view tailc.patch @ 516:4cd7d914b0e3

rebase to current hsx/hotspot-comp
author jrose
date Sun, 06 Oct 2013 23:32:13 -0700
parents ef9d5753e526
children
line wrap: on
line source
diff -r aa0c48844632 -r a7d54b98ca4a src/cpu/sparc/vm/sparc.ad
--- a/src/cpu/sparc/vm/sparc.ad	Thu May 14 10:57:58 2009 -0700
+++ b/src/cpu/sparc/vm/sparc.ad	Wed Jun 03 16:27:17 2009 +0200
@@ -1552,7 +1552,7 @@ void emit_java_to_interp(CodeBuffer &cbu
   // jmp -1
 
   address mark = cbuf.inst_mark();  // get mark within main instrs section
-
+  tty->print_cr("java_to_interp "INTPTR_FORMAT, mark);
   MacroAssembler _masm(&cbuf);
 
   address base =
diff -r aa0c48844632 -r a7d54b98ca4a src/cpu/x86/vm/assembler_x86.cpp
--- a/src/cpu/x86/vm/assembler_x86.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/cpu/x86/vm/assembler_x86.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -1348,6 +1348,15 @@ void Assembler::jmp(Address adr) {
   emit_byte(0xFF);
   emit_operand(rsp, adr);
 }
+// Tail call.
+void Assembler::mov_label(Address dst,  Label& L, relocInfo::relocType rtype) {
+  InstructionMark im(this);
+  L.add_patch_at(code(), locator());
+  assert(is8bit(dst.disp()), "Assume 8bit immediate in pd_patch_instruction"); 
+  emit_byte(0xC7);
+  emit_operand(rax, dst, 4);
+  emit_data(0, rtype, 0);
+}
 
 void Assembler::jmp(Label& L, relocInfo::relocType rtype) {
   if (L.is_bound()) {
@@ -3335,6 +3344,15 @@ void Assembler::shrdl(Register dst, Regi
   emit_byte(0xC0 | src->encoding() << 3 | dst->encoding());
 }
 
+void MacroAssembler::parent_is_not_interpreter_jcc(Register base_pointer, Register temp, Label& is_not_interpreter_continutation) {
+  movl(temp, Address(base_pointer, frame::return_addr_offset * wordSize));
+  cmp32 (temp, ExternalAddress(Interpreter::interpreter_code_begin_address()));
+  jcc(Assembler::less, is_not_interpreter_continutation);
+  cmp32 (temp, ExternalAddress(Interpreter::interpreter_code_end_address()));
+  jcc(Assembler::greater, is_not_interpreter_continutation);
+}
+
+
 #else // LP64
 
 // 64bit only pieces of the assembler
@@ -3795,6 +3813,15 @@ void Assembler::cvttss2siq(Register dst,
   emit_byte(0x0F);
   emit_byte(0x2C);
   emit_byte(0xC0 | encode);
+}
+
+void MacroAssembler::parent_is_not_interpreter_jcc(Register temp, Label& is_not_interpreter_continutation) {
+  assert(0, "update this code");
+  movl(temp, Address(rbp, frame::return_addr_offset * wordSize));
+  cmp32 (temp, ExternalAddress(Interpreter::interpreter_code_begin_address()));
+  jcc(Assembler::less, is_not_interpreter_continutation);
+  cmp32 (temp, ExternalAddress(Interpreter::interpreter_code_end_address()));
+  jcc(Assembler::greater, is_not_interpreter_continutation);
 }
 
 void Assembler::decl(Register dst) {
diff -r aa0c48844632 -r a7d54b98ca4a src/cpu/x86/vm/assembler_x86.hpp
--- a/src/cpu/x86/vm/assembler_x86.hpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/cpu/x86/vm/assembler_x86.hpp	Wed Jun 03 16:27:17 2009 +0200
@@ -606,7 +606,12 @@ private:
 
   void call_literal(address entry, RelocationHolder const& rspec);
   void jmp_literal(address entry, RelocationHolder const& rspec);
-
+public:
+  // Server compiler needs this method when emitting tail calls.
+  void tail_call_jmp_literal(address entry, RelocationHolder const& rspec) {
+    jmp_literal(entry, rspec);
+  }
+protected:  
   // Avoid using directly section
   // Instructions in this section are actually usable by anyone without danger
   // of failure but have performance issues that are addressed my enhanced
@@ -1119,7 +1124,8 @@ private:
   void movl(Register dst, Register src);
   void movl(Register dst, Address src);
   void movl(Address dst, Register src);
-
+  // Tail call support.
+  void mov_label(Address dst,  Label& L, relocInfo::relocType rtype = relocInfo::internal_word_type);
   // These dummies prevent using movl from converting a zero (like NULL) into Register
   // by giving the compiler two choices it can't resolve
 
@@ -2183,6 +2189,8 @@ public:
   // Can push value or effective address
   void pushptr(AddressLiteral src);
 
+  void parent_is_not_interpreter_jcc(Register base_pointer, Register temp, Label& is_not_interpreter_continutation);
+
   void pushptr(Address src) { LP64_ONLY(pushq(src)) NOT_LP64(pushl(src)); }
   void popptr(Address src) { LP64_ONLY(popq(src)) NOT_LP64(popl(src)); }
 
@@ -2191,7 +2199,6 @@ public:
   // sign extend as need a l to ptr sized element
   void movl2ptr(Register dst, Address src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(movl(dst, src)); }
   void movl2ptr(Register dst, Register src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(if (dst != src) movl(dst, src)); }
-
 
 #undef VIRTUAL
 
diff -r aa0c48844632 -r a7d54b98ca4a src/cpu/x86/vm/assembler_x86.inline.hpp
--- a/src/cpu/x86/vm/assembler_x86.inline.hpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/cpu/x86/vm/assembler_x86.inline.hpp	Wed Jun 03 16:27:17 2009 +0200
@@ -24,14 +24,19 @@
 
 inline void MacroAssembler::pd_patch_instruction(address branch, address target) {
   unsigned char op = branch[0];
-  assert(op == 0xE8 /* call */ ||
+  assert(op == 0xC7 /* movl reg $label*/||
+         op == 0xE8 /* call */ ||
          op == 0xE9 /* jmp */ ||
          op == 0xEB /* short jmp */ ||
          (op & 0xF0) == 0x70 /* short jcc */ ||
          op == 0x0F && (branch[1] & 0xF0) == 0x80 /* jcc */,
          "Invalid opcode at patch point");
 
-  if (op == 0xEB || (op & 0xF0) == 0x70) {
+  if (op == 0xC7) {
+    int * disp = (int*) &branch[4];
+    assert (*disp == 0, "offset wrong");
+    *disp = (int)target;
+  } else if (op == 0xEB || (op & 0xF0) == 0x70) {
     // short offset operators (jmp and jcc)
     char* disp = (char*) &branch[1];
     int imm8 = target - (address) &disp[1];
diff -r aa0c48844632 -r a7d54b98ca4a src/cpu/x86/vm/c1_CodeStubs_x86.cpp
--- a/src/cpu/x86/vm/c1_CodeStubs_x86.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/cpu/x86/vm/c1_CodeStubs_x86.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -441,7 +441,7 @@ void ArrayCopyStub::emit_code(LIR_Assemb
     }
   }
 
-  ce->align_call(lir_static_call);
+  ce->align_call(lir_static_call, false);
 
   ce->emit_static_call_stub();
   AddressLiteral resolve(SharedRuntime::get_resolve_static_call_stub(),
diff -r aa0c48844632 -r a7d54b98ca4a src/cpu/x86/vm/c1_FrameMap_x86.cpp
--- a/src/cpu/x86/vm/c1_FrameMap_x86.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/cpu/x86/vm/c1_FrameMap_x86.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -27,7 +27,7 @@
 
 const int FrameMap::pd_c_runtime_reserved_arg_size = 0;
 
-LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool) {
+LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool, bool is_tail_call) {
   LIR_Opr opr = LIR_OprFact::illegalOpr;
   VMReg r_1 = reg->first();
   VMReg r_2 = reg->second();
@@ -36,7 +36,13 @@ LIR_Opr FrameMap::map_to_opr(BasicType t
     // The calling convention does not count the SharedRuntime::out_preserve_stack_slots() value
     // so we must add it in here.
     int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
-    opr = LIR_OprFact::address(new LIR_Address(rsp_opr, st_off, type));
+    if (!is_tail_call)
+      opr = LIR_OprFact::address(new LIR_Address(rsp_opr, st_off, type));
+    else {
+      // Ignore RET address and RBP.
+      st_off += (2* VMRegImpl::stack_slot_size);
+      opr = LIR_OprFact::address(new LIR_Address(rbp_opr, st_off, type));
+    }
   } else if (r_1->is_Register()) {
     Register reg = r_1->as_Register();
     if (r_2->is_Register() && (type == T_LONG || type == T_DOUBLE)) {
diff -r aa0c48844632 -r a7d54b98ca4a src/cpu/x86/vm/c1_LIRAssembler_x86.cpp
--- a/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -510,6 +510,296 @@ void LIR_Assembler::emit_deopt_handler()
 
 }
 
+void LIR_Assembler::emit_static_not_sibling_tail_call_stub() {
+  // if the last instruction is a call (typically to do a throw which
+  // is coming at the end after block reordering) the return address
+  // must still point into the code area in order to avoid assertion
+  // failures when searching for the corresponding bci => add a nop
+  // (was bug 5/14/1999 - gri)
+  __ nop();
+  CallingConvention * incoming_args = frame_map()->incoming_arguments();
+  Label call_to_interpreter;
+  assert(incoming_args!=NULL, "ops");
+  int arg_slots = incoming_args->reserved_stack_slots();
+  int stub_size = 8*arg_slots + static_not_sibling_tail_call_stub_size;
+  // Generate code for static tail call (we know that klass and protection
+  // domain is correct).
+  address handler_base = __ start_a_stub(stub_size);
+  if (handler_base == NULL) {
+    // not enough space left for the handler
+    bailout("static tail call stub overflow");
+    return;
+  }
+#ifdef ASSERT
+  int offset = code_offset();
+#endif // ASSERT
+  __ align(CodeEntryAlignment);
+  compilation()->offsets()->set_value(CodeOffsets::Verified_Not_Sibling_Tail_Call_Entry, code_offset());
+  if (TraceTailCalls) __ warn("Compiled entry point: Verified_Not_Sibling_Tail_Call_Entry");
+  // Fast case: parent is interpreter. This means we can extend its stack frame.
+  // Assume: rax, rbx are scratch. rbx would hold methodOop, rax the IC_klass token
+  Register tmp = rax; // scratch
+  Register last_sp = rbx; // scratch
+  __ parent_is_not_interpreter_jcc(rbp, rbx, call_to_interpreter);
+  // Store old rbp
+  __ movl(tmp, Address(rbp, frame::link_offset*wordSize));
+  __ push_reg(tmp);
+  // Store ret address.
+  __ movl(tmp, Address(rbp, frame::return_addr_offset * wordSize));
+  __ push_reg(tmp);
+  
+  // when debugging the return addr pointer remove the 2 __ a_long() lines in
+  // templateInterpreter_x86_32.cpp to get a sensible assembler output
+  //__ stop("static_not_sibling_call, parent is interpreted rax contains ret_entry");
+  
+  // Get last_sp from parent frame.
+  __ movl(tmp, Address(rbp, frame::link_offset * wordSize)); // old rbp
+  __ movl(last_sp, Address(tmp, frame::interpreter_frame_last_sp_offset * wordSize));
+  // Shuffle arguments
+  for (int src_slot = arg_slots, dest_slot=-1; src_slot > 0; src_slot--, dest_slot--) {
+    // saved old_rbp, old_retaddr on top of stack => +2
+    Address src(rsp, VMRegImpl::stack_slot_size * (2+src_slot));
+    Address dest(last_sp, VMRegImpl::stack_slot_size * (dest_slot));
+    __ movl(tmp, src);
+    __ movl(dest, tmp);
+  }
+  // Set return address.
+  __ subl(last_sp, (1+arg_slots)*wordSize);
+  __ pop_reg(tmp);
+  __ movl(Address(last_sp, 0), tmp);
+  // Set new rbp
+  __ pop_reg(rbp);
+  // Set new rsp. Need to do this after the pop!
+  __ movl(rsp, last_sp);
+  // jump to VEP
+   address vep_entry = compilation()->code()->insts()->start() +  
+    compilation()->offsets()->value(CodeOffsets::Verified_Entry);
+  RelocationHolder rh = section_call_Relocation::spec(vep_entry, CodeBuffer::SECT_INSTS);
+  // Jump to vep.
+  __ jump(AddressLiteral((address)vep_entry, rh));
+  // Slow case: parent is not interpreted. Jump to interpreter entry of called
+  // function in order to lazily create an interpreted frame on the stack.
+  __ bind(call_to_interpreter);
+  if (TraceTailCalls) __ warn("c1 LIR: call_to_interpreter");
+  compilation()->offsets()->set_value(CodeOffsets::Verified_Not_Sibling_Tail_Call_Set_Data_Entry, code_offset());
+  // Set methodoop.
+  __ movoop(rbx, (jobject)Universe::non_oop_word());
+  // Jump to C2I Entry Point
+  __ jump(RuntimeAddress((address)-1));
+  // TODO: adapt static_tail_call_stub_size
+  assert(code_offset() - offset <= stub_size, "overflow");
+  __ end_a_stub();
+}
+
+void LIR_Assembler::emit_static_tail_call_stub() {
+  // if the last instruction is a call (typically to do a throw which
+  // is coming at the end after block reordering) the return address
+  // must still point into the code area in order to avoid assertion
+  // failures when searching for the corresponding bci => add a nop
+  // (was bug 5/14/1999 - gri)
+
+  __ nop();
+  CallingConvention * incoming_args = frame_map()->incoming_arguments();
+  assert(incoming_args!=NULL, "ops");
+  int arg_slots = incoming_args->reserved_stack_slots();
+  int stub_size = 8*arg_slots + static_tail_call_stub_size;
+  // Generate code for static tail call (we know that klass and protection
+  // domain is correct).
+  address handler_base = __ start_a_stub(stub_size);
+  if (handler_base == NULL) {
+    // not enough space left for the handler
+    bailout("static tail call stub overflow");
+    return;
+  }
+#ifdef ASSERT
+  int offset = code_offset();
+#endif // ASSERT
+  __ align(CodeEntryAlignment);
+  compilation()->offsets()->set_value(CodeOffsets::Verified_Tail_Call_Entry, code_offset());
+  if (TraceTailCalls) __ warn("Compiled entry point: Verified_Tail_Call_Entry");
+  __ generate_stack_overflow_check(initial_frame_size_in_bytes());
+ 
+  // Move arguments.
+  emit_tail_call_argument_move(arg_slots);
+  // Remove tail calling caller's stack frame.
+  Address new_stack_pointer(rbp, -1*initial_frame_size_in_bytes());
+  __ leal(rsp, new_stack_pointer);
+  // Compute target of jump. Verified entry point of current method.
+  address vep_entry = compilation()->code()->insts()->start() + 
+    compilation()->offsets()->value(CodeOffsets::Frame_Complete);
+    //compilation()->offsets()->value(CodeOffsets::Verified_Entry);
+  RelocationHolder rh = section_call_Relocation::spec(vep_entry, CodeBuffer::SECT_INSTS);
+  // Jump to vep.
+  __ jump(AddressLiteral((address)vep_entry, rh));
+  
+  // TODO: adapt static_tail_call_stub_size
+  assert(code_offset() - offset <= stub_size, "overflow");
+  __ end_a_stub();
+}
+
+void LIR_Assembler::emit_tail_call_argument_move(int arg_slots) {
+  // Copy the args to tail call position using register rbx.
+  int sizeargs = 0;
+  BasicTypeArray* sig = FrameMap::signature_type_array_for(method());
+  for (int i = 0; i < sig->length(); i++) {
+    sizeargs += type2size[sig->at(i)];
+  }
+  VMRegPair* regs = NEW_RESOURCE_ARRAY(VMRegPair, sizeargs);
+  BasicType * sig_bt = NEW_RESOURCE_ARRAY(BasicType, sizeargs);
+  int sig_index = 0;
+  for (int i = 0; i < sizeargs; i++, sig_index++) {
+    sig_bt[i] = sig->at(sig_index);
+    if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
+      sig_bt[i + 1] = T_VOID;
+      i++;
+    }
+  }
+  // Get exact (hence the 3) arg size on stack.
+  Register tmp = rbx;
+  Register tmp2 = rax;
+  arg_slots = SharedRuntime::java_calling_convention(sig_bt, regs, sizeargs, 3);
+  for (int slot = 1+SharedRuntime::tail_call_protection_domain_slots();
+      slot <= arg_slots; slot++) {
+    Address src (rsp, VMRegImpl::stack_slot_size * (SharedRuntime::out_preserve_stack_slots()+slot));
+    // Need to add safed eip slot so slot+1 VVV
+    Address dest(rbp, VMRegImpl::stack_slot_size * (SharedRuntime::out_preserve_stack_slots()+slot+1));
+    __ movl(tmp, src);
+    __ movl(dest, tmp);
+  }
+}
+
+void LIR_Assembler::emit_not_sibling_monomorphic_tail_call_stub() {
+  // if the last instruction is a call (typically to do a throw which
+  // is coming at the end after block reordering) the return address
+  // must still point into the code area in order to avoid assertion
+  // failures when searching for the corresponding bci => add a nop
+  // (was bug 5/14/1999 - gri)
+  __ nop();
+  CallingConvention * incoming_args = frame_map()->incoming_arguments();
+  Label call_to_interpreter;
+  assert(incoming_args!=NULL, "ops");
+  int arg_slots = incoming_args->reserved_stack_slots();
+  int stub_size = 8*arg_slots + monomorphic_not_sibling_tail_call_stub_size;
+  // Generate code for static tail call (we know that klass and protection
+  // domain is correct).
+  address handler_base = __ start_a_stub(stub_size);
+  if (handler_base == NULL) {
+    // not enough space left for the handler
+    bailout("static tail call stub overflow");
+    return;
+  }
+#ifdef ASSERT
+  int offset = code_offset();
+#endif // ASSERT
+  __ align(CodeEntryAlignment);
+  compilation()->offsets()->set_value(CodeOffsets::Not_Sibling_Tail_Call_Entry, code_offset());
+  if (TraceTailCalls) __ warn("Compiled entry point: Not_Sibling_Tail_Call_Entry");
+  check_icache();
+  // Fast case: parent is interpreter. This means we can extend its stack frame.
+  // Assume: rax, rbx are scratch here since rax is needed only for
+  // check_icache. rbx would hold methodOop, rax the IC_klass token
+  Register tmp = rax; // scratch
+  Register last_sp = rbx; // scratch
+  __ parent_is_not_interpreter_jcc(rbp, tmp, call_to_interpreter);
+  // Store old rbp
+  __ movl(tmp, Address(rbp, frame::link_offset*wordSize));
+  __ push_reg(tmp);
+  // Store ret address.
+  __ movl(tmp, Address(rbp, frame::return_addr_offset * wordSize));
+  __ push_reg(tmp);
+  
+  // when debugging the return addr pointer remove the 2 __ a_long() lines in
+  // templateInterpreter_x86_32.cpp to get a sensible assembler output
+  //__ stop("static_not_sibling_call, parent is interpreted rax contains ret_entry");
+  
+  // Get last_sp from parent frame.
+  __ movl(tmp, Address(rbp, frame::link_offset * wordSize)); // old rbp
+  __ movl(last_sp, Address(tmp, frame::interpreter_frame_last_sp_offset * wordSize));
+  // Shuffle arguments
+  for (int src_slot = arg_slots, dest_slot=-1; src_slot > 0; src_slot--, dest_slot--) {
+    // saved old_rbp, old_retaddr on top of stack => +2
+    Address src(rsp, VMRegImpl::stack_slot_size * (2+src_slot));
+    Address dest(last_sp, VMRegImpl::stack_slot_size * (dest_slot));
+    __ movl(tmp, src);
+    __ movl(dest, tmp);
+  }
+  // Set return address.
+  __ subl(last_sp, (1+arg_slots)*wordSize);
+  __ pop_reg(tmp);
+  __ movl(Address(last_sp, 0), tmp);
+  // Set new rbp
+  __ pop_reg(rbp);
+  // Set new rsp. Need to do this after the pop!
+  __ movl(rsp, last_sp);
+  // jump to VEP
+   address vep_entry = compilation()->code()->insts()->start() +  
+    compilation()->offsets()->value(CodeOffsets::Verified_Entry);
+  RelocationHolder rh = section_call_Relocation::spec(vep_entry, CodeBuffer::SECT_INSTS);
+  // Jump to vep.
+  __ jump(AddressLiteral((address)vep_entry, rh));
+  // Slow case: parent is not interpreted. Jump to interpreter entry of called
+  // function in order to lazily create an interpreted frame on the stack.
+  __ bind(call_to_interpreter);
+  compilation()->offsets()->set_value(CodeOffsets::Not_Sibling_Tail_Call_Set_Data_Entry, code_offset());
+  // Set methodoop.
+  __ movoop(rbx, (jobject)Universe::non_oop_word());
+  // Jump to C2I Entry Point
+  __ jump(RuntimeAddress((address)-1));
+  // TODO: adapt static_tail_call_stub_size
+  assert(code_offset() - offset <= stub_size, "overflow");
+  __ end_a_stub();
+}
+
+void LIR_Assembler::emit_monomorphic_tail_call_stub() {
+  // if the last instruction is a call (typically to do a throw which
+  // is coming at the end after block reordering) the return address
+  // must still point into the code area in order to avoid assertion
+  // failures when searching for the corresponding bci => add a nop
+  // (was bug 5/14/1999 - gri)
+
+  __ nop();
+  CallingConvention * incoming_args = frame_map()->incoming_arguments();
+  assert(incoming_args!=NULL, "ops");
+  int arg_slots = incoming_args->reserved_stack_slots();
+  int stub_size = 8*arg_slots + monomorphic_tail_call_stub_size;
+  // Generate code for monomorphic tail call (we know that protection
+  // domain is correct).
+  address handler_base = __ start_a_stub(stub_size);
+  if (handler_base == NULL) {
+    // not enough space left for the handler
+    bailout("static tail call stub overflow");
+    return;
+  }
+#ifdef ASSERT
+  int offset = code_offset();
+#endif // ASSERT
+  __ align(CodeEntryAlignment);
+  compilation()->offsets()->set_value(CodeOffsets::Tail_Call_Entry, code_offset());
+  if (TraceTailCalls) __ warn("Compiled entry point: Tail_Call_Entry");
+  // Check inline cache - needs to be done before poping the frame.
+  // If the check is done after popping the frame and the icache check fails,
+  // the frame would be popped again by handle_ic_miss code path.
+  check_icache();
+  __ generate_stack_overflow_check(initial_frame_size_in_bytes());
+  // Move arguments.
+  emit_tail_call_argument_move(arg_slots);
+
+  // Remove tail calling caller's stack frame.
+  Address new_stack_pointer(rbp, -1*initial_frame_size_in_bytes());
+  __ leal(rsp, new_stack_pointer);
+
+  // Compute target of jump. Verified entry point of current method.
+  address vep_entry = compilation()->code()->insts()->start() + 
+    //compilation()->offsets()->value(CodeOffsets::Verified_Entry);
+    compilation()->offsets()->value(CodeOffsets::Frame_Complete);
+  RelocationHolder rh = section_call_Relocation::spec(vep_entry, CodeBuffer::SECT_INSTS);
+  // Jump to vep.
+  __ jump(AddressLiteral((address)vep_entry, rh));
+  
+  // TODO: adapt size
+  assert(code_offset() - offset <= stub_size, "overflow");
+  __ end_a_stub();
+}
 
 // This is the fast version of java.lang.String.compare; it has not
 // OSR-entry and therefore, we generate a slow version for OSR's
@@ -2726,7 +3016,7 @@ void LIR_Assembler::comp_fl2i(LIR_Code c
 }
 
 
-void LIR_Assembler::align_call(LIR_Code code) {
+void LIR_Assembler::align_call(LIR_Code code, bool is_tail_call) {
   if (os::is_MP()) {
     // make sure that the displacement word of the call ends up word aligned
     int offset = __ offset();
@@ -2734,9 +3024,14 @@ void LIR_Assembler::align_call(LIR_Code 
       case lir_static_call:
       case lir_optvirtual_call:
         offset += NativeCall::displacement_offset;
-        break;
+        if (is_tail_call) offset += NativeJump::tail_call_push_ret_offset;
+      break;
       case lir_icvirtual_call:
-        offset += NativeCall::displacement_offset + NativeMovConstReg::instruction_size;
+        if (is_tail_call) {
+          offset += NativeCall::displacement_offset + NativeMovConstReg::instruction_size+NativeMovConstProtectionDomain::instruction_size;
+          offset += NativeJump::tail_call_push_ret_offset;
+        } else
+          offset += NativeCall::displacement_offset + NativeMovConstReg::instruction_size;
       break;
       case lir_virtual_call:  // currently, sparc-specific for niagara
       default: ShouldNotReachHere();
@@ -2747,22 +3042,93 @@ void LIR_Assembler::align_call(LIR_Code 
   }
 }
 
+void LIR_Assembler::set_protection_domain_token() {
+  // needs 7 bytes on x86-32
+  __ movoop(Address(rsp, 0), (jobject)Universe::non_oop_word()); 
+}
+
+#ifdef ASSERT
+void check_call_alignment(C1_MacroAssembler * _masm) {
+  assert(!os::is_MP() || ((__ offset() + NativeCall::displacement_offset) % BytesPerWord == 0),
+      "must be aligned");
+}
+# else
+ void check_call_alignment(C1_MacroAssembler * _masm) {}
+#endif
+
+
+void LIR_Assembler::tail_call(address entry, RelocationHolder& rh) {
+  Label return_address;
+  __ mov_label(Address(rsp,-4), return_address);
+  __ subl (rsp, wordSize);
+  check_call_alignment(_masm);
+  __ jmp_literal(entry, rh);
+  __ bind (return_address);
+}
 
 void LIR_Assembler::call(address entry, relocInfo::relocType rtype, CodeEmitInfo* info) {
-  assert(!os::is_MP() || (__ offset() + NativeCall::displacement_offset) % BytesPerWord == 0,
-         "must be aligned");
-  __ call(AddressLiteral(entry, rtype));
+  if (entry == SharedRuntime::get_resolve_not_sibling_static_tail_call_stub()) {
+    assert(rtype == relocInfo::static_call_type, "expect static call");
+  
+    RelocationHolder rh = static_call_Relocation::spec(relocInfo::not_sibling_tail_call_type);
+    tail_call(entry,rh);
+  } else if (entry == SharedRuntime::get_resolve_static_tail_call_stub() ) {
+    assert(rtype==relocInfo::static_call_type, "expect static call");
+
+    RelocationHolder rh = static_call_Relocation::spec(relocInfo::sibling_tail_call_type);
+    tail_call(entry, rh);
+  } else if (entry == SharedRuntime::get_resolve_opt_virtual_tail_call_stub()) {
+    assert(rtype==relocInfo::opt_virtual_call_type, "expect opt virtual call");
+
+    RelocationHolder rh = opt_virtual_call_Relocation::spec(relocInfo::sibling_tail_call_type);
+    tail_call(entry, rh);
+  } else if (entry == SharedRuntime::get_resolve_opt_not_sibling_virtual_tail_call_stub()) {
+    assert(rtype==relocInfo::opt_virtual_call_type, "expect opt virtual call");
+
+    RelocationHolder rh = opt_virtual_call_Relocation::spec(relocInfo::not_sibling_tail_call_type);
+    tail_call(entry, rh);
+  } else {
+    check_call_alignment(_masm);
+    __ call(AddressLiteral(entry, rtype));
+  }
   add_call_info(code_offset(), info);
 }
 
+static relocInfo::tailCallType tail_call_type_from_resolve_stub(address entry) {
+   
+   assert((entry == SharedRuntime::get_resolve_virtual_tail_call_stub()) ||
+          (entry == SharedRuntime::get_resolve_virtual_call_stub()) ||
+          (entry == SharedRuntime::get_resolve_not_sibling_virtual_tail_call_stub()),
+          "must be virtual resolve stub");
+   
+   relocInfo::tailCallType type = relocInfo::not_tail_call;
+   if (entry == SharedRuntime::get_resolve_virtual_tail_call_stub()) {
+     type = relocInfo::sibling_tail_call_type;
+   
+   } else if (entry == SharedRuntime::get_resolve_not_sibling_virtual_tail_call_stub()) {
+     type = relocInfo::not_sibling_tail_call_type;
+   }
 
-void LIR_Assembler::ic_call(address entry, CodeEmitInfo* info) {
-  RelocationHolder rh = virtual_call_Relocation::spec(pc());
+   return type;
+}
+
+void LIR_Assembler::ic_call(address entry, CodeEmitInfo* info, bool is_tail_call) {
+  
+  if (is_tail_call) {
+    set_protection_domain_token();
+  }
+
+  relocInfo::tailCallType type = tail_call_type_from_resolve_stub(entry);
+  RelocationHolder rh = virtual_call_Relocation::spec(pc(), NULL, type);
   __ movoop(IC_Klass, (jobject)Universe::non_oop_word());
-  assert(!os::is_MP() ||
+  if (is_tail_call)
+    tail_call(entry, rh);
+  else {
+    assert(!os::is_MP() ||
          (__ offset() + NativeCall::displacement_offset) % BytesPerWord == 0,
          "must be aligned");
-  __ call(AddressLiteral(entry, rh));
+    __ call(AddressLiteral(entry, rh));
+  }
   add_call_info(code_offset(), info);
 }
 
diff -r aa0c48844632 -r a7d54b98ca4a src/cpu/x86/vm/c1_LIRAssembler_x86.hpp
--- a/src/cpu/x86/vm/c1_LIRAssembler_x86.hpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/cpu/x86/vm/c1_LIRAssembler_x86.hpp	Wed Jun 03 16:27:17 2009 +0200
@@ -36,6 +36,9 @@
   address float_constant(float f);
   address double_constant(double d);
 
+
+  void emit_tail_call_argument_move(int arg_slots);
+
   bool is_literal_address(LIR_Address* addr);
 
   // When we need to use something other than rscratch1 use this
@@ -51,5 +54,9 @@ public:
 
   enum { call_stub_size = NOT_LP64(15) LP64_ONLY(28),
          exception_handler_size = DEBUG_ONLY(1*K) NOT_DEBUG(175),
+         static_tail_call_stub_size = NOT_LP64(64) LP64_ONLY(xxx), //TODO: proper size ;)
+         static_not_sibling_tail_call_stub_size = NOT_LP64(300) LP64_ONLY(xxx),
+         monomorphic_tail_call_stub_size = NOT_LP64(128) LP64_ONLY(xxx),
+         monomorphic_not_sibling_tail_call_stub_size = NOT_LP64(300) LP64_ONLY(xxx),
          deopt_handler_size = NOT_LP64(10) LP64_ONLY(17)
        };
diff -r aa0c48844632 -r a7d54b98ca4a src/cpu/x86/vm/frame_x86.hpp
--- a/src/cpu/x86/vm/frame_x86.hpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/cpu/x86/vm/frame_x86.hpp	Wed Jun 03 16:27:17 2009 +0200
@@ -109,7 +109,9 @@
     interpreter_frame_cache_offset                   = interpreter_frame_mdx_offset - 1,
     interpreter_frame_locals_offset                  = interpreter_frame_cache_offset - 1,
     interpreter_frame_bcx_offset                     = interpreter_frame_locals_offset - 1,
-    interpreter_frame_initial_sp_offset              = interpreter_frame_bcx_offset - 1,
+    interpreter_frame_osr_offset                     = interpreter_frame_bcx_offset - 1,
+    //interpreter_frame_initial_sp_offset              = interpreter_frame_bcx_offset - 1,
+    interpreter_frame_initial_sp_offset              = interpreter_frame_osr_offset - 1,
 
     interpreter_frame_monitor_block_top_offset       = interpreter_frame_initial_sp_offset,
     interpreter_frame_monitor_block_bottom_offset    = interpreter_frame_initial_sp_offset,
diff -r aa0c48844632 -r a7d54b98ca4a src/cpu/x86/vm/frame_x86.inline.hpp
--- a/src/cpu/x86/vm/frame_x86.inline.hpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/cpu/x86/vm/frame_x86.inline.hpp	Wed Jun 03 16:27:17 2009 +0200
@@ -191,6 +191,9 @@ inline intptr_t* frame::interpreter_fram
 #else /* asm interpreter */
 inline intptr_t*    frame::sender_sp()        const { return            addr_at(   sender_sp_offset); }
 
+inline int32_t* frame::interpreter_frame_osr_addr() const {
+  return (int32_t*)addr_at(interpreter_frame_osr_offset);
+}
 inline intptr_t** frame::interpreter_frame_locals_addr() const {
   return (intptr_t**)addr_at(interpreter_frame_locals_offset);
 }
diff -r aa0c48844632 -r a7d54b98ca4a src/cpu/x86/vm/interp_masm_x86_32.cpp
--- a/src/cpu/x86/vm/interp_masm_x86_32.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/cpu/x86/vm/interp_masm_x86_32.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -568,18 +568,22 @@ void InterpreterMacroAssembler::super_ca
 }
 
 
-void InterpreterMacroAssembler::prepare_to_jump_from_interpreted() {
+void InterpreterMacroAssembler::prepare_to_jump_from_interpreted(bool is_tail_call) {
   // set sender sp
-  lea(rsi, Address(rsp, wordSize));
-  // record last_sp
-  movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), rsi);
+  // Tail call codes sets its own sender sp. So only set sp if current call is
+  // not a tail call.
+  if (is_tail_call==false) {
+    leal(rsi, Address(rsp, wordSize));
+    // record last_sp
+    movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), rsi);
+  }
 }
 
 
 // Jump to from_interpreted entry of a call unless single stepping is possible
 // in this thread in which case we must call the i2i entry
-void InterpreterMacroAssembler::jump_from_interpreted(Register method, Register temp) {
-  prepare_to_jump_from_interpreted();
+void InterpreterMacroAssembler::jump_from_interpreted(Register method, Register temp, bool is_tail_call) {
+  prepare_to_jump_from_interpreted(is_tail_call);
 
   if (JvmtiExport::can_post_interpreter_events()) {
     Label run_compiled_code;
diff -r aa0c48844632 -r a7d54b98ca4a src/cpu/x86/vm/interp_masm_x86_32.hpp
--- a/src/cpu/x86/vm/interp_masm_x86_32.hpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/cpu/x86/vm/interp_masm_x86_32.hpp	Wed Jun 03 16:27:17 2009 +0200
@@ -162,8 +162,8 @@ class InterpreterMacroAssembler: public 
 
 
   // jump to an invoked target
-  void prepare_to_jump_from_interpreted();
-  void jump_from_interpreted(Register method, Register temp);
+  void prepare_to_jump_from_interpreted(bool is_tail_call=false);
+  void jump_from_interpreted(Register method, Register temp, bool is_tail_call=false);
 
   // Returning from interpreted functions
   //
diff -r aa0c48844632 -r a7d54b98ca4a src/cpu/x86/vm/nativeInst_x86.cpp
--- a/src/cpu/x86/vm/nativeInst_x86.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/cpu/x86/vm/nativeInst_x86.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -114,7 +114,18 @@ void NativeCall::replace_mt_safe(address
 
 }
 
-
+void NativeCall::replace_with_jmp_mt_safe(address instr_addr, address jmp_dest) {
+  unsigned char code_buf[5];
+  int32_t rel_jmp_dest = jmp_dest - (instr_addr+NativeCall::return_address_offset);
+  unsigned char * jmp_dest_ptr = (unsigned char *)&rel_jmp_dest;
+  code_buf[0] = NativeJump::instruction_code;
+  code_buf[1] = jmp_dest_ptr[0];
+  code_buf[2] = jmp_dest_ptr[1];
+  code_buf[3] = jmp_dest_ptr[2];
+  code_buf[4] = jmp_dest_ptr[3];
+  replace_mt_safe(instr_addr, code_buf);
+  
+}
 // Similar to replace_mt_safe, but just changes the destination.  The
 // important thing is that free-running threads are able to execute this
 // call instruction at all times.  If the displacement field is aligned
@@ -129,7 +140,8 @@ void NativeCall::replace_mt_safe(address
 // Used in the runtime linkage of calls; see class CompiledIC.
 // (Cf. 4506997 and 4479829, where threads witnessed garbage displacements.)
 void NativeCall::set_destination_mt_safe(address dest) {
-  debug_only(verify());
+  //debug_only(verify()); TODO: check only if really call and not a jmp.
+
   // Make sure patching code is locked.  No two threads can patch at the same
   // time but one may be executing this code.
   assert(Patching_lock->is_locked() ||
@@ -199,6 +211,21 @@ void NativeCall::set_destination_mt_safe
 }
 
 
+void NativeMovConstProtectionDomain::verify() {
+#ifdef AMD64
+  assert(false, "not implemented");
+#else
+   u_char test_byte = *(u_char*)instruction_address();
+   if (test_byte != instruction_code) fatal("not a mov (%reg, #) imm32");
+#endif
+}
+
+void NativeMovConstProtectionDomain::print() {
+  tty->print_cr(PTR_FORMAT ": mov (%reg,#), " INTPTR_FORMAT,
+                instruction_address(), data());
+}
+
+
 void NativeMovConstReg::verify() {
 #ifdef AMD64
   // make sure code pattern is actually a mov reg64, imm64 instruction
@@ -211,7 +238,17 @@ void NativeMovConstReg::verify() {
   // make sure code pattern is actually a mov reg, imm32 instruction
   u_char test_byte = *(u_char*)instruction_address();
   u_char test_byte_2 = test_byte & ( 0xff ^ register_mask);
-  if (test_byte_2 != instruction_code) fatal("not a mov reg, imm32");
+  if (test_byte_2 != instruction_code) {
+    HandleMark hm;
+    CodeBlob* cb = CodeCache::find_blob_unsafe(instruction_address());
+    assert(cb != NULL && cb->is_nmethod(), "must be nmethod");
+    nmethod * nm = (nmethod*) cb;
+    nm->print();
+    nm->print_code();
+    nm->print_pcs();
+    nm->print_relocations();
+    fatal("not a mov reg, imm32");
+  }
 #endif // AMD64
 }
 
@@ -565,8 +602,6 @@ void NativeGeneralJump::replace_mt_safe(
 
 }
 
-
-
 address NativeGeneralJump::jump_destination() const {
   int op_code = ubyte_at(0);
   bool is_rel32off = (op_code == 0xE9 || op_code == 0x0F);
diff -r aa0c48844632 -r a7d54b98ca4a src/cpu/x86/vm/nativeInst_x86.hpp
--- a/src/cpu/x86/vm/nativeInst_x86.hpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/cpu/x86/vm/nativeInst_x86.hpp	Wed Jun 03 16:27:17 2009 +0200
@@ -154,6 +154,8 @@ class NativeCall: public NativeInstructi
   static void insert(address code_pos, address entry);
 
   static void replace_mt_safe(address instr_addr, address code_buffer);
+  // MT-safe patch to jmp (rel32) instruction.
+  static void replace_with_jmp_mt_safe(address instr_addr, address jmp_dest);
 };
 
 inline NativeCall* nativeCall_at(address address) {
@@ -170,6 +172,48 @@ inline NativeCall* nativeCall_before(add
   call->verify();
 #endif
   return call;
+}
+
+// An interface for accessing/manipultation of mov (%esp,offset) imm32
+// protection domain token instructions.
+class NativeMovConstProtectionDomain : public NativeInstruction {
+  public:
+  enum Intel_specific_constants {
+    instruction_code            = 0xC7,
+    instruction_size            =    1 + 2 + wordSize,
+    instruction_offset          =    0,
+    data_offset                 =    1 + 2,
+    next_instruction_offset     =    instruction_size
+  };
+
+  address instruction_address() const       { return addr_at(instruction_offset); }
+  address next_instruction_address() const  { return addr_at(next_instruction_offset); }
+  oop * oop_address()  const                { return (oop*)addr_at(data_offset); }
+  intptr_t data() const                     { return ptr_at(data_offset); }
+  void  set_data(intptr_t x)                { set_ptr_at(data_offset, x); }
+
+  void  verify();
+  void  print();
+
+  // Creation
+  inline friend NativeMovConstProtectionDomain* nativeMovConstPD_at(address address);
+  inline friend NativeMovConstProtectionDomain* nativeMovConstPD_before(address address);
+};
+
+inline NativeMovConstProtectionDomain* nativeMovConstPD_at(address address) {
+  NativeMovConstProtectionDomain* test = (NativeMovConstProtectionDomain*)(address - NativeMovConstProtectionDomain::instruction_offset);
+#ifdef ASSERT
+  test->verify();
+#endif
+  return test;
+}
+
+inline NativeMovConstProtectionDomain* nativeMovConstPD_before(address address) {
+  NativeMovConstProtectionDomain* test = (NativeMovConstProtectionDomain*)(address - NativeMovConstProtectionDomain::instruction_size - NativeMovConstProtectionDomain::instruction_offset);
+#ifdef ASSERT
+  test->verify();
+#endif
+  return test;
 }
 
 // An interface for accessing/manipulating native mov reg, imm32 instructions.
@@ -372,7 +416,10 @@ class NativeJump: public NativeInstructi
     instruction_size            =    5,
     instruction_offset          =    0,
     data_offset                 =    1,
-    next_instruction_offset     =    5
+    next_instruction_offset     =    5,
+    // A tail call replace a call instruction by
+    // movl %esp $ret_addr ; subl esp 4 ; jmp target
+    tail_call_push_ret_offset   =    11
   };
 
   address instruction_address() const       { return addr_at(instruction_offset); }
@@ -413,8 +460,23 @@ class NativeJump: public NativeInstructi
   // MT-safe insertion of native jump at verified method entry
   static void check_verified_entry_alignment(address entry, address verified_entry);
   static void patch_verified_entry(address entry, address verified_entry, address dest);
+
+  static bool is_jump_at(address instr) {
+    return ((*instr) & 0xFF) == NativeJump::instruction_code;
+  }
+
+  static bool is_jump_before(address return_address) {
+    return is_jump_at(return_address - NativeCall::return_address_offset);
+  }
 };
 
+inline NativeJump* nativeJump_before(address return_address) {
+  NativeJump* call = (NativeJump*)(return_address - NativeCall::return_address_offset);
+#ifdef ASSERT
+  call->verify();
+#endif
+  return call;
+}
 inline NativeJump* nativeJump_at(address address) {
   NativeJump* jump = (NativeJump*)(address - NativeJump::instruction_offset);
 #ifdef ASSERT
diff -r aa0c48844632 -r a7d54b98ca4a src/cpu/x86/vm/sharedRuntime_x86_32.cpp
--- a/src/cpu/x86/vm/sharedRuntime_x86_32.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/cpu/x86/vm/sharedRuntime_x86_32.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -38,6 +38,12 @@ RuntimeStub*       SharedRuntime::_resol
 RuntimeStub*       SharedRuntime::_resolve_opt_virtual_call_blob;
 RuntimeStub*       SharedRuntime::_resolve_virtual_call_blob;
 RuntimeStub*       SharedRuntime::_resolve_static_call_blob;
+RuntimeStub*       SharedRuntime::_resolve_static_tail_call_blob;
+RuntimeStub*       SharedRuntime::_resolve_not_sibling_static_tail_call_blob;
+RuntimeStub*       SharedRuntime::_resolve_virtual_tail_call_blob;
+RuntimeStub*       SharedRuntime::_resolve_not_sibling_virtual_tail_call_blob;
+RuntimeStub*       SharedRuntime::_resolve_opt_virtual_tail_call_blob;
+RuntimeStub*       SharedRuntime::_resolve_opt_not_sibling_virtual_tail_call_blob;
 
 const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
 
@@ -318,6 +324,13 @@ static int reg2offset_out(VMReg r) {
   return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
 }
 
+
+// Tail call support: slots used on top of stack for the protection domain
+// token. Currently set to 18 to find bugs. Normally we would need only one.
+int SharedRuntime::tail_call_protection_domain_slots() {
+    if (TailCalls) return 2;
+    else return 0;
+}
 // ---------------------------------------------------------------------------
 // Read the array of BasicTypes from a signature, and compute where the
 // arguments should go.  Values in the VMRegPair regs array refer to 4-byte
@@ -349,12 +362,14 @@ int SharedRuntime::java_calling_conventi
                                            VMRegPair *regs,
                                            int total_args_passed,
                                            int is_outgoing) {
-  uint    stack = 0;          // Starting stack position for args on stack
+  uint    stack = tail_call_protection_domain_slots();          // Starting stack position for args on stack
 
 
   // Pass first two oop/int args in registers ECX and EDX.
   uint reg_arg0 = 9999;
   uint reg_arg1 = 9999;
+  // +TailCall edx is used for security token
+  // uint reg_arg1 = 0; 
 
   // Pass first two float/double args in registers XMM0 and XMM1.
   // Doubles have precedence, so if you pass a mix of floats and doubles
@@ -383,7 +398,7 @@ int SharedRuntime::java_calling_conventi
       stack += 2;
     }
   }
-  int dstack = 0;             // Separate counter for placing doubles
+  int dstack = tail_call_protection_domain_slots();             // Separate counter for placing doubles
 
   // Now pick where all else goes.
   for( i = 0; i < total_args_passed; i++) {
@@ -441,7 +456,8 @@ int SharedRuntime::java_calling_conventi
       break;
     }
   }
-
+  // return exact stack size. Used for tail calls moving their arguments.
+  if (is_outgoing==3) return stack;
   // return value can be odd number of VMRegImpl stack slots make multiple of 2
   return round_to(stack, 2);
 }
@@ -538,7 +554,8 @@ static void gen_c2i_adapter(MacroAssembl
                             int comp_args_on_stack,
                             const BasicType *sig_bt,
                             const VMRegPair *regs,
-                            Label& skip_fixup) {
+                            Label& skip_fixup,
+                            bool is_tail_call) {
   // Before we get into the guts of the C2I adapter, see if we should be here
   // at all.  We've come from compiled code and are attempting to jump to the
   // interpreter, which means the caller made a static call to get here
@@ -566,7 +583,8 @@ static void gen_c2i_adapter(MacroAssembl
   __ pop(rax);
 
   // set senderSP value
-  __ movptr(rsi, rsp);
+  if (is_tail_call==false)
+    __ movptr(rsi, rsp);
 
   __ subptr(rsp, extraspace);
 
@@ -913,6 +931,181 @@ AdapterHandlerEntry* SharedRuntime::gene
   gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
 
   // -------------------------------------------------------------------------
+  // Generate tail call C2I adapters.
+  
+  assert (SharedRuntime::out_preserve_stack_slots() == 0, "This code assumes that there are not preserved stack slots");
+  Label skip_fixup_tail_call;
+  Label parent_is_not_interpreted;
+  Label verified_tail_call;
+  Label verified_tail_call_not_sibling;
+  Label continue_in_interpreter;
+  Label skip_fixup_tailcall;
+  Label skip_fixup;
+
+#ifdef COMPILER2
+  // opto puts the 'base pointer' into rsi when tail calling.
+  Register base_pointer = rsi;
+#else
+  Register base_pointer = rbp;
+#endif
+
+  address c2i_unverified_not_sibling_tail_call_entry = __ pc();
+  {
+
+    Label missed;
+    Register holder = rax;
+    Register receiver = rcx;
+    Register temp = rbx;
+
+    __ verify_oop(holder);
+    __ movl(temp, Address(receiver, oopDesc::klass_offset_in_bytes()));
+    __ verify_oop(temp);
+
+    __ cmpl(temp, Address(holder, compiledICHolderOopDesc::holder_klass_offset()));
+    __ movl(rbx, Address(holder, compiledICHolderOopDesc::holder_method_offset()));
+    __ jcc(Assembler::notEqual, missed);
+    // Method might have been compiled since the call site was patched to
+    // interpreted if that is the case treat it as a miss so we can get
+    // the call site corrected.
+    __ cmpl(Address(rbx, in_bytes(methodOopDesc::code_offset())), NULL_WORD);
+    __ jcc(Assembler::equal, verified_tail_call_not_sibling);
+
+    __ bind(missed);
+    __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
+  }
+
+  address c2i_verified_not_sibling_tail_call_entry = __ pc();
+  {
+    //__ warn("c2i_not_sibling_tail_call_entry");
+    patch_callers_callsite(masm);
+    __ verify_oop(rbx); // Rbx should contain methodoop of callee
+    Register tmp = rax; // Rax (IC_klass) is not used for static calls.
+    __ bind(verified_tail_call_not_sibling);
+    if (TraceTailCalls) __ warn("c2i static not sib tail call entry");
+    __ parent_is_not_interpreter_jcc(base_pointer, tmp, continue_in_interpreter);
+    //__ warn("c2i_not_sibling_tail_call_entry: parent is interpreted");
+    // Parent is interpreted: can use code path of static tail call.
+    // It moves the arguments relative to the last_sp of the parent frame.
+    __ jmp(verified_tail_call);
+    __ bind(continue_in_interpreter);
+    if (TraceTailCalls) __ warn("c2i continue in interpreter");
+    //__ warn("c2i_not_sibling_tail_call_entry: parent is compiled: continue in interpreted");
+    // Leave an int frame - lazily create an interpreter frame (the callee frame)
+    // Since we want to guarantee an interpreter frame on the stack we turn off
+    // on stack replacement (OSR) for one run (frame) of the called function.
+    // See InterpreterGenerator::generate_native_entry(bool synchronized) for the
+    // corresponding handshake code.
+    const Address disable_osr_for_frame(tmp,
+          in_bytes(JavaThread::disable_osr_for_frame_offset()));
+    __ push(tmp); // Probably don't have to safe it. Just to be safe.
+    __ get_thread(tmp);
+    __ movbool(disable_osr_for_frame, true);
+    __ pop(tmp);
+    __ jmp(skip_fixup); // to interpreter.
+  }
+
+  address c2i_unverified_tail_call_entry = __ pc();
+  {
+
+    Label missed;
+    Register holder = rax;
+    Register receiver = rcx;
+    Register temp = rbx;
+
+    __ verify_oop(holder);
+    __ movl(temp, Address(receiver, oopDesc::klass_offset_in_bytes()));
+    __ verify_oop(temp);
+
+    __ cmpl(temp, Address(holder, compiledICHolderOopDesc::holder_klass_offset()));
+    __ movl(rbx, Address(holder, compiledICHolderOopDesc::holder_method_offset()));
+    __ jcc(Assembler::notEqual, missed);
+    // Method might have been compiled since the call site was patched to
+    // interpreted if that is the case treat it as a miss so we can get
+    // the call site corrected.
+    __ cmpl(Address(rbx, in_bytes(methodOopDesc::code_offset())), NULL_WORD);
+    __ jcc(Assembler::equal, verified_tail_call);
+
+    __ bind(missed);
+    __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
+  }
+
+  // Static tail call entry: We know that the protection domain and klass is
+  // correct.
+  
+  address verified_tail_call_c2i_entry = __ pc();
+  patch_callers_callsite(masm);
+  __ bind (verified_tail_call);
+  if (TraceTailCalls) __ warn("c2i static tail call entry");
+  // Check whether parent frame is interpreter.
+  Register tmp = rax; // Klass not used if static call.
+  
+
+  __ parent_is_not_interpreter_jcc(base_pointer, tmp, parent_is_not_interpreted);
+  //__ warn("c2i_static_tail_call_entry: parent is interpreted");
+  // Parent is interpreted
+  // Safe some registers.
+  __ push(rax);
+  __ push(rbx);
+  // store rbp
+  __ movl(tmp, Address(base_pointer, frame::link_offset*wordSize));
+  __ push(tmp);
+  // Store ret address.
+  __ movl(tmp, Address(base_pointer, frame::return_addr_offset * wordSize));
+  __ push(tmp);
+  // Get last_sp from parent frame.
+  Register last_sp = tmp; tmp = rbx;
+  __ movl(tmp, Address(base_pointer, frame::link_offset * wordSize)); // old rbp
+  __ movl(last_sp, Address(tmp, frame::interpreter_frame_last_sp_offset * wordSize));
+  __ movl(rsi, last_sp); // old_sp for interpreter
+  // Shuffle arguments
+  for (int src_slot = comp_args_on_stack, dest_slot=-1; src_slot > 0; src_slot--, dest_slot--) {
+    // rax,rbx,old_rbp,old_retaddr => +4
+    Address src(rsp, VMRegImpl::stack_slot_size * (4+src_slot));
+    Address dest(last_sp, VMRegImpl::stack_slot_size * (dest_slot));
+    __ movl(tmp, src);
+    __ movl(dest, tmp);
+  }
+  // Set return address.
+  __ subl(last_sp, (1+comp_args_on_stack)*wordSize);
+  __ pop(tmp);
+  __ movl(Address(last_sp, 0), tmp);
+  // Set rbp
+  __ pop(rbp);
+  // Restore used registers.
+  __ pop(rbx);
+  // Set new rsp.
+  __ movl(rsp, last_sp);
+  // Set new rbp
+  // BUG: might be overwritten !!!???!!! see not sibling entry point. push rbp
+  // at the beginning.
+  //__ movl(rbp, Address(rbp, frame::link_offset * wordSize)); // old rbp
+  __ jmp(skip_fixup_tail_call);
+  
+  // Parent is compiled.
+  __ bind(parent_is_not_interpreted);
+  tmp = rax;
+  for (int slot = 1; slot <= comp_args_on_stack; slot++) {
+    Address src (rsp, VMRegImpl::stack_slot_size * (SharedRuntime::out_preserve_stack_slots()+slot));
+    // Need to add safed rbp slot so slot+1 VVV
+    Address dest(base_pointer, VMRegImpl::stack_slot_size * (SharedRuntime::out_preserve_stack_slots()+slot+1));
+    __ movl(tmp, src);
+    __ movl(dest, tmp);
+  }
+  // pop frame
+#ifdef COMPILER2
+  __ mov(rsp, rsi); // Rsi contains base_pointer for opto.
+  __ pop(rbp);
+#else
+  __ leave();
+#endif
+  // set  old_sp
+  __ leal(rsi,Address(rsp, wordSize)); // ret addr on stack
+  // jump to normal c2i entry
+  __ jmp(skip_fixup_tail_call);
+  gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup_tail_call, true);
+  // End of static tail call entry.
+
+  // -------------------------------------------------------------------------
   // Generate a C2I adapter.  On entry we know rbx, holds the methodOop during calls
   // to the interpreter.  The args start out packed in the compiled layout.  They
   // need to be unpacked into the interpreter layout.  This will almost always
@@ -922,7 +1115,8 @@ AdapterHandlerEntry* SharedRuntime::gene
   // compiled code, which relys solely on SP and not EBP, get sick).
 
   address c2i_unverified_entry = __ pc();
-  Label skip_fixup;
+  if (TraceTailCalls) __ warn("c2i_unverified_entry (not tail call)");
+  //Label skip_fixup;
 
   Register holder = rax;
   Register receiver = rcx;
@@ -948,13 +1142,22 @@ AdapterHandlerEntry* SharedRuntime::gene
     __ bind(missed);
     __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
   }
-
+  // For tail calls (lazy adapter/ interpreter frame) we skip the fix up check.
+  address c2i_entry_skip_fixup = __ pc();
+  if (TraceTailCalls) __ warn("c2i entry skip fixup");
+  __ jmp(continue_in_interpreter);
+  //__ jmp(skip_fixup);
+  
   address c2i_entry = __ pc();
-
-  gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
+  //if (TraceTailCalls) __ warn("c2i_entry (not tail call)");
+  gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup,  false);
 
   __ flush();
-  return new AdapterHandlerEntry(i2c_entry, c2i_entry, c2i_unverified_entry);
+  return new AdapterHandlerEntry(i2c_entry, c2i_entry, c2i_unverified_entry, 
+                                 verified_tail_call_c2i_entry, c2i_unverified_tail_call_entry, 
+                                 c2i_entry_skip_fixup, 
+                                 c2i_verified_not_sibling_tail_call_entry, 
+                                 c2i_unverified_not_sibling_tail_call_entry);
 }
 
 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
@@ -1453,7 +1656,9 @@ nmethod *SharedRuntime::generate_native_
   // sure we can capture all the incoming oop args from the
   // caller.
   //
-  OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
+  //OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
+  OopMap* map = new OopMap((tail_call_protection_domain_slots()+stack_slots) * 2, 0 /* arg_slots*/);
+  // oopmap that copes with size of tail_call_pd_slots
 
   // Mark location of rbp,
   // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, rbp->as_VMReg());
@@ -2335,7 +2540,7 @@ void SharedRuntime::generate_deopt_blob(
   // allocate space for the code
   ResourceMark rm;
   // setup code generation tools
-  CodeBuffer   buffer("deopt_blob", 1024, 1024);
+  CodeBuffer   buffer("deopt_blob", 2048, 2048);
   MacroAssembler* masm = new MacroAssembler(&buffer);
   int frame_size_in_words;
   OopMap* map = NULL;
@@ -2397,6 +2602,29 @@ void SharedRuntime::generate_deopt_blob(
   __ push(Deoptimization::Unpack_reexecute);
   __ jmp(cont);
 
+  // Stack compression case
+  int stack_compression_offset = __ pc() - start;
+  Label stack_comp_cont; // Where to continue after we fetch the unroll info.
+  {
+    OopMap* comp_map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
+    __ push(Deoptimization::Unpack_stack_compression);
+    __ empty_FPU_stack();
+    // Call C code.  Need thread and this frame, but NOT official VM entry
+    // crud.  We cannot block on this call, no GC can happen.
+    __ get_thread(rcx);
+    __ push(rcx);
+    // fetch_unroll_info_stack_compression needs to call last_java_frame()
+    __ set_last_Java_frame(rcx, noreg, noreg, NULL);
+    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info_stack_compression)));
+    oop_maps->add_gc_map( __ pc()-start, comp_map);
+    // If fetch_unroll_info_stack_compression returned NULL it means
+    // stack compression failed. Throw an exception.
+    __ cmpl(rax, (intptr_t)0);
+    __ jcc(Assembler::notEqual, stack_comp_cont);
+    __ pop(rax); __ pop(rax);
+    RegisterSaver::restore_live_registers(masm);
+    __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
+  }
   int exception_offset = __ pc() - start;
 
   // Prolog for exception case
@@ -2476,6 +2704,8 @@ void SharedRuntime::generate_deopt_blob(
 
   oop_maps->add_gc_map( __ pc()-start, map);
 
+  __ bind(stack_comp_cont); // comming from fetch_unroll_info_stack_compression.
+
   // Discard arg to fetch_unroll_info
   __ pop(rcx);
 
@@ -2536,7 +2766,7 @@ void SharedRuntime::generate_deopt_blob(
 
   // Stack bang to make sure there's enough room for these interpreter frames.
   if (UseStackBanging) {
-    __ movl(rbx, Address(rdi ,Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
+    __ movl(rbx, Address(rdi, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
     __ bang_stack_size(rbx, rcx);
   }
 
@@ -2556,9 +2786,9 @@ void SharedRuntime::generate_deopt_blob(
   // Pick up the initial fp we should save
   __ movptr(rbp, Address(rdi, Deoptimization::UnrollBlock::initial_fp_offset_in_bytes()));
 
-  // Now adjust the caller's stack to make up for the extra locals
-  // but record the original sp so that we can save it in the skeletal interpreter
-  // frame and the stack walking of interpreter_sender will get the unextended sp
+  // Now adjust the caller's stack to make up for the extra locals but record
+  // the original sp so that we can save it in the skeletal interpreter frame
+  // and the stack walking of interpreter_sender will get the unextended sp
   // value and not the "real" sp value.
 
   Address sp_temp(rdi, Deoptimization::UnrollBlock::sender_sp_temp_offset_in_bytes());
@@ -2674,7 +2904,7 @@ void SharedRuntime::generate_deopt_blob(
   // make sure all code is generated
   masm->flush();
 
-  _deopt_blob = DeoptimizationBlob::create( &buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
+  _deopt_blob = DeoptimizationBlob::create( &buffer, oop_maps, 0, exception_offset, reexecute_offset, stack_compression_offset, frame_size_in_words);
   _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
 }
 
@@ -2959,7 +3189,7 @@ static SafepointBlob* generate_handler_b
 // but since this is generic code we don't know what they are and the caller
 // must do any gc of the args.
 //
-static RuntimeStub* generate_resolve_blob(address destination, const char* name) {
+static RuntimeStub* generate_resolve_blob(address destination, const char* name, bool is_tail_call=false) {
   assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
 
   // allocate space for the code
@@ -3014,10 +3244,24 @@ static RuntimeStub* generate_resolve_blo
 
   __ movptr(Address(rsp, RegisterSaver::rax_offset() * wordSize), rax);
 
-  RegisterSaver::restore_live_registers(masm);
+  //RegisterSaver::restore_live_registers(masm);
 
   // We are back the the original state on entry and ready to go.
-
+  /*Dont need to do the following here since this is performed by jumping to
+    verified_static_tail_call_code_entry.
+    if (is_tail_call) {
+    Label normal_call;
+    __ movl(rbx, Address(rbx, methodOopDesc::code_offset()));
+    __ cmpl(rax, Address(rbx, nmethod::verified_entry_point_offset()));
+    __ jcc(Assembler::notEqual, normal_call);
+    // Remove caller's stack frame.
+    RegisterSaver::restore_live_registers(masm);
+    __ leave();
+    // Tail call.
+    __ jmp(rax);
+    __ bind (normal_call);
+    }*/
+  RegisterSaver::restore_live_registers(masm);
   __ jmp(rax);
 
   // Pending exception after the safepoint
@@ -3059,6 +3303,23 @@ void SharedRuntime::generate_stubs() {
   _resolve_static_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_static_call_C),
                                         "resolve_static_call");
 
+  _resolve_static_tail_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_static_tail_call_C),
+                                                         "resolve_static_tail_call", true);
+
+  _resolve_not_sibling_static_tail_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_not_sibling_static_tail_call_C),
+                                                         "resolve_not_sibling_static_tail_call", true);
+
+  _resolve_virtual_tail_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_virtual_tail_call_C),
+                                                          "resolve_virtual_tail_call", true);
+
+  _resolve_not_sibling_virtual_tail_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_not_sibling_virtual_tail_call_C),
+                                                          "resolve_not_sibling_virtual_tail_call", true);
+
+  _resolve_opt_virtual_tail_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_opt_virtual_tail_call_C),
+                                                          "resolve_opt_virtual_tail_call", true);
+  _resolve_opt_not_sibling_virtual_tail_call_blob =  generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_opt_not_sibling_virtual_tail_call_C),
+                                                          "resolve_opt_not_sibling_virtual_tail_call", true);
+
   _polling_page_safepoint_handler_blob =
     generate_handler_blob(CAST_FROM_FN_PTR(address,
                    SafepointSynchronize::handle_polling_page_exception), false);
@@ -3068,6 +3329,10 @@ void SharedRuntime::generate_stubs() {
                    SafepointSynchronize::handle_polling_page_exception), true);
 
   generate_deopt_blob();
+  // we now have the stack compression entry point and can set its address in
+  // the Interpeter.
+  NativeJump * jmp = nativeJump_at(TemplateInterpreter::tail_call_handle_stack_overflow_patch_addr());
+  jmp->set_jump_destination(_deopt_blob->stack_compression());
 #ifdef COMPILER2
   generate_uncommon_trap_blob();
 #endif // COMPILER2
diff -r aa0c48844632 -r a7d54b98ca4a src/cpu/x86/vm/stubGenerator_x86_32.cpp
--- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -2071,12 +2071,28 @@ class StubGenerator: public StubCodeGene
   // exceptions (e.g., NullPointerException or AbstractMethodError on entry) are
   // either at call sites or otherwise assume that stack unwinding will be initiated,
   // so caller saved registers were assumed volatile in the compiler.
+  //
+  // tail call overflow stores safes register parameters to the stack.
+  enum tail_call_safe_layout {
+    xmm0_off=0, xmm0H_off,
+    xmm1_off, xmm1H_off,
+    xmm2_off, xmm2H_off,
+    xmm3_off, xmm3H_off,
+    xmm4_off, xmm4H_off,
+    xmm5_off, xmm5H_off,
+    xmm6_off, xmm6H_off,
+    xmm7_off, xmm7H_off,
+    rdx_off,
+    rcx_off,
+    tail_call_safe_layout_size
+  };
   address generate_throw_exception(const char* name, address runtime_entry,
-                                   bool restore_saved_exception_pc) {
+                                   bool restore_saved_exception_pc,
+                                   bool is_tail_call_stack_overflow_stub = false) {
 
-    int insts_size = 256;
+    int insts_size = is_tail_call_stack_overflow_stub ? 512 : 256;
     int locs_size  = 32;
-
+    int frame_size = framesize;
     CodeBuffer code(name, insts_size, locs_size);
     OopMapSet* oop_maps  = new OopMapSet();
     MacroAssembler* masm = new MacroAssembler(&code);
@@ -2095,7 +2111,22 @@ class StubGenerator: public StubCodeGene
     }
 
     __ enter(); // required for proper stackwalking of RuntimeStub frame
-
+    if (is_tail_call_stack_overflow_stub) {
+      // Safe parameter registers
+      // rcx, rdx, xmm1 ....
+      __ push(rcx);
+      __ push(rdx);
+      __ subptr(rsp,16*wordSize); // Push FPU registers space
+      assert(UseSSE>=2, "assume sse2 for now");
+      __ movdbl(Address(rsp,xmm0_off*wordSize),xmm0);
+      __ movdbl(Address(rsp,xmm1_off*wordSize),xmm1);
+      __ movdbl(Address(rsp,xmm2_off*wordSize),xmm2);
+      __ movdbl(Address(rsp,xmm3_off*wordSize),xmm3);
+      __ movdbl(Address(rsp,xmm4_off*wordSize),xmm4);
+      __ movdbl(Address(rsp,xmm5_off*wordSize),xmm5);
+      __ movdbl(Address(rsp,xmm6_off*wordSize),xmm6);
+      __ movdbl(Address(rsp,xmm7_off*wordSize),xmm7);
+    }
     // pc and rbp, already pushed
     __ subptr(rsp, (framesize-2) * wordSize); // prolog
 
@@ -2122,9 +2153,33 @@ class StubGenerator: public StubCodeGene
     __ get_thread(java_thread);
 
     __ reset_last_Java_frame(java_thread, true, false);
-
+    if (is_tail_call_stack_overflow_stub) {
+      // Restore safed parameter registers
+      __ addptr(rsp, (framesize-2) * wordSize);
+      
+      __ movdbl(xmm0,Address(rsp,xmm0_off*wordSize));
+      __ movdbl(xmm1,Address(rsp,xmm1_off*wordSize));
+      __ movdbl(xmm2,Address(rsp,xmm2_off*wordSize));
+      __ movdbl(xmm3,Address(rsp,xmm3_off*wordSize));
+      __ movdbl(xmm4,Address(rsp,xmm4_off*wordSize));
+      __ movdbl(xmm5,Address(rsp,xmm5_off*wordSize));
+      __ movdbl(xmm6,Address(rsp,xmm6_off*wordSize));
+      __ movdbl(xmm7,Address(rsp,xmm7_off*wordSize));
+      __ addptr(rsp, 16*wordSize);
+      __ pop(rdx);
+      __ pop(rcx);
+      // add tail call parameter slots
+      frame_size += tail_call_safe_layout_size;
+    }
     __ leave(); // required for proper stackwalking of RuntimeStub frame
-
+    
+    if (is_tail_call_stack_overflow_stub) {
+      Label L;
+      __ cmpptr(Address(java_thread, JavaThread::tail_call_do_stack_compression_offset()), (int32_t)NULL_WORD);
+      __ jcc(Assembler::equal, L);
+      __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->stack_compression()));
+      __ bind(L);
+    }
     // check for pending exceptions
 #ifdef ASSERT
     Label L;
@@ -2135,8 +2190,8 @@ class StubGenerator: public StubCodeGene
 #endif /* ASSERT */
     __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
 
-
-    RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &code, frame_complete, framesize, oop_maps, false);
+    assert( __ pc() - start < insts_size, "Enough place for instructions");
+    RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &code, frame_complete, frame_size, oop_maps, false);
     return stub->entry_point();
   }
 
@@ -2210,6 +2265,9 @@ class StubGenerator: public StubCodeGene
     StubRoutines::_throw_NullPointerException_entry        = generate_throw_exception("NullPointerException throw_exception",         CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException), true);
     StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false);
     StubRoutines::_throw_StackOverflowError_entry          = generate_throw_exception("StackOverflowError throw_exception",           CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError),   false);
+    StubRoutines::_throw_TailCallException_entry           =  generate_throw_exception("TailCallException throw_exception",           CAST_FROM_FN_PTR(address, SharedRuntime::throw_TailCallException),   false);
+
+    StubRoutines::_tail_call_handle_stack_overflow_entry   = generate_throw_exception("Tail call handle stack overflow", CAST_FROM_FN_PTR(address, SharedRuntime::tail_call_handle_stack_overflow), false, true);
 
     //------------------------------------------------------------------------------------------------------------------------
     // entry points that are platform specific
diff -r aa0c48844632 -r a7d54b98ca4a src/cpu/x86/vm/templateInterpreterGenerator_x86.hpp
--- a/src/cpu/x86/vm/templateInterpreterGenerator_x86.hpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/cpu/x86/vm/templateInterpreterGenerator_x86.hpp	Wed Jun 03 16:27:17 2009 +0200
@@ -24,6 +24,6 @@
 
  protected:
 
- void generate_fixed_frame(bool native_call);
+void generate_fixed_frame(bool native_call, bool disable_osr=false);
 
  // address generate_asm_interpreter_entry(bool synchronized);
diff -r aa0c48844632 -r a7d54b98ca4a src/cpu/x86/vm/templateInterpreter_x86_32.cpp
--- a/src/cpu/x86/vm/templateInterpreter_x86_32.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/cpu/x86/vm/templateInterpreter_x86_32.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -34,6 +34,53 @@ const int locals_offset = frame::interpr
 const int locals_offset = frame::interpreter_frame_locals_offset * wordSize;
 
 //------------------------------------------------------------------------------------------------------------------------
+
+address TemplateInterpreterGenerator::generate_tail_call_stack_overflow_handler() {
+  address entry = __ pc();
+  // Check whether we should compress.
+  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::tail_call_handle_stack_overflow));
+  // Check result of previous operation.
+  {
+    Label L;
+    __ get_thread(rax);
+    __ cmpptr(Address(rax, JavaThread::tail_call_do_stack_compression_offset()), (int32_t)NULL_WORD);
+    __ jcc(Assembler::equal, L);
+    // When we get here  an new interpreter frame sits on the stack. the
+    // expression stack is empty. no interpreter return address is on the stack.
+    // Stack compression expects this frame to be removed so we do.
+    __ leave();
+    address patch_addr = __ pc();
+    TemplateInterpreter::_tail_call_handle_stack_overflow_patch_address = patch_addr;
+    __ jump(RuntimeAddress((address) -1));
+    __ bind(L);
+  }
+  // Note: There should be a minimal interpreter frame set up when stack
+  // overflow occurs since we check explicitly for it now.
+  //
+#ifdef ASSERT
+  { Label L;
+    __ lea(rax, Address(rbp,
+                frame::interpreter_frame_monitor_block_top_offset * wordSize));
+    __ cmpptr(rax, rsp);  // rax, = maximal rsp for current rbp,
+                        //  (stack grows negative)
+    __ jcc(Assembler::aboveEqual, L); // check if frame is complete
+    __ stop ("interpreter frame not set up");
+    __ bind(L);
+  }
+#endif // ASSERT
+  
+  // Restore bcp under the assumption that the current frame is still
+  // interpreted
+  __ restore_bcp();
+
+  // expression stack must be empty before entering the VM if an exception
+  // happened
+  __ empty_expression_stack();
+  __ empty_FPU_stack();
+  // throw exception
+  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_StackOverflowError));
+  return entry;
+}
 
 address TemplateInterpreterGenerator::generate_StackOverflowError_handler() {
   address entry = __ pc();
@@ -168,7 +215,11 @@ address TemplateInterpreterGenerator::ge
 
   Label interpreter_entry;
   address compiled_entry = __ pc();
-
+  // Tail call invokes have wide prefix so their length is one bigger.
+  // TODO: Fix tail call invokedynamic path i.e. offsets relative to rsi and
+  // bcp offsets.
+  bool is_tail_call = (step == 4) || (step==6);
+  int bcp_offset = is_tail_call ? 2 : 1;
 #ifdef COMPILER2
   // The FPU stack is clean if UseSSE >= 2 but must be cleaned in other cases
   if ((incoming_state == ftos && UseSSE < 1) || (incoming_state == dtos && UseSSE < 2)) {
@@ -262,7 +313,7 @@ address TemplateInterpreterGenerator::ge
     __ cmpb(Address(rsi, 0), Bytecodes::_invokedynamic);
     __ jcc(Assembler::equal, L_giant_index);
   }
-  __ get_cache_and_index_at_bcp(rbx, rcx, 1, false);
+  __ get_cache_and_index_at_bcp(rbx, rcx, bcp_offset, false);
   __ bind(L_got_cache);
   if (unbox && state == atos) {
     // insert a casting conversion, to keep verifier sane
@@ -290,6 +341,7 @@ address TemplateInterpreterGenerator::ge
     __ pop(rax);
     __ bind(L_ok);
   }
+
   __ movl(rbx, Address(rbx, rcx,
                     Address::times_ptr, constantPoolCacheOopDesc::base_offset() +
                     ConstantPoolCacheEntry::flags_offset()));
@@ -552,7 +604,6 @@ void InterpreterGenerator::generate_stac
   // for the additional locals.
   __ cmpl(rdx, (page_size - overhead_size)/Interpreter::stackElementSize());
   __ jcc(Assembler::belowEqual, after_frame_check);
-
   // compute rsp as if this were going to be the last frame on
   // the stack before the red zone
 
@@ -566,10 +617,8 @@ void InterpreterGenerator::generate_stac
 
   const Address stack_base(thread, Thread::stack_base_offset());
   const Address stack_size(thread, Thread::stack_size_offset());
-
   // locals + overhead, in bytes
   __ lea(rax, Address(noreg, rdx, Interpreter::stackElementScale(), overhead_size));
-
 #ifdef ASSERT
   Label stack_base_okay, stack_size_okay;
   // verify that thread stack base is non-zero
@@ -651,7 +700,7 @@ void InterpreterGenerator::lock_method(v
 // Generate a fixed interpreter frame. This is identical setup for interpreted methods
 // and for native methods hence the shared code.
 
-void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
+void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call, bool disable_osr) {
   // initialize fixed part of activation frame
   __ push(rax);                                       // save return address
   __ enter();                                         // save old & set new rbp,
@@ -683,6 +732,11 @@ void TemplateInterpreterGenerator::gener
   } else {
     __ push(rsi);                                     // set bcp
     }
+  // OSR token
+  if (disable_osr)
+    __ push(1);
+  else
+    __ push(0);                                         
   __ push(0);                                         // reserve word for pointer to expression stack bottom
   __ movptr(Address(rsp, 0), rsp);                    // set expression stack bottom
 }
@@ -873,7 +927,6 @@ address InterpreterGenerator::generate_n
 
   if (inc_counter) __ movl(rcx, invocation_counter);  // (pre-)fetch invocation count
   // initialize fixed part of activation frame
-
   generate_fixed_frame(true);
 
   // make sure method is native & not abstract
@@ -1318,7 +1371,39 @@ address InterpreterGenerator::generate_n
 
   if (inc_counter) __ movl(rcx, invocation_counter);  // (pre-)fetch invocation count
   // initialize fixed part of activation frame
+  // Tail calls want to disable OSR if we are comming from a compiled frame that
+  // tried to do a non sibling tail call and failed because the parent was not
+  // interpreted.
+  Label disable_osr_frame_generated;
+  if (TailCalls) {
+    Register temp = rax;
+    Label normal_frame;
+    // safe temp regiser
+    __ push(temp);
+    __ get_thread(temp);
+    const Address disable_osr_for_frame(temp,
+          in_bytes(JavaThread::disable_osr_for_frame_offset()));
+    __ movbool(temp, disable_osr_for_frame);
+    __ testbool(temp);
+    __ jcc(Assembler::zero, normal_frame);
+    // Generate frame which temporary disables osr.
+    __ pop(temp); // restore register
+    generate_fixed_frame(false, true);
+    if (TraceTailCalls) __ warn("saw disabled osr");
+    __ push(temp);
+    // Turn osr back on (That is if DoOnStackReplacement is set to true).
+    __ get_thread(temp);
+    __ movbool(disable_osr_for_frame, false);
+    __ pop(temp);
+    __ jmp(disable_osr_frame_generated);
+    __ bind(normal_frame);
+    // restore register
+    __ pop(temp);
+  }
+      
   generate_fixed_frame(false);
+  __ bind(disable_osr_frame_generated);
+  // generate_fixed_frame(false);
 
   // make sure method is not native & not abstract
 #ifdef ASSERT
@@ -1361,7 +1446,7 @@ address InterpreterGenerator::generate_n
   }
   Label continue_after_compile;
   __ bind(continue_after_compile);
-
+  
   bang_stack_shadow_pages(false);
 
   // reset the _do_not_unlock_if_synchronized flag
@@ -1573,6 +1658,9 @@ int AbstractInterpreter::layout_activati
     assert(caller->unextended_sp() == interpreter_frame->interpreter_frame_sender_sp(), "Frame not properly walkable");
     assert(caller->sp() == interpreter_frame->sender_sp(), "Frame not properly walkable(2)");
 #endif
+
+    // set osr field (for tail calls)
+    //interpreter_frame->interpreter_frame_set_osr(0);
 
     interpreter_frame->interpreter_frame_set_method(method);
     // NOTE the difference in using sender_sp and interpreter_frame_sender_sp
diff -r aa0c48844632 -r a7d54b98ca4a src/cpu/x86/vm/templateTable_x86_32.cpp
--- a/src/cpu/x86/vm/templateTable_x86_32.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/cpu/x86/vm/templateTable_x86_32.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -1699,6 +1699,23 @@ void TemplateTable::branch(bool is_jsr, 
 
       // invocation counter overflow
       __ bind(backedge_counter_overflow);
+
+      if (TailCalls) {
+        Label continue_osr;
+        // Tail calls might disable OSR during an execution of one method in
+        // the interpreter.
+        __ push (rcx);
+        Address disable_osr_offset(rbp, frame::interpreter_frame_osr_offset * wordSize);
+        __ movl(rcx, disable_osr_offset);
+        __ testl(rcx, rcx);
+        __ jcc(Assembler::zero, continue_osr);
+        __ pop(rcx);
+        if (TraceTailCalls) __ warn("continue with loop because osr disabled");
+        __ jmp(dispatch);
+        __ bind(continue_osr);
+        if (TraceTailCalls) __ warn("backedge_counter_overflow: may be doing osr.");
+        __ pop(rcx);
+      }
       __ negptr(rdx);
       __ addptr(rdx, rsi);        // branch bcp
       call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), rdx);
@@ -1727,7 +1744,7 @@ void TemplateTable::branch(bool is_jsr, 
       call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin));
       // rax, is OSR buffer, move it to expected parameter location
       __ mov(rcx, rax);
-
+      if (TraceTailCalls) __ warn("Performing OSR");
       // pop the interpreter frame
       __ movptr(rdx, Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize)); // get sender sp
       __ leave();                                // remove frame anchor
@@ -2103,7 +2120,7 @@ void TemplateTable::volatile_barrier(Ass
   __ membar(order_constraint);
 }
 
-void TemplateTable::resolve_cache_and_index(int byte_no, Register Rcache, Register index) {
+void TemplateTable::resolve_cache_and_index(int byte_no, Register Rcache, Register index, bool is_tail_call) {
   assert(byte_no == 1 || byte_no == 2, "byte_no out of range");
   bool is_invokedynamic = (bytecode() == Bytecodes::_invokedynamic);
 
@@ -2113,7 +2130,10 @@ void TemplateTable::resolve_cache_and_in
 
   const int shift_count = (1 + byte_no)*BitsPerByte;
   Label resolved;
-  __ get_cache_and_index_at_bcp(Rcache, index, 1, is_invokedynamic);
+  if (is_tail_call)
+    __ get_cache_and_index_at_bcp(Rcache, index, 2, is_invokedynamic); // Skip the wide instruction.
+  else
+    __ get_cache_and_index_at_bcp(Rcache, index, 1, is_invokedynamic);
   if (is_invokedynamic) {
     // we are resolved if the f1 field contains a non-null CallSite object
     __ cmpptr(Address(Rcache, index, Address::times_ptr, constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::f1_offset()), (int32_t) NULL_WORD);
@@ -2144,7 +2164,10 @@ void TemplateTable::resolve_cache_and_in
   __ movl(temp, (int)bytecode());
   __ call_VM(noreg, entry, temp);
   // Update registers with resolved info
-  __ get_cache_and_index_at_bcp(Rcache, index, 1, is_invokedynamic);
+  if (is_tail_call)
+    __ get_cache_and_index_at_bcp(Rcache, index, 2, is_invokedynamic);
+  else
+    __ get_cache_and_index_at_bcp(Rcache, index, 1, is_invokedynamic);
   __ bind(resolved);
 }
 
@@ -2174,6 +2197,7 @@ void TemplateTable::load_field_cp_cache_
 }
 
 void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
+                                               bool is_tail_call,
                                                Register method,
                                                Register itable_index,
                                                Register flags,
@@ -2200,13 +2224,30 @@ void TemplateTable::load_invoke_cp_cache
   const int index_offset = in_bytes(constantPoolCacheOopDesc::base_offset() +
                                     ConstantPoolCacheEntry::f2_offset());
 
-  resolve_cache_and_index(byte_no, cache, index);
+  resolve_cache_and_index(byte_no, cache, index, is_tail_call);
 
   __ movptr(method, Address(cache, index, Address::times_ptr, method_offset));
   if (itable_index != noreg) {
     __ movptr(itable_index, Address(cache, index, Address::times_ptr, index_offset));
   }
   __ movl(flags , Address(cache, index, Address::times_ptr, flags_offset ));
+}
+
+void TemplateTable::load_invoke_cp_cache_flags(int byte_no,
+                                               Register Rcache,
+                                               Register Rindex,
+                                               Register OutFlags) {
+  // Rcache and Rindex are used. OutFlags contains flag on return.
+  assert_different_registers(OutFlags, Rcache, Rindex);
+ 
+  const int flags_offset = in_bytes(constantPoolCacheOopDesc::base_offset() +
+                                    ConstantPoolCacheEntry::flags_offset());
+  // access constant pool cache fields Since this function is only called from
+  // tail_call set parameter is_tail_call=true.
+  resolve_cache_and_index(byte_no, Rcache, Rindex, true);
+
+  assert(wordSize == 4, "adjust code below");
+  __ movl(OutFlags , Address(Rcache, Rindex, Address::times_4, flags_offset ));
 }
 
 
@@ -2260,7 +2301,7 @@ void TemplateTable::getfield_or_static(i
   const Register off   = rbx;
   const Register flags = rax;
 
-  resolve_cache_and_index(byte_no, cache, index);
+  resolve_cache_and_index(byte_no, cache, index, false);
   jvmti_post_field_access(cache, index, is_static, false);
   load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
 
@@ -2469,7 +2510,7 @@ void TemplateTable::putfield_or_static(i
   const Register off   = rbx;
   const Register flags = rax;
 
-  resolve_cache_and_index(byte_no, cache, index);
+  resolve_cache_and_index(byte_no, cache, index, false);
   jvmti_post_field_mod(cache, index, is_static);
   load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
 
@@ -2889,7 +2930,7 @@ void TemplateTable::count_calls(Register
 }
 
 
-void TemplateTable::prepare_invoke(Register method, Register index, int byte_no) {
+void TemplateTable::prepare_invoke(Register method, Register index, int byte_no, bool is_tail_call) {
   bool is_invdyn_bootstrap = (byte_no < 0);
   if (is_invdyn_bootstrap)  byte_no = -byte_no;
 
@@ -2912,7 +2953,7 @@ void TemplateTable::prepare_invoke(Regis
   // save 'interpreter return address'
   __ save_bcp();
 
-  load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual);
+  load_invoke_cp_cache_entry(byte_no, is_tail_call, method, index, flags, is_invokevirtual);
 
   // load receiver if needed (note: no return address pushed yet)
   if (load_receiver) {
@@ -2943,14 +2984,14 @@ void TemplateTable::prepare_invoke(Regis
   ConstantPoolCacheEntry::verify_tosBits();
   // load return address
   {
+    int instruction_size = is_invokeinterface ? 5 : 3;
+    if (is_tail_call) instruction_size += 1;
+    ExternalAddress table((address)Interpreter::return_addrs_by_index_table(instruction_size));
     address table_addr;
-    if (is_invdyn_bootstrap)
+    if (is_invdyn_bootstrap) {
       table_addr = (address)Interpreter::return_5_unbox_addrs_by_index_table();
-    else if (is_invokeinterface || is_invokedynamic)
-      table_addr = (address)Interpreter::return_5_addrs_by_index_table();
-    else
-      table_addr = (address)Interpreter::return_3_addrs_by_index_table();
-    ExternalAddress table(table_addr);
+      table = ExternalAddress(table_addr);
+    }
     __ movptr(flags, ArrayAddress(table, Address(noreg, flags, Address::times_ptr)));
   }
 
@@ -2965,15 +3006,46 @@ void TemplateTable::prepare_invoke(Regis
   }
 }
 
+// Load the klass holding the method passed in register 'method' to register
+// klass.
+void TemplateTable::load_pool_holder_of_method(Register method, Register klass) {
+  __ verify_oop(method);
+  __ movl (klass, Address(method, methodOopDesc::constants_offset() ));
+  __ verify_oop(klass);
+  __ movl (klass, Address(klass, constantPoolOopDesc::pool_holder_offset_in_bytes()));
+  __ verify_oop(klass);
+}
+
+void TemplateTable::jcc_protection_domain_mismatch(Register temp, Register temp2, Register recv_method, Label& mismatch_cont) {
+  // Check protection domains.
+  // Get receiver method PD.
+  load_pool_holder_of_method(recv_method, temp); 
+  __ verify_oop(temp);
+  __ movl (temp, Address(temp, instanceKlass::protection_domain_offset() * wordSize));
+  __ verify_oop(temp);
+  // Get caller PD.
+  __ movl (temp2, Address(rbp, frame::interpreter_frame_method_offset * wordSize));
+  __ verify_oop(temp2);
+  load_pool_holder_of_method(temp2, temp2);
+  __ movl (temp2, Address(temp2, instanceKlass::protection_domain_offset() * wordSize));
+  __ verify_oop(temp2);
+  __ cmpl(temp, temp2);
+  __ jcc(Assembler::notEqual, mismatch_cont);
+}
 
 void TemplateTable::invokevirtual_helper(Register index, Register recv,
-                        Register flags) {
+                                         Register flags, int byte_no, bool is_tail_call) {
 
   // Uses temporary registers rax, rdx
   assert_different_registers(index, recv, rax, rdx);
 
   // Test for an invoke of a final method
   Label notFinal;
+  // Tail call executed as a normal call (protection domain mismatch or parent
+  // frame is not an interpreter frame)
+  Label regular_call_continuation;
+  Label protection_domain_mismatch_cont;
+
   __ movl(rax, flags);
   __ andl(rax, (1 << ConstantPoolCacheEntry::vfinalMethod));
   __ jcc(Assembler::zero, notFinal);
@@ -2989,8 +3061,16 @@ void TemplateTable::invokevirtual_helper
 
   // profile this call
   __ profile_final_call(rax);
-
-  __ jump_from_interpreted(method, rax);
+  
+  // Prepare for tail call.
+  if (is_tail_call) {
+    if (TailCallsStackCompression)
+      jcc_protection_domain_mismatch(rax, rdx, method, regular_call_continuation);
+    else
+      jcc_protection_domain_mismatch(rax, rdx, method, protection_domain_mismatch_cont);
+    tail_call(byte_no, regular_call_continuation);
+  }
+  __ jump_from_interpreted(method, rax, is_tail_call);
 
   __ bind(notFinal);
 
@@ -3007,41 +3087,220 @@ void TemplateTable::invokevirtual_helper
   const int base = instanceKlass::vtable_start_offset() * wordSize;
   assert(vtableEntry::size() * wordSize == 4, "adjust the scaling in the code below");
   __ movptr(method, Address(rax, index, Address::times_ptr, base + vtableEntry::method_offset_in_bytes()));
-  __ jump_from_interpreted(method, rdx);
-}
-
+  
+  if (is_tail_call) {
+    // Check protection domain.
+    if (TailCallsStackCompression)
+      jcc_protection_domain_mismatch(rax, rdx, method, regular_call_continuation);
+    else
+      jcc_protection_domain_mismatch(rax, rdx, method, protection_domain_mismatch_cont);
+    // Shift arguments onto caller's outgoing parameter area
+    // pop frame.
+    tail_call(byte_no, regular_call_continuation);
+  }
+  __ jump_from_interpreted(method, rdx, is_tail_call);
+
+  // We arrive here if tail call should be performed as regular call.
+  __ bind(regular_call_continuation);
+  __ jump_from_interpreted(method, rdx, false);
+  // Tail call exception on protection domain mismatch
+  __ bind(protection_domain_mismatch_cont);
+  __ restore_bcp();
+  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
+                                     InterpreterRuntime::throw_TailCallException));
+}
+
+void TemplateTable::tail_call(int byte_no, Label& regular_call_continuation) {
+  Register temp = rdx;
+  // Only tail call if parent frame is interpreted. Otherwise we might have an
+  // c2i adapter sitting around that nobody takes care of. And that would not be
+  // nice (i.e result in a wrong rsp). This is also needed in order for lazy
+  // adapter frame creation in the not sibling tail call code path.
+  __ parent_is_not_interpreter_jcc(rbp, temp, regular_call_continuation);
+  // Assumption can use rdx, rdi since they contain no vital info. Rsi is
+  // computed from sender_sp so its free too. Other registers need to be saved.
+  // Store return_addr, link (old fp), sender_sp (old sp) to top of stack to
+  // prevent those values from being overwritten when moving parameters further
+  // down.
+  __ movl(temp, Address(rbp, frame::return_addr_offset * wordSize));
+  __ movl(Address(rsp, 0), temp);  // Save ret addr.
+  __ movl(temp, Address(rbp, frame::link_offset * wordSize));
+  __ push(temp); // Save old rbp.
+  __ movl(temp, Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize));
+  __ push(temp); // Save old rsp. needed if caller was compiled - c2i adapter
+                 // parameters lurking around!
+  // Save rax, rbx, rcx.
+  __ push(rax); // callVM in resolve_cache_and_index saves?
+  __ push(rbx); // Clobbered by resolve_cache_and_index.
+  __ push(rcx); // callVM saves?
+
+  Register param_end = rdx;
+  
+  // Clobbers caller saved registers rax,rcx,rdx.
+  load_invoke_cp_cache_flags(byte_no, rdi, rsi, param_end);
+  __ andl(param_end, 0xFF);
+  __ shll(param_end, 2); // a param needs 4 bytes. REFACTOR.
+  Register source = rsi;
+  Register dest = rdi;
+  temp = rbx;
+  assert_different_registers(param_end, source, dest);
+  assert_different_registers(temp, param_end, source);
+  assert_different_registers(dest, temp);
+ 
+  // Load copy-from start.
+  __ leal(source, Address(rsp, 5 * wordSize)); // eax, ebx, ecx, oldsp, oldbp, retaddr
+  __ addl(source, param_end);
+  __ leal(param_end, Address(rsp, 4 * wordSize)); // Also copy retaddr.
+  // Load copy-to destination start.
+  __ movl(dest, Address(rbp, frame::interpreter_frame_locals_offset* wordSize));
+
+  // Copy word from source to dest parameter-count times.
+  // I am sure we can do a better memcpy.
+  { Label exit, loop;
+    // copy
+    __ bind(loop);
+    __ cmpl(param_end, source);
+    __ jcc(Assembler::equal, exit);
+    __ movl(temp, Address(source, 0));
+    __ movl(Address(dest, 0), temp);
+    __ subl(source, wordSize);
+    __ subl(dest, wordSize);
+
+    __ jmp(loop); // next iteration
+    __ bind(exit);
+  }
+  
+  // Restore rcx, rbx, rax.
+  __ pop(rcx);
+  __ pop(rbx);
+  __ pop(rax);
+  // Get old stack pointer.
+  __ pop(rsi); //no need except if our sender was compiled 
+  // Get old frame pointer.
+  __ pop(rbp);
+  // Adjust stack pointer to ret addr.
+  __ addl(dest, wordSize);
+  __ movl(rsp, dest);
+}
 
 void TemplateTable::invokevirtual(int byte_no) {
   transition(vtos, vtos);
-  prepare_invoke(rbx, noreg, byte_no);
+  prepare_invoke(rbx, noreg, byte_no, false);
 
   // rbx,: index
   // rcx: receiver
   // rdx: flags
 
-  invokevirtual_helper(rbx, rcx, rdx);
-}
-
+  invokevirtual_helper(rbx, rcx, rdx, byte_no);
+}
+
+void TemplateTable::wide_invokevirtual(int byte_no) {
+  transition(vtos, vtos);
+  // Bcp points to wide, advance to invoke instruction.
+  //__ increment(rsi, 1);
+  if (TraceTailCalls) __ warn("Interpreter: wide_invokevirtual");
+  prepare_invoke(rbx, noreg, byte_no, true);
+  // rbx,: index
+  // rcx: receiver
+  // rdx: flags
+  invokevirtual_helper(rbx, rcx, rdx, byte_no, true);
+}
 
 void TemplateTable::invokespecial(int byte_no) {
   transition(vtos, vtos);
-  prepare_invoke(rbx, noreg, byte_no);
+  prepare_invoke(rbx, noreg, byte_no, false);
   // do the call
   __ verify_oop(rbx);
   __ profile_call(rax);
   __ jump_from_interpreted(rbx, rax);
 }
 
+void TemplateTable::wide_invokespecial(int byte_no) {
+  transition(vtos, vtos);
+  Label regular_call_continuation;
+  Label protection_domain_mismatch_cont;
+  //__ increment(rsi, 1);
+  if (TraceTailCalls) __ warn("Interpreter: wide_invokespecial");
+  prepare_invoke(rbx, noreg, byte_no, true);
+  // do the call
+  __ verify_oop(rbx);
+  __ profile_call(rax);
+  __ push(rax);
+  __ push(rdx);
+  jcc_protection_domain_mismatch(rax, rdx, rbx, protection_domain_mismatch_cont);
+  __ pop(rdx);
+  __ pop(rax);
+  tail_call(byte_no, regular_call_continuation);
+  __ jump_from_interpreted(rbx, rax, true);
+  // not a tail call
+  __ bind (regular_call_continuation);
+  __ jump_from_interpreted(rbx, rax, false);
+  // Tail call exception on protection domain mismatch
+  __ bind(protection_domain_mismatch_cont);
+  __ pop(rcx);
+  __ pop(rdx);
+  __ pop(rax);
+  __ restore_bcp();
+  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
+                                     InterpreterRuntime::throw_TailCallException));
+}
 
 void TemplateTable::invokestatic(int byte_no) {
   transition(vtos, vtos);
-  prepare_invoke(rbx, noreg, byte_no);
+  prepare_invoke(rbx, noreg, byte_no, false);
   // do the call
   __ verify_oop(rbx);
   __ profile_call(rax);
   __ jump_from_interpreted(rbx, rax);
 }
 
+void TemplateTable::wide_invokestatic(int byte_no) {
+  transition(vtos, vtos);
+  Label regular_call_continuation;
+  Label regular_call_continuation_pd_mismatch;
+  Label protection_domain_mismatch_cont;
+  // Bcp points to wide, advance to invoke instruction.
+  //__ increment(rsi, 1);
+  if (TraceTailCalls) __ warn("Interpreter: wide_invokestatic");
+  prepare_invoke(rbx, noreg, byte_no, true);
+  // do the call
+  __ verify_oop(rbx);
+  __ profile_call(rax);
+  //__ null_check(rcx); //receiver
+  // Store clobbered registers.
+  __ push(rax);
+  __ push(rdx);
+  __ push(rcx);
+  __ push(rbx);
+  if (TailCallsStackCompression)
+    jcc_protection_domain_mismatch(rax, rdx, rbx, regular_call_continuation_pd_mismatch);
+  else  
+    jcc_protection_domain_mismatch(rax, rdx, rbx, protection_domain_mismatch_cont);
+  __ pop(rbx);
+  __ pop(rcx);
+  __ pop(rdx);
+  __ pop(rax);
+  tail_call(byte_no, regular_call_continuation);
+  __ jump_from_interpreted(rbx, rax, true);
+  // not a tail call
+  __ bind (regular_call_continuation_pd_mismatch);
+  __ pop(rbx);
+  __ pop(rcx);
+  __ pop(rdx);
+  __ pop(rax);
+  __ bind(regular_call_continuation);
+  if (TraceTailCalls) __ warn("wide_invokestatic: not a tail call");
+  __ jump_from_interpreted(rbx, rax, false);
+  // Tail call exception on protection domain mismatch
+  __ bind(protection_domain_mismatch_cont);
+  __ pop(rbx);
+  __ pop(rcx);
+  __ pop(rdx);
+  __ pop(rax);
+  __ restore_bcp();
+  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
+                                     InterpreterRuntime::throw_TailCallException));
+}
 
 void TemplateTable::fast_invokevfinal(int byte_no) {
   transition(vtos, vtos);
@@ -3050,8 +3309,19 @@ void TemplateTable::fast_invokevfinal(in
 
 
 void TemplateTable::invokeinterface(int byte_no) {
-  transition(vtos, vtos);
-  prepare_invoke(rax, rbx, byte_no);
+  invokeinterface_helper(byte_no, false);
+}
+
+void TemplateTable::wide_invokeinterface(int byte_no) {
+  if (TraceTailCalls) __ warn("Interpreter: wide_invokeinterface");
+  invokeinterface_helper(byte_no, true);
+}
+
+void TemplateTable::invokeinterface_helper(int byte_no, bool is_tail_call) {
+  transition(vtos, vtos);
+
+
+  prepare_invoke(rax, rbx, byte_no, is_tail_call);
 
   // rax,: Interface
   // rbx,: index
@@ -3067,7 +3337,7 @@ void TemplateTable::invokeinterface(int 
   __ andl(rdi, (1 << ConstantPoolCacheEntry::methodInterface));
   __ jcc(Assembler::zero, notMethod);
 
-  invokevirtual_helper(rbx, rcx, rdx);
+  invokevirtual_helper(rbx, rcx, rdx, byte_no, is_tail_call);
   __ bind(notMethod);
 
   // Get receiver klass into rdx - also a null check
@@ -3095,10 +3365,30 @@ void TemplateTable::invokeinterface(int 
   __ testptr(rbx, rbx);
   __ jcc(Assembler::zero, no_such_method);
 
+  // Move parameters and remove frame if this is a tail-call.
+  Label regular_call_continuation;
+  Label regular_call_continuation_pd_mismatch;
+  Label protection_domain_mismatch_cont;
+  // If tail calling pop stack and move paramters.
+  if (is_tail_call) {
+    // rax, rdx are clobbered by jcc_protection_domain_mismatch
+    __ push(rax);
+    __ push(rdx);
+    if (TailCallsStackCompression)
+      jcc_protection_domain_mismatch(rax, rdx, rbx, regular_call_continuation_pd_mismatch);
+    else
+      jcc_protection_domain_mismatch(rax, rdx, rbx, protection_domain_mismatch_cont);
+    __ pop(rdx);
+    __ pop(rax);
+    // The bcp in rsi has been clobbered but is needed in tail_call.
+    __ restore_bcp();
+    tail_call(byte_no, regular_call_continuation);
+  }
+
   // do the call
   // rcx: receiver
   // rbx,: methodOop
-  __ jump_from_interpreted(rbx, rdx);
+  __ jump_from_interpreted(rbx, rdx, is_tail_call);
   __ should_not_reach_here();
 
   // exception handling code follows...
@@ -3123,6 +3413,21 @@ void TemplateTable::invokeinterface(int 
                    InterpreterRuntime::throw_IncompatibleClassChangeError));
   // the call_VM checks for exception, so we should never return here.
   __ should_not_reach_here();
+
+  // Not a tail call.
+  __ bind (regular_call_continuation);
+  __ jump_from_interpreted(rbx, rax, false);
+  __ bind (regular_call_continuation_pd_mismatch);
+  __ pop(rdx);
+  __ pop(rax);
+  __ jump_from_interpreted(rbx, rax, false);
+  // Tail call exception on protection domain mismatch.
+  __ bind(protection_domain_mismatch_cont);
+  __ pop(rdx);
+  __ pop(rax);
+  __ restore_bcp();
+  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
+                                     InterpreterRuntime::throw_TailCallException));
 }
 
 void TemplateTable::invokedynamic(int byte_no) {
@@ -3139,7 +3444,7 @@ void TemplateTable::invokedynamic(int by
     return;
   }
 
-  prepare_invoke(rax, rbx, byte_no);
+  prepare_invoke(rax, rbx, byte_no, false);
 
   // rax: CallSite object (f1)
   // rbx: unused (f2)
@@ -3173,7 +3478,7 @@ void TemplateTable::invokedynamic(int by
 
   // recompute return address
   __ restore_bcp();      // rsi must be correct for prepare_invoke
-  prepare_invoke(rax, rbx, -byte_no);  // smashes rcx, rdx
+  prepare_invoke(rax, rbx, -byte_no, false);  // smashes rcx, rdx
   // rax: CallSite object (f1)
   // rbx: unused (f2)
   // rdi: bootstrap MH
diff -r aa0c48844632 -r a7d54b98ca4a src/cpu/x86/vm/templateTable_x86_32.hpp
--- a/src/cpu/x86/vm/templateTable_x86_32.hpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/cpu/x86/vm/templateTable_x86_32.hpp	Wed Jun 03 16:27:17 2009 +0200
@@ -22,9 +22,21 @@
  *
  */
 
-  static void prepare_invoke(Register method, Register index, int byte_no);
+  static void prepare_invoke(Register method, Register index, int byte_no, bool is_tail_call);
+
   static void invokevirtual_helper(Register index, Register recv,
-                                   Register flags);
+                                   Register flags, int byte_no, bool is_tail_call = false);
+  
+  static void invokeinterface_helper(int byte_no, bool is_tail_call = false);
+
+  static void load_pool_holder_of_method(Register method, Register klass);
+  static void jcc_protection_domain_mismatch(Register temp, Register temp2, Register recv, Label& mismatch_cont);
+
+  static void tail_call(int byte_no, Label& regular_call_continuation);
+  // Tail call helper.
+  static void load_invoke_cp_cache_flags(int byte_no, Register Rcache,
+                                         Register Rindex, Register Flags);
+                                  
   static void volatile_barrier(Assembler::Membar_mask_bits order_constraint );
 
   // Helpers
diff -r aa0c48844632 -r a7d54b98ca4a src/cpu/x86/vm/vtableStubs_x86_32.cpp
--- a/src/cpu/x86/vm/vtableStubs_x86_32.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/cpu/x86/vm/vtableStubs_x86_32.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -45,9 +45,9 @@ extern "C" void bad_compiled_vtable_inde
 //   rsi, rdi
 // Note that rax and rdx are also used for return values.
 //
-VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
+VtableStub* VtableStubs::create_vtable_stub(int vtable_index, bool is_tail_call, bool is_sibling) {
   const int i486_code_length = VtableStub::pd_code_size_limit(true);
-  VtableStub* s = new(i486_code_length) VtableStub(true, vtable_index);
+  VtableStub* s = new(i486_code_length) VtableStub(true, vtable_index, is_tail_call, is_sibling);
   ResourceMark rm;
   CodeBuffer cb(s->entry_point(), i486_code_length);
   MacroAssembler* masm = new MacroAssembler(&cb);
@@ -64,9 +64,27 @@ VtableStub* VtableStubs::create_vtable_s
 
   // get receiver klass
   address npe_addr = __ pc();
+
   __ movptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
+
   // compute entry offset (in words)
   int entry_offset = instanceKlass::vtable_start_offset() + vtable_index*vtableEntry::size();
+
+  Label pd_mismatch;
+  // check protection domains match
+  if (is_tail_call) {
+    // Protection domain slot [esp+4] contains caller klass.
+    __ movl(rbx, Address(rsp, wordSize));
+    __ movl(rbx, Address(rbx, instanceKlass::protection_domain_offset()*wordSize));
+    // Get method_holder's pd. First get methodOop. Then do
+    // methodOop.constant_pool.class.protection_domain.
+    __ movptr(rdi, Address(rax, entry_offset*wordSize + vtableEntry::method_offset_in_bytes()));
+    __ movptr(rdi, Address(rdi, methodOopDesc::constants_offset()));
+    __ movptr(rdi, Address(rdi, constantPoolOopDesc::pool_holder_offset_in_bytes()));
+    __ cmpl(rbx, Address(rdi, instanceKlass::protection_domain_offset()*wordSize));
+    __ jcc(Assembler::notEqual, pd_mismatch);
+  }
+
 #ifndef PRODUCT
   if (DebugVtables) {
     Label L;
@@ -83,11 +101,21 @@ VtableStub* VtableStubs::create_vtable_s
 
   // load methodOop and target address
   __ movptr(method, Address(rax, entry_offset*wordSize + vtableEntry::method_offset_in_bytes()));
+  
+  ByteSize method_entry_offset = in_ByteSize(0);
+  if (is_tail_call && is_sibling) {
+    method_entry_offset = methodOopDesc::from_compiled_tail_call_offset();
+  } else if (is_tail_call) {
+    method_entry_offset = methodOopDesc::from_compiled_not_sibling_tail_call_offset();
+  } else {
+    method_entry_offset = methodOopDesc::from_compiled_offset();
+  }
+  
   if (DebugVtables) {
     Label L;
     __ cmpptr(method, (int32_t)NULL_WORD);
     __ jcc(Assembler::equal, L);
-    __ cmpptr(Address(method, methodOopDesc::from_compiled_offset()), (int32_t)NULL_WORD);
+    __ cmpptr(Address(method, method_entry_offset), (int32_t)NULL_WORD);
     __ jcc(Assembler::notZero, L);
     __ stop("Vtable entry is NULL");
     __ bind(L);
@@ -97,7 +125,20 @@ VtableStub* VtableStubs::create_vtable_s
   // method (rbx): methodOop
   // rcx: receiver
   address ame_addr = __ pc();
-  __ jmp( Address(method, methodOopDesc::from_compiled_offset()));
+  __ jmp( Address(method, method_entry_offset));
+  
+  if (is_tail_call) { 
+    __ bind(pd_mismatch); // Protection domain mismatch.
+    if (TraceTailCalls) __ warn ("vtable tail call pd mismatch");
+    if (TailCallsStackCompression) {
+      // Jump to normal method entry. TODO: Need new exception point.
+      __ movptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
+      __ movptr(method, Address(rax, entry_offset*wordSize + vtableEntry::method_offset_in_bytes()));
+      __ jmp(Address(method, methodOopDesc::from_compiled_offset()));
+    } else {
+      __ jump(RuntimeAddress(StubRoutines::throw_TailCallException_entry()));
+    }
+  }
 
   masm->flush();
 
@@ -117,11 +158,11 @@ VtableStub* VtableStubs::create_vtable_s
 }
 
 
-VtableStub* VtableStubs::create_itable_stub(int itable_index) {
+VtableStub* VtableStubs::create_itable_stub(int itable_index, bool is_tail_call, bool is_sibling) {
   // Note well: pd_code_size_limit is the absolute minimum we can get away with.  If you
   //            add code here, bump the code stub size returned by pd_code_size_limit!
   const int i486_code_length = VtableStub::pd_code_size_limit(false);
-  VtableStub* s = new(i486_code_length) VtableStub(false, itable_index);
+  VtableStub* s = new(i486_code_length) VtableStub(false, itable_index, is_tail_call, is_sibling);
   ResourceMark rm;
   CodeBuffer cb(s->entry_point(), i486_code_length);
   MacroAssembler* masm = new MacroAssembler(&cb);
@@ -139,8 +180,15 @@ VtableStub* VtableStubs::create_itable_s
 
   assert(VtableStub::receiver_location() == rcx->as_VMReg(), "receiver expected in rcx");
 
+#ifdef COMPILER2
+  if (is_tail_call) {
+    __ push(rsi); // Opto tail calls store the base pointer in rsi.
+  }
+#endif
+
   // get receiver klass (also an implicit null-check)
   address npe_addr = __ pc();
+
   __ movptr(rsi, Address(rcx, oopDesc::klass_offset_in_bytes()));
 
   // Most registers are in use; we'll use rax, rbx, rsi, rdi
@@ -157,25 +205,63 @@ VtableStub* VtableStubs::create_itable_s
 
   // method (rbx): methodOop
   // rcx: receiver
+  ByteSize method_entry_offset = in_ByteSize(0);
+  if (is_tail_call && is_sibling) {
+    method_entry_offset = methodOopDesc::from_compiled_tail_call_offset();
+  } else if (is_tail_call) {
+    method_entry_offset = methodOopDesc::from_compiled_not_sibling_tail_call_offset();
+  } else {
+    method_entry_offset = methodOopDesc::from_compiled_offset();
+  }
 
 #ifdef ASSERT
   if (DebugVtables) {
       Label L1;
       __ cmpptr(method, (int32_t)NULL_WORD);
       __ jcc(Assembler::equal, L1);
-      __ cmpptr(Address(method, methodOopDesc::from_compiled_offset()), (int32_t)NULL_WORD);
+      __ cmpptr(Address(method, method_entry_offset), (int32_t)NULL_WORD);
       __ jcc(Assembler::notZero, L1);
       __ stop("methodOop is null");
       __ bind(L1);
     }
 #endif // ASSERT
 
+#ifdef COMPILER2
+  if (is_tail_call) {
+    // restore rsi (contains base pointer for tail calls).
+    __ pop(rsi);
+  }
+#endif
+
+  Label pd_mismatch;
+  // Check protection domains match. Clobbers: rdi, rax
+  if (is_tail_call) {
+    // Protection domain slot [esp+4] contains caller klass.
+    __ movl(rdi, Address(rsp, wordSize));
+    __ movl(rdi, Address(rdi, instanceKlass::protection_domain_offset()*wordSize));
+    // Get receiver klass.
+    __ movptr(rax, Address(method, methodOopDesc::constants_offset()));
+    __ movptr(rax, Address(rax, constantPoolOopDesc::pool_holder_offset_in_bytes())); 
+    __ cmpl(rdi, Address(rax, instanceKlass::protection_domain_offset()*wordSize));
+    __ jcc(Assembler::notEqual, pd_mismatch);
+  }
+
   address ame_addr = __ pc();
-  __ jmp(Address(method, methodOopDesc::from_compiled_offset()));
+  __ jmp(Address(method, method_entry_offset));
 
   __ bind(throw_icce);
   __ jump(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry()));
   masm->flush();
+  
+  if (is_tail_call) { 
+    __ bind(pd_mismatch); // Protection domain mismatch.
+    if (TraceTailCalls) __ warn("itable tail call pd mismatch");
+    if (TailCallsStackCompression) {
+      __ jmp(Address(method, methodOopDesc::from_compiled_offset())); 
+    } else {
+      __ jump(RuntimeAddress(StubRoutines::throw_TailCallException_entry()));
+    }
+  }
 
   if (PrintMiscellaneous && (WizardMode || Verbose)) {
     tty->print_cr("itable #%d at "PTR_FORMAT"[%d] left over: %d",
@@ -197,10 +283,12 @@ int VtableStub::pd_code_size_limit(bool 
 int VtableStub::pd_code_size_limit(bool is_vtable_stub) {
   if (is_vtable_stub) {
     // Vtable stub size
-    return (DebugVtables ? 210 : 16) + (CountCompiledCalls ? 6 : 0);
+    // Tail calls check that protection domains match.
+    // TODO: get the size right!
+    return (TailCalls ? 256 : 0) + (DebugVtables ? 210 : 16) + (CountCompiledCalls ? 6 : 0);
   } else {
     // Itable stub size
-    return (DebugVtables ? 256 : 66) + (CountCompiledCalls ? 6 : 0);
+    return (TailCalls ? 256+128 : 0) +(DebugVtables ? 144 : 64+16) + (CountCompiledCalls ? 6 : 0);
   }
   // In order to tune these parameters, run the JVM with VM options
   // +PrintMiscellaneous and +WizardMode to see information about
diff -r aa0c48844632 -r a7d54b98ca4a src/cpu/x86/vm/x86_32.ad
--- a/src/cpu/x86/vm/x86_32.ad	Thu May 14 10:57:58 2009 -0700
+++ b/src/cpu/x86/vm/x86_32.ad	Wed Jun 03 16:27:17 2009 +0200
@@ -272,11 +272,11 @@ static jlong *double_signflip_pool = dou
 //       from the start of the call to the point where the return address
 //       will point.
 int MachCallStaticJavaNode::ret_addr_offset() {
-  return 5 + (Compile::current()->in_24_bit_fp_mode() ? 6 : 0);  // 5 bytes from start of call to where return address points
+  return 5 + (Compile::current()->in_24_bit_fp_mode() ? 6 : 0) + (_is_tail_call ? 7+11 : 0);  // 5 bytes from start of call to where return address points
 }
 
 int MachCallDynamicJavaNode::ret_addr_offset() {
-  return 10 + (Compile::current()->in_24_bit_fp_mode() ? 6 : 0);  // 10 bytes from start of call to where return address points
+  return 10 + (Compile::current()->in_24_bit_fp_mode() ? 6 : 0) + (_is_tail_call ? 14+11 : 0);  // 10 bytes from start of call to where return address points
 }
 
 static int sizeof_FFree_Float_Stack_All = -1;
@@ -299,6 +299,12 @@ bool SafePointNode::needs_polling_addres
 // The address of the call instruction needs to be 4-byte aligned to
 // ensure that it does not span a cache line so that it can be patched.
 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
+  if (_is_tail_call) {
+    // emit lea ebp [esp-framesize] before every call
+    current_offset += 7;
+    // mov [esp-4] ret_addr; subptr 4 esp 
+    current_offset += 11;
+  }
   if (Compile::current()->in_24_bit_fp_mode())
     current_offset += 6;    // skip fldcw in pre_call_FPU, if any
   current_offset += 1;      // skip call opcode byte
@@ -308,6 +314,14 @@ int CallStaticJavaDirectNode::compute_pa
 // The address of the call instruction needs to be 4-byte aligned to
 // ensure that it does not span a cache line so that it can be patched.
 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
+  if (_is_tail_call) {
+    // emit lea ebp [esp-framesize] before every call
+    current_offset += 7;
+    // mov [esp+0] noop
+    current_offset += 7;
+    // mov [esp-4] ret_addr; subptr 4 esp
+    current_offset += 11;
+  }
   if (Compile::current()->in_24_bit_fp_mode())
     current_offset += 6;    // skip fldcw in pre_call_FPU, if any
   current_offset += 5;      // skip MOV instruction
@@ -1158,12 +1172,36 @@ void emit_java_to_interp(CodeBuffer &cbu
   __ end_a_stub();
   // Update current stubs pointer and restore code_end.
 }
+void emit_java_to_interp_tail_call(CodeBuffer &cbuf ) {
+  // Stub is fixed up when the corresponding call is converted from calling
+  // compiled code to calling interpreted code.
+  // mov rbx,0
+  // jmp -1
+  // Note that the code buffer's inst_mark is always relative to insts.
+  // That's why we must use the macroassembler to generate a stub.
+  MacroAssembler _masm(&cbuf);
+  address call_pc = __ pc()-5;
+  address base =
+  __ start_a_stub(Compile::MAX_stubs_size);
+  if (base == NULL)  return;  // CodeBuffer::expand failed
+  // static stub relocation stores the instruction address of the call
+  __ relocate(static_stub_Relocation::spec(call_pc), RELOC_IMM32);
+  // static stub relocation also tags the methodOop in the code-stream.
+  __ movoop(rbx, (jobject)NULL);  // method is zapped till fixup time
+  // This is recognized as unresolved by relocs/nativeInst/ic code
+  __ jump(RuntimeAddress(__ pc()));
+
+  __ end_a_stub();
+  // Update current stubs pointer and restore code_end.
+}
+
 // size of call stub, compiled java to interpretor
 uint size_java_to_interp() {
   return 10;  // movl; jmp
 }
 // relocation entries for call stub, compiled java to interpretor
 uint reloc_java_to_interp() {
+  if (TailCalls) return 4 + 4; 
   return 4;  // 3 in emit_java_to_interp + 1 in Java_Static_Call
 }
 
@@ -1236,6 +1274,283 @@ uint size_deopt_handler() {
   // Note that this value is also credited (in output.cpp) to
   // the size of the code section.
   return 5 + NativeJump::instruction_size; // pushl(); jmp;
+}
+
+relocInfo::tailCallType tail_call_type_for_call(bool is_tail_call, bool is_sibling) {
+  if(!is_tail_call) return relocInfo::not_tail_call;
+  else if (is_tail_call && is_sibling) return relocInfo::sibling_tail_call_type;
+  else {
+    assert (is_tail_call && !is_sibling, "oops");
+    return relocInfo::not_sibling_tail_call_type;
+  }
+}
+
+void emit_tail_call_argument_move(MacroAssembler & _masm, int arg_slots) {
+  // Copy the args to tail call position using register rbx.
+  Register tmp = rbx;
+  // Don't need to move the protection domain slot.
+  for (int slot = 1+SharedRuntime::tail_call_protection_domain_slots(); slot <= arg_slots; slot++) {
+    Address src (rsp, VMRegImpl::stack_slot_size * (SharedRuntime::out_preserve_stack_slots()+slot));
+    // Need to add safed eip slot so slot+1 VVV
+    Address dest(rsi, VMRegImpl::stack_slot_size * (SharedRuntime::out_preserve_stack_slots()+slot+1));
+    __ movl(tmp, src);
+    __ movl(dest, tmp);
+  }
+}
+
+void tail_call_leave(MacroAssembler & _masm) {
+  // Rsi holds rbp in opto tail calls.
+  __ mov(rsp, rsi);
+  __ pop(rbp);
+}
+
+uint size_verified_not_sib_tail_call_stub(int arg_slots) {
+     // TODO: get real size.
+     return (8 * arg_slots) + 512;
+}
+
+uint size_verified_tail_call_stub(int arg_slots) {
+     // TODO: get real size.
+     return (8 * arg_slots) + 256;
+}
+
+void emit_verified_not_sib_tail_call_stub(CodeBuffer& cbuf, int arg_slots, int VEP_offset, CodeOffsets & code_offsets) {
+  MacroAssembler _masm(&cbuf);
+  Label call_to_interpreter;
+  int stub_size = size_verified_not_sib_tail_call_stub(arg_slots);
+  // Generate code for static tail call (we know that klass and protection
+  // domain is correct).
+  address handler_base = __ start_a_stub(stub_size);
+  if (handler_base == NULL) {
+    assert(false, "static tail call stub overflow");
+    // not enough space left for the handler
+    return;
+  }
+#ifdef ASSERT
+  int offset = __ offset();
+#endif // ASSERT
+  __ align(CodeEntryAlignment);
+  code_offsets.set_value(CodeOffsets::Verified_Not_Sibling_Tail_Call_Entry, __ offset());
+  
+  if (TraceTailCalls) __ warn("Opto Compiled entry point: Verified_Not_Sibling_Tail_Call_Entry");
+  // Fast case: parent is interpreter. This means we can extend its stack frame.
+  // Assume: rax, rbx are scratch. rbx would hold methodOop, rax the IC_klass token
+  Register tmp = rax; // scratch
+  Register last_sp = rbx; // scratch
+  Register base_pointer = rsi; // opto specific
+  __ parent_is_not_interpreter_jcc(base_pointer, rbx, call_to_interpreter);
+  // Store old rbp
+  __ movl(tmp, Address(base_pointer, frame::link_offset*wordSize));
+  __ push(tmp);
+  // Store ret address.
+  __ movl(tmp, Address(base_pointer, frame::return_addr_offset * wordSize));
+  __ push(tmp);
+  
+  
+  // Get last_sp from parent frame.
+  __ movl(tmp, Address(base_pointer, frame::link_offset * wordSize)); // old rbp
+  __ movl(last_sp, Address(tmp, frame::interpreter_frame_last_sp_offset * wordSize));
+  // Shuffle arguments
+  for (int src_slot = arg_slots, dest_slot=-1; src_slot > 0; src_slot--, dest_slot--) {
+    // saved old_rbp, old_retaddr on top of stack => +2
+    Address src(rsp, VMRegImpl::stack_slot_size * (2+src_slot));
+    Address dest(last_sp, VMRegImpl::stack_slot_size * (dest_slot));
+    __ movl(tmp, src);
+    __ movl(dest, tmp);
+  }
+  // Set return address.
+  __ subl(last_sp, (1+arg_slots)*wordSize);
+  __ pop(tmp);
+  __ movl(Address(last_sp, 0), tmp);
+  // Set new rbp
+  __ pop(rbp);
+  // Set new rsp. Need to do this after the pop!
+  __ movl(rsp, last_sp);
+  // jump to VEP
+  address vep_entry = cbuf.insts()->start() + VEP_offset;
+  RelocationHolder rh = section_call_Relocation::spec(vep_entry, CodeBuffer::SECT_INSTS);
+  // Jump to vep.
+  __ jump(AddressLiteral((address)vep_entry, rh));
+  // Slow case: parent is not interpreted. Jump to interpreter entry of called
+  // function in order to lazily create an interpreted frame on the stack.
+  __ bind(call_to_interpreter);
+  code_offsets.set_value(CodeOffsets::Verified_Not_Sibling_Tail_Call_Set_Data_Entry, __ offset());
+  // Set methodoop.
+  __ movoop(rbx, (jobject)Universe::non_oop_word());
+  // Jump to C2I Entry Point
+  __ jump(RuntimeAddress((address)-1));
+  // TODO: adapt static_tail_call_stub_size
+  assert(__ offset() - offset <= stub_size, "overflow");
+  __ end_a_stub();
+}
+
+void emit_verified_tail_call_stub(Compile * C, CodeBuffer& cbuf, int arg_slots, int frame_size, CodeOffsets & code_offsets) {
+  MacroAssembler _masm(&cbuf);
+  int stub_size = size_verified_tail_call_stub(arg_slots);
+  // Generate code for static tail call (we know that klass and protection
+  // domain is correct).
+  address handler_base = __ start_a_stub(stub_size);
+  if (handler_base == NULL) {
+    // not enough space left for the handler
+    return;
+  }
+#ifdef ASSERT
+  int offset = __ offset();
+#endif // ASSERT
+
+  __ align(CodeEntryAlignment);
+  code_offsets.set_value(CodeOffsets::Verified_Tail_Call_Entry, __ offset());
+  if (TraceTailCalls) __ warn("Opto Compiled entry point: Verified_Tail_Call_Entry");
+  // Check that there is enough space on the stack.
+  if (C->need_stack_bang(frame_size)) {
+    MacroAssembler masm(&cbuf);
+    masm.generate_stack_overflow_check(frame_size);
+  }
+  // Move arguments.
+  emit_tail_call_argument_move(_masm, arg_slots);
+  // Remove tail calling caller's stack frame.
+  //tail_call_leave(_masm);
+  __ leal(rsp, Address(rsi, -frame_size));
+  // Compute target of jump. Verified entry point of current method.
+  address vep_entry = cbuf.insts()->start() + 
+    code_offsets.value(CodeOffsets::Frame_Complete);
+  RelocationHolder rh = section_call_Relocation::spec(vep_entry, CodeBuffer::SECT_INSTS);
+  // Jump to vep.
+  __ jump(AddressLiteral((address)vep_entry, rh));
+  
+  // TODO: adapt static_tail_call_stub_size
+  assert(__ offset() - offset <= stub_size, "opto overflow");
+  __ end_a_stub();
+}
+
+uint size_not_sib_tail_call_stub(int arg_slots) {
+     // TODO: get real size.
+     return 8*arg_slots + 512;
+}
+
+uint size_tail_call_stub(int arg_slots) {
+     // TODO: get real size.
+     return 8*arg_slots + 256;
+}
+
+void emit_not_sib_tail_call_stub(CodeBuffer& cbuf, int arg_slots, int VEP_offset, CodeOffsets& code_offsets) {
+  MacroAssembler _masm(&cbuf);
+  Label call_to_interpreter;
+  int stub_size = size_not_sib_tail_call_stub(arg_slots);
+  address handler_base = __ start_a_stub(stub_size);
+  if (handler_base == NULL) {
+    // not enough space left for the handler
+    assert(false, "static tail call stub overflow");
+    return;
+  }
+#ifdef ASSERT
+  int offset = __ offset();
+#endif // ASSERT
+  __ align(CodeEntryAlignment);
+  code_offsets.set_value(CodeOffsets::Not_Sibling_Tail_Call_Entry, __ offset());
+  if (TraceTailCalls) __ warn("Opto Compiled entry point: Not_Sibling_Tail_Call_Entry");
+  // Check icache
+  __ cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
+  __ jump_cc(Assembler::notEqual,
+               RuntimeAddress(SharedRuntime::get_ic_miss_stub()));  
+  // Fast case: parent is interpreter. This means we can extend its stack frame.
+  // Assume: rax, rbx are scratch here since rax is needed only for
+  // check_icache. rbx would hold methodOop, rax the IC_klass token
+  Register tmp = rax; // scratch
+  Register last_sp = rbx; // scratch
+  Register base_pointer = rsi;
+  __ parent_is_not_interpreter_jcc(base_pointer, tmp, call_to_interpreter);
+  // Store old rbp
+  __ movl(tmp, Address(base_pointer, frame::link_offset*wordSize));
+  __ push(tmp);
+  // Store ret address.
+  __ movl(tmp, Address(base_pointer, frame::return_addr_offset * wordSize));
+  __ push(tmp);
+  
+  // when debugging the return addr pointer remove the 2 __ a_long() lines in
+  // templateInterpreter_x86_32.cpp to get a sensible assembler output
+  //__ stop("static_not_sibling_call, parent is interpreted rax contains ret_entry");
+  
+  // Get last_sp from parent frame.
+  __ movl(tmp, Address(base_pointer, frame::link_offset * wordSize)); // old rbp
+  __ movl(last_sp, Address(tmp, frame::interpreter_frame_last_sp_offset * wordSize));
+  // Shuffle arguments
+  for (int src_slot = arg_slots, dest_slot=-1; src_slot > 0; src_slot--, dest_slot--) {
+    // saved old_rbp, old_retaddr on top of stack => +2
+    Address src(rsp, VMRegImpl::stack_slot_size * (2+src_slot));
+    Address dest(last_sp, VMRegImpl::stack_slot_size * (dest_slot));
+    __ movl(tmp, src);
+    __ movl(dest, tmp);
+  }
+  // Set return address.
+  __ subl(last_sp, (1+arg_slots)*wordSize);
+  __ pop(tmp);
+  __ movl(Address(last_sp, 0), tmp);
+  // Set new rbp
+  __ pop(rbp);
+  // Set new rsp. Need to do this after the pop!
+  __ movl(rsp, last_sp);
+  // jump to VEP
+   address vep_entry = cbuf.insts()->start() + VEP_offset;
+  RelocationHolder rh = section_call_Relocation::spec(vep_entry, CodeBuffer::SECT_INSTS);
+  // Jump to vep.
+  __ jump(AddressLiteral((address)vep_entry, rh));
+  // Slow case: parent is not interpreted. Jump to interpreter entry of called
+  // function in order to lazily create an interpreted frame on the stack.
+  __ bind(call_to_interpreter);
+  code_offsets.set_value(CodeOffsets::Not_Sibling_Tail_Call_Set_Data_Entry, __ offset());
+  // Set methodoop.
+  __ movoop(rbx, (jobject)Universe::non_oop_word());
+  // Jump to C2I Entry Point
+  __ jump(RuntimeAddress((address)-1));
+  // TODO: adapt static_tail_call_stub_size
+  assert(__ offset() - offset <= stub_size, "overflow");
+  __ end_a_stub();
+}
+
+void emit_tail_call_stub(Compile * C, CodeBuffer& cbuf, int arg_slots, int frame_size, CodeOffsets & code_offsets) {
+  MacroAssembler _masm(&cbuf);
+  int stub_size = size_tail_call_stub(arg_slots);
+  // Generate code for tail call, check klass token. 
+  address handler_base = __ start_a_stub(stub_size);
+  if (handler_base == NULL) {
+    assert(false, "not enough space in tail call stub");
+    // not enough space left for the handler
+    return;
+  }
+#ifdef ASSERT
+  int offset = __ offset();
+#endif // ASSERT
+
+  __ align(CodeEntryAlignment);
+  code_offsets.set_value(CodeOffsets::Tail_Call_Entry, __ offset());
+  if (TraceTailCalls) __ warn("Opto Compiled entry point: Tail_Call_Entry");
+
+  // Check icache
+  __ cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
+  __ jump_cc(Assembler::notEqual,
+               RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
+  // Check for enough stack space.
+  if (C->need_stack_bang(frame_size)) {
+    MacroAssembler masm(&cbuf);
+    masm.generate_stack_overflow_check(frame_size);
+  }
+  // Move arguments.
+  emit_tail_call_argument_move(_masm, arg_slots);
+  // Remove tail calling caller's stack frame.
+  //tail_call_leave(_masm);
+  __ leal(rsp, Address(rsi, -frame_size));
+  
+  // Compute target of jump. Verified entry point of current method.
+  address vep_entry = cbuf.insts()->start() 
+    + code_offsets.value(CodeOffsets::Frame_Complete);
+  RelocationHolder rh = section_call_Relocation::spec(vep_entry, CodeBuffer::SECT_INSTS);
+  // Jump to vep.
+  __ jump(AddressLiteral((address)vep_entry, rh));
+  
+  // TODO: adapt static_tail_call_stub_size
+  assert(__ offset() - offset <= stub_size, "opto overflow");
+  __ end_a_stub();
 }
 
 // Emit deopt handler code.
@@ -1782,37 +2097,94 @@ encode %{
   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
     // who we intended to call.
-    cbuf.set_inst_mark();
-    $$$emit8$primary;
-    if ( !_method ) {
-      emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.code_end()) - 4),
-                     runtime_call_Relocation::spec(), RELOC_IMM32 );
-    } else if(_optimized_virtual) {
-      emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.code_end()) - 4),
-                     opt_virtual_call_Relocation::spec(), RELOC_IMM32 );
-    } else {
-      emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.code_end()) - 4),
-                     static_call_Relocation::spec(), RELOC_IMM32 );
-    }
-    if( _method ) {  // Emit stub for static call
-      emit_java_to_interp(cbuf);
+    if(_is_tail_call) {
+      // load base pointer to rsi for tail calls.
+      Compile* C = ra_->C;
+      int framesize = C->frame_slots() << LogBytesPerInt;
+      framesize -= 2*wordSize; // EBP,RET
+      
+      cbuf.set_inst_mark();
+      emit_opcode(cbuf, 0x8D);      // LEA  ESI,[SP-framesize] 1byte
+      emit_rm(cbuf, 0x2, ESI_enc, 0x04); // 1 byte
+      emit_rm(cbuf, 0x0, 0x04, ESP_enc); // 1 byte
+      emit_d32(cbuf, framesize);
+    }
+    relocInfo::tailCallType rtype = tail_call_type_for_call(_is_tail_call, _is_sibling);
+    if (_is_tail_call) {
+      MacroAssembler masm(&cbuf);
+      Label return_address;
+      RelocationHolder rh = _optimized_virtual ?
+        opt_virtual_call_Relocation::spec(rtype) :
+        static_call_Relocation::spec(rtype);
+      masm.mov_label(Address(rsp,-4), return_address);
+      masm.subl (rsp, wordSize); //jmp_literal
+      masm.tail_call_jmp_literal((address)_entry_point, rh);
+      masm.bind(return_address);
+      emit_java_to_interp_tail_call(cbuf);
+    } else {
+      cbuf.set_inst_mark();
+      $$$emit8$primary;
+      if ( !_method ) {
+        emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.code_end()) - 4),
+            runtime_call_Relocation::spec(), RELOC_IMM32 );
+      } else if(_optimized_virtual) {
+        emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.code_end()) - 4),
+            opt_virtual_call_Relocation::spec(rtype), RELOC_IMM32 );
+      } else {
+        emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.code_end()) - 4),
+            static_call_Relocation::spec(rtype), RELOC_IMM32 );
+      }
+      if( _method ) {  // Emit stub for static call
+        emit_java_to_interp(cbuf);
+      }
     }
   %}
 
   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
+    if(_is_tail_call) {
+      // load base pointer to rsi for tail calls.
+      Compile* C = ra_->C;
+      int framesize = C->frame_slots() << LogBytesPerInt;
+      framesize -= 2*wordSize; // EBP,RET
+      
+      cbuf.set_inst_mark();
+      emit_opcode(cbuf, 0x8D);      // LEA  ESI,[SP-framesize] 1byte
+      emit_rm(cbuf, 0x2, ESI_enc, 0x04); // 1 byte
+      emit_rm(cbuf, 0x0, 0x04, ESP_enc); // 1 byte
+      emit_d32(cbuf, framesize);
+      // protection domain token
+      {
+        MacroAssembler _masm(&cbuf);
+        // 7bytes
+        int before_offset = __ offset();
+        __ movoop(Address(rsp, 0), (jobject)Universe::non_oop_word());
+        assert (__ offset() - before_offset == 7, "Must have size 7.");
+      }
+    }
     // !!!!!
     // Generate  "Mov EAX,0x00", placeholder instruction to load oop-info
     // emit_call_dynamic_prologue( cbuf );
     cbuf.set_inst_mark();
     emit_opcode(cbuf, 0xB8 + EAX_enc);        // mov    EAX,-1
     emit_d32_reloc(cbuf, (int)Universe::non_oop_word(), oop_Relocation::spec_for_immediate(), RELOC_IMM32);
+    relocInfo::tailCallType rtype = tail_call_type_for_call(_is_tail_call, _is_sibling);
     address  virtual_call_oop_addr = cbuf.inst_mark();
     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
     // who we intended to call.
-    cbuf.set_inst_mark();
-    $$$emit8$primary;
-    emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.code_end()) - 4),
-                virtual_call_Relocation::spec(virtual_call_oop_addr), RELOC_IMM32 );
+    if (_is_tail_call) {
+      RelocationHolder rh = virtual_call_Relocation::spec(virtual_call_oop_addr, NULL, rtype);
+      MacroAssembler masm(&cbuf);
+      Label return_address;
+      masm.mov_label(Address(rsp,-4), return_address);
+      masm.subl (rsp, wordSize); //jmp_literal
+      masm.tail_call_jmp_literal((address)_entry_point, rh);
+      masm.bind(return_address);
+    } else { 
+      cbuf.set_inst_mark();
+      $$$emit8$primary;
+      emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.code_end()) - 4),
+          virtual_call_Relocation::spec(virtual_call_oop_addr, NULL, rtype), RELOC_IMM32 );
+    }
   %}
 
   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
@@ -4819,8 +5191,8 @@ frame %{
   // a new frame.  The PROLOG must add this many slots to the stack.  The
   // EPILOG must remove this many slots.  Intel needs one slot for
   // return address and one for rbp, (must save rbp)
+  //in_preserve_stack_slots(2+VerifyStackAtCalls);
   in_preserve_stack_slots(2+VerifyStackAtCalls);
-
   // Number of outgoing stack slots killed above the out_preserve_stack_slots
   // for calls to C.  Supports the var-args backing area for register parms.
   varargs_C_out_slots_killed(0);
diff -r aa0c48844632 -r a7d54b98ca4a src/os/solaris/vm/os_solaris.cpp
--- a/src/os/solaris/vm/os_solaris.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/os/solaris/vm/os_solaris.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -3803,6 +3803,7 @@ int     set_lwp_priority (int ThreadID, 
     int maxClamped     = MIN2(iaLimits.maxPrio, (int)iaInfo->ia_uprilim);
     iaInfo->ia_upri    = scale_to_lwp_priority(iaLimits.minPrio, maxClamped, newPrio);
     iaInfo->ia_uprilim = IA_NOCHANGE;
+    iaInfo->ia_nice    = IA_NOCHANGE;
     iaInfo->ia_mode    = IA_NOCHANGE;
     if (ThreadPriorityVerbose) {
       tty->print_cr ("IA: [%d...%d] %d->%d\n",
diff -r aa0c48844632 -r a7d54b98ca4a src/os_cpu/linux_x86/vm/os_linux_x86.cpp
--- a/src/os_cpu/linux_x86/vm/os_linux_x86.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/os_cpu/linux_x86/vm/os_linux_x86.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -261,7 +261,7 @@ JVM_handle_linux_signal(int sig,
 
       // check if fault address is within thread stack
       if (addr < thread->stack_base() &&
-          addr >= thread->stack_base() - thread->stack_size()) {
+        addr >= thread->stack_base() - thread->stack_size()) {
         // stack overflow
         if (thread->in_stack_yellow_zone(addr)) {
           thread->disable_stack_yellow_zone();
diff -r aa0c48844632 -r a7d54b98ca4a src/share/tools/hsdis/hsdis-demo.c
--- a/src/share/tools/hsdis/hsdis-demo.c	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/tools/hsdis/hsdis-demo.c	Wed Jun 03 16:27:17 2009 +0200
@@ -209,3 +209,4 @@ void disassemble(void* from, void* to) {
   if (res != to)
     printf("*** Result was %p!\n", res);
 }
+
diff -r aa0c48844632 -r a7d54b98ca4a src/share/tools/hsdis/hsdis.c
--- a/src/share/tools/hsdis/hsdis.c	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/tools/hsdis/hsdis.c	Wed Jun 03 16:27:17 2009 +0200
@@ -29,6 +29,7 @@
 
 #include "hsdis.h"
 
+#include <stdint.h>
 #include <sysdep.h>
 #include <libiberty.h>
 #include <bfd.h>
diff -r aa0c48844632 -r a7d54b98ca4a src/share/tools/hsdis/hsdis.h
--- a/src/share/tools/hsdis/hsdis.h	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/tools/hsdis/hsdis.h	Wed Jun 03 16:27:17 2009 +0200
@@ -65,3 +65,4 @@ typedef void* (*decode_instructions_ftyp
                                             decode_instructions_printf_callback_ftype printf_callback,
                                             void* printf_stream,
                                             const char* options);
+
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/asm/codeBuffer.hpp
--- a/src/share/vm/asm/codeBuffer.hpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/asm/codeBuffer.hpp	Wed Jun 03 16:27:17 2009 +0200
@@ -39,6 +39,12 @@ public:
                  Dtrace_trap = OSR_Entry,  // dtrace probes can never have an OSR entry so reuse it
                  Exceptions,     // Offset where exception handler lives
                  Deopt,          // Offset where deopt handler lives
+                 Verified_Tail_Call_Entry, // Offset for verified tail calls.
+                 Verified_Not_Sibling_Tail_Call_Entry,
+                 Tail_Call_Entry, // Offset for monomorphic tail calls.
+                 Not_Sibling_Tail_Call_Entry,
+                 Verified_Not_Sibling_Tail_Call_Set_Data_Entry,
+                 Not_Sibling_Tail_Call_Set_Data_Entry,
                  max_Entries };
 
   // special value to note codeBlobs where profile (forte) stack walking is
@@ -57,6 +63,12 @@ public:
     _values[OSR_Entry] = 0;
     _values[Exceptions] = -1;
     _values[Deopt] = -1;
+    _values[Verified_Tail_Call_Entry] = 0;
+    _values[Tail_Call_Entry] = 0;
+    _values[Not_Sibling_Tail_Call_Entry] = 0;
+    _values[Verified_Not_Sibling_Tail_Call_Entry] = 0;
+    _values[Verified_Not_Sibling_Tail_Call_Set_Data_Entry] = 0;
+    _values[Not_Sibling_Tail_Call_Set_Data_Entry] = 0;
   }
 
   int value(Entries e) { return _values[e]; }
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/c1/c1_Compilation.cpp
--- a/src/share/vm/c1/c1_Compilation.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/c1/c1_Compilation.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -218,7 +218,14 @@ void Compilation::emit_code_epilog(LIR_A
   CHECK_BAILOUT();
   assembler->emit_deopt_handler();
   CHECK_BAILOUT();
-
+  assembler->emit_static_tail_call_stub();
+  CHECK_BAILOUT();
+  assembler->emit_monomorphic_tail_call_stub();
+  CHECK_BAILOUT();
+  assembler->emit_not_sibling_monomorphic_tail_call_stub();
+  CHECK_BAILOUT();
+  assembler->emit_static_not_sibling_tail_call_stub();
+  CHECK_BAILOUT();
   // done
   masm()->flush();
 }
@@ -274,8 +281,13 @@ int Compilation::compile_java_method() {
 
   {
     PhaseTraceTime timeit(_t_emit_lir);
-
-    _frame_map = new FrameMap(method(), hir()->number_of_locks(), MAX2(4, hir()->max_stack()));
+    int tail_call_pd_slots = SharedRuntime::tail_call_protection_domain_slots(); 
+    // Really only need 1. Stack frame code depends
+    // on reserved_argument_area_size to be correct
+    // maximum. It seems that the stack offsets for incoming arguments is not
+    // calculated right. FrameMap::java_calling_conv() misses a '*BytePerWord'
+    _frame_map = new FrameMap(method(), hir()->number_of_locks(), 
+                              MAX2(4, hir()->max_stack()+tail_call_pd_slots));
     emit_lir();
   }
   CHECK_BAILOUT_(no_frame_size);
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/c1/c1_FrameMap.cpp
--- a/src/share/vm/c1/c1_FrameMap.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/c1/c1_FrameMap.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -49,7 +49,7 @@ BasicTypeArray* FrameMap::signature_type
 }
 
 
-CallingConvention* FrameMap::java_calling_convention(const BasicTypeArray* signature, bool outgoing) {
+CallingConvention* FrameMap::java_calling_convention(const BasicTypeArray* signature, bool outgoing, bool is_tail_call) {
   // compute the size of the arguments first.  The signature array
   // that java_calling_convention takes includes a T_VOID after double
   // work items but our signatures do not.
@@ -76,12 +76,14 @@ CallingConvention* FrameMap::java_callin
     BasicType t = sig_bt[i];
     assert(t != T_VOID, "should be skipping these");
 
-    LIR_Opr opr = map_to_opr(t, regs + i, outgoing);
+    LIR_Opr opr = map_to_opr(t, regs + i, outgoing, is_tail_call);
     args->append(opr);
     if (opr->is_address()) {
       LIR_Address* addr = opr->as_address_ptr();
       assert(addr->disp() == (int)addr->disp(), "out of range value");
+      assert(MAX2(out_preserve, (intptr_t)addr->disp() / 4) == out_preserve, "c1 different from java_call_conv");
       out_preserve = MAX2(out_preserve, (intptr_t)addr->disp() / 4);
+      
     }
     i += type2size[t];
   }
@@ -90,7 +92,10 @@ CallingConvention* FrameMap::java_callin
 
   if (outgoing) {
     // update the space reserved for arguments.
-    update_reserved_argument_area_size(out_preserve);
+    // shouldn't that be out_preserve * BytesPerWord
+    // like in FrameMap::FrameMap()
+    //update_reserved_argument_area_size(out_preserve);
+    update_reserved_argument_area_size(out_preserve*BytesPerWord);
   }
   return new CallingConvention(args, out_preserve);
 }
@@ -161,7 +166,8 @@ FrameMap::FrameMap(ciMethod* method, int
   assert(monitors >= 0, "not set");
   _num_monitors = monitors;
   assert(reserved_argument_area_size >= 0, "not set");
-  _reserved_argument_area_size = MAX2(4, reserved_argument_area_size) * BytesPerWord;
+  int min_ougoing_arg_size = MAX2(MinOutgoingArgStackSlotSize, 4);
+  _reserved_argument_area_size = MAX2(min_ougoing_arg_size, reserved_argument_area_size) * BytesPerWord;
 
   _argcount = method->arg_size();
   _argument_locations = new intArray(_argcount, -1);
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/c1/c1_FrameMap.hpp
--- a/src/share/vm/c1/c1_FrameMap.hpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/c1/c1_FrameMap.hpp	Wed Jun 03 16:27:17 2009 +0200
@@ -144,7 +144,7 @@ class FrameMap : public CompilationResou
   // stack addresses are expressable in a simm13.
   bool validate_frame();
 
-  static LIR_Opr map_to_opr(BasicType type, VMRegPair* reg, bool incoming);
+  static LIR_Opr map_to_opr(BasicType type, VMRegPair* reg, bool incoming, bool is_tail_call=false);
 
  public:
   // Opr representing the stack_pointer on this platform
@@ -156,7 +156,7 @@ class FrameMap : public CompilationResou
   // for outgoing calls, these also update the reserved area to
   // include space for arguments and any ABI area.
   CallingConvention* c_calling_convention (const BasicTypeArray* signature);
-  CallingConvention* java_calling_convention (const BasicTypeArray* signature, bool outgoing);
+  CallingConvention* java_calling_convention (const BasicTypeArray* signature, bool outgoing, bool is_tail_call=false);
 
   // deopt support
   ByteSize sp_offset_for_orig_pc() { return sp_offset_for_monitor_base(_num_monitors); }
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/c1/c1_GraphBuilder.cpp
--- a/src/share/vm/c1/c1_GraphBuilder.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/c1/c1_GraphBuilder.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -1504,6 +1504,8 @@ Dependencies* GraphBuilder::dependency_r
 
 void GraphBuilder::invoke(Bytecodes::Code code) {
   bool will_link;
+  bool is_tail_call = stream()->is_wide();
+
   ciMethod* target = stream()->get_method(will_link);
   // we have to make sure the argument size (incl. the receiver)
   // is correct for compilation (the call would fail later during
@@ -1723,7 +1725,7 @@ void GraphBuilder::invoke(Bytecodes::Cod
     profile_call(recv, target_klass);
   }
 
-  Invoke* result = new Invoke(code, result_type, recv, args, vtable_index, target);
+  Invoke* result = new Invoke(code, result_type, recv, args, vtable_index, target, is_tail_call);
   // push result
   append_split(result);
 
@@ -3338,7 +3340,7 @@ bool GraphBuilder::try_inline_full(ciMet
       !InlineSynchronizedMethods         ) INLINE_BAILOUT("callee is synchronized");
   if (!callee->holder()->is_initialized()) INLINE_BAILOUT("callee's klass not initialized yet");
   if (!callee->has_balanced_monitors())    INLINE_BAILOUT("callee's monitors do not match");
-
+  if (!stream()->is_wide() && callee->contains_tail_call()) INLINE_BAILOUT("callee contains a tail call but caller is no tail calling");
   // Proper inlining of methods with jsrs requires a little more work.
   if (callee->has_jsrs()                 ) INLINE_BAILOUT("jsrs not handled properly by inliner yet");
 
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/c1/c1_Instruction.cpp
--- a/src/share/vm/c1/c1_Instruction.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/c1/c1_Instruction.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -334,7 +334,7 @@ void Intrinsic::state_values_do(void f(V
 
 
 Invoke::Invoke(Bytecodes::Code code, ValueType* result_type, Value recv, Values* args,
-               int vtable_index, ciMethod* target)
+               int vtable_index, ciMethod* target, bool is_tail_call)
   : StateSplit(result_type)
   , _code(code)
   , _recv(recv)
@@ -345,7 +345,7 @@ Invoke::Invoke(Bytecodes::Code code, Val
   set_flag(TargetIsLoadedFlag,   target->is_loaded());
   set_flag(TargetIsFinalFlag,    target_is_loaded() && target->is_final_method());
   set_flag(TargetIsStrictfpFlag, target_is_loaded() && target->is_strict());
-
+  set_flag(TailCallFlag, is_tail_call);
   assert(args != NULL, "args must exist");
 #ifdef ASSERT
   values_do(assert_value);
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/c1/c1_Instruction.hpp
--- a/src/share/vm/c1/c1_Instruction.hpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/c1/c1_Instruction.hpp	Wed Jun 03 16:27:17 2009 +0200
@@ -312,7 +312,8 @@ class Instruction: public CompilationRes
     NeedsPatchingFlag,
     ThrowIncompatibleClassChangeErrorFlag,
     ProfileMDOFlag,
-    InstructionLastFlag
+    InstructionLastFlag,
+    TailCallFlag
   };
 
  public:
@@ -1144,7 +1145,7 @@ LEAF(Invoke, StateSplit)
  public:
   // creation
   Invoke(Bytecodes::Code code, ValueType* result_type, Value recv, Values* args,
-         int vtable_index, ciMethod* target);
+         int vtable_index, ciMethod* target, bool is_tail_call=false);
 
   // accessors
   Bytecodes::Code code() const                   { return _code; }
@@ -1161,6 +1162,9 @@ LEAF(Invoke, StateSplit)
   bool target_is_loaded() const                  { return check_flag(TargetIsLoadedFlag); }
   // Returns false if target is not loaded
   bool target_is_strictfp() const                { return check_flag(TargetIsStrictfpFlag); }
+
+  // Is this a tail call?
+  bool is_tail_call() const                      { return check_flag(TailCallFlag); }
 
   // generic
   virtual bool can_trap() const                  { return true; }
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/c1/c1_LIR.hpp
--- a/src/share/vm/c1/c1_LIR.hpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/c1/c1_LIR.hpp	Wed Jun 03 16:27:17 2009 +0200
@@ -1032,25 +1032,28 @@ class LIR_OpJavaCall: public LIR_OpCall 
  private:
   ciMethod*       _method;
   LIR_Opr         _receiver;
-
+  bool            _is_tail_call;
  public:
   LIR_OpJavaCall(LIR_Code code, ciMethod* method,
                  LIR_Opr receiver, LIR_Opr result,
                  address addr, LIR_OprList* arguments,
-                 CodeEmitInfo* info)
+                 CodeEmitInfo* info, bool is_tail_call)
   : LIR_OpCall(code, addr, result, arguments, info)
   , _receiver(receiver)
-  , _method(method)          { assert(is_in_range(code, begin_opJavaCall, end_opJavaCall), "code check"); }
+  , _method(method)
+  , _is_tail_call(is_tail_call) { assert(is_in_range(code, begin_opJavaCall, end_opJavaCall), "code check"); }
 
   LIR_OpJavaCall(LIR_Code code, ciMethod* method,
                  LIR_Opr receiver, LIR_Opr result, intptr_t vtable_offset,
-                 LIR_OprList* arguments, CodeEmitInfo* info)
+                 LIR_OprList* arguments, CodeEmitInfo* info, bool is_tail_call)
   : LIR_OpCall(code, (address)vtable_offset, result, arguments, info)
   , _receiver(receiver)
-  , _method(method)          { assert(is_in_range(code, begin_opJavaCall, end_opJavaCall), "code check"); }
+  , _method(method)
+  , _is_tail_call(is_tail_call) { assert(is_in_range(code, begin_opJavaCall, end_opJavaCall), "code check"); }
 
   LIR_Opr receiver() const                       { return _receiver; }
   ciMethod* method() const                       { return _method;   }
+  bool is_tail_call() const                      { return _is_tail_call; }
 
   intptr_t vtable_offset() const {
     assert(_code == lir_virtual_call, "only have vtable for real vcall");
@@ -1751,20 +1754,20 @@ class LIR_List: public CompilationResour
   //---------- instructions -------------
   void call_opt_virtual(ciMethod* method, LIR_Opr receiver, LIR_Opr result,
                         address dest, LIR_OprList* arguments,
-                        CodeEmitInfo* info) {
-    append(new LIR_OpJavaCall(lir_optvirtual_call, method, receiver, result, dest, arguments, info));
+                        CodeEmitInfo* info, bool is_tail_call=false) {
+    append(new LIR_OpJavaCall(lir_optvirtual_call, method, receiver, result, dest, arguments, info, is_tail_call));
   }
   void call_static(ciMethod* method, LIR_Opr result,
-                   address dest, LIR_OprList* arguments, CodeEmitInfo* info) {
-    append(new LIR_OpJavaCall(lir_static_call, method, LIR_OprFact::illegalOpr, result, dest, arguments, info));
+                   address dest, LIR_OprList* arguments, CodeEmitInfo* info, bool is_tail_call=false) {
+    append(new LIR_OpJavaCall(lir_static_call, method, LIR_OprFact::illegalOpr, result, dest, arguments, info, is_tail_call));
   }
   void call_icvirtual(ciMethod* method, LIR_Opr receiver, LIR_Opr result,
-                      address dest, LIR_OprList* arguments, CodeEmitInfo* info) {
-    append(new LIR_OpJavaCall(lir_icvirtual_call, method, receiver, result, dest, arguments, info));
+                      address dest, LIR_OprList* arguments, CodeEmitInfo* info, bool is_tail_call=false) {
+    append(new LIR_OpJavaCall(lir_icvirtual_call, method, receiver, result, dest, arguments, info, is_tail_call));
   }
   void call_virtual(ciMethod* method, LIR_Opr receiver, LIR_Opr result,
-                    intptr_t vtable_offset, LIR_OprList* arguments, CodeEmitInfo* info) {
-    append(new LIR_OpJavaCall(lir_virtual_call, method, receiver, result, vtable_offset, arguments, info));
+                    intptr_t vtable_offset, LIR_OprList* arguments, CodeEmitInfo* info, bool is_tail_call=false) {
+    append(new LIR_OpJavaCall(lir_virtual_call, method, receiver, result, vtable_offset, arguments, info, is_tail_call));
   }
 
   void get_thread(LIR_Opr result)                { append(new LIR_Op0(lir_get_thread, result)); }
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/c1/c1_LIRAssembler.cpp
--- a/src/share/vm/c1/c1_LIRAssembler.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/c1/c1_LIRAssembler.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -411,10 +411,11 @@ void LIR_Assembler::emit_rtcall(LIR_OpRT
 
 void LIR_Assembler::emit_call(LIR_OpJavaCall* op) {
   verify_oop_map(op->info());
-
+  
   if (os::is_MP()) {
-    // must align calls sites, otherwise they can't be updated atomically on MP hardware
-    align_call(op->code());
+    // Must align calls sites, otherwise they can't be updated atomically on MP
+    // hardware.
+    align_call(op->code(), op->is_tail_call());
   }
 
   // emit the static call stub stuff out of line
@@ -428,7 +429,7 @@ void LIR_Assembler::emit_call(LIR_OpJava
     call(op->addr(), relocInfo::opt_virtual_call_type, op->info());
     break;
   case lir_icvirtual_call:
-    ic_call(op->addr(), op->info());
+    ic_call(op->addr(), op->info(), op->is_tail_call());
     break;
   case lir_virtual_call:
     vtable_call(op->vtable_offset(), op->info());
@@ -570,6 +571,7 @@ void LIR_Assembler::emit_op0(LIR_Op0* op
 
     case lir_std_entry:
       // init offsets
+      assert(_masm->offset()== 0 , "Depend on this constraint in emit_static_tail_call_stub");
       offsets()->set_value(CodeOffsets::OSR_Entry, _masm->offset());
       _masm->align(CodeEntryAlignment);
       if (needs_icache(compilation()->method())) {
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/c1/c1_LIRAssembler.hpp
--- a/src/share/vm/c1/c1_LIRAssembler.hpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/c1/c1_LIRAssembler.hpp	Wed Jun 03 16:27:17 2009 +0200
@@ -129,6 +129,10 @@ class LIR_Assembler: public CompilationR
   // stubs
   void emit_slow_case_stubs();
   void emit_static_call_stub();
+  void emit_static_tail_call_stub();
+  void emit_static_not_sibling_tail_call_stub();
+  void emit_monomorphic_tail_call_stub();
+  void emit_not_sibling_monomorphic_tail_call_stub();
   void emit_code_stub(CodeStub* op);
   void add_call_info_here(CodeEmitInfo* info)                              { add_call_info(code_offset(), info); }
 
@@ -205,7 +209,10 @@ class LIR_Assembler: public CompilationR
   void comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr result, LIR_Op2* op);
   void cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr result);
 
-  void ic_call(address destination, CodeEmitInfo* info);
+  // Set protection domain token for virtual tail calls.
+  void set_protection_domain_token();
+  void tail_call(address destination, RelocationHolder & rh);
+  void ic_call(address destination, CodeEmitInfo* info, bool is_tail_call);
   void vtable_call(int vtable_offset, CodeEmitInfo* info);
   void call(address entry, relocInfo::relocType rtype, CodeEmitInfo* info);
 
@@ -217,7 +224,7 @@ class LIR_Assembler: public CompilationR
   void monitor_address(int monitor_ix, LIR_Opr dst);
 
   void align_backward_branch_target();
-  void align_call(LIR_Code code);
+  void align_call(LIR_Code code, bool is_tail_call);
 
   void negate(LIR_Opr left, LIR_Opr dest);
   void leal(LIR_Opr left, LIR_Opr dest);
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/c1/c1_LIRGenerator.cpp
--- a/src/share/vm/c1/c1_LIRGenerator.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/c1/c1_LIRGenerator.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -2257,6 +2257,18 @@ void LIRGenerator::do_OsrEntry(OsrEntry*
   __ move(LIR_Assembler::osrBufferPointer(), result);
 }
 
+bool LIRGenerator::is_sibling_call(CallingConvention * callee_cc) {
+  // Size of incoming arguments must accomodate outgoing arguments.  
+  // TODO: Check the following number 4 must match with c1_FrameMap.cpp:FrameMap
+  // and c1_Compilation:compile_java_method.
+  //int incoming_args_slots = MAX2(4,
+  // frame_map()->incoming_arguments()->reserved_stack_slots());
+  int incoming_args_slots = frame_map()->incoming_arguments()->reserved_stack_slots();
+  int min_ougoing_arg_size = MAX2(MinOutgoingArgStackSlotSize, 4);
+  incoming_args_slots = MAX2(min_ougoing_arg_size, incoming_args_slots);
+  int outgoing_args_slots = callee_cc->reserved_stack_slots();
+  return outgoing_args_slots <= incoming_args_slots;
+}
 
 void LIRGenerator::invoke_load_arguments(Invoke* x, LIRItemList* args, const LIR_OprList* arg_list) {
   int i = x->has_receiver() ? 1 : 0;
@@ -2291,7 +2303,7 @@ void LIRGenerator::invoke_load_arguments
 
 
 // Visits all arguments, returns appropriate items without loading them
-LIRItemList* LIRGenerator::invoke_visit_arguments(Invoke* x) {
+LIRItemList* LIRGenerator::invoke_visit_arguments(Invoke* x, bool is_tail_call) {
   LIRItemList* argument_items = new LIRItemList();
   if (x->has_receiver()) {
     LIRItem* receiver = new LIRItem(x->receiver(), this);
@@ -2300,6 +2312,16 @@ LIRItemList* LIRGenerator::invoke_visit_
   int idx = x->has_receiver() ? 1 : 0;
   for (int i = 0; i < x->number_of_arguments(); i++) {
     LIRItem* param = new LIRItem(x->argument_at(i), this);
+    Instruction * instr = instruction_for_opr(param->result());
+    if(instr && is_tail_call) {
+      if (instr->as_Local()!=NULL) {
+        // Generate another move to a virtual register. This is done in order to
+        // prevent arguments sourcing from a stack slot being overwritten when
+        // tail calling. Related code: c1_LinearScan::handle_method_arguments.
+        LIR_Opr dest = new_register(param->result()->type());
+        param->load_item_force(dest);
+      }
+    }
     argument_items->append(param);
     idx += (param->type()->is_double_word() ? 2 : 1);
   }
@@ -2331,11 +2353,15 @@ LIRItemList* LIRGenerator::invoke_visit_
 // - if we keep the receiver locked while doing spill-save,
 //   we cannot spill it as it is spill-locked
 //
-void LIRGenerator::do_Invoke(Invoke* x) {
+void LIRGenerator::do_Invoke(Invoke* x) {  
   CallingConvention* cc = frame_map()->java_calling_convention(x->signature(), true);
+  bool is_tail  = x->is_tail_call();
+  bool is_sibling = is_tail? is_sibling_call(cc) : false;
+  //CallingConvention* cc_tail = frame_map()->java_calling_convention(x->signature(), true, is_sibling);
+  //cc = is_sibling ? cc_tail : cc;
 
   LIR_OprList* arg_list = cc->args();
-  LIRItemList* args = invoke_visit_arguments(x);
+  LIRItemList* args = invoke_visit_arguments(x, false);
   LIR_Opr receiver = LIR_OprFact::illegalOpr;
 
   // setup result register
@@ -2359,9 +2385,19 @@ void LIRGenerator::do_Invoke(Invoke* x) 
 
   switch (x->code()) {
     case Bytecodes::_invokestatic:
-      __ call_static(x->target(), result_register,
-                     SharedRuntime::get_resolve_static_call_stub(),
-                     arg_list, info);
+      if (is_tail && is_sibling) {
+        __ call_static(x->target(), result_register,
+                     SharedRuntime::get_resolve_static_tail_call_stub(),
+                       arg_list, info, is_tail); 
+      } else if (is_tail) {
+        // non sibling call
+        __ call_static(x->target(), result_register,
+                     SharedRuntime::get_resolve_not_sibling_static_tail_call_stub(),
+                       arg_list, info, is_tail);
+      } else {
+        __ call_static(x->target(), result_register,
+                       SharedRuntime::get_resolve_static_call_stub(),
+                       arg_list, info, is_tail); }
       break;
     case Bytecodes::_invokespecial:
     case Bytecodes::_invokevirtual:
@@ -2369,13 +2405,32 @@ void LIRGenerator::do_Invoke(Invoke* x) 
       // for final target we still produce an inline cache, in order
       // to be able to call mixed mode
       if (x->code() == Bytecodes::_invokespecial || optimized) {
-        __ call_opt_virtual(x->target(), receiver, result_register,
+        if (is_tail && is_sibling) {
+          __ call_opt_virtual(x->target(), receiver, result_register,
+                            SharedRuntime::get_resolve_opt_virtual_tail_call_stub(),
+                            arg_list, info, is_tail);
+        } else if (is_tail) {
+          __ call_opt_virtual(x->target(), receiver, result_register,
+                            SharedRuntime::get_resolve_opt_not_sibling_virtual_tail_call_stub(),
+                            arg_list, info, is_tail);
+        } else
+          __ call_opt_virtual(x->target(), receiver, result_register,
                             SharedRuntime::get_resolve_opt_virtual_call_stub(),
+                            arg_list, info, is_tail);
+      } else if (x->vtable_index() < 0) {
+        if (is_tail && is_sibling) {
+          __ call_icvirtual(x->target(), receiver, result_register,
+                            SharedRuntime::get_resolve_virtual_tail_call_stub(),
+                            arg_list, info, is_tail);
+        } else if (is_tail) {
+          __ call_icvirtual(x->target(), receiver, result_register,
+                            SharedRuntime::get_resolve_not_sibling_virtual_tail_call_stub(),
+                            arg_list, info, is_tail);
+        } else {
+          __ call_icvirtual(x->target(), receiver, result_register,
+                            SharedRuntime::get_resolve_virtual_call_stub(),
                             arg_list, info);
-      } else if (x->vtable_index() < 0) {
-        __ call_icvirtual(x->target(), receiver, result_register,
-                          SharedRuntime::get_resolve_virtual_call_stub(),
-                          arg_list, info);
+        }
       } else {
         int entry_offset = instanceKlass::vtable_start_offset() + x->vtable_index() * vtableEntry::size();
         int vtable_offset = entry_offset * wordSize + vtableEntry::method_offset_in_bytes();
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/c1/c1_LIRGenerator.hpp
--- a/src/share/vm/c1/c1_LIRGenerator.hpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/c1/c1_LIRGenerator.hpp	Wed Jun 03 16:27:17 2009 +0200
@@ -30,6 +30,7 @@ class Invoke;
 class Invoke;
 class SwitchRange;
 class LIRItem;
+class CallingConvention;
 
 define_array(LIRItemArray, LIRItem*)
 define_stack(LIRItemList, LIRItemArray)
@@ -269,8 +270,10 @@ class LIRGenerator: public InstructionVi
 
   ciObject* get_jobject_constant(Value value);
 
-  LIRItemList* invoke_visit_arguments(Invoke* x);
+  LIRItemList* invoke_visit_arguments(Invoke* x, bool is_tail_call);
   void invoke_load_arguments(Invoke* x, LIRItemList* args, const LIR_OprList* arg_list);
+  // Tail call optimization support
+  bool is_sibling_call(CallingConvention * callee_cc);
 
   void trace_block_entry(BlockBegin* block);
 
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/c1/c1_Runtime1.cpp
--- a/src/share/vm/c1/c1_Runtime1.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/c1/c1_Runtime1.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -140,9 +140,18 @@ void Runtime1::setup_code_buffer(CodeBuf
                                         locs_buffer_size / sizeof(relocInfo));
   code->initialize_consts_size(desired_max_constant_size());
   // Call stubs + deopt/exception handler
+  int max_arg_count = 2 * 32;
+  int tail_call_stubs = 4;
+  int move_arg_size = 8; // SYNC: keep in sync with number in tail call stubs.
   code->initialize_stubs_size((call_stub_estimate * LIR_Assembler::call_stub_size) +
                               LIR_Assembler::exception_handler_size +
-                              LIR_Assembler::deopt_handler_size);
+                              LIR_Assembler::deopt_handler_size +
+                              LIR_Assembler::static_tail_call_stub_size+
+                              LIR_Assembler::static_not_sibling_tail_call_stub_size+
+                              LIR_Assembler::monomorphic_tail_call_stub_size+
+                              LIR_Assembler::monomorphic_not_sibling_tail_call_stub_size+
+                              (move_arg_size * max_arg_count * tail_call_stubs )); //TODO: decent estimate take parameter
+                                         //moving into acount.
 }
 
 
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/ci/ciMethod.cpp
--- a/src/share/vm/ci/ciMethod.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/ci/ciMethod.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -49,6 +49,9 @@ ciMethod::ciMethod(methodHandle h_m) : c
   _handler_count      = h_m()->exception_table()->length() / 4;
   _uses_monitors      = h_m()->access_flags().has_monitor_bytecodes();
   _balanced_monitors  = !_uses_monitors || h_m()->access_flags().is_monitor_matching();
+  _contains_tail_call = h_m()->contains_tail_call();
+  _contains_tail_call_var_initialized = false;
+
   _is_compilable      = !h_m()->is_not_compilable();
   // Lazy fields, filled in on demand.  Require allocation.
   _code               = NULL;
@@ -124,6 +127,8 @@ ciMethod::ciMethod(ciInstanceKlass* hold
   _bcea = NULL;
   _method_blocks = NULL;
   _method_data = NULL;
+  _contains_tail_call = false;
+  _contains_tail_call_var_initialized = false;
 #ifdef COMPILER2
   _flow = NULL;
 #endif // COMPILER2
@@ -286,6 +291,47 @@ bool ciMethod::has_balanced_monitors() {
     }
     method->set_guaranteed_monitor_matching();
     _balanced_monitors = true;
+  }
+  return true;
+}
+
+// ------------------------------------------------------------------
+// ciMethod::uses_balanced_monitors
+//
+// Does this method use monitors in a strict stack-disciplined manner?
+bool ciMethod::contains_tail_call() {
+  check_is_loaded();
+
+  if (_contains_tail_call) return true;
+
+  if (_contains_tail_call_var_initialized)
+    return _contains_tail_call;
+
+  // Analyze the method to see if monitors are used properly.
+  VM_ENTRY_MARK;
+  methodHandle method(THREAD, get_methodOop());
+
+  // Check to see if a previous compilation computed the
+  // monitor-matching analysis.
+  if (method->contains_tail_call()) {
+    _contains_tail_call = true;
+    return true;
+  }
+
+  {
+    EXCEPTION_MARK;
+    ResourceMark rm(THREAD);
+    ContainsTailCallInfo tci(method);
+    tci.compute_map(CATCH);
+    if (!tci.contains_tail_call()) {
+      _contains_tail_call =false;
+      // only need to set this variable to true here. Because if
+      // _contains_tail_call is true we know that its value has been computed.
+      _contains_tail_call_var_initialized = true;
+      return false;
+    }
+    method->set_contains_tail_call(true);
+    _contains_tail_call = true;
   }
   return true;
 }
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/ci/ciMethod.hpp
--- a/src/share/vm/ci/ciMethod.hpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/ci/ciMethod.hpp	Wed Jun 03 16:27:17 2009 +0200
@@ -62,6 +62,8 @@ class ciMethod : public ciObject {
   bool _balanced_monitors;
   bool _is_compilable;
   bool _can_be_statically_bound;
+  bool _contains_tail_call;
+  bool _contains_tail_call_var_initialized;
 
   // Lazy fields, filled in on demand
   address              _code;
@@ -148,6 +150,7 @@ class ciMethod : public ciObject {
   bool          uses_monitors() const            { return _uses_monitors; } // this one should go away, it has a misleading name
   bool          has_monitor_bytecodes() const    { return _uses_monitors; }
   bool          has_balanced_monitors();
+  bool          contains_tail_call();
 
   MethodLivenessResult liveness_at_bci(int bci);
 
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/ci/ciStreams.cpp
--- a/src/share/vm/ci/ciStreams.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/ci/ciStreams.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -88,10 +88,9 @@ Bytecodes::Code ciBytecodeStream::wide()
 {
   // Get following bytecode; do not return wide
   Bytecodes::Code bc = (Bytecodes::Code)_pc[1];
-  _pc += 2;                     // Skip both bytecodes
-  _pc += 2;                     // Skip index always
-  if( bc == Bytecodes::_iinc )
-    _pc += 2;                   // Skip optional constant
+  Bytecodes::Prefix pfx = Bytecodes::allowed_prefix(bc);
+  int advance = Bytecodes::length_for(pfx, bc);
+  _pc += advance;
   _was_wide = _pc;              // Flag last wide bytecode found
   return bc;
 }
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/ci/ciStreams.hpp
--- a/src/share/vm/ci/ciStreams.hpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/ci/ciStreams.hpp	Wed Jun 03 16:27:17 2009 +0200
@@ -122,7 +122,7 @@ public:
     return check_java(_bc);
   }
 
-  bool is_wide() const { return ( _pc == _was_wide ); }
+  bool is_wide() const  { return ( _pc == _was_wide ); }
 
   // Get a byte index following this bytecode.
   // If prefixed with a wide bytecode, get a wide index.
@@ -131,12 +131,6 @@ public:
     return (_pc == _was_wide)   // was widened?
       ? Bytes::get_Java_u2(_bc_start+2) // yes, return wide index
       : _bc_start[1];           // no, return narrow index
-  }
-
-  // Get 2-byte index (getfield/putstatic/etc)
-  int get_index_big() const {
-    assert_index_size(2);
-    return Bytes::get_Java_u2(_bc_start+1);
   }
 
   // Get 2-byte index (or 4-byte, for invokedynamic)
@@ -148,6 +142,20 @@ public:
   int get_index_giant() const {
     assert_index_size(4);
     return Bytes::get_native_u4(_bc_start+1);
+  }
+
+  // Get 2-byte index (getfield/putstatic/etc)
+  int get_index_big() const { 
+    assert_index_size(is_wide() ? 2 : 1);
+    if (is_wide()) {
+      assert(_bc == Bytecodes::_invokestatic ||
+             _bc == Bytecodes::_invokespecial ||
+             _bc == Bytecodes::_invokevirtual ||
+             _bc == Bytecodes::_invokeinterface,
+             "Work without the is_wide() check previously so it is assumed that its only called for wide invokes."); // Just to check.
+      return Bytes::get_Java_u2(_bc_start+2);
+    }
+    return Bytes::get_Java_u2(_bc_start+1); 
   }
 
   bool has_giant_index() const { return (cur_bc() == Bytecodes::_invokedynamic); }
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/classfile/verifier.cpp
--- a/src/share/vm/classfile/verifier.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/classfile/verifier.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -323,6 +323,7 @@ void ClassVerifier::verify_method(method
   bool no_control_flow = false; // Set to true when there is no direct control
                                 // flow from current instruction to the next
                                 // instruction in sequence
+  bool handlers_prohibited = false; // Set to true immediately after a tail-call.
   Bytecodes::Code opcode;
   while (!bcs.is_last_bytecode()) {
     opcode = bcs.raw_next();
@@ -330,6 +331,8 @@ void ClassVerifier::verify_method(method
 
     // Set current frame's offset to bci
     current_frame.set_offset(bci);
+
+    assert(handlers_prohibited == false, "should be set true only briefly");
 
     // Make sure every offset in stackmap table point to the beginning to
     // an instruction. Match current_frame to stackmap_table entry with
@@ -350,22 +353,46 @@ void ClassVerifier::verify_method(method
 #ifndef PRODUCT
       if (_verify_verbose) {
         current_frame.print();
-        tty->print_cr("offset = %d,  opcode = %s", bci, Bytecodes::name(opcode));
+        if (bcs.has_prefix())
+          tty->print_cr("offset = %d,  opcode = %s:%s",
+                        bci, Bytecodes::prefix_name(bcs.prefix()), Bytecodes::name(opcode));
+        else
+          tty->print_cr("offset = %d,  opcode = %s", bci, Bytecodes::name(opcode));
       }
 #endif
 
       // Make sure wide instruction is in correct format
-      if (bcs.is_wide()) {
+      if (bcs.prefix() == Bytecodes::Prefix_illegal) {
+        // BytecodeStream checks and decodes all prefixes.
+        verify_error(bci, "Bad wide instruction");
+        return;
+      }
+
+#ifdef ASSERT
+      switch (bcs.prefix()) {
+      case Bytecodes::Prefix_none:
+        break;
+      case Bytecodes::Prefix_wide_index:
         if (opcode != Bytecodes::_iinc   && opcode != Bytecodes::_iload  &&
             opcode != Bytecodes::_aload  && opcode != Bytecodes::_lload  &&
             opcode != Bytecodes::_istore && opcode != Bytecodes::_astore &&
             opcode != Bytecodes::_lstore && opcode != Bytecodes::_fload  &&
             opcode != Bytecodes::_dload  && opcode != Bytecodes::_fstore &&
             opcode != Bytecodes::_dstore) {
-          verify_error(bci, "Bad wide instruction");
-          return;
+          assert(false, "should have seen Prefix_unknown");
         }
+        break;
+      case Bytecodes::Prefix_tail_call:
+        assert(TailCalls, "");
+        if (opcode != Bytecodes::_invokevirtual   && opcode != Bytecodes::_invokespecial  &&
+            opcode != Bytecodes::_invokeinterface && opcode != Bytecodes::_invokestatic) {
+          assert(false, "should have seen Prefix_unknown");
+        }
+        break;
+      default:
+        ShouldNotReachHere();
       }
+#endif //ASSERT
 
       switch (opcode) {
         case Bytecodes::_nop :
@@ -1169,15 +1196,16 @@ void ClassVerifier::verify_method(method
         case Bytecodes::_invokevirtual :
         case Bytecodes::_invokespecial :
         case Bytecodes::_invokestatic :
-          verify_invoke_instructions(
-            &bcs, code_length, &current_frame,
-            &this_uninit, return_type, cp, CHECK_VERIFY(this));
-          no_control_flow = false; break;
         case Bytecodes::_invokeinterface :
         case Bytecodes::_invokedynamic :
           verify_invoke_instructions(
             &bcs, code_length, &current_frame,
             &this_uninit, return_type, cp, CHECK_VERIFY(this));
+          // Check for tail call.
+          if (bcs.prefix() == Bytecodes::Prefix_tail_call) {
+            verify_tail_call(&bcs, bci, CHECK_VERIFY(this));
+            handlers_prohibited = true;
+          }
           no_control_flow = false; break;
         case Bytecodes::_new :
         {
@@ -1279,8 +1307,10 @@ void ClassVerifier::verify_method(method
     // matches current_frame
     if (bci >= ex_min && bci < ex_max) {
       verify_exception_handler_targets(
-        bci, this_uninit, &current_frame, &stackmap_table, CHECK_VERIFY(this));
+        bci, this_uninit, handlers_prohibited, &current_frame, &stackmap_table, CHECK_VERIFY(this));
     }
+    handlers_prohibited = false; // reset brief setting for next iteration of loop
+
   } // end while
 
   // Make sure that control flow does not fall through end of the method
@@ -1416,8 +1446,9 @@ u2 ClassVerifier::verify_stackmap_table(
   return stackmap_index;
 }
 
-void ClassVerifier::verify_exception_handler_targets(u2 bci, bool this_uninit, StackMapFrame* current_frame,
-                                                     StackMapTable* stackmap_table, TRAPS) {
+void ClassVerifier::verify_exception_handler_targets(
+    u2 bci, bool this_uninit, bool handlers_prohibited,
+    StackMapFrame* current_frame, StackMapTable* stackmap_table, TRAPS) {
   constantPoolHandle cp (THREAD, _method->constants());
   typeArrayHandle exhandlers (THREAD, _method->exception_table());
   if (exhandlers() != NULL) {
@@ -1427,6 +1458,13 @@ void ClassVerifier::verify_exception_han
       u2 handler_pc = exhandlers->int_at(i++);
       int catch_type_index = exhandlers->int_at(i++);
       if(bci >= start_pc && bci < end_pc) {
+        if (handlers_prohibited) {
+          verify_error(bci,
+            "Tail-call covered by exception handler %d",
+            handler_pc);
+          return;
+        }
+
         u1 flags = current_frame->flags();
         if (this_uninit) {  flags |= FLAG_THIS_UNINIT; }
 
@@ -1724,8 +1762,9 @@ void ClassVerifier::verify_field_instruc
   }
 
   // Get referenced class type
-  VerificationType ref_class_type = cp_ref_index_to_type(
-    index, cp, CHECK_VERIFY(this));
+  symbolHandle ref_class_name = symbolHandle(THREAD,
+                                             cp->klass_name_at(cp->klass_ref_index_at(index)));
+  VerificationType ref_class_type = VerificationType::reference_type(ref_class_name);
   if (!ref_class_type.is_object()) {
     verify_error(
       "Expecting reference to class in class %s at constant pool index %d",
@@ -1794,8 +1833,6 @@ void ClassVerifier::verify_field_instruc
     check_protected: {
       if (_this_type == stack_object_type)
         break; // stack_object_type must be assignable to _current_class_type
-      symbolHandle ref_class_name = symbolHandle(THREAD,
-        cp->klass_name_at(cp->klass_ref_index_at(index)));
       if (!name_in_supers(ref_class_name(), current_class()))
         // stack_object_type must be assignable to _current_class_type since:
         // 1. stack_object_type must be assignable to ref_class.
@@ -1891,6 +1928,7 @@ void ClassVerifier::verify_invoke_instru
     RawBytecodeStream* bcs, u4 code_length, StackMapFrame* current_frame,
     bool *this_uninit, VerificationType return_type,
     constantPoolHandle cp, TRAPS) {
+
   // Make sure the constant pool item is the right type
   u2 index = bcs->get_index_big();
   Bytecodes::Code opcode = bcs->code();
@@ -1976,7 +2014,7 @@ void ClassVerifier::verify_invoke_instru
   // Check instruction operands
   u2 bci = bcs->bci();
   if (opcode == Bytecodes::_invokeinterface) {
-    address bcp = bcs->bcp();
+    address bcp = bcs->bcp()+ (bcs->prefix()==Bytecodes::Prefix_tail_call ? 1 : 0);
     // 4905268: count operand in invokeinterface should be nargs+1, not nargs.
     // JSR202 spec: The count operand of an invokeinterface instruction is valid if it is
     // the difference between the size of the operand stack before and after the instruction
@@ -2002,8 +2040,12 @@ void ClassVerifier::verify_invoke_instru
   if (method_name->byte_at(0) == '<') {
     // Make sure <init> can only be invoked by invokespecial
     if (opcode != Bytecodes::_invokespecial ||
+        bcs->has_prefix() ||    // no tail-calls to <init>
         method_name() != vmSymbols::object_initializer_name()) {
-      verify_error(bci, "Illegal call to internal method");
+      if (bcs->has_prefix())
+        verify_error(bci, "Illegal tail-call to internal method");
+      else
+        verify_error(bci, "Illegal call to internal method");
       return;
     }
   } else if (opcode == Bytecodes::_invokespecial
@@ -2037,8 +2079,8 @@ void ClassVerifier::verify_invoke_instru
           current_frame->pop_stack(ref_class_type, CHECK_VERIFY(this));
         if (current_type() != stack_object_type) {
           assert(cp->cache() == NULL, "not rewritten yet");
-          symbolHandle ref_class_name = symbolHandle(THREAD,
-            cp->klass_name_at(cp->klass_ref_index_at(index)));
+           symbolHandle ref_class_name = symbolHandle(THREAD,
+                                                      cp->klass_name_at(cp->klass_ref_index_at(index)));
           // See the comments in verify_field_instructions() for
           // the rationale behind this.
           if (name_in_supers(ref_class_name(), current_class())) {
@@ -2083,6 +2125,97 @@ void ClassVerifier::verify_invoke_instru
     for (int i = 0; i < n; i++) {
       current_frame->push_stack(return_type[i], CHECK_VERIFY(this)); // push types backwards
     }
+  }
+}
+
+// Follow a chain of gotos. Check that gotos are followed by a xxxreturn or
+// pop/return instruction. Returns false if this check fails.
+static bool verify_tail_call_chain_of_gotos(methodHandle& method, RawBytecodeStream& bcs) {
+  assert(bcs.code() == Bytecodes::_goto, "Expected a goto instruction");
+  RawBytecodeStream lookahead_bcs(method);
+  // Target of goto must be within method boundry.
+  if (bcs.dest () > lookahead_bcs.end_bci() ||
+      bcs.dest() < 0) return false;
+  lookahead_bcs.set_start(bcs.dest());
+  // Verify target. Must either be
+  //  * further goto
+  //  * return
+  //  * pop followed by return
+  switch(lookahead_bcs.raw_next()) {
+    case Bytecodes::_pop:
+      // Pop of result because of void-return.
+      if (lookahead_bcs.raw_next()==Bytecodes::_return) {
+        return true;
+      }
+      break;
+    case Bytecodes::_goto:
+      // Follow a chain of gotos. Expect a return or a pop/return combination.
+      return verify_tail_call_chain_of_gotos(method, lookahead_bcs);
+      break;
+    case Bytecodes::_ireturn :
+    case Bytecodes::_lreturn :
+    case Bytecodes::_freturn :
+    case Bytecodes::_dreturn :
+    case Bytecodes::_areturn :
+    case Bytecodes::_return :
+      // Next iteration of main loop will verify compatibility of return value.
+      // Note:  This allows a certain amount of "widening" of the result.
+      // A void method can tail-call a non-void method, etc.
+      return true;
+      break;
+    default:
+      // Fail.
+      break;
+  }
+  return false;
+}
+
+void ClassVerifier::verify_tail_call(
+    RawBytecodeStream* bcs, u2 bci, TRAPS) {
+  assert(TailCalls, "BCS will produce tailcalls only if feature is enabled");
+
+  // Rules for tail call:
+  //  - Must be immediately followed by a return opcode. (Checked here.)
+  //  - Return values are consistent.  (Checked by return opcode.)
+  //  - No exception handlers.  (Checked by caller, via handlers_prohibited.)
+  //  - Caller method not synchronized.  (Checked here.)
+  //  - Caller holding no object locks.  (IllegalMonitorStateException test.)
+  //  - Callee accessible from caller.  (Checked as in non-tail case.)
+  RawBytecodeStream lookahead_bcs(_method);
+  lookahead_bcs.set_start(bcs->next_bci());
+  switch (lookahead_bcs.raw_next()) {
+  case Bytecodes::_pop:
+    // Pop of result because of void-return.
+    if (lookahead_bcs.raw_next()==Bytecodes::_return) {
+      break;
+    }
+  case Bytecodes::_goto:
+    // Follow a chain of gotos. Expect a return or a pop/return combination.
+    if (verify_tail_call_chain_of_gotos(_method, lookahead_bcs)==false) {
+      verify_error(bci, "Tail call must be followed by a return instruction");
+      return;
+    }
+    break;
+  case Bytecodes::_ireturn :
+  case Bytecodes::_lreturn :
+  case Bytecodes::_freturn :
+  case Bytecodes::_dreturn :
+  case Bytecodes::_areturn :
+  case Bytecodes::_return :
+    // Next iteration of main loop will verify compatibility of return value.
+    // Note:  This allows a certain amount of "widening" of the result.
+    // A void method can tail-call a non-void method, etc.
+
+    break;
+  default:
+    verify_error(bci, "Tail call must be followed by a return instruction");
+    return;
+  }
+
+  if (_method()->is_synchronized()) {
+    // An implicit exception handler...
+    verify_error(bci, "Tail call from synchronized method");
+    return;
   }
 }
 
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/classfile/verifier.hpp
--- a/src/share/vm/classfile/verifier.hpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/classfile/verifier.hpp	Wed Jun 03 16:27:17 2009 +0200
@@ -107,7 +107,7 @@ class ClassVerifier : public StackObj {
     StackMapTable* stackmap_table, bool no_control_flow, TRAPS);
 
   void verify_exception_handler_targets(
-    u2 bci, bool this_uninit, StackMapFrame* current_frame,
+    u2 bci, bool this_uninit, bool handlers_prohibited, StackMapFrame* current_frame,
     StackMapTable* stackmap_table, TRAPS);
 
   void verify_ldc(
@@ -131,6 +131,9 @@ class ClassVerifier : public StackObj {
     RawBytecodeStream* bcs, u4 code_length, StackMapFrame* current_frame,
     bool* this_uninit, VerificationType return_type,
     constantPoolHandle cp, TRAPS);
+
+  void verify_tail_call(
+    RawBytecodeStream* bcs, u2 bci, TRAPS);
 
   VerificationType get_newarray_type(u2 index, u2 bci, TRAPS);
   void verify_anewarray(
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/classfile/vmSymbols.hpp
--- a/src/share/vm/classfile/vmSymbols.hpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/classfile/vmSymbols.hpp	Wed Jun 03 16:27:17 2009 +0200
@@ -153,6 +153,8 @@
   template(java_lang_RuntimeException,                "java/lang/RuntimeException")               \
   template(java_io_IOException,                       "java/io/IOException")                      \
   template(java_security_PrivilegedActionException,   "java/security/PrivilegedActionException")  \
+  /* Tail call exception */ \
+  template(java_lang_TailCallException,               "java/lang/TailCallException") \
                                                                                                   \
   /* error klasses: at least all errors thrown by the VM have entries here */                     \
   template(java_lang_AbstractMethodError,             "java/lang/AbstractMethodError")            \
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/code/codeBlob.cpp
--- a/src/share/vm/code/codeBlob.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/code/codeBlob.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -375,6 +375,7 @@ DeoptimizationBlob::DeoptimizationBlob(
   int         unpack_offset,
   int         unpack_with_exception_offset,
   int         unpack_with_reexecution_offset,
+  int         stack_compression_offset,
   int         frame_size
 )
 : SingletonBlob("DeoptimizationBlob", cb, sizeof(DeoptimizationBlob), size, frame_size, oop_maps)
@@ -382,6 +383,7 @@ DeoptimizationBlob::DeoptimizationBlob(
   _unpack_offset           = unpack_offset;
   _unpack_with_exception   = unpack_with_exception_offset;
   _unpack_with_reexecution = unpack_with_reexecution_offset;
+  _stack_compression_offset = stack_compression_offset;
 #ifdef COMPILER1
   _unpack_with_exception_in_tls   = -1;
 #endif
@@ -394,6 +396,7 @@ DeoptimizationBlob* DeoptimizationBlob::
   int        unpack_offset,
   int        unpack_with_exception_offset,
   int        unpack_with_reexecution_offset,
+  int        stack_compression_offset,
   int        frame_size)
 {
   DeoptimizationBlob* blob = NULL;
@@ -407,6 +410,7 @@ DeoptimizationBlob* DeoptimizationBlob::
                                          unpack_offset,
                                          unpack_with_exception_offset,
                                          unpack_with_reexecution_offset,
+                                         stack_compression_offset,
                                          frame_size);
   }
 
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/code/codeBlob.hpp
--- a/src/share/vm/code/codeBlob.hpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/code/codeBlob.hpp	Wed Jun 03 16:27:17 2009 +0200
@@ -339,6 +339,7 @@ class DeoptimizationBlob: public Singlet
   int _unpack_offset;
   int _unpack_with_exception;
   int _unpack_with_reexecution;
+  int _stack_compression_offset;
 
   int _unpack_with_exception_in_tls;
 
@@ -350,6 +351,7 @@ class DeoptimizationBlob: public Singlet
     int         unpack_offset,
     int         unpack_with_exception_offset,
     int         unpack_with_reexecution_offset,
+    int         stack_compression_offset,
     int         frame_size
   );
 
@@ -363,6 +365,7 @@ class DeoptimizationBlob: public Singlet
     int         unpack_offset,
     int         unpack_with_exception_offset,
     int         unpack_with_reexecution_offset,
+    int         stack_compression_offset,
     int         frame_size
   );
 
@@ -389,6 +392,7 @@ class DeoptimizationBlob: public Singlet
   address unpack() const                         { return instructions_begin() + _unpack_offset;           }
   address unpack_with_exception() const          { return instructions_begin() + _unpack_with_exception;   }
   address unpack_with_reexecution() const        { return instructions_begin() + _unpack_with_reexecution; }
+  address stack_compression() const              { return instructions_begin() + _stack_compression_offset; }
 
   // Alternate entry point for C1 where the exception and issuing pc
   // are in JavaThread::_exception_oop and JavaThread::_exception_pc
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/code/compiledIC.cpp
--- a/src/share/vm/code/compiledIC.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/code/compiledIC.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -116,12 +116,37 @@ address CompiledIC::stub_address() const
   return _ic_call->destination();
 }
 
+// Is 'Thread safe' because the megamorphic call site is the only client. It
+// does not matter if monomorphic call site sees a protection domain.
+void CompiledIC::set_protection_domain_token(oop protection_domain) {
+  // Verify, creation also verifies the object.
+  NativeMovConstReg* set_cache_oop = nativeMovConstReg_at(_first_set_oop_inst);   
+  NativeMovConstProtectionDomain * set_pd_oop = nativeMovConstPD_before(_first_set_oop_inst);
+  
+  // Set the protection domain.
+  set_pd_oop->set_data((intptr_t) protection_domain);
+  // Fix the relocation entry.
+  oop* oop_addr = set_pd_oop->oop_address();
+  bool is_fixed = false;
+  assert(_oops.code()!=NULL, "oops");
+  RelocIterator iter = RelocIterator(_oops.code(), (address)set_pd_oop, ((address)set_pd_oop)+1);
+  while (iter.next()) {
+    if (iter.type() == relocInfo::oop_type) {
+      oop_Relocation* r = iter.oop_reloc();
+      if (r->oop_addr() == oop_addr) {
+        r->fix_oop_relocation();
+        is_fixed = true;
+      }
+    }
+  }
+  assert (is_fixed, "Oop relocation fixed");
+}
 
 //-----------------------------------------------------------------------------
 // High-level access to an inline cache. Guaranteed to be MT-safe.
 
 
-void CompiledIC::set_to_megamorphic(CallInfo* call_info, Bytecodes::Code bytecode, TRAPS) {
+void CompiledIC::set_to_megamorphic(CallInfo* call_info, Bytecodes::Code bytecode, oop protection_domain, TRAPS) {
   methodHandle method = call_info->selected_method();
   bool is_invoke_interface = (bytecode == Bytecodes::_invokeinterface && !call_info->has_vtable_index());
   assert(CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "");
@@ -130,9 +155,13 @@ void CompiledIC::set_to_megamorphic(Call
   assert(is_call_to_compiled() || is_call_to_interpreted(), "going directly to megamorphic?");
 
   address entry;
+  if (call_info->is_tail_call()) {
+    assert(is_tail_call(), "Should match call_info->is_tail_call()");
+    set_protection_domain_token((oop)protection_domain);
+  }
   if (is_invoke_interface) {
     int index = klassItable::compute_itable_index(call_info->resolved_method()());
-    entry = VtableStubs::create_stub(false, index, method());
+    entry = VtableStubs::create_stub(false, index, method(), is_tail_call(), is_sibling());
     assert(entry != NULL, "entry not computed");
     klassOop k = call_info->resolved_method()->method_holder();
     assert(Klass::cast(k)->is_interface(), "sanity check");
@@ -140,7 +169,7 @@ void CompiledIC::set_to_megamorphic(Call
   } else {
     // Can be different than method->vtable_index(), due to package-private etc.
     int vtable_index = call_info->vtable_index();
-    entry = VtableStubs::create_stub(true, vtable_index, method());
+    entry = VtableStubs::create_stub(true, vtable_index, method(), is_tail_call(), is_sibling());
     InlineCacheBuffer::create_transition_stub(this, method(), entry);
   }
 
@@ -238,9 +267,34 @@ void CompiledIC::set_to_clean() {
 
   address entry;
   if (is_optimized()) {
-    entry = SharedRuntime::get_resolve_opt_virtual_call_stub();
+    switch (tail_call_type()) {
+    case relocInfo::not_tail_call:
+      entry = SharedRuntime::get_resolve_opt_virtual_call_stub();
+      break;
+    case relocInfo::sibling_tail_call_type:
+      entry = SharedRuntime::get_resolve_opt_virtual_tail_call_stub();
+      break;
+    case relocInfo::not_sibling_tail_call_type:
+      entry = SharedRuntime::get_resolve_opt_not_sibling_virtual_tail_call_stub();
+      break;
+    default: assert(false, "oops"); break;
+    }
+    
   } else {
-    entry = SharedRuntime::get_resolve_virtual_call_stub();
+    switch (tail_call_type()) {
+    case relocInfo::not_tail_call:
+      entry = SharedRuntime::get_resolve_virtual_call_stub();
+      break;
+    case relocInfo::sibling_tail_call_type:
+      entry = SharedRuntime::get_resolve_virtual_tail_call_stub();
+      break;
+    case relocInfo::not_sibling_tail_call_type:
+      entry = SharedRuntime::get_resolve_not_sibling_virtual_tail_call_stub();
+      break;
+    default:
+      assert(false,"oops");
+      break;
+    }
   }
 
   // A zombie transition will always be safe, since the oop has already been set to NULL, so
@@ -274,13 +328,17 @@ bool CompiledIC::is_clean() const {
   bool is_clean = false;
   address dest = ic_destination();
   is_clean = dest == SharedRuntime::get_resolve_opt_virtual_call_stub() ||
-             dest == SharedRuntime::get_resolve_virtual_call_stub();
+             dest == SharedRuntime::get_resolve_virtual_call_stub() ||
+             dest == SharedRuntime::get_resolve_virtual_tail_call_stub() ||
+             dest == SharedRuntime::get_resolve_not_sibling_virtual_tail_call_stub() ||
+             dest == SharedRuntime::get_resolve_opt_virtual_tail_call_stub() ||
+             dest == SharedRuntime::get_resolve_opt_not_sibling_virtual_tail_call_stub();
   assert(!is_clean || is_optimized() || cached_oop() == NULL, "sanity check");
   return is_clean;
 }
 
 
-void CompiledIC::set_to_monomorphic(const CompiledICInfo& info) {
+void CompiledIC::set_to_monomorphic(const CompiledICInfo& info, bool is_tail_call) {
   assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "");
   // Updating a cache to the wrong entry can cause bugs that are very hard
   // to track down - if cache entry gets invalid - we just clean it. In
@@ -305,7 +363,7 @@ void CompiledIC::set_to_monomorphic(cons
       // At code generation time, this call has been emitted as static call
       // Call via stub
       assert(info.cached_oop().not_null() && info.cached_oop()->is_method(), "sanity check");
-      CompiledStaticCall* csc = compiledStaticCall_at(instruction_address());
+      CompiledStaticCall* csc = compiledStaticCall_at(instruction_address(), is_tail_call);
       methodHandle method (thread, (methodOop)info.cached_oop()());
       csc->set_to_interpreted(method, info.entry());
       if (TraceICs) {
@@ -370,6 +428,8 @@ void CompiledIC::compute_monomorphic_ent
                                            KlassHandle receiver_klass,
                                            bool is_optimized,
                                            bool static_bound,
+                                           bool is_tail_call,
+                                           bool is_sibling,
                                            CompiledICInfo& info,
                                            TRAPS) {
   info._is_optimized = is_optimized;
@@ -379,9 +439,41 @@ void CompiledIC::compute_monomorphic_ent
   if (method_code != NULL) {
     // Call to compiled code
     if (static_bound || is_optimized) {
-      entry      = method_code->verified_entry_point();
+      if (is_tail_call && is_sibling) {
+        if (TraceTailCalls){
+          tty->print("CompiledIC::compute_monomorphic_entry():");
+          method->print_value_string();
+          tty->print_cr(" to compiled: verified_tail_call_entry_point");
+        }
+        entry      = method_code->verified_tail_call_entry_point();
+      } else if (is_tail_call) {
+        if (TraceTailCalls){
+          tty->print("CompiledIC::compute_monomorphic_entry():");
+                    method->print_value_string();
+          tty->print_cr(" to compiled: verified_not_sibling_tail_call_entry_point");
+        }
+        entry      = method_code->verified_not_sibling_tail_call_entry_point();
+      } else {
+        entry      = method_code->verified_entry_point();
+      }
     } else {
-      entry      = method_code->entry_point();
+      if (is_tail_call && is_sibling) {
+        if (TraceTailCalls){
+          tty->print("CompiledIC::compute_monomorphic_entry():");
+          method->print_value_string();
+          tty->print_cr(" to compiled: monomorphic_tail_call_entry_point");
+        }
+        entry      = method_code->tail_call_entry_point();
+      } else if (is_tail_call) {
+        if (TraceTailCalls){
+          tty->print("CompiledIC::compute_monomorphic_entry():");
+                    method->print_value_string();
+          tty->print_cr(" to compiled: monomorphic_not_sibling_call_entry_point");
+        }
+        entry      = method_code->not_sibling_tail_call_entry_point();
+      } else {
+        entry      = method_code->entry_point();
+      }
     }
   }
   if (entry != NULL) {
@@ -429,36 +521,74 @@ void CompiledIC::compute_monomorphic_ent
 #endif // COMPILER2
     if (is_optimized) {
       // Use stub entry
-      info._entry      = method()->get_c2i_entry();
+      if (is_tail_call && is_sibling) {
+        if (TraceTailCalls){
+          tty->print("CompiledIC::compute_monomorphic_entry():");
+          method->print_value_string();
+          tty->print_cr(" to interpreter: get_c2i_verified_tail_call_entry_point");
+        }
+        info._entry      = method()->get_c2i_verified_tail_call_entry();
+      } else if(is_tail_call) {
+        if (TraceTailCalls){
+          tty->print("CompiledIC::compute_monomorphic_entry():");
+          method->print_value_string();
+          tty->print_cr(" to interpreter: get_c2i_verified_not_sibling_tail_call_entry_point");
+        }
+        info._entry      = method()->get_c2i_verified_not_sibling_tail_call_entry();
+      } else
+        info._entry      = method()->get_c2i_entry();
       info._cached_oop = method;
     } else {
       // Use mkh entry
       oop holder = oopFactory::new_compiledICHolder(method, receiver_klass, CHECK);
       info._cached_oop = Handle(THREAD, holder);
-      info._entry      = method()->get_c2i_unverified_entry();
+      if (is_tail_call && is_sibling) {
+        if (TraceTailCalls){
+          tty->print("CompiledIC::compute_monomorphic_entry():");
+          method->print_value_string();
+          tty->print_cr(" to interpreter: get_c2i_unverified_tail_call_entry_point");
+        }
+        info._entry      = method()->get_c2i_unverified_tail_call_entry();
+      } else if (is_tail_call) {
+        if (TraceTailCalls){
+          tty->print("CompiledIC::compute_monomorphic_entry():");
+          method->print_value_string();
+          tty->print_cr(" to interpreter: get_c2i_unverified_not_sibling_tail_call_entry_point");
+        }
+        info._entry      = method()->get_c2i_unverified_not_sibling_tail_call_entry();
+      } else
+        info._entry      = method()->get_c2i_unverified_entry();
     }
   }
 }
 
 
-inline static RelocIterator parse_ic(CodeBlob* code, address ic_call, oop* &_oop_addr, bool *is_optimized) {
+inline static RelocIterator parse_ic(CodeBlob* code, address ic_call, oop* &_oop_addr, bool *is_optimized, relocInfo::tailCallType* type, address& first_set_oop_addr) {
    address  first_oop = NULL;
+   first_set_oop_addr = NULL;
    // Mergers please note: Sun SC5.x CC insists on an lvalue for a reference parameter.
    CodeBlob *code1 = code;
-   return virtual_call_Relocation::parse_ic(code1, ic_call, first_oop, _oop_addr, is_optimized);
+   return virtual_call_Relocation::parse_ic(code1, ic_call, first_set_oop_addr, _oop_addr, is_optimized, type);
 }
 
 CompiledIC::CompiledIC(NativeCall* ic_call)
   : _ic_call(ic_call),
-    _oops(parse_ic(NULL, ic_call->instruction_address(), _oop_addr, &_is_optimized))
+    _oops(parse_ic(NULL, ic_call->instruction_address(), _oop_addr, &_is_optimized, &_tail_call_type, _first_set_oop_inst))
 {
 }
 
+static NativeCall* nativeCallOrJump_at(address addr, bool is_tail_call) {
+  NativeCall * res = is_tail_call ?
+    (NativeCall*) nativeJump_at(addr) :
+    nativeCall_at(addr);
+  return res;
+}
 
-CompiledIC::CompiledIC(Relocation* ic_reloc)
-  : _ic_call(nativeCall_at(ic_reloc->addr())),
-    _oops(parse_ic(ic_reloc->code(), ic_reloc->addr(), _oop_addr, &_is_optimized))
+CompiledIC::CompiledIC(Relocation* ic_reloc, bool is_tail_call)
+  : _ic_call(nativeCallOrJump_at(ic_reloc->addr(), is_tail_call)),
+    _oops(parse_ic(ic_reloc->code(), ic_reloc->addr(), _oop_addr, &_is_optimized, &_tail_call_type, _first_set_oop_inst))
 {
+  
   assert(ic_reloc->type() == relocInfo::virtual_call_type ||
          ic_reloc->type() == relocInfo::opt_virtual_call_type, "wrong reloc. info");
 }
@@ -474,8 +604,17 @@ void CompiledStaticCall::set_to_clean() 
   CodeBlob* cb = CodeCache::find_blob_unsafe(this);
   assert(cb != NULL && cb->is_nmethod(), "must be nmethod");
 #endif
-  set_destination_mt_safe(SharedRuntime::get_resolve_static_call_stub());
-
+  address addr = instruction_address();
+  relocInfo::tailCallType tail_call_type;
+  static_call_Relocation::parse_static_call(addr, tail_call_type);
+  
+  if (tail_call_type == relocInfo::not_tail_call)
+    set_destination_mt_safe(SharedRuntime::get_resolve_static_call_stub());
+  else if (tail_call_type == relocInfo::sibling_tail_call_type)
+    set_destination_mt_safe(SharedRuntime::get_resolve_static_tail_call_stub());
+  else if (tail_call_type == relocInfo::not_sibling_tail_call_type)
+    set_destination_mt_safe(SharedRuntime::get_resolve_not_sibling_static_tail_call_stub());
+  else assert(0, "Something went terribly wrong here.");
   // Do not reset stub here:  It is too expensive to call find_stub.
   // Instead, rely on caller (nmethod::clear_inline_caches) to clear
   // both the call and its stub.
@@ -486,6 +625,14 @@ bool CompiledStaticCall::is_clean() cons
   return destination() == SharedRuntime::get_resolve_static_call_stub();
 }
 
+bool CompiledStaticCall::is_clean_static_tail_call() const {
+  return destination() == SharedRuntime::get_resolve_static_tail_call_stub() ||
+    destination() == SharedRuntime::get_resolve_not_sibling_static_tail_call_stub() ||
+    destination() == SharedRuntime::get_resolve_opt_virtual_tail_call_stub() ||
+    destination() == SharedRuntime::get_resolve_opt_not_sibling_virtual_tail_call_stub();
+}
+
+
 bool CompiledStaticCall::is_call_to_compiled() const {
   return CodeCache::contains(destination());
 }
@@ -495,7 +642,13 @@ bool CompiledStaticCall::is_call_to_inte
   // It is a call to interpreted, if it calls to a stub. Hence, the destination
   // must be in the stub part of the nmethod that contains the call
   nmethod* nm = CodeCache::find_nmethod(instruction_address());
-  return nm->stub_contains(destination());
+  // Change: Because of tail calls there is another stub destination. So we need
+  // to check that the destination is not the tail call stub.
+  address dest = destination();
+  bool is_stub = nm->stub_contains(dest) &&
+    dest != nm->verified_tail_call_entry_point() &&
+    dest != nm->verified_not_sibling_tail_call_entry_point();
+  return is_stub;
 }
 
 
@@ -532,7 +685,7 @@ void CompiledStaticCall::set(const Stati
   // to track down - if cache entry gets invalid - we just clean it. In
   // this way it is always the same code path that is responsible for
   // updating and resolving an inline cache
-  assert(is_clean(), "do not update a call entry - use clean");
+  assert(is_clean() || (TailCallsStackCompression && is_clean_static_tail_call()), "do not update a call entry - use clean");
 
   if (info._to_interpreter) {
     // Call to interpreted code
@@ -550,20 +703,76 @@ void CompiledStaticCall::set(const Stati
   }
 }
 
+void CompiledStaticCall::set_tail_call(const StaticCallInfo& info) {
+  assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call");
+  MutexLockerEx pl(Patching_lock, Mutex::_no_safepoint_check_flag);
+  // Updating a cache to the wrong entry can cause bugs that are very hard
+  // to track down - if cache entry gets invalid - we just clean it. In
+  // this way it is always the same code path that is responsible for
+  // updating and resolving an inline cache
+  assert(is_clean_static_tail_call(), "do not update a call entry - use clean");
+
+  if (info._to_interpreter) {
+    // Call to interpreted code
+    set_to_interpreted(info.callee(), info.entry());
+  } else {
+    if (TraceICs) {
+      ResourceMark rm;
+      tty->print_cr("CompiledStaticCall@" INTPTR_FORMAT ": set_to_static_tail_compiled " INTPTR_FORMAT,
+                    instruction_address(),
+                    info.entry());
+    }
+    // Call to compiled code
+    assert (CodeCache::contains(info.entry()), "wrong entry point");
+    set_destination_mt_safe(info.entry());
+  }
+}
+
 
 // Compute settings for a CompiledStaticCall. Since we might have to set
 // the stub when calling to the interpreter, we need to return arguments.
-void CompiledStaticCall::compute_entry(methodHandle m, StaticCallInfo& info) {
+void CompiledStaticCall::compute_entry(methodHandle m, StaticCallInfo& info, bool is_tail_call, bool is_sibling_call) {
   nmethod* m_code = m->code();
   info._callee = m;
   if (m_code != NULL) {
     info._to_interpreter = false;
-    info._entry  = m_code->verified_entry_point();
+    if (is_tail_call && is_sibling_call) {
+      if (TraceTailCalls) {
+        tty->print("CompiledStaticCall:compute_entry() ");
+        m->print_value_string();
+        tty->print_cr(" to compiled: verified_tail_call_entry_point");
+      }
+      info._entry = m_code->verified_tail_call_entry_point();
+    } else if (is_tail_call) {
+      if (TraceTailCalls){
+        tty->print("CompiledStaticCall:compute_entry() ");
+        m->print_value_string();
+        tty->print_cr(" to compiled: verified_not_sibling_tail_call_entry_point");
+      }
+      info._entry = m_code->verified_not_sibling_tail_call_entry_point();
+    } else {
+      info._entry = m_code->verified_entry_point();
+    }
   } else {
     // Callee is interpreted code.  In any case entering the interpreter
     // puts a converter-frame on the stack to save arguments.
     info._to_interpreter = true;
-    info._entry      = m()->get_c2i_entry();
+    if (is_tail_call && is_sibling_call) {
+      if (TraceTailCalls){
+        tty->print("CompiledStaticCall:compute_entry() ");
+        m->print_value_string();
+        tty->print_cr(" to interpreter: get_c2i_verified_tail_call_entry");
+      }
+      info._entry      = m()->get_c2i_verified_tail_call_entry();
+    } else if (is_tail_call) {
+      if (TraceTailCalls){
+        tty->print("CompiledStaticCall:compute_entry() ");
+        m->print_value_string();
+        tty->print_cr(" to interpreter: get_c2i_verified_not_sibling_tail_call_entry");
+      }
+      info._entry      = m()->get_c2i_verified_not_sibling_tail_call_entry();
+    } else
+      info._entry      = m()->get_c2i_entry();
   }
 }
 
@@ -607,9 +816,11 @@ address CompiledStaticCall::find_stub() 
 // Non-product mode code
 #ifndef PRODUCT
 
-void CompiledIC::verify() {
+void CompiledIC::verify(bool is_tail_call) {
   // make sure code pattern is actually a call imm32 instruction
-  _ic_call->verify();
+  if (is_tail_call)
+    ((NativeJump*)_ic_call)->verify();
+  else _ic_call->verify();
   if (os::is_MP()) {
     _ic_call->verify_alignment();
   }
@@ -640,6 +851,23 @@ void CompiledStaticCall::print() {
     tty->print("interpreted");
   }
   tty->cr();
+}
+
+void CompiledStaticCall::verify_static_tail_call() {
+  // Verify call
+  NativeJump* jmp = (NativeJump*) this;
+  jmp->verify();
+  if (os::is_MP()) {
+    verify_alignment();
+  }
+
+  // Verify stub
+  address stub = find_stub();
+  assert(stub != NULL, "no stub found for static call");
+  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub);   // creation also verifies the object
+  NativeJump*        jump          = nativeJump_at(method_holder->next_instruction_address());
+  // Verify state
+  assert(is_clean_static_tail_call() || is_call_to_compiled()|| is_call_to_interpreted(), "sanity check");
 }
 
 void CompiledStaticCall::verify() {
@@ -654,7 +882,6 @@ void CompiledStaticCall::verify() {
   assert(stub != NULL, "no stub found for static call");
   NativeMovConstReg* method_holder = nativeMovConstReg_at(stub);   // creation also verifies the object
   NativeJump*        jump          = nativeJump_at(method_holder->next_instruction_address());
-
   // Verify state
   assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check");
 }
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/code/compiledIC.hpp
--- a/src/share/vm/code/compiledIC.hpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/code/compiledIC.hpp	Wed Jun 03 16:27:17 2009 +0200
@@ -76,15 +76,20 @@ class CompiledIC: public ResourceObj {
   oop*          _oop_addr;      // patchable oop cell for this IC
   RelocIterator _oops;          // iteration over any and all set-oop instructions
   bool          _is_optimized;  // an optimized virtual call (i.e., no compiled IC)
+  relocInfo::tailCallType _tail_call_type; // Whether a tail call and what kind.
+  address _first_set_oop_inst;
 
   CompiledIC(NativeCall* ic_call);
-  CompiledIC(Relocation* ic_reloc);    // Must be of virtual_call_type/opt_virtual_call_type
+  CompiledIC(Relocation* ic_reloc, bool is_tail_call);    // Must be of virtual_call_type/opt_virtual_call_type
 
   // low-level inline-cache manipulation. Cannot be accessed directly, since it might not be MT-safe
   // to change an inline-cache. These changes the underlying inline-cache directly. They *newer* make
   // changes to a transition stub.
   void set_ic_destination(address entry_point);
   void set_cached_oop(oop cache);
+
+  // Megamorphic tail calls need to set the protection domain token.
+  void set_protection_domain_token(oop protection_domain);
 
   // Reads the location of the transition stub. This will fail with an assertion, if no transition stub is
   // associated with the inline cache.
@@ -93,9 +98,9 @@ class CompiledIC: public ResourceObj {
 
  public:
   // conversion (machine PC to CompiledIC*)
-  friend CompiledIC* CompiledIC_before(address return_addr);
-  friend CompiledIC* CompiledIC_at(address call_site);
-  friend CompiledIC* CompiledIC_at(Relocation* call_site);
+  friend CompiledIC* CompiledIC_before(address return_addr, bool is_tail_call);
+  friend CompiledIC* CompiledIC_at(address call_site, bool is_tail_call);
+  friend CompiledIC* CompiledIC_at(Relocation* call_site, bool is_tail_call);
 
   // Return the cached_oop/destination associated with this inline cache. If the cache currently points
   // to a transition stub, it will read the values from the transition stub.
@@ -103,6 +108,9 @@ class CompiledIC: public ResourceObj {
   address ic_destination() const;
 
   bool is_optimized() const   { return _is_optimized; }
+  relocInfo::tailCallType tail_call_type() { return _tail_call_type; }
+  bool is_tail_call() { return _tail_call_type != relocInfo::not_tail_call; }
+  bool is_sibling() { return _tail_call_type == relocInfo::sibling_tail_call_type; }
 
   // State
   bool is_clean() const;
@@ -121,11 +129,12 @@ class CompiledIC: public ResourceObj {
   // They all takes a TRAP argument, since they can cause a GC if the inline-cache buffer is full.
   //
   void set_to_clean();  // Can only be called during a safepoint operation
-  void set_to_monomorphic(const CompiledICInfo& info);
-  void set_to_megamorphic(CallInfo* call_info, Bytecodes::Code bytecode, TRAPS);
+  void set_to_monomorphic(const CompiledICInfo& info, bool is_tail_call);
+  void set_to_megamorphic(CallInfo* call_info, Bytecodes::Code bytecode, oop protection_domain, TRAPS);
 
   static void compute_monomorphic_entry(methodHandle method, KlassHandle receiver_klass,
-                                        bool is_optimized, bool static_bound, CompiledICInfo& info, TRAPS);
+                                        bool is_optimized, bool static_bound, bool is_tail_call, 
+                                        bool is_sibling, CompiledICInfo& info, TRAPS);
 
   // Location
   address instruction_address() const { return _ic_call->instruction_address(); }
@@ -133,24 +142,28 @@ class CompiledIC: public ResourceObj {
   // Misc
   void print()             PRODUCT_RETURN;
   void print_compiled_ic() PRODUCT_RETURN;
-  void verify()            PRODUCT_RETURN;
+  void verify(bool is_tail_call)            PRODUCT_RETURN;
 };
 
-inline CompiledIC* CompiledIC_before(address return_addr) {
-  CompiledIC* c_ic = new CompiledIC(nativeCall_before(return_addr));
-  c_ic->verify();
+inline CompiledIC* CompiledIC_before(address return_addr, bool is_tail_call) {
+  CompiledIC* c_ic = is_tail_call ?
+    new CompiledIC((NativeCall*)nativeJump_before(return_addr)) :
+    new CompiledIC(nativeCall_before(return_addr));
+  c_ic->verify(is_tail_call);
   return c_ic;
 }
 
-inline CompiledIC* CompiledIC_at(address call_site) {
-  CompiledIC* c_ic = new CompiledIC(nativeCall_at(call_site));
-  c_ic->verify();
+inline CompiledIC* CompiledIC_at(address call_site, bool is_tail_call) {
+  CompiledIC* c_ic = is_tail_call ?
+     new CompiledIC((NativeCall*)nativeJump_at(call_site)) :
+     new CompiledIC(nativeCall_at(call_site));
+  c_ic->verify(is_tail_call);
   return c_ic;
 }
 
-inline CompiledIC* CompiledIC_at(Relocation* call_site) {
-  CompiledIC* c_ic = new CompiledIC(call_site);
-  c_ic->verify();
+inline CompiledIC* CompiledIC_at(Relocation* call_site, bool is_tail_call) {
+  CompiledIC* c_ic = new CompiledIC(call_site, is_tail_call);
+  c_ic->verify(is_tail_call);
   return c_ic;
 }
 
@@ -191,7 +204,7 @@ class CompiledStaticCall: public NativeC
 
   // Also used by CompiledIC
   void set_to_interpreted(methodHandle callee, address entry);
-  bool is_optimized_virtual();
+  //bool is_optimized_virtual(); seems to be dead?
 
  public:
   friend CompiledStaticCall* compiledStaticCall_before(address return_addr);
@@ -200,6 +213,8 @@ class CompiledStaticCall: public NativeC
 
   // State
   bool is_clean() const;
+  bool is_clean_static_tail_call() const;
+
   bool is_call_to_compiled() const;
   bool is_call_to_interpreted() const;
 
@@ -210,9 +225,11 @@ class CompiledStaticCall: public NativeC
   // Computation and setting is split up, since the actions are separate during
   // a OptoRuntime::resolve_xxx.
   void set(const StaticCallInfo& info);
+  // Convert this call to a jump (to the tail call stub).
+  void set_tail_call(const StaticCallInfo& info);
 
   // Compute entry point given a method
-  static void compute_entry(methodHandle m, StaticCallInfo& info);
+  static void compute_entry(methodHandle m, StaticCallInfo& info, bool is_tail_call=false, bool is_sibling_call=false);
 
   // Stub support
   address find_stub();
@@ -221,21 +238,31 @@ class CompiledStaticCall: public NativeC
   // Misc.
   void print()  PRODUCT_RETURN;
   void verify() PRODUCT_RETURN;
+  void verify_static_tail_call() PRODUCT_RETURN;
 };
 
 
-inline CompiledStaticCall* compiledStaticCall_before(address return_addr) {
-  CompiledStaticCall* st = (CompiledStaticCall*)nativeCall_before(return_addr);
-  st->verify();
+inline CompiledStaticCall* compiledStaticCall_before(address return_addr, bool is_tail_call) {
+  // Since a jmp and call instruction have the same prefix size we treat the
+  // jump instruction like a call instruction.
+  CompiledStaticCall* st = is_tail_call ?
+    (CompiledStaticCall*)nativeJump_before(return_addr) :
+    (CompiledStaticCall*)nativeCall_before(return_addr);
+  if (is_tail_call)
+    st->verify_static_tail_call();
+  else st->verify();
   return st;
 }
 
-inline CompiledStaticCall* compiledStaticCall_at(address native_call) {
+
+inline CompiledStaticCall* compiledStaticCall_at(address native_call, bool is_tail_call) {
   CompiledStaticCall* st = (CompiledStaticCall*)native_call;
-  st->verify();
+  if (is_tail_call)
+    st->verify_static_tail_call();
+  else st->verify();
   return st;
 }
 
-inline CompiledStaticCall* compiledStaticCall_at(Relocation* call_site) {
-  return compiledStaticCall_at(call_site->addr());
+inline CompiledStaticCall* compiledStaticCall_at(Relocation* call_site, bool is_tail_call) {
+  return compiledStaticCall_at(call_site->addr(), is_tail_call);
 }
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/code/icBuffer.cpp
--- a/src/share/vm/code/icBuffer.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/code/icBuffer.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -35,7 +35,9 @@ void ICStub::finalize() {
 void ICStub::finalize() {
   if (!is_empty()) {
     ResourceMark rm;
-    CompiledIC *ic = CompiledIC_at(ic_site());
+    // If it's a jump it's a tail call.
+    bool is_tail_call = NativeJump::is_jump_at(ic_site());
+    CompiledIC *ic = CompiledIC_at(ic_site(), is_tail_call);
     assert(CodeCache::find_nmethod(ic->instruction_address()) != NULL, "inline cache in non-nmethod?");
 
     assert(this == ICStub_from_destination_address(ic->stub_address()), "wrong owner of ic buffer");
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/code/nmethod.cpp
--- a/src/share/vm/code/nmethod.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/code/nmethod.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -1,3 +1,4 @@
+
 /*
  * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -653,6 +654,7 @@ nmethod::nmethod(
 
 // For dtrace wrappers
 #ifdef HAVE_DTRACE_H
+
 nmethod::nmethod(
   methodOop method,
   int nmethod_size,
@@ -786,6 +788,12 @@ nmethod::nmethod(
     // Exception handler and deopt handler are in the stub section
     _exception_offset        = _stub_offset + offsets->value(CodeOffsets::Exceptions);
     _deoptimize_offset       = _stub_offset + offsets->value(CodeOffsets::Deopt);
+    _verified_tail_call_offset = _stub_offset + offsets->value(CodeOffsets::Verified_Tail_Call_Entry);
+    _verified_not_sibling_tail_call_offset = _stub_offset + offsets->value(CodeOffsets::Verified_Not_Sibling_Tail_Call_Entry);
+    _verified_not_sibling_tail_call_set_data_offset = _stub_offset + offsets->value(CodeOffsets::Verified_Not_Sibling_Tail_Call_Set_Data_Entry);
+    _tail_call_offset = _stub_offset + offsets->value(CodeOffsets::Tail_Call_Entry);
+    _not_sibling_tail_call_offset = _stub_offset + offsets->value(CodeOffsets::Not_Sibling_Tail_Call_Entry);
+    _not_sibling_tail_call_set_data_offset = _stub_offset + offsets->value(CodeOffsets::Not_Sibling_Tail_Call_Set_Data_Entry);
     _consts_offset           = instructions_offset() + code_buffer->total_offset_of(code_buffer->consts()->start());
     _scopes_data_offset      = data_offset();
     _scopes_pcs_offset       = _scopes_data_offset   + round_to(debug_info->data_size         (), oopSize);
@@ -797,6 +805,19 @@ nmethod::nmethod(
     _entry_point             = instructions_begin();
     _verified_entry_point    = instructions_begin() + offsets->value(CodeOffsets::Verified_Entry);
     _osr_entry_point         = instructions_begin() + offsets->value(CodeOffsets::OSR_Entry);
+    _verified_tail_call_entry_point = verified_tail_call_begin();
+    _verified_not_sibling_tail_call_entry_point = verified_not_sibling_tail_call_begin();
+    _tail_call_entry_point = tail_call_begin();
+    _not_sibling_tail_call_entry_point = not_sibling_tail_call_begin();
+
+#if  ASSERT
+    // Check entry point alignment: when making the nmethod not entrant or
+    // zombie the tail call entry points need to be aligned properly.
+    NativeJump::check_verified_entry_alignment(0, _verified_tail_call_entry_point);
+    NativeJump::check_verified_entry_alignment(0, _verified_not_sibling_tail_call_entry_point);
+    NativeJump::check_verified_entry_alignment(0, _tail_call_entry_point);
+    NativeJump::check_verified_entry_alignment(0, _not_sibling_tail_call_entry_point);
+#endif
     _exception_cache         = NULL;
     _pc_desc_cache.reset_to(scopes_pcs_begin());
 
@@ -970,6 +991,43 @@ ScopeDesc* nmethod::scope_desc_at(addres
 }
 
 
+// Sets methodoop and c2i entry in tail call stub at address move_addr.
+static void set_adapter_info_in_tail_call_stub_helper(CodeBlob * buffer, methodOop method, address move_addr, AdapterHandlerEntry* adapter) {
+  // Get instruction sequence:
+  // mov ebx, [oop]
+  // jmp [c2i]
+  NativeMovConstReg* method_holder = nativeMovConstReg_at(move_addr);  
+  NativeJump*        jump          = nativeJump_at(method_holder->next_instruction_address());
+  method_holder->set_data((intptr_t)method);
+  assert(method->is_perm(), "Must be in permament gen.");
+  //fixup relocations
+  RelocIterator iter(buffer, (address)method_holder, ((address) method_holder)+1);
+  oop* oop_addr = (oop*) (((address)method_holder)+NativeMovConstReg::data_offset);
+  bool is_fixed = false;
+  while (iter.next()) {
+    if (iter.type() == relocInfo::oop_type) {
+      oop_Relocation* r = iter.oop_reloc();
+      if (r->oop_addr() == oop_addr) {
+        r->fix_oop_relocation();
+        is_fixed = true;
+      }
+    }
+  }
+  assert (is_fixed, "Oop relocation fixed");
+  jump->set_jump_destination(adapter->get_c2i_entry_skip_fixup());
+}
+
+void nmethod::set_adapter_info_in_tail_call_stubs(methodOop method, AdapterHandlerEntry* adapter) {
+  guarantee(is_java_method(), "only works for java methods");
+ 
+  // Verified not sibling tail call entry.
+  address move_addr = header_begin() + _verified_not_sibling_tail_call_set_data_offset;
+  set_adapter_info_in_tail_call_stub_helper(this, method, move_addr, adapter);
+  // Not sibling tail call entry.
+  move_addr = header_begin() + _not_sibling_tail_call_set_data_offset;
+  set_adapter_info_in_tail_call_stub_helper(this, method, move_addr, adapter);
+}
+
 void nmethod::clear_inline_caches() {
   assert(SafepointSynchronize::is_at_safepoint(), "cleaning of IC's only allowed at safepoint");
   if (is_zombie()) {
@@ -1009,7 +1067,8 @@ void nmethod::cleanup_inline_caches() {
     switch(iter.type()) {
       case relocInfo::virtual_call_type:
       case relocInfo::opt_virtual_call_type: {
-        CompiledIC *ic = CompiledIC_at(iter.reloc());
+        bool is_tail_call = iter.tail_call_type() != relocInfo::not_tail_call;
+        CompiledIC *ic = CompiledIC_at(iter.reloc(), is_tail_call);
         // Ok, to lookup references to zombies here
         CodeBlob *cb = CodeCache::find_blob_unsafe(ic->ic_destination());
         if( cb != NULL && cb->is_nmethod() ) {
@@ -1020,7 +1079,8 @@ void nmethod::cleanup_inline_caches() {
         break;
       }
       case relocInfo::static_call_type: {
-        CompiledStaticCall *csc = compiledStaticCall_at(iter.reloc());
+        bool is_tail_call = iter.tail_call_type() != relocInfo::not_tail_call;
+        CompiledStaticCall *csc = compiledStaticCall_at(iter.reloc(), is_tail_call);
         CodeBlob *cb = CodeCache::find_blob_unsafe(csc->destination());
         if( cb != NULL && cb->is_nmethod() ) {
           nmethod* nm = (nmethod*)cb;
@@ -1173,6 +1233,18 @@ void nmethod::make_not_entrant_or_zombie
     // The caller can be calling the method statically or through an inline
     // cache call.
     if (!is_not_entrant()) {
+      // Patch tail call entries before the verified entry point otherwise we
+      // might get to a patched verified entry point with a popped frame and
+      // get_handle_wrong method_stub would see a wrong return address (of the
+      // parent frame).
+      NativeJump::patch_verified_entry(0, verified_tail_call_entry_point(),
+                  SharedRuntime::get_handle_wrong_method_stub());
+      NativeJump::patch_verified_entry(0, verified_not_sibling_tail_call_entry_point(),
+                  SharedRuntime::get_handle_wrong_method_stub());
+      NativeJump::patch_verified_entry(0, tail_call_entry_point(),
+                  SharedRuntime::get_handle_wrong_method_stub());
+      NativeJump::patch_verified_entry(0, not_sibling_tail_call_entry_point(),
+                  SharedRuntime::get_handle_wrong_method_stub());
       NativeJump::patch_verified_entry(entry_point(), verified_entry_point(),
                   SharedRuntime::get_handle_wrong_method_stub());
       assert (NativeJump::instruction_size == nmethod::_zombie_instruction_size, "");
@@ -1456,7 +1528,8 @@ void nmethod::do_unloading(BoolObjectClo
     RelocIterator iter(this, low_boundary);
     while(iter.next()) {
       if (iter.type() == relocInfo::virtual_call_type) {
-        CompiledIC *ic = CompiledIC_at(iter.reloc());
+        bool is_tail_call = iter.tail_call_type() != relocInfo::not_tail_call;
+        CompiledIC *ic = CompiledIC_at(iter.reloc(), is_tail_call);
         oop ic_oop = ic->cached_oop();
         if (ic_oop != NULL && !is_alive->do_object_b(ic_oop)) {
           // The only exception is compiledICHolder oops which may
@@ -1510,7 +1583,8 @@ void nmethod::do_unloading(BoolObjectClo
     RelocIterator iter(this, low_boundary);
     while (iter.next()) {
       if (iter.type() == relocInfo::virtual_call_type) {
-         CompiledIC *ic = CompiledIC_at(iter.reloc());
+         bool is_tail_call = iter.tail_call_type() != relocInfo::not_tail_call;
+         CompiledIC *ic = CompiledIC_at(iter.reloc(), is_tail_call);
          oop ic_oop = ic->cached_oop();
          assert(ic_oop == NULL || is_alive->do_object_b(ic_oop),
                 "Found unmarked ic_oop in reachable nmethod");
@@ -1923,11 +1997,13 @@ void nmethod::verify_interrupt_point(add
   if (CompiledIC_lock->owner() == cur ||
       ((cur->is_VM_thread() || cur->is_ConcurrentGC_thread()) &&
        SafepointSynchronize::is_at_safepoint())) {
-    ic = CompiledIC_at(call_site);
+    bool is_tail_call = NativeJump::is_jump_at(call_site);
+    ic = CompiledIC_at(call_site, is_tail_call);
     CHECK_UNHANDLED_OOPS_ONLY(Thread::current()->clear_unhandled_oops());
   } else {
     MutexLocker ml_verify (CompiledIC_lock);
-    ic = CompiledIC_at(call_site);
+    bool is_tail_call = NativeJump::is_jump_at(call_site);
+    ic = CompiledIC_at(call_site, is_tail_call);
   }
   PcDesc* pd = pc_desc_at(ic->end_of_call());
   assert(pd != NULL, "PcDesc must exist");
@@ -2171,6 +2247,7 @@ const char* nmethod::reloc_string_for(u_
         case relocInfo::poll_type:             return "poll";
         case relocInfo::poll_return_type:      return "poll_return";
         case relocInfo::type_mask:             return "type_bit_mask";
+        case relocInfo::section_call_type:     return "section_call";
     }
   }
   return have_one ? "other" : NULL;
@@ -2220,8 +2297,9 @@ void nmethod::print_code_comment_on(outp
         st->print("method is native");
       } else {
         address bcp  = sd->method()->bcp_from(sd->bci());
-        Bytecodes::Code bc = Bytecodes::java_code_at(bcp);
-        st->print(";*%s", Bytecodes::name(bc));
+        Bytecodes::Prefix pfx;
+        Bytecodes::Code bc = Bytecodes::java_code_at(bcp, pfx);
+        st->print(";*%s%s", Bytecodes::prefix_name(pfx), Bytecodes::name(bc));
         switch (bc) {
         case Bytecodes::_invokevirtual:
         case Bytecodes::_invokespecial:
@@ -2302,12 +2380,13 @@ void nmethod::print_calls(outputStream* 
     case relocInfo::virtual_call_type:
     case relocInfo::opt_virtual_call_type: {
       VerifyMutexLocker mc(CompiledIC_lock);
-      CompiledIC_at(iter.reloc())->print();
+      bool is_tail_call = iter.tail_call_type()!=relocInfo::not_tail_call;
+      CompiledIC_at(iter.reloc(), is_tail_call)->print();
       break;
     }
     case relocInfo::static_call_type:
       st->print_cr("Static call at " INTPTR_FORMAT, iter.reloc()->addr());
-      compiledStaticCall_at(iter.reloc())->print();
+      compiledStaticCall_at(iter.reloc(), iter.tail_call_type()!=relocInfo::not_tail_call)->print();
       break;
     }
   }
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/code/nmethod.hpp
--- a/src/share/vm/code/nmethod.hpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/code/nmethod.hpp	Wed Jun 03 16:27:17 2009 +0200
@@ -151,7 +151,12 @@ class nmethod : public CodeBlob {
   int _handler_table_offset;
   int _nul_chk_table_offset;
   int _nmethod_end_offset;
-
+  int _verified_tail_call_offset;
+  int _verified_not_sibling_tail_call_offset;
+  int _verified_not_sibling_tail_call_set_data_offset;
+  int _tail_call_offset;
+  int _not_sibling_tail_call_offset;
+  int _not_sibling_tail_call_set_data_offset;
   // location in frame (offset for sp) that deopt can store the original
   // pc during a deopt.
   int _orig_pc_offset;
@@ -163,6 +168,14 @@ class nmethod : public CodeBlob {
   address _entry_point;                // entry point with class check
   address _verified_entry_point;       // entry point without class check
   address _osr_entry_point;            // entry point for on stack replacement
+
+  // Tail call entry points:
+  //   Without class and without protection domain check.
+  address _verified_tail_call_entry_point;
+  address _verified_not_sibling_tail_call_entry_point;
+  //   With class, without protection domain check.
+  address _tail_call_entry_point;
+  address _not_sibling_tail_call_entry_point;
 
   nmFlags flags;           // various flags to keep track of nmethod state
   bool _markedForDeoptimization;       // Used for stack deoptimization
@@ -338,7 +351,10 @@ class nmethod : public CodeBlob {
   address handler_table_end  () const             { return           header_begin() + _nul_chk_table_offset   ; }
   address nul_chk_table_begin() const             { return           header_begin() + _nul_chk_table_offset ; }
   address nul_chk_table_end  () const             { return           header_begin() + _nmethod_end_offset   ; }
-
+  address verified_tail_call_begin() const          { return           header_begin() + _verified_tail_call_offset; }
+  address verified_not_sibling_tail_call_begin() const          { return           header_begin() + _verified_not_sibling_tail_call_offset; }
+  address tail_call_begin() const     { return           header_begin() + _tail_call_offset; }
+  address not_sibling_tail_call_begin() const     { return           header_begin() + _not_sibling_tail_call_offset; }
   int code_size         () const                  { return      code_end         () -      code_begin         (); }
   int stub_size         () const                  { return      stub_end         () -      stub_begin         (); }
   int consts_size       () const                  { return      consts_end       () -      consts_begin       (); }
@@ -361,7 +377,10 @@ class nmethod : public CodeBlob {
   // entry points
   address entry_point() const                     { return _entry_point;             } // normal entry point
   address verified_entry_point() const            { return _verified_entry_point;    } // if klass is correct
-
+  address verified_tail_call_entry_point() const    { return _verified_tail_call_entry_point; } // If klass and pd is korrect.
+  address verified_not_sibling_tail_call_entry_point() const    { return _verified_not_sibling_tail_call_entry_point; } // If klass and pd is korrect.
+  address tail_call_entry_point() const { return _tail_call_entry_point; } // Klass check 
+  address not_sibling_tail_call_entry_point() const { return _not_sibling_tail_call_entry_point; }
   // flag accessing and manipulation
   bool  is_in_use() const                         { return flags.state == alive; }
   bool  is_alive() const                          { return flags.state == alive || flags.state == not_entrant; }
@@ -432,6 +451,8 @@ class nmethod : public CodeBlob {
   // note: native wrappers cannot be deoptimized.
   bool can_be_deoptimized() const { return is_java_method(); }
 
+  // Tail call support
+  void set_adapter_info_in_tail_call_stubs(methodOop method, AdapterHandlerEntry* adapter);
   // Inline cache support
   void clear_inline_caches();
   void cleanup_inline_caches();
@@ -581,7 +602,7 @@ class nmethod : public CodeBlob {
   static int verified_entry_point_offset()        { return offset_of(nmethod, _verified_entry_point); }
   static int osr_entry_point_offset()             { return offset_of(nmethod, _osr_entry_point); }
   static int entry_bci_offset()                   { return offset_of(nmethod, _entry_bci); }
-
+  static int verified_tail_call_entry_point_offset() { return offset_of(nmethod, _verified_tail_call_entry_point); }
 };
 
 // Locks an nmethod so its code will not get removed, even if it is a zombie/not_entrant method
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/code/relocInfo.cpp
--- a/src/share/vm/code/relocInfo.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/code/relocInfo.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -106,6 +106,39 @@ void relocInfo::change_reloc_info_for_ad
   assert(found, "no relocInfo found for pc");
 }
 
+// TODO: arnold remove dead code
+#if 0
+void relocInfo::change_reloc_info_for_address(RelocIterator *itr, address pc, relocType old_type, relocType new_type, address target, int section) {
+  bool found = false;
+  assert(old_type == relocInfo::static_call_type, "only works for static call");
+  assert(new_type == relocInfo::section_call_type, "only works for inter section call");
+  while (itr->next() && !found) {
+    if (itr->addr() == pc) {
+      assert(itr->type()==old_type, "wrong relocInfo type found");
+      itr->current()->set_type(new_type);
+      // Set relocation info.
+      short* p = (short*)itr->data();
+      assert(itr->datalen() == 2, "Sanity check.");
+      assert(itr->code()->is_nmethod(), "oops");
+      nmethod * code = (nmethod*)itr->code();
+      int sindex = section;
+  
+      assert(sindex != CodeBuffer::SECT_NONE, "must belong somewhere");
+      assert(target != NULL, "sanity");
+      assert(sindex == CodeBuffer::SECT_STUBS, "assume section stub");
+      address base = code->stub_begin();
+      jint offset = Relocation::scaled_offset(target, base);
+      assert((uint)sindex < (uint)CodeBuffer::SECT_LIMIT, "sanity");
+      assert(CodeBuffer::SECT_LIMIT <= (1 << section_call_Relocation::section_width), "section_width++");
+      p = Relocation::add_jint(p, (offset << section_call_Relocation::section_width) | sindex);
+      
+      // TODO: end
+      found=true;
+    }
+  }
+  assert(found, "no relocInfo found for pc");
+}
+#endif
 
 void relocInfo::remove_reloc_info_for_address(RelocIterator *itr, address pc, relocType old_type) {
   change_reloc_info_for_address(itr, pc, old_type, none);
@@ -404,6 +437,31 @@ Relocation* RelocIterator::reloc() {
 }
 
 
+relocInfo::tailCallType RelocIterator::tail_call_type() {
+  assert(has_current(), "must have a reloc");
+
+  relocInfo::relocType type = reloc()->type();
+  assert(type==relocInfo::static_call_type || type==relocInfo::virtual_call_type ||
+         type==relocInfo::opt_virtual_call_type, "must be a call type");
+
+  relocInfo::tailCallType tail_call_type;
+  switch(type) {
+  case relocInfo::static_call_type: {
+    static_call_Relocation * r = static_call_reloc();
+    tail_call_type = r->tail_call_type(); }
+    break;
+  case relocInfo::opt_virtual_call_type: {
+    opt_virtual_call_Relocation* r = opt_virtual_call_reloc();
+    tail_call_type = r->tail_call_type(); }
+    break;
+  case relocInfo::virtual_call_type: {
+    virtual_call_Relocation * r = virtual_call_reloc();
+    tail_call_type = r->tail_call_type(); }
+    break;
+  default: guarantee(0, "should not get here");break;
+  }
+  return tail_call_type;
+}
 //////// Methods for flyweight Relocation types
 
 
@@ -579,18 +637,43 @@ void virtual_call_Relocation::pack_data_
   normalize_address(_oop_limit, dest);
   jint x0 = scaled_offset_null_special(_first_oop, point);
   jint x1 = scaled_offset_null_special(_oop_limit, point);
-  p = pack_2_ints_to(p, x0, x1);
+  //p = pack_2_ints_to(p, x0, x1); added add_short so pack_2_ints_logic might
+  //not work.
+  p = add_jint(p, x0);
+  p = add_jint(p, x1);
+  p = add_short(p, (short)_tail_call_type);
   dest->set_locs_end((relocInfo*) p);
 }
 
 
 void virtual_call_Relocation::unpack_data() {
-  jint x0, x1; unpack_2_ints(x0, x1);
+  jint x0, x1; 
+  short * p = data();
+  int dlen = datalen();
+  //short* p = unpack_2_ints(x0, x1);
+   x0 = relocInfo::jint_data_at(0, p, dlen);
+   x1 = relocInfo::jint_data_at(2, p, dlen);
+   
   address point = addr();
   _first_oop = x0==0? NULL: address_from_scaled_offset(x0, point);
   _oop_limit = x1==0? NULL: address_from_scaled_offset(x1, point);
+  //_tail_call_type = (relocInfo::tailCallType)relocInfo::short_data_at(0, p,
+  //1);
+  _tail_call_type = (relocInfo::tailCallType)relocInfo::short_data_at(4, p, dlen);
 }
 
+// un-/pack tail_call_type data
+void opt_virtual_call_Relocation::pack_data_to(CodeSection* dest) {
+  short*  p     = (short*) dest->locs_end();
+  p = add_short(p, (short)_tail_call_type);
+  dest->set_locs_end((relocInfo*) p);
+}
+
+void opt_virtual_call_Relocation::unpack_data() {
+  assert(datalen()==1, "data length must be 1");
+  short* p = data();
+  _tail_call_type = (relocInfo::tailCallType)relocInfo::short_data_at(0, p, 1);
+}
 
 void static_stub_Relocation::pack_data_to(CodeSection* dest) {
   short* p = (short*) dest->locs_end();
@@ -702,6 +785,62 @@ void section_word_Relocation::unpack_dat
   _target  = address_from_scaled_offset(offset, base);
 }
 
+void static_call_Relocation::pack_data_to(CodeSection* dest) {
+  short* p = (short*) dest->locs_end();
+  p = add_short(p, (short)_tail_call_type);
+  dest->set_locs_end((relocInfo*) p);
+}
+
+void static_call_Relocation::unpack_data() {
+  short* p = data();
+  assert(datalen() == 1, "data length must be 1");
+  _tail_call_type = (relocInfo::tailCallType)relocInfo::short_data_at(0, p, 1);
+}
+
+void section_call_Relocation::pack_data_to(CodeSection* dest) {
+  short* p = (short*) dest->locs_end();
+  normalize_address(_target, dest, true);
+
+  // Check whether my target address is valid within this section.
+  // If not, strengthen the relocation type to point to another section.
+  int sindex = _section;
+  
+  assert(sindex != CodeBuffer::SECT_NONE, "must belong somewhere");
+  assert(_target != NULL, "sanity");
+
+  CodeSection* sect = dest->outer()->code_section(sindex);
+  guarantee(sect->allocates2(_target), "must be in correct section");
+  address base = sect->start();
+  jint offset = scaled_offset(_target, base);
+  assert((uint)sindex < (uint)CodeBuffer::SECT_LIMIT, "sanity");
+  assert(CodeBuffer::SECT_LIMIT <= (1 << section_width), "section_width++");
+  // Want fixed size.
+  p = add_jint(p, (offset << section_width) | sindex);
+
+  dest->set_locs_end((relocInfo*) p);
+}
+
+void section_call_Relocation::unpack_data() {
+  jint    x      = unpack_1_int();
+  jint    offset = (x >> section_width);
+  int     sindex = (x & ((1<<section_width)-1));
+  address base   = binding()->section_start(sindex);
+
+  _section = sindex;
+  _target  = address_from_scaled_offset(offset, base);
+}
+
+void section_call_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
+  address target = _target;
+  if (target == NULL) {
+    if (addr_in_const()) {
+      target = new_addr_for(*(address*)addr(), src, dest);
+    } else {
+      target = new_addr_for(pd_get_address_from_code(), src, dest);
+    }
+  }
+  set_value(target);
+}
 
 void breakpoint_Relocation::pack_data_to(CodeSection* dest) {
   short* p = (short*) dest->locs_end();
@@ -775,9 +914,21 @@ void oop_Relocation::fix_oop_relocation(
   }
 }
 
-
+void static_call_Relocation::parse_static_call(address &static_call, relocInfo::tailCallType & tail_call_type) {
+  CodeBlob * code = CodeCache::find_blob(static_call);
+  RelocIterator iter(code, static_call, static_call+1);
+  bool ret = iter.next();
+  assert(ret == true, "relocInfo must exist at this address");
+  assert(iter.addr() == static_call, "must find static call");
+  if (iter.type()==relocInfo::static_call_type) {
+    static_call_Relocation * r = iter.static_call_reloc();
+    tail_call_type = r->tail_call_type();
+  } else {
+    assert(0, "must be a static call");
+  }
+}
 RelocIterator virtual_call_Relocation::parse_ic(CodeBlob* &code, address &ic_call, address &first_oop,
-                                                oop* &oop_addr, bool *is_optimized) {
+                                                oop* &oop_addr, bool *is_optimized, relocInfo::tailCallType* tail_call_type) {
   assert(ic_call != NULL, "ic_call address must be set");
   assert(ic_call != NULL || first_oop != NULL, "must supply a non-null input");
   if (code == NULL) {
@@ -803,12 +954,15 @@ RelocIterator virtual_call_Relocation::p
       virtual_call_Relocation* r = iter.virtual_call_reloc();
       first_oop = r->first_oop();
       oop_limit = r->oop_limit();
+      *tail_call_type = r->tail_call_type();
       *is_optimized = false;
     } else {
       assert(iter.type() == relocInfo::opt_virtual_call_type, "must be a virtual call");
+      opt_virtual_call_Relocation* r = iter.opt_virtual_call_reloc();
       *is_optimized = true;
       oop_addr = NULL;
       first_oop = NULL;
+      *tail_call_type = r->tail_call_type();
       return iter;
     }
   }
@@ -865,22 +1019,27 @@ address virtual_call_Relocation::oop_lim
     return _oop_limit;
 }
 
-
+relocInfo::tailCallType virtual_call_Relocation::tail_call_type() {
+  return _tail_call_type;
+}
 
 void virtual_call_Relocation::clear_inline_cache() {
   // No stubs for ICs
   // Clean IC
   ResourceMark rm;
-  CompiledIC* icache = CompiledIC_at(this);
+  CompiledIC* icache = CompiledIC_at(this, _tail_call_type!=relocInfo::not_tail_call);
   icache->set_to_clean();
 }
 
+relocInfo::tailCallType opt_virtual_call_Relocation::tail_call_type() {
+  return _tail_call_type;
+}
 
 void opt_virtual_call_Relocation::clear_inline_cache() {
   // No stubs for ICs
   // Clean IC
   ResourceMark rm;
-  CompiledIC* icache = CompiledIC_at(this);
+  CompiledIC* icache = CompiledIC_at(this, _tail_call_type!=relocInfo::not_tail_call);
   icache->set_to_clean();
 }
 
@@ -893,6 +1052,9 @@ address opt_virtual_call_Relocation::sta
     if (iter.type() == relocInfo::static_stub_type) {
       if (iter.static_stub_reloc()->static_call() == static_call_addr) {
         return iter.addr();
+      } else if (iter.static_stub_reloc()->static_call()+NativeJump::tail_call_push_ret_offset == static_call_addr) {
+        // Tail call via jump.
+        return iter.addr();
       }
     }
   }
@@ -900,9 +1062,14 @@ address opt_virtual_call_Relocation::sta
 }
 
 
+relocInfo::tailCallType static_call_Relocation::tail_call_type() {
+  return _tail_call_type;
+}
+
 void static_call_Relocation::clear_inline_cache() {
   // Safe call site info
-  CompiledStaticCall* handler = compiledStaticCall_at(this);
+  bool is_tail_call = _tail_call_type!=relocInfo::not_tail_call;
+  CompiledStaticCall* handler = compiledStaticCall_at(this, is_tail_call);
   handler->set_to_clean();
 }
 
@@ -914,6 +1081,9 @@ address static_call_Relocation::static_s
   while (iter.next()) {
     if (iter.type() == relocInfo::static_stub_type) {
       if (iter.static_stub_reloc()->static_call() == static_call_addr) {
+        return iter.addr();
+      } else if (iter.static_stub_reloc()->static_call()+NativeJump::tail_call_push_ret_offset == static_call_addr) {
+        // Tail call via jump.
         return iter.addr();
       }
     }
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/code/relocInfo.hpp
--- a/src/share/vm/code/relocInfo.hpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/code/relocInfo.hpp	Wed Jun 03 16:27:17 2009 +0200
@@ -261,12 +261,17 @@ class relocInfo VALUE_OBJ_CLASS_SPEC {
     poll_type               = 10, // polling instruction for safepoints
     poll_return_type        = 11, // polling instruction for safepoints at return
     breakpoint_type         = 12, // an initialization barrier or safepoint
-    yet_unused_type         = 13, // Still unused
+    section_call_type       = 13,
+    //yet_unused_type         = 13, // Still unused
     yet_unused_type_2       = 14, // Still unused
     data_prefix_tag         = 15, // tag for a prefix (carries data arguments)
     type_mask               = 15  // A mask which selects only the above values
   };
-
+  enum tailCallType {
+    not_tail_call              = 0,
+    sibling_tail_call_type     = 1,
+    not_sibling_tail_call_type = 2
+  };
  protected:
   unsigned short _value;
 
@@ -302,7 +307,7 @@ class relocInfo VALUE_OBJ_CLASS_SPEC {
     visitor(poll_return) \
     visitor(breakpoint) \
     visitor(section_word) \
-
+    visitor(section_call) \
 
  public:
   enum {
@@ -616,6 +621,8 @@ class RelocIterator : public StackObj {
   #undef EACH_TYPE
   // generic relocation accessor; switches on type to call the above
   Relocation* reloc();
+  // Get the tail call type of current relocation provided it is a call.
+  relocInfo::tailCallType tail_call_type();
 
   // CodeBlob's have relocation indexes for faster random access:
   static int locs_and_index_size(int code_size, int locs_size);
@@ -732,7 +739,7 @@ class Relocation VALUE_OBJ_CLASS_SPEC {
     }
     return p;
   }
-  void unpack_2_ints(jint& x0, jint& x1) {
+  short* unpack_2_ints(jint& x0, jint& x1) {
     int    dlen = datalen();
     short* dp  = data();
     if (dlen <= 2) {
@@ -743,6 +750,7 @@ class Relocation VALUE_OBJ_CLASS_SPEC {
       x0 = relocInfo::jint_data_at(0, dp, dlen);
       x1 = relocInfo::jint_data_at(2, dp, dlen);
     }
+    return dp;
   }
 
  protected:
@@ -821,6 +829,8 @@ class Relocation VALUE_OBJ_CLASS_SPEC {
   virtual void fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { }
 
   void print();
+
+  friend class relocInfo;
 };
 
 
@@ -948,20 +958,24 @@ class virtual_call_Relocation : public C
   // "first_oop" points to the first associated set-oop.
   // The oop_limit helps find the last associated set-oop.
   // (See comments at the top of this file.)
-  static RelocationHolder spec(address first_oop, address oop_limit = NULL) {
+  static RelocationHolder spec(address first_oop, address oop_limit = NULL, 
+                               relocInfo::tailCallType tail_call_type = relocInfo::not_tail_call) {
     RelocationHolder rh = newHolder();
-    new(rh) virtual_call_Relocation(first_oop, oop_limit);
+    new(rh) virtual_call_Relocation(first_oop, oop_limit, tail_call_type);
     return rh;
   }
 
-  virtual_call_Relocation(address first_oop, address oop_limit) {
+  virtual_call_Relocation(address first_oop, address oop_limit, 
+                          relocInfo::tailCallType tail_call_type = relocInfo::not_tail_call) {
     _first_oop = first_oop; _oop_limit = oop_limit;
+    _tail_call_type = tail_call_type;
     assert(first_oop != NULL, "first oop address must be specified");
   }
 
  private:
   address _first_oop;               // location of first set-oop instruction
   address _oop_limit;               // search limit for set-oop instructions
+  relocInfo::tailCallType _tail_call_type;
 
   friend class RelocIterator;
   virtual_call_Relocation() { }
@@ -970,6 +984,7 @@ class virtual_call_Relocation : public C
  public:
   address first_oop();
   address oop_limit();
+  relocInfo::tailCallType tail_call_type();
 
   // data is packed as scaled offsets in "2_ints" format:  [f l] or [Ff Ll]
   // oop_limit is set to 0 if the limit falls somewhere within the call.
@@ -987,7 +1002,7 @@ class virtual_call_Relocation : public C
   // The returned iterator will enumerate over the oops and the ic_call,
   // as well as any other relocations that happen to be in that span of code.
   // Recognize relevant set_oops with:  oop_reloc()->oop_addr() == oop_addr.
-  static RelocIterator parse_ic(CodeBlob* &code, address &ic_call, address &first_oop, oop* &oop_addr, bool *is_optimized);
+  static RelocIterator parse_ic(CodeBlob* &code, address &ic_call, address &first_oop, oop* &oop_addr, bool *is_optimized, relocInfo::tailCallType* tail_call_type);
 };
 
 
@@ -995,21 +1010,27 @@ class opt_virtual_call_Relocation : publ
   relocInfo::relocType type() { return relocInfo::opt_virtual_call_type; }
 
  public:
-  static RelocationHolder spec() {
+  static RelocationHolder spec(relocInfo::tailCallType tail_call_type = relocInfo::not_tail_call) {
     RelocationHolder rh = newHolder();
-    new(rh) opt_virtual_call_Relocation();
+    new(rh) opt_virtual_call_Relocation(tail_call_type);
     return rh;
   }
 
  private:
   friend class RelocIterator;
-  opt_virtual_call_Relocation() { }
-
+  opt_virtual_call_Relocation(relocInfo::tailCallType tail_call_type= relocInfo::not_tail_call) { 
+    _tail_call_type = tail_call_type;
+  }
+  relocInfo::tailCallType _tail_call_type;
  public:
   void clear_inline_cache();
-
+  relocInfo::tailCallType tail_call_type();
   // find the matching static_stub
   address static_stub();
+  
+  // un-/pack tail_call_type data
+  void pack_data_to(CodeSection* dest);
+  void unpack_data();
 };
 
 
@@ -1017,21 +1038,28 @@ class static_call_Relocation : public Ca
   relocInfo::relocType type() { return relocInfo::static_call_type; }
 
  public:
-  static RelocationHolder spec() {
+  static RelocationHolder spec(relocInfo::tailCallType tail_call_type = relocInfo::not_tail_call) {
     RelocationHolder rh = newHolder();
-    new(rh) static_call_Relocation();
+    new(rh) static_call_Relocation(tail_call_type);
     return rh;
   }
 
  private:
   friend class RelocIterator;
-  static_call_Relocation() { }
-
+  static_call_Relocation(relocInfo::tailCallType tail_call_type= relocInfo::not_tail_call) { 
+    _tail_call_type=tail_call_type;
+  }
+  relocInfo::tailCallType _tail_call_type;
  public:
+  relocInfo::tailCallType tail_call_type();
   void clear_inline_cache();
-
+  // Added to accomodate size for transformation to section_call_Relocation.
+  void pack_data_to(CodeSection* dest);
+  void unpack_data();
   // find the matching static_stub
   address static_stub();
+
+  static void parse_static_call(address &static_call, relocInfo::tailCallType &tail_call_type);
 };
 
 class static_stub_Relocation : public Relocation {
@@ -1197,6 +1225,41 @@ class section_word_Relocation : public i
 };
 
 
+class section_call_Relocation : public CallRelocation {
+  relocInfo::relocType type() { return relocInfo::section_call_type; }
+ public:
+  static RelocationHolder spec(address target, int section) {
+    RelocationHolder rh = newHolder();
+    new(rh) section_call_Relocation(target, section);
+    return rh;
+  }
+
+  section_call_Relocation(address target, int section) {
+     assert(target != NULL, "must not be null");
+    assert(section >= 0, "must be a valid section");
+    _target  = target;
+    _section = section;
+  };
+
+  // Need to take section index into account.
+  void pack_data_to(CodeSection* dest);
+  void unpack_data();
+  void fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest);
+
+protected:
+  address _target;                  // address in CodeBlob
+  int     _section;                 // section providing base address, if any
+  
+public:
+  // bit-width of LSB field in packed offset, if section >= 0
+  enum { section_width = 2 }; // must equal CodeBuffer::sect_bits
+ 
+private:
+  friend class RelocIterator;
+  section_call_Relocation() { }
+};
+
+
 class poll_Relocation : public Relocation {
   bool          is_data()                      { return true; }
   relocInfo::relocType type() { return relocInfo::poll_type; }
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/code/vtableStubs.cpp
--- a/src/share/vm/code/vtableStubs.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/code/vtableStubs.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -96,17 +96,17 @@ void VtableStubs::initialize() {
 }
 
 
-address VtableStubs::create_stub(bool is_vtable_stub, int vtable_index, methodOop method) {
+address VtableStubs::create_stub(bool is_vtable_stub, int vtable_index, methodOop method, bool is_tail_call, bool is_sibling) {
   assert(vtable_index >= 0, "must be positive");
 
-  VtableStub* s = ShareVtableStubs ? lookup(is_vtable_stub, vtable_index) : NULL;
+  VtableStub* s = ShareVtableStubs ? lookup(is_vtable_stub, vtable_index, is_tail_call, is_sibling) : NULL;
   if (s == NULL) {
     if (is_vtable_stub) {
-      s = create_vtable_stub(vtable_index);
+      s = create_vtable_stub(vtable_index, is_tail_call, is_sibling);
     } else {
-      s = create_itable_stub(vtable_index);
+      s = create_itable_stub(vtable_index, is_tail_call, is_sibling);
     }
-    enter(is_vtable_stub, vtable_index, s);
+    enter(is_vtable_stub, vtable_index, s, is_tail_call, is_sibling);
     if (PrintAdapterHandlers) {
       tty->print_cr("Decoding VtableStub %s[%d]@%d",
                     is_vtable_stub? "vtbl": "itbl", vtable_index, VtableStub::receiver_location());
@@ -117,26 +117,28 @@ address VtableStubs::create_stub(bool is
 }
 
 
-inline uint VtableStubs::hash(bool is_vtable_stub, int vtable_index){
+inline uint VtableStubs::hash(bool is_vtable_stub, int vtable_index, bool is_tail_call, bool is_sibling){
   // Assumption: receiver_location < 4 in most cases.
   int hash = ((vtable_index << 2) ^ VtableStub::receiver_location()->value()) + vtable_index;
   return (is_vtable_stub ? ~hash : hash)  & mask;
 }
 
 
-VtableStub* VtableStubs::lookup(bool is_vtable_stub, int vtable_index) {
+VtableStub* VtableStubs::lookup(bool is_vtable_stub, int vtable_index, 
+                                bool is_tail_call, bool is_sibling) {
   MutexLocker ml(VtableStubs_lock);
-  unsigned hash = VtableStubs::hash(is_vtable_stub, vtable_index);
+  unsigned hash = VtableStubs::hash(is_vtable_stub, vtable_index, is_tail_call, is_sibling);
   VtableStub* s = _table[hash];
-  while( s && !s->matches(is_vtable_stub, vtable_index)) s = s->next();
+  while( s && !s->matches(is_vtable_stub, vtable_index, is_tail_call, is_sibling)) s = s->next();
   return s;
 }
 
 
-void VtableStubs::enter(bool is_vtable_stub, int vtable_index, VtableStub* s) {
+void VtableStubs::enter(bool is_vtable_stub, int vtable_index, VtableStub* s,
+                        bool is_tail_call, bool is_sibling) {
   MutexLocker ml(VtableStubs_lock);
-  assert(s->matches(is_vtable_stub, vtable_index), "bad vtable stub");
-  unsigned int h = VtableStubs::hash(is_vtable_stub, vtable_index);
+  assert(s->matches(is_vtable_stub, vtable_index, is_tail_call, is_sibling), "bad vtable stub");
+  unsigned int h = VtableStubs::hash(is_vtable_stub, vtable_index, is_tail_call, is_sibling);
   // enter s at the beginning of the corresponding list
   s->set_next(_table[h]);
   _table[h] = s;
@@ -147,7 +149,7 @@ bool VtableStubs::is_entry_point(address
 bool VtableStubs::is_entry_point(address pc) {
   MutexLocker ml(VtableStubs_lock);
   VtableStub* stub = (VtableStub*)(pc - VtableStub::entry_offset());
-  uint hash = VtableStubs::hash(stub->is_vtable_stub(), stub->index());
+  uint hash = VtableStubs::hash(stub->is_vtable_stub(), stub->index(), stub->is_tail_call(), stub->is_sibling());
   VtableStub* s;
   for (s = _table[hash]; s != NULL && s != stub; s = s->next()) {}
   return s == stub;
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/code/vtableStubs.hpp
--- a/src/share/vm/code/vtableStubs.hpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/code/vtableStubs.hpp	Wed Jun 03 16:27:17 2009 +0200
@@ -37,14 +37,18 @@ class VtableStub {
   const short    _index;             // vtable index
   short          _ame_offset;        // Where an AbstractMethodError might occur
   short          _npe_offset;        // Where a NullPointerException might occur
-  bool           _is_vtable_stub;    // True if vtable stub, false, is itable stub
+  bool           _is_vtable_stub;    // True if vtable stub, false, is itable
+                                     // stub
+  bool           _is_tail_call;
+  bool           _is_sibling;
   /* code follows here */            // The vtableStub code
 
   void* operator new(size_t size, int code_size);
 
-  VtableStub(bool is_vtable_stub, int index)
+  VtableStub(bool is_vtable_stub, int index, bool is_tail_call, bool is_sibling)
         : _next(NULL), _is_vtable_stub(is_vtable_stub),
-          _index(index), _ame_offset(-1), _npe_offset(-1) {}
+          _index(index), _ame_offset(-1), _npe_offset(-1),
+          _is_tail_call(is_tail_call), _is_sibling(is_sibling) {}
   VtableStub* next() const                       { return _next; }
   int index() const                              { return _index; }
   static VMReg receiver_location()               { return _receiver_location; }
@@ -54,8 +58,9 @@ class VtableStub {
   address entry_point() const                    { return code_begin(); }
   static int entry_offset()                      { return sizeof(class VtableStub); }
 
-  bool matches(bool is_vtable_stub, int index) const {
-    return _index == index && _is_vtable_stub == is_vtable_stub;
+  bool matches(bool is_vtable_stub, int index, bool is_tail_call, bool is_sibling) const {
+    return _index == index && _is_vtable_stub == is_vtable_stub && _is_tail_call==is_tail_call && 
+      _is_sibling == is_sibling;
   }
   bool contains(address pc) const                { return code_begin() <= pc && pc < code_end(); }
 
@@ -83,6 +88,8 @@ class VtableStub {
   // Query
   bool is_itable_stub()                          { return !_is_vtable_stub; }
   bool is_vtable_stub()                          { return  _is_vtable_stub; }
+  bool is_tail_call()                            { return _is_tail_call; }
+  bool is_sibling()                              { return _is_sibling; }
   bool is_abstract_method_error(address epc)     { return epc == code_begin()+_ame_offset; }
   bool is_null_pointer_exception(address epc)    { return epc == code_begin()+_npe_offset; }
 
@@ -105,14 +112,14 @@ class VtableStubs : AllStatic {
   static VtableStub* _table[N];                  // table of existing stubs
   static int         _number_of_vtable_stubs;    // number of stubs created so far (for statistics)
 
-  static VtableStub* create_vtable_stub(int vtable_index);
-  static VtableStub* create_itable_stub(int vtable_index);
-  static VtableStub* lookup            (bool is_vtable_stub, int vtable_index);
-  static void        enter             (bool is_vtable_stub, int vtable_index, VtableStub* s);
-  static inline uint hash              (bool is_vtable_stub, int vtable_index);
+  static VtableStub* create_vtable_stub(int vtable_index, bool is_tail_call, bool is_sibling);
+  static VtableStub* create_itable_stub(int vtable_index, bool is_tail_call, bool is_sibling);
+  static VtableStub* lookup            (bool is_vtable_stub, int vtable_index, bool is_tail_call, bool is_sibling);
+  static void        enter             (bool is_vtable_stub, int vtable_index, VtableStub* s, bool is_tail_call, bool is_sibling);
+  static inline uint hash              (bool is_vtable_stub, int vtable_index, bool is_tail_call, bool is_sibling);
 
  public:
-  static address     create_stub(bool is_vtable_stub, int vtable_index, methodOop method); // return the entry point of a stub for this call
+  static address     create_stub(bool is_vtable_stub, int vtable_index, methodOop method, bool is_tail_call, bool is_sibling); // return the entry point of a stub for this call
   static bool        is_entry_point(address pc);                     // is pc a vtable stub entry point?
   static bool        contains(address pc);                           // is pc within any stub?
   static VtableStub* stub_containing(address pc);                    // stub containing pc or NULL
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/compiler/disassembler.cpp
--- a/src/share/vm/compiler/disassembler.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/compiler/disassembler.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -293,6 +293,10 @@ void decode_env::print_insn_labels() {
     if (p == nm->exception_begin())         st->print_cr("[Exception Handler]");
     if (p == nm->stub_begin())              st->print_cr("[Stub Code]");
     if (p == nm->consts_begin())            st->print_cr("[Constants]");
+    if (p == nm->verified_tail_call_begin())  st->print_cr("[Verified Tail Call Entry Point]");
+    if (p == nm->tail_call_begin()) st->print_cr("[Tail Call Entry Point]");
+    if (p == nm->verified_not_sibling_tail_call_begin()) st->print_cr("[Verified Not Sibling Tail Call Entry Point]");
+    if (p == nm->not_sibling_tail_call_begin()) st->print_cr("[Not Sibling Tail Call Entry Point]");
   }
   CodeBlob* cb = _code;
   if (cb != NULL) {
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/compiler/disassembler.hpp
--- a/src/share/vm/compiler/disassembler.hpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/compiler/disassembler.hpp	Wed Jun 03 16:27:17 2009 +0200
@@ -29,7 +29,7 @@ class decode_env;
 
 class Disassembler {
   friend class decode_env;
- private:
+public:
   // this is the type of the dll entry point:
   typedef void* (*decode_func)(void* start, void* end,
                                void* (*event_callback)(void*, const char*, void*),
@@ -37,6 +37,8 @@ class Disassembler {
                                int (*printf_callback)(void*, const char*, ...),
                                void* printf_stream,
                                const char* options);
+ private:
+
   // points to the library.
   static void*    _library;
   // bailout
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/includeDB_core
--- a/src/share/vm/includeDB_core	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/includeDB_core	Wed Jun 03 16:27:17 2009 +0200
@@ -1125,11 +1125,14 @@ compiledIC.cpp                          
 compiledIC.cpp                          systemDictionary.hpp
 compiledIC.cpp                          vtableStubs.hpp
 
+compiledIC.hpp                          codeCache.hpp
 compiledIC.hpp                          compiledICHolderKlass.hpp
 compiledIC.hpp                          compiledICHolderOop.hpp
 compiledIC.hpp                          klassOop.hpp
 compiledIC.hpp                          linkResolver.hpp
 compiledIC.hpp                          nativeInst_<arch>.hpp
+compiledIC.hpp                          nmethod.hpp
+compiledIC.hpp                          relocInfo.hpp
 
 compiledICHolderKlass.cpp               collectedHeap.hpp
 compiledICHolderKlass.cpp               collectedHeap.inline.hpp
@@ -4448,6 +4451,7 @@ verifier.cpp                            
 verifier.cpp                            typeArrayOop.hpp
 verifier.cpp                            verifier.hpp
 verifier.cpp                            vmSymbols.hpp
+verifier.cpp                            bytecodes.hpp
 
 verifier.hpp                            exceptions.hpp
 verifier.hpp                            gcLocker.hpp
@@ -4505,6 +4509,7 @@ vframeArray.cpp                         
 vframeArray.cpp                         vframeArray.hpp
 vframeArray.cpp                         vframe_hp.hpp
 vframeArray.cpp                         vmSymbols.hpp
+vframeArray.cpp                         objectMonitor.inline.hpp
 
 vframeArray.hpp                         arrayOop.hpp
 vframeArray.hpp                         deoptimization.hpp
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/interpreter/abstractInterpreter.hpp
--- a/src/share/vm/interpreter/abstractInterpreter.hpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/interpreter/abstractInterpreter.hpp	Wed Jun 03 16:27:17 2009 +0200
@@ -74,7 +74,9 @@ class AbstractInterpreter: AllStatic {
   };
 
   enum SomeConstants {
-    number_of_result_handlers = 10                              // number of result handlers for native calls
+    number_of_result_handlers = 10,                              // number of result handlers for native calls
+    min_invoke_length = 3,
+    max_invoke_length = 6  // wide invokeinterface
   };
 
  protected:
@@ -92,6 +94,12 @@ class AbstractInterpreter: AllStatic {
 
   static address    _rethrow_exception_entry;                   // rethrows an activation in previous frame
 
+  // Tail calls in interpreter need to check whether parent frame is an
+  // interpreter frame. To support this we need the address range of interpreter
+  // code.
+  static address    _interpreter_code_begin;
+  static address    _interpreter_code_end;
+
   friend class      AbstractInterpreterGenerator;
   friend class              InterpreterGenerator;
   friend class      InterpreterMacroAssembler;
@@ -117,6 +125,10 @@ class AbstractInterpreter: AllStatic {
 
   static address    rethrow_exception_entry()                   { return _rethrow_exception_entry; }
 
+  // Tail call support
+  static address   interpreter_code_begin_address() { return (address)&_interpreter_code_begin; }
+  static address   interpreter_code_end_address() { return (address)&_interpreter_code_end; }
+ 
   // Activation size in words for a method that is just being called.
   // Parameters haven't been pushed so count them too.
   static int        size_top_interpreter_activation(methodOop method);
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/interpreter/bytecode.hpp
--- a/src/share/vm/interpreter/bytecode.hpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/interpreter/bytecode.hpp	Wed Jun 03 16:27:17 2009 +0200
@@ -175,10 +175,20 @@ inline Bytecode_tableswitch* Bytecode_ta
 
 class Bytecode_invoke: public ResourceObj {
  protected:
-  methodHandle _method;                          // method containing the bytecode
-  int          _bci;                             // position of the bytecode
+  methodHandle       _method;                    // method containing the bytecode
+  int                _bci;                       // position of the bytecode
+  Bytecodes::Prefix  _prefix;                    // prefix, if any
 
-  Bytecode_invoke(methodHandle method, int bci)  : _method(method), _bci(bci) {}
+  Bytecode_invoke(methodHandle method, int bci)
+    : _method(method), _bci(bci)
+  {
+    _prefix = Bytecodes::Prefix_none;
+    if (code() == Bytecodes::_wide) {
+      // unlike bytecode streams, note the prefix and skip over it:
+      _prefix = Bytecodes::Prefix_tail_call;
+      _bci += 1;
+    }
+  }
 
  public:
   void verify() const;
@@ -199,6 +209,7 @@ class Bytecode_invoke: public ResourceOb
   methodHandle static_target(TRAPS);             // "specified" method   (from constant pool)
 
   // Testers
+  bool is_tailcall() const                       { return _prefix == Bytecodes::Prefix_tail_call; }
   bool is_invokeinterface() const                { return adjusted_invoke_code() == Bytecodes::_invokeinterface; }
   bool is_invokevirtual() const                  { return adjusted_invoke_code() == Bytecodes::_invokevirtual; }
   bool is_invokestatic() const                   { return adjusted_invoke_code() == Bytecodes::_invokestatic; }
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/interpreter/bytecodeStream.cpp
--- a/src/share/vm/interpreter/bytecodeStream.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/interpreter/bytecodeStream.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -37,14 +37,24 @@ Bytecodes::Code RawBytecodeStream::raw_n
     _next_bci += l;
     assert(_bci < _next_bci, "length must be > 0");
     // set attributes
-    _is_wide = false;
+    _prefix = Bytecodes::Prefix_none;
     // check for special (uncommon) cases
     if (code == Bytecodes::_wide) {
       if (bcp + 1 >= end) {
         code = Bytecodes::_illegal;
       } else {
         code = (Bytecodes::Code)bcp[1];
-        _is_wide = true;
+        switch(code) {
+        case Bytecodes::_invokevirtual:
+        case Bytecodes::_invokeinterface:
+        case Bytecodes::_invokespecial:
+        case Bytecodes::_invokestatic:
+          _prefix = Bytecodes::Prefix_tail_call;
+          break;
+        default:
+          _prefix = Bytecodes::Prefix_wide_index;
+          break;
+        }
       }
     }
   }
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/interpreter/bytecodeStream.hpp
--- a/src/share/vm/interpreter/bytecodeStream.hpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/interpreter/bytecodeStream.hpp	Wed Jun 03 16:27:17 2009 +0200
@@ -47,16 +47,17 @@ class RawBytecodeStream: StackObj {
   int             _bci;                          // bci if current bytecode
   int             _next_bci;                     // bci of next bytecode
   int             _end_bci;                      // bci after the current iteration interval
-
+  //bool            _is_wide;
   // last bytecode read
   Bytecodes::Code _code;
-  bool            _is_wide;
+  Bytecodes::Prefix _prefix;
 
  public:
   // Construction
   RawBytecodeStream(methodHandle method) : _method(method) {
     set_interval(0, _method->code_size());
   }
+  //bool is_wide() { return _is_wide;}
 
   // Iteration control
   void set_interval(int beg_bci, int end_bci) {
@@ -84,12 +85,12 @@ class RawBytecodeStream: StackObj {
     code        = Bytecodes::code_or_bp_at(bcp);
 
     // set next bytecode position
-    int l = Bytecodes::length_for(code);
-    if (l > 0 && (_bci + l) <= _end_bci) {
+    int len = Bytecodes::length_for(code);
+    if (len > 0 && (_bci + len) <= _end_bci) {
       assert(code != Bytecodes::_wide && code != Bytecodes::_tableswitch
              && code != Bytecodes::_lookupswitch, "can't be special bytecode");
-      _is_wide = false;
-      _next_bci += l;
+      _prefix = Bytecodes::Prefix_none;
+      _next_bci += len;
       _code = code;
       return code;
     } else if (code == Bytecodes::_wide && _bci + 1 >= _end_bci) {
@@ -108,7 +109,11 @@ class RawBytecodeStream: StackObj {
   int             end_bci() const                { return _end_bci; }
 
   Bytecodes::Code code() const                   { return _code; }
-  bool            is_wide() const                { return _is_wide; }
+  Bytecodes::Prefix prefix() const               { return _prefix; }
+  bool            has_prefix() const             { return _prefix != Bytecodes::Prefix_none; }
+  int             prefix_length() const          { return has_prefix() ? 1 : 0; } // all prefixes are 1 byte
+  bool            is_wide_index() const          { return _prefix == Bytecodes::Prefix_wide_index; }
+  bool            is_tail_call() const           { return _prefix == Bytecodes::Prefix_tail_call;}
   int             instruction_size() const       { return (_next_bci - _bci); }
   bool            is_last_bytecode() const       { return _next_bci >= _end_bci; }
 
@@ -123,10 +128,10 @@ class RawBytecodeStream: StackObj {
   int             dest_w() const                 { return bci() + (int  )Bytes::get_Java_u4(bcp() + 1); }
 
   // Unsigned indices, widening
-  int             get_index() const              { assert_index_size(is_wide() ? 2 : 1);
-                                                   return (is_wide()) ? Bytes::get_Java_u2(bcp() + 2) : bcp()[1]; }
+  int             get_index() const              { assert_index_size(is_wide_index() ? 2 : 1);
+                                                   return (is_wide_index()) ? Bytes::get_Java_u2(bcp() + 2) : bcp()[1]; }
   int             get_index_big() const          { assert_index_size(2);
-                                                   return (int)Bytes::get_Java_u2(bcp() + 1);  }
+                                                   return (int)Bytes::get_Java_u2(bcp() + prefix_length() + 1);  }
   int             get_index_int() const          { return has_giant_index() ? get_index_giant() : get_index_big(); }
   int             get_index_giant() const        { assert_index_size(4); return Bytes::get_native_u4(bcp() + 1); }
   int             has_giant_index() const        { return (code() == Bytecodes::_invokedynamic); }
@@ -134,7 +139,7 @@ class RawBytecodeStream: StackObj {
  private:
   void assert_index_size(int required_size) const {
 #ifdef ASSERT
-    int isize = instruction_size() - (int)_is_wide - 1;
+    int isize = instruction_size() - (int)is_wide_index() - 1;
     if (isize == 2 && code() == Bytecodes::_iinc)
       isize = 1;
     else if (isize <= 2)
@@ -173,18 +178,23 @@ class BytecodeStream: public RawBytecode
       // note that we cannot advance before having the
       // tty bytecode otherwise the stepping is wrong!
       // (carefull: length_for(...) must be used first!)
-      int l = Bytecodes::length_for(code);
-      if (l == 0) l = Bytecodes::length_at(bcp);
-      _next_bci  += l;
+      int len = Bytecodes::length_for(code);
+      _prefix = Bytecodes::Prefix_none;
+      if (len == 0) {
+        len = Bytecodes::length_at(bcp);
+        if (code == Bytecodes::_wide) {
+          code = (Bytecodes::Code)bcp[1];
+          _prefix = Bytecodes::allowed_prefix(code);
+          assert(prefix_length() == 1, "");
+          if (_prefix == Bytecodes::Prefix_illegal){
+            code = Bytecodes::_illegal;
+          }
+        }
+      }
+      _next_bci += len;
       assert(_bci < _next_bci, "length must be > 0");
-      // set attributes
-      _is_wide      = false;
-      // check for special (uncommon) cases
-      if (code == Bytecodes::_wide) {
-        code = (Bytecodes::Code)bcp[1];
-        _is_wide = true;
-      }
-      assert(Bytecodes::is_java_code(code), "sanity check");
+      if (code != Bytecodes::_illegal)
+        assert(Bytecodes::is_java_code(code), "sanity check");
     }
     _code = code;
     return _code;
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/interpreter/bytecodeTracer.cpp
--- a/src/share/vm/interpreter/bytecodeTracer.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/interpreter/bytecodeTracer.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -38,7 +38,7 @@ class BytecodePrinter: public BytecodeCl
   // operations on the pointer, except within a critical section.
   // (Also, ensure that occasional false positives are benign.)
   methodOop _current_method;
-  bool      _is_wide;
+  Bytecodes::Prefix _prefix;
   address   _next_pc;                // current decoding position
 
   void      align()                  { _next_pc = (address)round_to((intptr_t)_next_pc, sizeof(jint)); }
@@ -48,11 +48,12 @@ class BytecodePrinter: public BytecodeCl
 
   int       get_index()              { return *(address)_next_pc++; }
   int       get_big_index()          { int i=Bytes::get_Java_u2(_next_pc); _next_pc+=2; return i; }
+  int       get_index_special()      { return (is_wide_index()) ? get_big_index() : get_index(); }
   int       get_giant_index()        { int i=Bytes::get_native_u4(_next_pc); _next_pc+=4; return i; }
-  int       get_index_special()      { return (is_wide()) ? get_big_index() : get_index(); }
   methodOop method()                 { return _current_method; }
-  bool      is_wide()                { return _is_wide; }
-
+  bool      has_prefix() const       { return _prefix != Bytecodes::Prefix_none; }
+  int       prefix_length() const    { return has_prefix() ? 1 : 0; }
+  bool      is_wide_index() const    { return _prefix == Bytecodes::Prefix_wide_index; }
 
   bool      check_index(int i, bool in_cp_cache, int& cp_index, outputStream* st = tty);
   void      print_constant(int i, outputStream* st = tty);
@@ -62,7 +63,7 @@ class BytecodePrinter: public BytecodeCl
 
  public:
   BytecodePrinter() {
-    _is_wide = false;
+    _prefix = Bytecodes::Prefix_none;
   }
 
   // This method is called while executing the raw bytecodes, so none of
@@ -83,26 +84,32 @@ class BytecodePrinter: public BytecodeCl
       _current_method = method();
     }
     Bytecodes::Code code;
-    if (is_wide()) {
+    if (has_prefix()) {
       // bcp wasn't advanced if previous bytecode was _wide.
-      code = Bytecodes::code_at(bcp+1);
+      code = Bytecodes::code_at(bcp+prefix_length());
     } else {
       code = Bytecodes::code_at(bcp);
     }
     int bci = bcp - method->code_base();
     st->print("[%d] ", (int) Thread::current()->osthread()->thread_id());
+    const char* bcn = Bytecodes::name(code);
+    const char* pfn = Bytecodes::prefix_name(_prefix);
     if (Verbose) {
-      st->print("%8d  %4d  " INTPTR_FORMAT " " INTPTR_FORMAT " %s",
-           BytecodeCounter::counter_value(), bci, tos, tos2, Bytecodes::name(code));
+      st->print("%8d  %4d  " INTPTR_FORMAT " " INTPTR_FORMAT " %s%s",
+           BytecodeCounter::counter_value(), bci, tos, tos2, pfn, bcn);
     } else {
-      st->print("%8d  %4d  %s",
-           BytecodeCounter::counter_value(), bci, Bytecodes::name(code));
+      st->print("%8d  %4d  %s%s",
+           BytecodeCounter::counter_value(), bci,            pfn, bcn);
     }
-    _next_pc = is_wide() ? bcp+2 : bcp+1;
+    _next_pc = bcp + prefix_length() + 1;
     print_attributes(code, bci);
-    // Set is_wide for the next one, since the caller of this doesn't skip
+    // Set is_prefix for the next one, since the caller of this doesn't skip
     // the next bytecode.
-    _is_wide = (code == Bytecodes::_wide);
+    _prefix = Bytecodes::Prefix_none;
+    if (code == Bytecodes::_wide) {
+      code = Bytecodes::code_at(bcp+1);
+      _prefix = Bytecodes::allowed_prefix(code);
+    }
   }
 
   // Used for methodOop::print_codes().  The input bcp comes from
@@ -111,19 +118,18 @@ class BytecodePrinter: public BytecodeCl
     _current_method = method();
     ResourceMark rm;
     Bytecodes::Code code = Bytecodes::code_at(bcp);
-    // Set is_wide
-    _is_wide = (code == Bytecodes::_wide);
-    if (is_wide()) {
+    // Set prefix
+    _prefix = Bytecodes::Prefix_none;
+    if (code == Bytecodes::_wide) {
       code = Bytecodes::code_at(bcp+1);
+      _prefix = Bytecodes::allowed_prefix(code);
     }
     int bci = bcp - method->code_base();
     // Print bytecode index and name
-    if (is_wide()) {
-      st->print("%d %s_w", bci, Bytecodes::name(code));
-    } else {
-      st->print("%d %s", bci, Bytecodes::name(code));
-    }
-    _next_pc = is_wide() ? bcp+2 : bcp+1;
+    const char* bcn = Bytecodes::name(code);
+    const char* pfn = Bytecodes::prefix_name(_prefix);
+    st->print("%d %s%s", bci, pfn, bcn);
+    _next_pc = bcp + prefix_length() + 1;
     print_attributes(code, bci, st);
     bytecode_epilog(bci, st);
   }
@@ -341,7 +347,7 @@ void BytecodePrinter::print_attributes(B
 
     case Bytecodes::_iinc:
       { int index = get_index_special();
-        jint offset = is_wide() ? get_short(): get_byte();
+        jint offset = is_wide_index() ? get_short(): get_byte();
         st->print_cr(" #%d " INT32_FORMAT, index, offset);
       }
       break;
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/interpreter/bytecodes.cpp
--- a/src/share/vm/interpreter/bytecodes.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/interpreter/bytecodes.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -40,6 +40,8 @@ BasicType       Bytecodes::_result_type 
 BasicType       Bytecodes::_result_type   [Bytecodes::number_of_codes];
 s_char          Bytecodes::_depth         [Bytecodes::number_of_codes];
 u_char          Bytecodes::_length        [Bytecodes::number_of_codes];
+u_char          Bytecodes::_wide_length   [Bytecodes::number_of_codes];
+s_char          Bytecodes::_allowed_prefix[Bytecodes::number_of_codes];
 bool            Bytecodes::_can_trap      [Bytecodes::number_of_codes];
 Bytecodes::Code Bytecodes::_java_code     [Bytecodes::number_of_codes];
 bool            Bytecodes::_can_rewrite   [Bytecodes::number_of_codes];
@@ -61,7 +63,7 @@ int Bytecodes::special_length_at(address
     if (end != NULL && bcp + 1 >= end) {
       return -1; // don't read past end of code buffer
     }
-    return wide_length_for(cast(*(bcp + 1)));
+    return _wide_length[*(bcp + 1)];
   case _tableswitch:
     { address aligned_bcp = (address)round_to((intptr_t)bcp + 1, jintSize);
       if (end != NULL && aligned_bcp + 3*jintSize >= end) {
@@ -92,6 +94,32 @@ int Bytecodes::special_length_at(address
   return 0;
 }
 
+const char* Bytecodes::_prefix_name[Bytecodes::number_of_prefixes] = {
+  "", // 'none' displays as empty string
+  // the others display with a trailing colon:
+  "wide_index:", "tail_call:"
+};
+const char* Bytecodes::prefix_name(Prefix pfx) {
+  if (0 <= pfx && pfx < number_of_prefixes)
+    return _prefix_name[pfx];
+  else
+    return "(unknown)";
+}
+
+const char* Bytecodes::name_for(Bytecodes::Prefix pfx, Bytecodes::Code code) {
+  const char* cn = name(code);
+  if (pfx == Prefix_none)
+    return cn;
+  else {
+    const char* pn = prefix_name(pfx);
+    char* buf = NEW_RESOURCE_ARRAY(char, strlen(pn) + strlen(cn) + 1);
+    strcpy(buf, pn);
+    strcat(buf, cn);
+    return buf;
+  }
+}
+
+
 // At a breakpoint instruction, this returns the breakpoint's length,
 // otherwise, it's the same as special_length_at().  This is used by
 // the RawByteCodeStream, which wants to see the actual bytecode
@@ -114,7 +142,6 @@ int Bytecodes::raw_special_length_at(add
 }
 
 
-
 void Bytecodes::def(Code code, const char* name, const char* format, const char* wide_format, BasicType result_type, int depth, bool can_trap) {
   def(code, name, format, wide_format, result_type, depth, can_trap, code);
 }
@@ -122,13 +149,39 @@ void Bytecodes::def(Code code, const cha
 
 void Bytecodes::def(Code code, const char* name, const char* format, const char* wide_format, BasicType result_type, int depth, bool can_trap, Code java_code) {
   assert(wide_format == NULL || format != NULL, "short form must exist if there's a wide form");
+
+  const char* wide_fmt = NULL; // will be wide_format or NULL
+  Prefix allowed_prefix = Prefix_illegal;
+  if (wide_format != NULL) {
+    switch (wide_format[0]) {
+    case 'w':
+      allowed_prefix = Prefix_wide_index;
+      wide_fmt = wide_format;
+      break;
+
+    // certain new features allow the _wide prefix to be a semantic modifier:
+    case 'T':
+      assert(wide_format[1] == 'w', "");
+      if (TailCalls) {
+        allowed_prefix = Prefix_tail_call;
+        wide_fmt = &wide_format[1];
+      }
+      break;
+
+    default:
+      ShouldNotReachHere();
+    }
+  }
+
   _name          [code] = name;
   _format        [code] = format;
-  _wide_format   [code] = wide_format;
+  _wide_format   [code] = wide_fmt;
   _result_type   [code] = result_type;
   _depth         [code] = depth;
   _can_trap      [code] = can_trap;
-  _length        [code] = format != NULL ? (u_char)strlen(format) : 0;
+  _length        [code] = (u_char)( (format   != NULL) ? strlen(format)   : 0 );
+  _wide_length   [code] = (u_char)( (wide_fmt != NULL) ? strlen(wide_fmt) : 0 );
+  _allowed_prefix[code] = allowed_prefix;
   _java_code     [code] = java_code;
   if (java_code != code)  _can_rewrite[java_code] = true;
 }
@@ -158,6 +211,9 @@ void Bytecodes::initialize() {
 void Bytecodes::initialize() {
   if (_is_initialized) return;
   assert(number_of_codes <= 256, "too many bytecodes");
+
+  assert(0 == strcmp("wide_index:", prefix_name(Prefix_wide_index)),
+         "_prefix_name initialized correctly");     // spot-check only
 
   // initialize bytecode tables - didn't use static array initializers
   // (such as {}) so we can do additional consistency checks and init-
@@ -353,10 +409,10 @@ void Bytecodes::initialize() {
   def(_putstatic           , "putstatic"           , "bjj"  , NULL    , T_ILLEGAL, -1, true );
   def(_getfield            , "getfield"            , "bjj"  , NULL    , T_ILLEGAL,  0, true );
   def(_putfield            , "putfield"            , "bjj"  , NULL    , T_ILLEGAL, -2, true );
-  def(_invokevirtual       , "invokevirtual"       , "bjj"  , NULL    , T_ILLEGAL, -1, true);
-  def(_invokespecial       , "invokespecial"       , "bjj"  , NULL    , T_ILLEGAL, -1, true);
-  def(_invokestatic        , "invokestatic"        , "bjj"  , NULL    , T_ILLEGAL,  0, true);
-  def(_invokeinterface     , "invokeinterface"     , "bjj__", NULL    , T_ILLEGAL, -1, true);
+  def(_invokevirtual       , "invokevirtual"       , "bjj"  , "Twbjj"  , T_ILLEGAL, -1, true);  // w=tailcall
+  def(_invokespecial       , "invokespecial"       , "bjj"  , "Twbjj"  , T_ILLEGAL, -1, true);  // w=tailcall
+  def(_invokestatic        , "invokestatic"        , "bjj"  , "Twbjj"  , T_ILLEGAL,  0, true);  // w=tailcall
+  def(_invokeinterface     , "invokeinterface"     , "bjj__", "Twbjj__", T_ILLEGAL, -1, true);  // w=tailcall
   def(_invokedynamic       , "invokedynamic"       , "bjjjj", NULL    , T_ILLEGAL, -1, true );
   def(_new                 , "new"                 , "bii"  , NULL    , T_OBJECT ,  1, true );
   def(_newarray            , "newarray"            , "bc"   , NULL    , T_OBJECT ,  0, true );
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/interpreter/bytecodes.hpp
--- a/src/share/vm/interpreter/bytecodes.hpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/interpreter/bytecodes.hpp	Wed Jun 03 16:27:17 2009 +0200
@@ -280,6 +280,16 @@ class Bytecodes: AllStatic {
     number_of_codes
   };
 
+  // Internal codes for various kinds of bytecode prefixes.
+  enum Prefix {
+    Prefix_illegal = -1,        // stray _wide bytecode; an error
+    Prefix_none    = 0,
+    Prefix_wide_index,
+    Prefix_tail_call,
+    number_of_prefixes
+    // Note:  Keep this enum in sync. with Bytecodes::_prefix_name.
+  };
+
  private:
   static bool        _is_initialized;
   static const char* _name          [number_of_codes];
@@ -288,9 +298,12 @@ class Bytecodes: AllStatic {
   static BasicType   _result_type   [number_of_codes];
   static s_char      _depth         [number_of_codes];
   static u_char      _length        [number_of_codes];
+  static u_char      _wide_length   [number_of_codes];
+  static s_char      _allowed_prefix[number_of_codes];
   static bool        _can_trap      [number_of_codes];
   static Code        _java_code     [number_of_codes];
   static bool        _can_rewrite   [number_of_codes];
+  static const char* _prefix_name   [number_of_prefixes];
 
   static void        def(Code code, const char* name, const char* format, const char* wide_format, BasicType result_type, int depth, bool can_trap);
   static void        def(Code code, const char* name, const char* format, const char* wide_format, BasicType result_type, int depth, bool can_trap, Code java_code);
@@ -300,9 +313,9 @@ class Bytecodes: AllStatic {
  public:
   // Conversion
   static void        check          (Code code)    { assert(is_defined(code), "illegal code"); }
-  static void        wide_check     (Code code)    { assert(wide_is_defined(code), "illegal code"); }
   static Code        cast           (int  code)    { return (Code)code; }
 
+  static void        prefix_check(Prefix pfx, Code code) { assert(pfx == allowed_prefix(code), "illegal prefix"); }
 
    // Fetch a bytecode, hiding breakpoints as necessary:
    static Code       code_at(address bcp, methodOop method = NULL) {
@@ -310,6 +323,16 @@ class Bytecodes: AllStatic {
    }
    static Code       java_code_at(address bcp, methodOop method = NULL) {
           return java_code(code_at(bcp, method));
+   }
+   static Code       java_code_at(address bcp, Prefix& prefix_return) {
+          Code code = java_code(code_at(bcp));
+          if (code != _wide) {
+            prefix_return = Prefix_none;
+          } else {
+            code = java_code(code_at(bcp + 1));
+            prefix_return = allowed_prefix(code);
+          }
+          return code;
    }
 
    // Fetch a bytecode or a breakpoint:
@@ -323,27 +346,35 @@ class Bytecodes: AllStatic {
 
   // Bytecode attributes
   static bool        is_defined     (int  code)    { return 0 <= code && code < number_of_codes && _format[code] != NULL; }
-  static bool        wide_is_defined(int  code)    { return is_defined(code) && _wide_format[code] != NULL; }
   static const char* name           (Code code)    { check(code);      return _name          [code]; }
   static const char* format         (Code code)    { check(code);      return _format        [code]; }
   static const char* wide_format    (Code code)    { return _wide_format[code]; }
   static BasicType   result_type    (Code code)    { check(code);      return _result_type   [code]; }
   static int         depth          (Code code)    { check(code);      return _depth         [code]; }
   static int         length_for     (Code code)    { return _length[code]; }
+  static bool        can_have_prefix(Code code)    { return ( allowed_prefix(code) > Prefix_none );  }
+  static Prefix      allowed_prefix (Code code)    { check(code);      return (Prefix)_allowed_prefix[code]; }
   static bool        can_trap       (Code code)    { check(code);      return _can_trap      [code]; }
   static Code        java_code      (Code code)    { check(code);      return _java_code     [code]; }
   static bool        can_rewrite    (Code code)    { check(code);      return _can_rewrite   [code]; }
-  static int         wide_length_for(Code code)    {
-    if (!is_defined(code)) {
+  static int         length_for(Prefix pfx, Code code) {
+    if (pfx == Prefix_none)
+      return length_for(code);
+    else if (pfx == allowed_prefix(code))
+      return _wide_length[code];
+    else
+      // no other kind of prefix allowed on this code:
       return 0;
-    }
-    const char* wf = wide_format(code);
-    return (wf == NULL) ? 0 : (int)strlen(wf);
   }
+
+  static const char* prefix_name    (Prefix pfx);
+  static const char* name_for       (Prefix pfx, Code code); // may resource-allocate
+
   // if 'end' is provided, it indicates the end of the code buffer which
   // should not be read past when parsing.
   static int         special_length_at(address bcp, address end = NULL);
   static int         raw_special_length_at(address bcp, address end = NULL);
+
   static int         length_at      (address bcp)  { int l = length_for(code_at(bcp)); return l > 0 ? l : special_length_at(bcp); }
   static int         java_length_at (address bcp)  { int l = length_for(java_code_at(bcp)); return l > 0 ? l : special_length_at(bcp); }
   static bool        is_java_code   (Code code)    { return 0 <= code && code < number_of_java_codes; }
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/interpreter/interpreter.cpp
--- a/src/share/vm/interpreter/interpreter.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/interpreter/interpreter.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -130,6 +130,9 @@ address    AbstractInterpreter::_slow_si
 address    AbstractInterpreter::_slow_signature_handler;
 address    AbstractInterpreter::_entry_table            [AbstractInterpreter::number_of_method_entries];
 address    AbstractInterpreter::_native_abi_to_tosca    [AbstractInterpreter::number_of_result_handlers];
+
+address    AbstractInterpreter::_interpreter_code_begin                     = NULL;
+address    AbstractInterpreter::_interpreter_code_end                       = NULL;
 
 //------------------------------------------------------------------------------------------------------------------------
 // Generation of complete interpreter
@@ -361,11 +364,13 @@ address AbstractInterpreter::continuatio
       Thread *thread = Thread::current();
       ResourceMark rm(thread);
       methodHandle mh(thread, method);
-      type = Bytecode_invoke_at(mh, bci)->result_type(thread);
+      Bytecode_invoke * invoke = Bytecode_invoke_at(mh, bci);
+      type = invoke->result_type(thread);
       // since the cache entry might not be initialized:
       // (NOT needed for the old calling convension)
       if (!is_top_frame) {
-        int index = Bytes::get_native_u2(bcp+1);
+        int index = -1;
+        index = Bytes::get_native_u2(bcp+1);
         method->constants()->cache()->entry_at(index)->set_parameter_size(callee_parameters);
       }
       break;
@@ -379,7 +384,33 @@ address AbstractInterpreter::continuatio
     case Bytecodes::_ldc2_w:
       type = constant_pool_type( method, Bytes::get_Java_u2(bcp+1) );
       break;
-
+    
+    case Bytecodes::_wide: {
+      Bytecodes::Code next = Bytecodes::java_code_at(bcp+1);
+      if (next == Bytecodes::_invokestatic ||
+          next == Bytecodes::_invokevirtual ||
+          next == Bytecodes::_invokespecial ||
+          next == Bytecodes::_invokeinterface) {
+        Thread *thread = Thread::current();
+        ResourceMark rm(thread);
+        methodHandle mh(thread, method);
+        Bytecode_invoke * invoke = Bytecode_invoke_at(mh, bci);
+        type = invoke->result_type(thread);
+        // since the cache entry might not be initialized:
+        // (NOT needed for the old calling convension)
+        if (!is_top_frame) {
+          int index = -1;
+          if (invoke->is_tailcall()) {
+            index = Bytes::get_native_u2(bcp+2);
+          } else {
+            index = Bytes::get_native_u2(bcp+1);
+          }
+          method->constants()->cache()->entry_at(index)->set_parameter_size(callee_parameters);
+        }
+      } else
+        type = Bytecodes::result_type(code);
+      break;
+    }
     default:
       type = Bytecodes::result_type(code);
       break;
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/interpreter/interpreterRuntime.cpp
--- a/src/share/vm/interpreter/interpreterRuntime.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/interpreter/interpreterRuntime.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -235,6 +235,23 @@ IRT_ENTRY(void, InterpreterRuntime::thro
   THROW_HANDLE(exception);
 IRT_END
 
+
+IRT_ENTRY(void, InterpreterRuntime::tail_call_handle_stack_overflow(JavaThread * thread)) 
+{// to get emacs to indent correctly
+  // Try to compress the stack here.
+  // Look at stack.
+  {
+    ResourceMark rm(thread);
+    if (StackCompressor::can_compress(thread)) {
+      thread->set_tail_call_do_stack_compression((address)1);
+      return;
+    }
+  }
+  // Throwing of exception is handled in calling function. See
+  // TemplateInterpreterGenerator::generate_tail_call_handle_stack_overflow_handler.
+  thread->set_tail_call_do_stack_compression((address)0);
+}// to get emacs to indent correctly
+IRT_END
 
 IRT_ENTRY(void, InterpreterRuntime::create_exception(JavaThread* thread, char* name, char* message))
   // lookup exception klass
@@ -448,6 +465,10 @@ IRT_END
 
 IRT_ENTRY(void, InterpreterRuntime::throw_IncompatibleClassChangeError(JavaThread* thread))
   THROW(vmSymbols::java_lang_IncompatibleClassChangeError());
+IRT_END
+
+IRT_ENTRY(void, InterpreterRuntime::throw_TailCallException(JavaThread* thread))
+  THROW(vmSymbols::java_lang_TailCallException());
 IRT_END
 
 
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/interpreter/interpreterRuntime.hpp
--- a/src/share/vm/interpreter/interpreterRuntime.hpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/interpreter/interpreterRuntime.hpp	Wed Jun 03 16:27:17 2009 +0200
@@ -33,7 +33,13 @@ class InterpreterRuntime: AllStatic {
   // Helper functions to access current interpreter state
   static frame     last_frame(JavaThread *thread)    { return thread->last_frame(); }
   static methodOop method(JavaThread *thread)        { return last_frame(thread).interpreter_frame_method(); }
-  static address   bcp(JavaThread *thread)           { return last_frame(thread).interpreter_frame_bcp(); }
+  static address   bcp(JavaThread *thread)           { 
+    Bytecodes::Prefix pfx;
+    address p = last_frame(thread).interpreter_frame_bcp();
+    Bytecodes::java_code_at(p, pfx);
+    if (pfx & Bytecodes::Prefix_tail_call) return (p+1);
+    return p;
+  }
   static void      set_bcp_and_mdp(address bcp, JavaThread*thread);
   static Bytecodes::Code code(JavaThread *thread)    {
     // pass method to avoid calling unsafe bcp_to_method (partial fix 4926272)
@@ -74,7 +80,10 @@ class InterpreterRuntime: AllStatic {
   static void    create_klass_exception(JavaThread* thread, char* name, oopDesc* obj);
   static address exception_handler_for_exception(JavaThread* thread, oopDesc* exception);
   static void    throw_pending_exception(JavaThread* thread);
+  static void    throw_TailCallException(JavaThread* thread);
 
+  static void    tail_call_handle_stack_overflow(JavaThread * thread);
+  
   // Statics & fields
   static void    resolve_get_put(JavaThread* thread, Bytecodes::Code bytecode);
 
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/interpreter/linkResolver.hpp
--- a/src/share/vm/interpreter/linkResolver.hpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/interpreter/linkResolver.hpp	Wed Jun 03 16:27:17 2009 +0200
@@ -69,7 +69,7 @@ class CallInfo: public LinkInfo {
   methodHandle _resolved_method;        // static target method
   methodHandle _selected_method;        // dynamic (actual) target method
   int          _vtable_index;           // vtable index of selected method
-
+  bool         _is_tail_call;
   void         set_static(   KlassHandle resolved_klass,                             methodHandle resolved_method                                                , TRAPS);
   void         set_interface(KlassHandle resolved_klass, KlassHandle selected_klass, methodHandle resolved_method, methodHandle selected_method                  , TRAPS);
   void         set_virtual(  KlassHandle resolved_klass, KlassHandle selected_klass, methodHandle resolved_method, methodHandle selected_method, int vtable_index, TRAPS);
@@ -84,6 +84,8 @@ class CallInfo: public LinkInfo {
   methodHandle selected_method() const           { return _selected_method; }
 
   BasicType    result_type() const               { return selected_method()->result_type(); }
+  bool         is_tail_call() const { return _is_tail_call; }
+  void         set_tail_call(bool is_tail_call) { _is_tail_call=is_tail_call; }
   bool         has_vtable_index() const          { return _vtable_index >= 0; }
   bool         is_statically_bound() const       { return _vtable_index == methodOopDesc::nonvirtual_vtable_index; }
   int          vtable_index() const {
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/interpreter/oopMapCache.cpp
--- a/src/share/vm/interpreter/oopMapCache.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/interpreter/oopMapCache.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -33,7 +33,7 @@ class OopMapCacheEntry: private Interpre
 
  protected:
   // Initialization
-  void fill(methodHandle method, int bci);
+  void fill(methodHandle method, int bci, bool report_parameter_exprs=false);
   // fills the bit mask for native calls
   void fill_for_native(methodHandle method);
   void set_mask(CellTypeState* vars, CellTypeState* stack, int stack_top);
@@ -77,7 +77,7 @@ class OopMapForCacheEntry: public Genera
   OopMapForCacheEntry(methodHandle method, int bci, OopMapCacheEntry *entry);
 
   // Computes stack map for (method,bci) and initialize entry
-  void compute_map(TRAPS);
+  void compute_map(bool report_parameter_exprs, TRAPS);
   int  size();
 };
 
@@ -89,14 +89,23 @@ OopMapForCacheEntry::OopMapForCacheEntry
 }
 
 
-void OopMapForCacheEntry::compute_map(TRAPS) {
+void OopMapForCacheEntry::compute_map(bool report_parameter_exprs, TRAPS) {
   assert(!method()->is_native(), "cannot compute oop map for native methods");
   // First check if it is a method where the stackmap is always empty
   if (method()->code_size() == 0 || method()->max_locals() + method()->max_stack() == 0) {
     _entry->set_mask_size(0);
   } else {
     ResourceMark rm;
-    GenerateOopMap::compute_map(CATCH);
+    //GenerateOopMap::compute_map(CATCH);
+    GenerateOopMap::compute_map(THREAD, report_parameter_exprs);
+    // Expanded CATCH macro to be able to append report_parameter_exprs
+    // parameter.
+    if (HAS_PENDING_EXCEPTION) {    
+      oop ex = PENDING_EXCEPTION;
+      CLEAR_PENDING_EXCEPTION;
+      ex->print();
+      ShouldNotReachHere();
+    }
     result_for_basicblock(_bci);
   }
 }
@@ -368,7 +377,7 @@ void OopMapCacheEntry::fill_for_native(m
 }
 
 
-void OopMapCacheEntry::fill(methodHandle method, int bci) {
+void OopMapCacheEntry::fill(methodHandle method, int bci, bool report_parameter_exprs) {
   HandleMark hm;
   // Flush entry to deallocate an existing entry
   flush();
@@ -378,10 +387,14 @@ void OopMapCacheEntry::fill(methodHandle
     // Native method activations have oops only among the parameters and one
     // extra oop following the parameters (the mirror for static native methods).
     fill_for_native(method);
+    // TODO: what to do? 
+    if (report_parameter_exprs==true) ShouldNotReachHere();
   } else {
     EXCEPTION_MARK;
     OopMapForCacheEntry gen(method, bci, this);
-    gen.compute_map(CATCH);
+    //gen.compute_map(CATCH);
+    gen.compute_map(report_parameter_exprs, CATCH);
+    
   }
   #ifdef ASSERT
     verify();
@@ -633,11 +646,11 @@ void OopMapCache::lookup(methodHandle me
   return;
 }
 
-void OopMapCache::compute_one_oop_map(methodHandle method, int bci, InterpreterOopMap* entry) {
+void OopMapCache::compute_one_oop_map(methodHandle method, int bci, InterpreterOopMap* entry, bool report_parameter_exprs) {
   // Due to the invariants above it's tricky to allocate a temporary OopMapCacheEntry on the stack
   OopMapCacheEntry* tmp = NEW_C_HEAP_ARRAY(OopMapCacheEntry, 1);
   tmp->initialize();
-  tmp->fill(method, bci);
+  tmp->fill(method, bci, report_parameter_exprs);
   entry->resource_copy(tmp);
   FREE_C_HEAP_ARRAY(OopMapCacheEntry, tmp);
 }
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/interpreter/oopMapCache.hpp
--- a/src/share/vm/interpreter/oopMapCache.hpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/interpreter/oopMapCache.hpp	Wed Jun 03 16:27:17 2009 +0200
@@ -177,7 +177,7 @@ class OopMapCache : public CHeapObj {
   void lookup(methodHandle method, int bci, InterpreterOopMap* entry);
 
   // Compute an oop map without updating the cache or grabbing any locks (for debugging)
-  static void compute_one_oop_map(methodHandle method, int bci, InterpreterOopMap* entry);
+  static void compute_one_oop_map(methodHandle method, int bci, InterpreterOopMap* entry, bool report_parameter_exprs=false);
 
   // Helpers
   // Iterate over the entries in the cached OopMapCacheEntry's
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/interpreter/rewriter.cpp
--- a/src/share/vm/interpreter/rewriter.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/interpreter/rewriter.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -154,7 +154,7 @@ void Rewriter::scan_method(methodOop met
     // moves, the bytecodes will also move.
     No_Safepoint_Verifier nsv;
     Bytecodes::Code c;
-
+    bool is_wide = false;
     // Bytecodes and their length
     const address code_base = method->code_base();
     const int code_length = method->code_size();
@@ -164,6 +164,7 @@ void Rewriter::scan_method(methodOop met
       address bcp = code_base + bci;
       int prefix_length = 0;
       c = (Bytecodes::Code)(*bcp);
+      is_wide = false;
 
       // Since we have the code, see if we can get the length
       // directly. Some more complicated bytecodes will report
@@ -177,6 +178,7 @@ void Rewriter::scan_method(methodOop met
         // by 'wide'. We don't currently examine any of the bytecodes
         // modified by wide, but in case we do in the future...
         if (c == Bytecodes::_wide) {
+          is_wide = true;
           prefix_length = 1;
           c = (Bytecodes::Code)bcp[1];
         }
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/interpreter/templateInterpreter.cpp
--- a/src/share/vm/interpreter/templateInterpreter.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/interpreter/templateInterpreter.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -50,6 +50,10 @@ void TemplateInterpreter::initialize() {
     if (PrintInterpreter) print();
   }
 
+  // Needed for tail calls.
+  _interpreter_code_begin = _code->code_start();
+  _interpreter_code_end = _code->code_end();
+
   // initialize dispatch table
   _active_table = _normal_table;
 }
@@ -172,6 +176,8 @@ address    TemplateInterpreter::_throw_N
 address    TemplateInterpreter::_throw_NullPointerException_entry           = NULL;
 address    TemplateInterpreter::_throw_StackOverflowError_entry             = NULL;
 address    TemplateInterpreter::_throw_exception_entry                      = NULL;
+address    TemplateInterpreter::_tail_call_handle_stack_overflow_entry      = NULL;
+address    TemplateInterpreter::_tail_call_handle_stack_overflow_patch_address = NULL;
 
 #ifndef PRODUCT
 EntryPoint TemplateInterpreter::_trace_code;
@@ -183,8 +189,8 @@ EntryPoint TemplateInterpreter::_continu
 EntryPoint TemplateInterpreter::_continuation_entry;
 EntryPoint TemplateInterpreter::_safept_entry;
 
-address    TemplateInterpreter::_return_3_addrs_by_index[TemplateInterpreter::number_of_return_addrs];
-address    TemplateInterpreter::_return_5_addrs_by_index[TemplateInterpreter::number_of_return_addrs];
+address    TemplateInterpreter::_return_addr_tables[TemplateInterpreter::number_of_invoke_lengths]
+                                                   [TemplateInterpreter::number_of_return_addrs];
 address    TemplateInterpreter::_return_5_unbox_addrs_by_index[TemplateInterpreter::number_of_return_addrs];
 
 DispatchTable TemplateInterpreter::_active_table;
@@ -316,9 +322,12 @@ void TemplateInterpreterGenerator::gener
 
   for (int j = 0; j < number_of_states; j++) {
     const TosState states[] = {btos, ctos, stos, itos, ltos, ftos, dtos, atos, vtos};
+    for (int k = Interpreter::min_invoke_length; k <= Interpreter::max_invoke_length; k++) {
+      if (!TailCalls && k != 3 && k != 5)  continue;
+      int jx = Interpreter::TosState_as_index(states[j]);
+      Interpreter::return_addrs_by_index_table(k)[jx] = Interpreter::return_entry(states[j], k);
+    }
     int index = Interpreter::TosState_as_index(states[j]);
-    Interpreter::_return_3_addrs_by_index[index] = Interpreter::return_entry(states[j], 3);
-    Interpreter::_return_5_addrs_by_index[index] = Interpreter::return_entry(states[j], 5);
     if (EnableInvokeDynamic)
       Interpreter::_return_5_unbox_addrs_by_index[index] = Interpreter::return_unbox_entry(states[j], 5);
   }
@@ -366,6 +375,7 @@ void TemplateInterpreterGenerator::gener
     Interpreter::_throw_WrongMethodType_entry                = generate_WrongMethodType_handler();
     Interpreter::_throw_NullPointerException_entry           = generate_exception_handler("java/lang/NullPointerException"          , NULL       );
     Interpreter::_throw_StackOverflowError_entry             = generate_StackOverflowError_handler();
+    Interpreter::_tail_call_handle_stack_overflow_entry      = generate_tail_call_stack_overflow_handler();
   }
 
 
@@ -462,11 +472,13 @@ void TemplateInterpreterGenerator::set_e
     Template* t = TemplateTable::template_for(code);
     assert(t->is_valid(), "just checking");
     set_short_entry_points(t, bep, cep, sep, aep, iep, lep, fep, dep, vep);
-  }
-  if (Bytecodes::wide_is_defined(code)) {
-    Template* t = TemplateTable::template_for_wide(code);
-    assert(t->is_valid(), "just checking");
-    set_wide_entry_point(t, wep);
+
+    if (Bytecodes::can_have_prefix(code)) {
+      Bytecodes::Prefix pfx = Bytecodes::allowed_prefix(code);
+      Template* t = TemplateTable::template_for_prefix(pfx, code);
+      assert(t->is_valid(), "just checking");
+      set_wide_entry_point(t, wep);
+    }
   }
   // set entry points
   EntryPoint entry(bep, cep, sep, aep, iep, lep, fep, dep, vep);
@@ -513,7 +525,7 @@ void TemplateInterpreterGenerator::gener
 #endif // !PRODUCT
   int step;
   if (!t->does_dispatch()) {
-    step = t->is_wide() ? Bytecodes::wide_length_for(t->bytecode()) : Bytecodes::length_for(t->bytecode());
+    step = Bytecodes::length_for(t->prefix(), t->bytecode());
     if (tos_out == ilgl) tos_out = t->tos_out();
     // compute bytecode size
     assert(step > 0, "just checkin'");
@@ -543,6 +555,12 @@ void TemplateInterpreterGenerator::gener
 
 address TemplateInterpreter::return_entry(TosState state, int length) {
   guarantee(0 <= length && length < Interpreter::number_of_return_entries, "illegal length");
+  // Tail call stuff: We assume that the only client of return_entry is
+  // generate_return_entry_for in generate_all. And that we can distinguish a
+  // tail call from a regular call by looking at the length (e.g 4 instead of 3
+  // respectively 6 instead of 5. The following check should catch other
+  // uses of return_entry.
+  assert(length >= 3, "oops"); 
   return _return_entry[length].entry(state);
 }
 
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/interpreter/templateInterpreter.hpp
--- a/src/share/vm/interpreter/templateInterpreter.hpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/interpreter/templateInterpreter.hpp	Wed Jun 03 16:27:17 2009 +0200
@@ -85,7 +85,8 @@ class TemplateInterpreter: public Abstra
   enum MoreConstants {
     number_of_return_entries  = number_of_states,               // number of return entry points
     number_of_deopt_entries   = number_of_states,               // number of deoptimization entry points
-    number_of_return_addrs    = number_of_states                // number of return addresses
+    number_of_return_addrs    = number_of_states,                // number of return addresses
+    number_of_invoke_lengths = 4  // 3 - 6
   };
 
  protected:
@@ -99,7 +100,8 @@ class TemplateInterpreter: public Abstra
   static address    _throw_exception_entry;
 
   static address    _throw_StackOverflowError_entry;
-
+  static address    _tail_call_handle_stack_overflow_entry;     // Try to compress the stack, throw exception if not possible.
+  static address    _tail_call_handle_stack_overflow_patch_address; // Patch with deopt compress_entry_point 
   static address    _remove_activation_entry;                   // continuation address if an exception is not handled by current frame
 #ifdef HOTSWAP
   static address    _remove_activation_preserving_args_entry;   // continuation address when current frame is being popped
@@ -115,8 +117,7 @@ class TemplateInterpreter: public Abstra
   static EntryPoint _continuation_entry;
   static EntryPoint _safept_entry;
 
-  static address    _return_3_addrs_by_index[number_of_return_addrs];     // for invokevirtual   return entries
-  static address    _return_5_addrs_by_index[number_of_return_addrs];     // for invokeinterface return entries
+  static address    _return_addr_tables[number_of_invoke_lengths][number_of_return_addrs];
   static address    _return_5_unbox_addrs_by_index[number_of_return_addrs]; // for invokedynamic bootstrap methods
 
   static DispatchTable _active_table;                           // the active    dispatch table (used by the interpreter for dispatch)
@@ -144,7 +145,9 @@ class TemplateInterpreter: public Abstra
   static address    throw_WrongMethodType_entry()               { return _throw_WrongMethodType_entry; }
   static address    throw_NullPointerException_entry()          { return _throw_NullPointerException_entry; }
   static address    throw_StackOverflowError_entry()            { return _throw_StackOverflowError_entry; }
-
+  static address    tail_call_handle_stack_overflow_entry()     { return _tail_call_handle_stack_overflow_entry; }
+  static address    tail_call_handle_stack_overflow_patch_addr(){ return _tail_call_handle_stack_overflow_patch_address; }
+  
   // Code generation
 #ifndef PRODUCT
   static address    trace_code    (TosState state)              { return _trace_code.entry(state); }
@@ -157,8 +160,9 @@ class TemplateInterpreter: public Abstra
   static address*   normal_table()                              { return _normal_table.table_for(); }
 
   // Support for invokes
-  static address*   return_3_addrs_by_index_table()             { return _return_3_addrs_by_index; }
-  static address*   return_5_addrs_by_index_table()             { return _return_5_addrs_by_index; }
+  static address*   return_3_addrs_by_index_table()             { return return_addrs_by_index_table(3); }
+  static address*   return_5_addrs_by_index_table()             { return return_addrs_by_index_table(5); }
+  static address*   return_addrs_by_index_table(int inst_size)  { assert(inst_size >= 3 && inst_size <=6, "wrong size"); return _return_addr_tables[inst_size-AbstractInterpreter::min_invoke_length];}
   static address*   return_5_unbox_addrs_by_index_table()       { return _return_5_unbox_addrs_by_index; }
   static int        TosState_as_index(TosState state);          // computes index into return_3_entry_by_index table
 
@@ -166,7 +170,7 @@ class TemplateInterpreter: public Abstra
   static address    deopt_entry   (TosState state, int length);
   static address    return_unbox_entry(TosState state, int length);
 
-  // Safepoint support
+  // Safepoint supportx
   static void       notice_safepoints();                        // stops the thread when reaching a safepoint
   static void       ignore_safepoints();                        // ignores safepoints
 
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/interpreter/templateInterpreterGenerator.hpp
--- a/src/share/vm/interpreter/templateInterpreterGenerator.hpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/interpreter/templateInterpreterGenerator.hpp	Wed Jun 03 16:27:17 2009 +0200
@@ -40,6 +40,7 @@ class TemplateInterpreterGenerator: publ
   address generate_slow_signature_handler();
   address generate_error_exit(const char* msg);
   address generate_StackOverflowError_handler();
+  address generate_tail_call_stack_overflow_handler();
   address generate_exception_handler(const char* name, const char* message) {
     return generate_exception_handler_common(name, message, false);
   }
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/interpreter/templateTable.cpp
--- a/src/share/vm/interpreter/templateTable.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/interpreter/templateTable.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -37,19 +37,21 @@ void templateTable_init() {
 // Implementation of Template
 
 
-void Template::initialize(int flags, TosState tos_in, TosState tos_out, generator gen, int arg) {
-  _flags   = flags;
-  _tos_in  = tos_in;
-  _tos_out = tos_out;
-  _gen     = gen;
-  _arg     = arg;
-}
+void Template::initialize(Bytecodes::Code code, Bytecodes::Prefix pfx,
+                          int flags, TosState tos_in, TosState tos_out, generator gen, int arg) {
+  _code    = (jubyte) code;
+  _prefix  = (jubyte) pfx;
+  _flags   = (jubyte) flags;
+  _tos_in  =  tos_in;
+  _tos_out =  tos_out;
+  _gen     =          gen;
+  _arg     = (jshort) arg;
 
-
-Bytecodes::Code Template::bytecode() const {
-  int i = this - TemplateTable::_template_table;
-  if (i < 0 || i >= Bytecodes::number_of_codes) i = this - TemplateTable::_template_table_wide;
-  return Bytecodes::cast(i);
+  // Make sure we haven't lost any precision:
+  assert(_flags   == (int) flags, "");
+  assert(_tos_in  == (int) tos_in, "");
+  assert(_tos_out == (int) tos_out, "");
+  assert(_arg     == (int) arg, "");
 }
 
 
@@ -180,24 +182,30 @@ void TemplateTable::def(Bytecodes::Code 
   def(code, flags, in, out, (Template::generator)gen, 0);
 }
 
+#define PrefixShift BitsPerByte // kludge to make prefix be a pseudo-flag
 
 void TemplateTable::def(Bytecodes::Code code, int flags, TosState in, TosState out, void (*gen)(int arg), int arg) {
   // should factor out these constants
   const int ubcp = 1 << Template::uses_bcp_bit;
   const int disp = 1 << Template::does_dispatch_bit;
   const int clvm = 1 << Template::calls_vm_bit;
-  const int iswd = 1 << Template::wide_bit;
+
+  // extract prefix from flag bits:
+  Bytecodes::Prefix pfx = (Bytecodes::Prefix)(flags >> PrefixShift);
+  flags -= (int)pfx << PrefixShift; // erase fake flag bits
+
   // determine which table to use
-  bool is_wide = (flags & iswd) != 0;
+  bool is_wide = (pfx != Bytecodes::Prefix_none);
   // make sure that wide instructions have a vtos entry point
   // (since they are executed extremely rarely, it doesn't pay out to have an
   // extra set of 5 dispatch tables for the wide instructions - for simplicity
   // they all go with one table)
   assert(in == vtos || !is_wide, "wide instructions have vtos entry point only");
-  Template* t = is_wide ? template_for_wide(code) : template_for(code);
+  Template* t = is_wide ? template_for_prefix(pfx, code) : template_for(code);
   // setup entry
-  t->initialize(flags, in, out, gen, arg);
+  t->initialize(code, pfx, flags, in, out, gen, arg);
   assert(t->bytecode() == code, "just checkin'");
+  assert(t->prefix()   == pfx,  "just checkin'");
 }
 
 
@@ -253,9 +261,11 @@ void TemplateTable::initialize() {
   const int  ubcp = 1 << Template::uses_bcp_bit;
   const int  disp = 1 << Template::does_dispatch_bit;
   const int  clvm = 1 << Template::calls_vm_bit;
-  const int  iswd = 1 << Template::wide_bit;
+  // Various kinds of prefixes:
+  const int  tail = (int)(Bytecodes::Prefix_tail_call)  << PrefixShift;
+  const int  iswd = (int)(Bytecodes::Prefix_wide_index) << PrefixShift;
   //                                    interpr. templates
-  // Java spec bytecodes                ubcp|disp|clvm|iswd  in    out   generator             argument
+  // Java spec bytecodes                ubcp|disp|clvm|prfx  in    out   generator             argument
   def(Bytecodes::_nop                 , ____|____|____|____, vtos, vtos, nop                 ,  _           );
   def(Bytecodes::_aconst_null         , ____|____|____|____, vtos, atos, aconst_null         ,  _           );
   def(Bytecodes::_iconst_m1           , ____|____|____|____, vtos, itos, iconst              , -1           );
@@ -474,6 +484,13 @@ void TemplateTable::initialize() {
   def(Bytecodes::_ret                 , ubcp|disp|____|iswd, vtos, vtos, wide_ret            ,  _           );
   def(Bytecodes::_breakpoint          , ubcp|disp|clvm|____, vtos, vtos, _breakpoint         ,  _           );
 
+  if (TailCalls) {
+    def(Bytecodes::_invokevirtual     , ubcp|disp|clvm|tail, vtos, vtos, wide_invokevirtual  ,  2           );
+    def(Bytecodes::_invokespecial     , ubcp|disp|clvm|tail, vtos, vtos, wide_invokespecial       ,  1           );
+    def(Bytecodes::_invokestatic      , ubcp|disp|clvm|tail, vtos, vtos, wide_invokestatic        ,  1           );
+    def(Bytecodes::_invokeinterface   , ubcp|disp|clvm|tail, vtos, vtos, wide_invokeinterface     ,  1           );
+  }
+
   // JVM bytecodes
   def(Bytecodes::_fast_agetfield      , ubcp|____|____|____, atos, atos, fast_accessfield    ,  atos        );
   def(Bytecodes::_fast_bgetfield      , ubcp|____|____|____, atos, itos, fast_accessfield    ,  itos        );
@@ -536,6 +553,12 @@ void templateTable_init() {
 
 
 void TemplateTable::unimplemented_bc() {
-  _masm->unimplemented( Bytecodes::name(_desc->bytecode()));
+  char buf[1024];
+  buf[0] = '\0';
+  if (_desc->has_prefix()) {
+    sprintf(buf, "%s:", Bytecodes::prefix_name(_desc->prefix()));
+  }
+  strcat(buf, Bytecodes::name(_desc->bytecode()));
+  _masm->unimplemented(buf);
 }
 #endif /* !CC_INTERP */
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/interpreter/templateTable.hpp
--- a/src/share/vm/interpreter/templateTable.hpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/interpreter/templateTable.hpp	Wed Jun 03 16:27:17 2009 +0200
@@ -47,14 +47,19 @@ class Template VALUE_OBJ_CLASS_SPEC {
   TosState  _tos_in;                             // tos cache state before template execution
   TosState  _tos_out;                            // tos cache state after  template execution
   generator _gen;                                // template code generator
-  int       _arg;                                // argument for template code generator
-
-  void      initialize(int flags, TosState tos_in, TosState tos_out, generator gen, int arg);
+  int       _arg;                                // argument for template code
+                                                 // generator
+  int _prefix;                     // prefix
+  int _code;
+  void      initialize(Bytecodes::Code code, Bytecodes::Prefix pfx,
+                       int flags, TosState tos_in, TosState tos_out, generator gen, int arg);
 
   friend class TemplateTable;
 
  public:
-  Bytecodes::Code bytecode() const;
+  Bytecodes::Prefix prefix() const  { return (Bytecodes::Prefix)_prefix; }
+  bool has_prefix() { return (_prefix & Bytecodes::Prefix_none) || (_prefix & Bytecodes::Prefix_wide_index) || (_prefix & Bytecodes::Prefix_tail_call);}
+  Bytecodes::Code bytecode() const { return (Bytecodes::Code)_code; }
   bool      is_valid() const                     { return _gen != NULL; }
   bool      uses_bcp() const                     { return (_flags & (1 << uses_bcp_bit     )) != 0; }
   bool      does_dispatch() const                { return (_flags & (1 << does_dispatch_bit)) != 0; }
@@ -244,8 +249,9 @@ class TemplateTable: AllStatic {
 
   static void _return(TosState state);
 
-  static void resolve_cache_and_index(int byte_no, Register cache, Register index);
+  static void resolve_cache_and_index(int byte_no, Register cache, Register index, bool is_tail_call);
   static void load_invoke_cp_cache_entry(int byte_no,
+                                         bool is_tail_call,
                                          Register method,
                                          Register itable_index,
                                          Register flags,
@@ -263,6 +269,15 @@ class TemplateTable: AllStatic {
   static void invokeinterface(int byte_no);
   static void invokedynamic(int byte_no);
   static void fast_invokevfinal(int byte_no);
+  // Tail calls
+  static void wide_invokevirtual(int byte_no);
+
+  static void wide_invokespecial(int byte_no);
+  static void wide_invokestatic(int byte_no);
+  static void wide_invokeinterface(int byte_no);
+#if 0
+  static void wide_fast_invokevfinal(int byte_no);
+#endif
 
   static void getfield_or_static(int byte_no, bool is_static);
   static void putfield_or_static(int byte_no, bool is_static);
@@ -323,7 +338,16 @@ class TemplateTable: AllStatic {
 
   // Templates
   static Template* template_for     (Bytecodes::Code code)  { Bytecodes::check     (code); return &_template_table     [code]; }
-  static Template* template_for_wide(Bytecodes::Code code)  { Bytecodes::wide_check(code); return &_template_table_wide[code]; }
+  //static Template* template_for_wide(Bytecodes::Code code)  {
+  //Bytecodes::wide_check(code); return &_template_table_wide[code]; }
+  static Template* template_for_prefix(Bytecodes::Prefix pfx, Bytecodes::Code code)  {
+    Bytecodes::prefix_check(pfx, code);
+    assert(( 1 && 
+            ((pfx &  Bytecodes::Prefix_wide_index) || (pfx & Bytecodes::Prefix_tail_call) )),
+           "Only support wide now"); 
+
+    return &_template_table_wide[code]; 
+  }
 
   // Platform specifics
   #include "incls/_templateTable_pd.hpp.incl"
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/memory/space.cpp
--- a/src/share/vm/memory/space.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/memory/space.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -790,6 +790,9 @@ size_t ContiguousSpace::block_size(const
   assert(MemRegion(bottom(), end()).contains(p), "p not in space");
   HeapWord* current_top = top();
   assert(p <= current_top, "p is not a block start");
+  if (oop(p)->is_oop()==false) {
+    tty->print_cr("oop( " INTPTR_FORMAT "/" INTPTR_FORMAT " ) is no oop.", p, oop(p));
+  }
   assert(p == current_top || oop(p)->is_oop(), "p is not a block start");
   if (p < current_top)
     return oop(p)->size();
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/oops/generateOopMap.cpp
--- a/src/share/vm/oops/generateOopMap.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/oops/generateOopMap.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -549,7 +549,7 @@ bool GenerateOopMap::jump_targets_do(Byt
         break;
       }
     case Bytecodes::_jsr:
-      assert(bcs->is_wide()==false, "sanity check");
+      assert(bcs->is_wide_index()==false, "sanity check");
       (*jmpFct)(this, bcs->dest(), data);
 
 
@@ -1322,7 +1322,14 @@ void GenerateOopMap::interp1(BytecodeStr
        break;
     }
   }
-
+  // Check for tail calls and remember if one was encountered.
+  if (itr->is_tail_call()) {
+    assert(itr->code() == Bytecodes::_invokevirtual ||
+           itr->code() == Bytecodes::_invokestatic ||
+           itr->code() == Bytecodes::_invokeinterface ||
+           itr->code() == Bytecodes::_invokespecial, "Expect a method");
+    _contains_tail_call = true;
+  }
   // abstract interpretation of current opcode
   switch(itr->code()) {
     case Bytecodes::_nop:                                           break;
@@ -1925,6 +1932,13 @@ void GenerateOopMap::do_method(int is_st
   int arg_length = cse.compute_for_parameters(is_static != 0, in);
   assert(arg_length<=MAXARGSIZE, "too many locals");
 
+  // Report results include parameter expressions (Used in Deoptimization stack
+  // compression).
+  if (_report_parameter_exprs == true && _report_result_for_send == true) {
+     fill_stackmap_for_opcodes(_itr_send, vars(), stack(), _stack_top);
+     _report_result_for_send = false;
+  }
+  
   // Pop arguments
   for (int i = arg_length - 1; i >= 0; i--) ppop1(in[i]);// Do args in reverse order.
 
@@ -2011,7 +2025,7 @@ GenerateOopMap::GenerateOopMap(methodHan
   _method = method;
   _max_locals=0;
   _init_vars = NULL;
-
+  _report_parameter_exprs = false; 
 #ifndef PRODUCT
   // If we are doing a detailed trace, include the regular trace information.
   if (TraceNewOopMapGenerationDetailed) {
@@ -2020,7 +2034,7 @@ GenerateOopMap::GenerateOopMap(methodHan
 #endif
 }
 
-void GenerateOopMap::compute_map(TRAPS) {
+void GenerateOopMap::compute_map(TRAPS, bool report_parameter_exprs) {
 #ifndef PRODUCT
   if (TimeOopMap2) {
     method()->print_short_name(tty);
@@ -2043,10 +2057,12 @@ void GenerateOopMap::compute_map(TRAPS) 
   _init_vars      = new GrowableArray<intptr_t>(5);  // There are seldom more than 5 init_vars
   _report_result  = false;
   _report_result_for_send = false;
+  _report_parameter_exprs = report_parameter_exprs;
   _new_var_map    = NULL;
   _ret_adr_tos    = new GrowableArray<intptr_t>(5);  // 5 seems like a good number;
   _did_rewriting  = false;
   _did_relocation = false;
+  _contains_tail_call = false;
 
   if (TraceNewOopMapGeneration) {
     tty->print("Method name: %s\n", method()->name()->as_C_string());
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/oops/generateOopMap.hpp
--- a/src/share/vm/oops/generateOopMap.hpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/oops/generateOopMap.hpp	Wed Jun 03 16:27:17 2009 +0200
@@ -298,7 +298,9 @@ class GenerateOopMap VALUE_OBJ_CLASS_SPE
   bool         _did_relocation;             // was relocation neccessary
   bool         _monitor_safe;               // The monitors in this method have been determined
                                             // to be safe.
-
+  bool         _contains_tail_call;         // This method contains a tail call.
+  bool         _report_parameter_exprs;     // Report parameter expressions. 
+                                            //(Stack compression deoptimzation.)
   // Working Cell type state
   int            _state_len;                // Size of states
   CellTypeState *_state;                    // list of states
@@ -456,7 +458,7 @@ class GenerateOopMap VALUE_OBJ_CLASS_SPE
   GenerateOopMap(methodHandle method);
 
   // Compute the map.
-  void compute_map(TRAPS);
+  void compute_map(TRAPS, bool report_parameter_exprs=false);
   void result_for_basicblock(int bci);    // Do a callback on fill_stackmap_for_opcodes for basicblock containing bci
 
   // Query
@@ -469,6 +471,8 @@ class GenerateOopMap VALUE_OBJ_CLASS_SPE
 
   static void print_time();
 
+  // Method contains tail call.
+  bool contains_tail_call()                        { return _contains_tail_call; }
   // Monitor query
   bool monitor_safe()                              { return _monitor_safe; }
 
@@ -557,3 +561,26 @@ class GeneratePairingInfo: public Genera
 
   // Call compute_map(CHECK) to generate info.
 };
+
+//
+// Subclass used by the compiler to query whether a method contains tail calls.
+//
+class ContainsTailCallInfo : public GenerateOopMap {
+ private:
+
+  virtual bool report_results() const     { return false; }
+  virtual bool report_init_vars() const   { return false; }
+  virtual bool allow_rewrites() const     { return false;  }
+  virtual bool possible_gc_point          (BytecodeStream *bcs)           { return false; }
+  virtual void fill_stackmap_prolog       (int nof_gc_points)             {}
+  virtual void fill_stackmap_epilog       ()                              {}
+  virtual void fill_stackmap_for_opcodes  (BytecodeStream *bcs,
+                                           CellTypeState* vars,
+                                           CellTypeState* stack,
+                                           int stack_top)                 {}
+  virtual void fill_init_vars             (GrowableArray<intptr_t> *init_vars) {}
+ public:
+  ContainsTailCallInfo(methodHandle method) : GenerateOopMap(method)       {};
+
+  // Call compute_map(CHECK) to generate info.
+};
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/oops/instanceKlass.hpp
--- a/src/share/vm/oops/instanceKlass.hpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/oops/instanceKlass.hpp	Wed Jun 03 16:27:17 2009 +0200
@@ -603,6 +603,7 @@ class instanceKlass: public Klass {
   int object_size() const             { return object_size(align_object_offset(vtable_length()) + align_object_offset(itable_length()) + static_field_size() + nonstatic_oop_map_size()); }
   static int vtable_start_offset()    { return header_size(); }
   static int vtable_length_offset()   { return oopDesc::header_size() + offset_of(instanceKlass, _vtable_len) / HeapWordSize; }
+  static int protection_domain_offset() { return oopDesc::header_size() + offset_of(instanceKlass, _protection_domain) / HeapWordSize; }
   static int object_size(int extra)   { return align_object_size(header_size() + extra); }
 
   intptr_t* start_of_vtable() const        { return ((intptr_t*)as_klassOop()) + vtable_start_offset(); }
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/oops/methodKlass.cpp
--- a/src/share/vm/oops/methodKlass.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/oops/methodKlass.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -72,6 +72,7 @@ methodOop methodKlass::allocate(constMet
   m->set_method_data(NULL);
   m->set_interpreter_throwout_count(0);
   m->set_vtable_index(methodOopDesc::garbage_vtable_index);
+  m->set_contains_tail_call(false);
 
   // Fix and bury in methodOop
   m->set_interpreter_entry(NULL); // sets i2i entry and from_int
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/oops/methodOop.cpp
--- a/src/share/vm/oops/methodOop.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/oops/methodOop.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -41,6 +41,26 @@ address methodOopDesc::get_c2i_unverifie
 address methodOopDesc::get_c2i_unverified_entry() {
   assert(_adapter != NULL, "must have");
   return _adapter->get_c2i_unverified_entry();
+}
+
+address methodOopDesc::get_c2i_verified_tail_call_entry() {
+  assert(_adapter != NULL, "must have");
+  return _adapter->get_c2i_verified_tail_call_entry();
+}
+
+address methodOopDesc::get_c2i_unverified_tail_call_entry() {
+  assert(_adapter != NULL, "must have");
+  return _adapter->get_c2i_unverified_tail_call_entry();
+}
+
+address methodOopDesc::get_c2i_verified_not_sibling_tail_call_entry() {
+  assert(_adapter != NULL, "must have");
+  return _adapter->get_c2i_verified_not_sibling_tail_call_entry();
+}
+
+address methodOopDesc::get_c2i_unverified_not_sibling_tail_call_entry() {
+  assert(_adapter != NULL, "must have");
+  return _adapter->get_c2i_unverified_not_sibling_tail_call_entry();
 }
 
 char* methodOopDesc::name_and_sig_as_C_string() {
@@ -143,7 +163,8 @@ void methodOopDesc::mask_for(int bci, In
   bool has_capability = myThread->is_VM_thread() ||
                         myThread->is_ConcurrentGC_thread() ||
                         myThread->is_GC_task_thread();
-
+// TODO: arnold turn back on.
+#if 0
   if (!has_capability) {
     if (!VerifyStack && !VerifyLastFrame) {
       // verify stack calls this outside VM thread
@@ -154,6 +175,7 @@ void methodOopDesc::mask_for(int bci, In
       local_mask.print();
     }
   }
+#endif
 #endif
   instanceKlass::cast(method_holder())->mask_for(h_this, bci, mask);
   return;
@@ -619,8 +641,12 @@ void methodOopDesc::clear_code() {
   // Only should happen at allocate time.
   if (_adapter == NULL) {
     _from_compiled_entry    = NULL;
+    _from_compiled_tail_call_entry = NULL;
+    _from_compiled_not_sibling_tail_call_entry = NULL;
   } else {
     _from_compiled_entry    = _adapter->get_c2i_entry();
+    _from_compiled_tail_call_entry = _adapter->get_c2i_verified_tail_call_entry();
+    _from_compiled_not_sibling_tail_call_entry = _adapter->get_c2i_verified_not_sibling_tail_call_entry();
   }
   OrderAccess::storestore();
   _from_interpreted_entry = _i2i_entry;
@@ -642,6 +668,8 @@ void methodOopDesc::unlink_method() {
   backedge_counter()->reset();
   _adapter = NULL;
   _from_compiled_entry = NULL;
+  _from_compiled_tail_call_entry = NULL;
+  _from_compiled_not_sibling_tail_call_entry = NULL;
   assert(_method_data == NULL, "unexpected method data?");
   set_method_data(NULL);
   set_interpreter_throwout_count(0);
@@ -682,7 +710,7 @@ void methodOopDesc::link_method(methodHa
 
 }
 
-address methodOopDesc::make_adapters(methodHandle mh, TRAPS) {
+   address methodOopDesc::make_adapters(methodHandle mh, TRAPS) {
   // Adapters for compiled code are made eagerly here.  They are fairly
   // small (generally < 100 bytes) and quick to make (and cached and shared)
   // so making them eagerly shouldn't be too expensive.
@@ -693,6 +721,10 @@ address methodOopDesc::make_adapters(met
 
   mh->set_adapter_entry(adapter);
   mh->_from_compiled_entry = adapter->get_c2i_entry();
+  mh->_from_compiled_tail_call_entry = adapter->get_c2i_verified_tail_call_entry();
+  mh->_from_compiled_not_sibling_tail_call_entry = 
+    adapter->get_c2i_verified_not_sibling_tail_call_entry();
+
   return adapter->get_c2i_entry();
 }
 
@@ -707,6 +739,19 @@ address methodOopDesc::verified_code_ent
   debug_only(No_Safepoint_Verifier nsv;)
   assert(_from_compiled_entry != NULL, "must be set");
   return _from_compiled_entry;
+}
+
+
+address methodOopDesc::verified_tail_call_code_entry() {
+  debug_only(No_Safepoint_Verifier nsv;)
+  assert(_from_compiled_tail_call_entry != NULL, "must be set");
+  return _from_compiled_tail_call_entry;
+}
+
+address methodOopDesc::verified_not_sibling_tail_call_code_entry() {
+  debug_only(No_Safepoint_Verifier nsv;)
+  assert(_from_compiled_not_sibling_tail_call_entry != NULL, "must be set");
+  return _from_compiled_not_sibling_tail_call_entry;
 }
 
 // Check that if an nmethod ref exists, it has a backlink to this or no backlink at all
@@ -736,13 +781,19 @@ void methodOopDesc::set_code(methodHandl
   if (comp_level > highest_tier_compile()) {
     set_highest_tier_compile(comp_level);
   }
+  // Set methodoop and c2i entry point in tail call stubs.
+  if ( code->is_java_method())
+    code->set_adapter_info_in_tail_call_stubs(mh(), mh->adapter());
 
   OrderAccess::storestore();
   mh->_from_compiled_entry = code->verified_entry_point();
   OrderAccess::storestore();
+  mh->_from_compiled_tail_call_entry = code->verified_tail_call_entry_point();
+  OrderAccess::storestore();
+  mh->_from_compiled_not_sibling_tail_call_entry = code->verified_not_sibling_tail_call_entry_point();
   // Instantly compiled code can execute.
   mh->_from_interpreted_entry = mh->get_i2c_entry();
-
+  
 }
 
 
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/oops/methodOop.hpp
--- a/src/share/vm/oops/methodOop.hpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/oops/methodOop.hpp	Wed Jun 03 16:27:17 2009 +0200
@@ -128,6 +128,12 @@ class methodOopDesc : public oopDesc {
   nmethod* volatile _code;                       // Points to the corresponding piece of native code
   volatile address           _from_interpreted_entry; // Cache of _code ? _adapter->i2c_entry() : _i2i_entry
 
+   // Entry point for tail calling from compiled code.
+  volatile address _from_compiled_tail_call_entry;        
+  volatile address _from_compiled_not_sibling_tail_call_entry;
+  // Probably should go in access flags but i am unsure whether there is a bit
+  // left.
+  bool _contains_tail_call;
  public:
 
   static const bool IsUnsafeConc         = false;
@@ -140,7 +146,10 @@ class methodOopDesc : public oopDesc {
 
   static address make_adapters(methodHandle mh, TRAPS);
   volatile address from_compiled_entry() const   { return (address)OrderAccess::load_ptr_acquire(&_from_compiled_entry); }
+  volatile address from_compiled_tail_call_entry() const { return (address)OrderAccess::load_ptr_acquire(&_from_compiled_tail_call_entry); }
+  volatile address from_compiled_not_sibling_tail_call_entry() const { return (address)OrderAccess::load_ptr_acquire(&_from_compiled_not_sibling_tail_call_entry); }
   volatile address from_interpreted_entry() const{ return (address)OrderAccess::load_ptr_acquire(&_from_interpreted_entry); }
+  
 
   // access flag
   AccessFlags access_flags() const               { return _access_flags;  }
@@ -302,6 +311,9 @@ class methodOopDesc : public oopDesc {
 
   // nmethod/verified compiler entry
   address verified_code_entry();
+  address verified_tail_call_code_entry();
+  address verified_not_sibling_tail_call_code_entry();
+
   bool check_code() const;      // Not inline to avoid circular ref
   nmethod* volatile code() const                 { assert( check_code(), "" ); return (nmethod *)OrderAccess::load_ptr_acquire(&_code); }
   void clear_code();            // Clear out any compiled code
@@ -310,6 +322,11 @@ class methodOopDesc : public oopDesc {
   address get_i2c_entry();
   address get_c2i_entry();
   address get_c2i_unverified_entry();
+  address get_c2i_verified_tail_call_entry();
+  address get_c2i_unverified_tail_call_entry();
+  address get_c2i_verified_not_sibling_tail_call_entry();
+  address get_c2i_unverified_not_sibling_tail_call_entry();
+
   AdapterHandlerEntry* adapter() {  return _adapter; }
   // setup entry points
   void link_method(methodHandle method, TRAPS);
@@ -459,6 +476,11 @@ class methodOopDesc : public oopDesc {
   bool guaranteed_monitor_matching() const       { return access_flags().is_monitor_matching(); }
   void set_guaranteed_monitor_matching()         { _access_flags.set_monitor_matching(); }
 
+  // Does this method contain a tail call. Might return false because the info
+  // was not computed yet. So rely only on a true value.
+  bool contains_tail_call() const { return _contains_tail_call; }
+  void set_contains_tail_call(bool does_contain) { _contains_tail_call = does_contain; }
+
   // returns true if the method is an accessor function (setter/getter).
   bool is_accessor() const;
 
@@ -487,6 +509,9 @@ class methodOopDesc : public oopDesc {
   static ByteSize size_of_locals_offset()        { return byte_offset_of(methodOopDesc, _max_locals        ); }
   static ByteSize size_of_parameters_offset()    { return byte_offset_of(methodOopDesc, _size_of_parameters); }
   static ByteSize from_compiled_offset()         { return byte_offset_of(methodOopDesc, _from_compiled_entry); }
+  static ByteSize from_compiled_tail_call_offset() { return byte_offset_of(methodOopDesc, _from_compiled_tail_call_entry); }
+  static ByteSize from_compiled_not_sibling_tail_call_offset() { return byte_offset_of(methodOopDesc, _from_compiled_not_sibling_tail_call_entry); }
+
   static ByteSize code_offset()                  { return byte_offset_of(methodOopDesc, _code); }
   static ByteSize invocation_counter_offset()    { return byte_offset_of(methodOopDesc, _invocation_counter); }
   static ByteSize backedge_counter_offset()      { return byte_offset_of(methodOopDesc, _backedge_counter); }
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/opto/callGenerator.cpp
--- a/src/share/vm/opto/callGenerator.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/opto/callGenerator.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -25,8 +25,10 @@
 #include "incls/_precompiled.incl"
 #include "incls/_callGenerator.cpp.incl"
 
-CallGenerator::CallGenerator(ciMethod* method) {
+CallGenerator::CallGenerator(ciMethod* method, bool is_tail_call, bool is_sibling) {
   _method = method;
+  _is_tail_call = is_tail_call;
+  _is_sibling = is_sibling;
 }
 
 // Utility function.
@@ -42,16 +44,16 @@ private:
   float _expected_uses;
 
 public:
-  ParseGenerator(ciMethod* method, float expected_uses, bool is_osr = false)
+  ParseGenerator(ciMethod* method, float expected_uses, bool is_osr = false, bool is_tail_call = false)
     : InlineCallGenerator(method)
   {
     _is_osr        = is_osr;
     _expected_uses = expected_uses;
-    assert(can_parse(method, is_osr), "parse must be possible");
+    assert(can_parse(method, true, is_osr, is_tail_call), "parse must be possible");
   }
 
   // Can we build either an OSR or a regular parser for this method?
-  static bool can_parse(ciMethod* method, int is_osr = false);
+  static bool can_parse(ciMethod* method, bool is_root, int is_osr = false, bool is_tail_call = false);
 
   virtual bool      is_parse() const           { return true; }
   virtual JVMState* generate(JVMState* jvms);
@@ -99,8 +101,8 @@ JVMState* ParseGenerator::generate(JVMSt
 // Internal class which handles all out-of-line calls w/o receiver type checks.
 class DirectCallGenerator : public CallGenerator {
 public:
-  DirectCallGenerator(ciMethod* method)
-    : CallGenerator(method)
+  DirectCallGenerator(ciMethod* method, bool is_tail_call, bool is_sibling)
+    : CallGenerator(method, is_tail_call, is_sibling)
   {
   }
   virtual JVMState* generate(JVMState* jvms);
@@ -109,14 +111,28 @@ JVMState* DirectCallGenerator::generate(
 JVMState* DirectCallGenerator::generate(JVMState* jvms) {
   GraphKit kit(jvms);
   bool is_static = method()->is_static();
-  address target = is_static ? SharedRuntime::get_resolve_static_call_stub()
-                             : SharedRuntime::get_resolve_opt_virtual_call_stub();
+  // MachCallStaticJavaNode
+  address target = NULL;
+  
+  if (! is_tail_call()) {
+    // Normal call
+    target = is_static ? SharedRuntime::get_resolve_static_call_stub()
+                       : SharedRuntime::get_resolve_opt_virtual_call_stub();
+  } else if (is_tail_call() && is_sibling()) {
+    // Tail call (sibling)
+    target = is_static ? SharedRuntime::get_resolve_static_tail_call_stub()
+                       : SharedRuntime::get_resolve_opt_virtual_tail_call_stub();
+  } else if (is_tail_call()) {
+    // Tail call (not sibling)
+    target = is_static ? SharedRuntime::get_resolve_not_sibling_static_tail_call_stub()
+                       : SharedRuntime::get_resolve_opt_not_sibling_virtual_tail_call_stub();
+  }
 
   if (kit.C->log() != NULL) {
     kit.C->log()->elem("direct_call bci='%d'", jvms->bci());
   }
 
-  CallStaticJavaNode *call = new (kit.C, tf()->domain()->cnt()) CallStaticJavaNode(tf(), target, method(), kit.bci());
+  CallStaticJavaNode *call = new (kit.C, tf()->domain()->cnt()) CallStaticJavaNode(tf(), target, method(), kit.bci(), is_tail_call(), is_sibling());
   if (!is_static) {
     // Make an explicit receiver null_check as part of this call.
     // Since we share a map with the caller, his JVMS gets adjusted.
@@ -139,8 +155,8 @@ private:
 private:
   int _vtable_index;
 public:
-  VirtualCallGenerator(ciMethod* method, int vtable_index)
-    : CallGenerator(method), _vtable_index(vtable_index)
+  VirtualCallGenerator(ciMethod* method, int vtable_index, bool is_tail_call, bool is_sibling)
+    : CallGenerator(method, is_tail_call, is_sibling), _vtable_index(vtable_index)
   {
     assert(vtable_index == methodOopDesc::invalid_vtable_index ||
            vtable_index >= 0, "either invalid or usable");
@@ -196,9 +212,23 @@ JVMState* VirtualCallGenerator::generate
   assert(!method()->is_private(), "virtual call should not be to private");
   assert(_vtable_index == methodOopDesc::invalid_vtable_index || !UseInlineCaches,
          "no vtable calls if +UseInlineCaches ");
-  address target = SharedRuntime::get_resolve_virtual_call_stub();
+  
+  address target = NULL;
+  if (! is_tail_call()) {
+    // Normal virtual call.
+    target = SharedRuntime::get_resolve_virtual_call_stub();
+  } else if (is_tail_call() && is_sibling()) {
+    // Tail call (sibling)
+    target = SharedRuntime::get_resolve_virtual_tail_call_stub();
+  } else if (is_tail_call()) {
+    // Tail call (not sibling)
+    target = SharedRuntime::get_resolve_not_sibling_virtual_tail_call_stub();
+  }
+#ifdef ASSERT
+  assert(target != NULL, "oops");
+#endif
   // Normal inline cache used for call
-  CallDynamicJavaNode *call = new (kit.C, tf()->domain()->cnt()) CallDynamicJavaNode(tf(), target, method(), _vtable_index, kit.bci());
+  CallDynamicJavaNode *call = new (kit.C, tf()->domain()->cnt()) CallDynamicJavaNode(tf(), target, method(), _vtable_index, kit.bci(), is_tail_call(), is_sibling());
   kit.set_arguments_for_java_call(call);
   kit.set_edges_for_java_call(call);
   Node* ret = kit.set_results_for_java_call(call);
@@ -211,10 +241,15 @@ JVMState* VirtualCallGenerator::generate
   return kit.transfer_exceptions_into_jvms();
 }
 
-bool ParseGenerator::can_parse(ciMethod* m, int entry_bci) {
+bool ParseGenerator::can_parse(ciMethod* m, bool is_root, int entry_bci, bool is_tail_call) {
   // Certain methods cannot be parsed at all:
   if (!m->can_be_compiled())              return false;
   if (!m->has_balanced_monitors())        return false;
+  // Can not inline calls to methods that contain tail calls except if the call
+  // itself is a tail call.
+  if (!is_tail_call &&
+      is_root == false && 
+      m->contains_tail_call())            return false;
   if (m->get_flow_analysis()->failing())  return false;
 
   // (Methods may bail out for other reasons, after the parser is run.
@@ -223,9 +258,9 @@ bool ParseGenerator::can_parse(ciMethod*
   return true;
 }
 
-CallGenerator* CallGenerator::for_inline(ciMethod* m, float expected_uses) {
-  if (!ParseGenerator::can_parse(m))  return NULL;
-  return new ParseGenerator(m, expected_uses);
+CallGenerator* CallGenerator::for_inline(ciMethod* m, float expected_uses, bool is_root, bool is_tail_call) {
+  if (!ParseGenerator::can_parse(m, is_root, is_root, is_tail_call))  return NULL;
+  return new ParseGenerator(m, expected_uses, false, is_tail_call);
 }
 
 // As a special case, the JVMS passed to this CallGenerator is
@@ -238,14 +273,14 @@ CallGenerator* CallGenerator::for_osr(ci
   return new ParseGenerator(m, expected_uses, true);
 }
 
-CallGenerator* CallGenerator::for_direct_call(ciMethod* m) {
+CallGenerator* CallGenerator::for_direct_call(ciMethod* m, bool is_tail_call, bool is_sibling) {
   assert(!m->is_abstract(), "for_direct_call mismatch");
-  return new DirectCallGenerator(m);
+  return new DirectCallGenerator(m, is_tail_call, is_sibling);
 }
 
-CallGenerator* CallGenerator::for_virtual_call(ciMethod* m, int vtable_index) {
+CallGenerator* CallGenerator::for_virtual_call(ciMethod* m, int vtable_index, bool is_tail_call, bool is_sibling) {
   assert(!m->is_static(), "for_virtual_call mismatch");
-  return new VirtualCallGenerator(m, vtable_index);
+  return new VirtualCallGenerator(m, vtable_index, is_tail_call, is_sibling);
 }
 
 
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/opto/callGenerator.hpp
--- a/src/share/vm/opto/callGenerator.hpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/opto/callGenerator.hpp	Wed Jun 03 16:27:17 2009 +0200
@@ -34,14 +34,16 @@ class CallGenerator : public ResourceObj
 
  private:
   ciMethod*             _method;                // The method being called.
-
+  bool                  _is_tail_call;
+  bool                  _is_sibling;
  protected:
-  CallGenerator(ciMethod* method);
+  CallGenerator(ciMethod* method, bool is_tail_call = false, bool is_sibling = false);
 
  public:
   // Accessors
   ciMethod*         method() const              { return _method; }
-
+  bool              is_tail_call() const        { return _is_tail_call; }
+  bool              is_sibling() const          { return _is_sibling; }
   // is_inline: At least some code implementing the method is copied here.
   virtual bool      is_inline() const           { return false; }
   // is_intrinsic: There's a method-specific way of generating the inline code.
@@ -87,13 +89,13 @@ class CallGenerator : public ResourceObj
   virtual JVMState* generate(JVMState* jvms) = 0;
 
   // How to generate a call site that is inlined:
-  static CallGenerator* for_inline(ciMethod* m, float expected_uses = -1);
+  static CallGenerator* for_inline(ciMethod* m, float expected_uses /* = -1*/, bool is_root, bool is_tail_call = false);
   // How to generate code for an on-stack replacement handler.
   static CallGenerator* for_osr(ciMethod* m, int osr_bci);
 
   // How to generate vanilla out-of-line call sites:
-  static CallGenerator* for_direct_call(ciMethod* m);   // static, special
-  static CallGenerator* for_virtual_call(ciMethod* m, int vtable_index);  // virtual, interface
+  static CallGenerator* for_direct_call(ciMethod* m, bool is_tail_call=false, bool is_sibling = false);   // static, special
+  static CallGenerator* for_virtual_call(ciMethod* m, int vtable_index, bool is_tail_call=false, bool is_sibling = false);  // virtual, interface
 
   // How to make a call but defer the decision whether to inline or not.
   static CallGenerator* for_warm_call(WarmCallInfo* ci,
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/opto/callnode.hpp
--- a/src/share/vm/opto/callnode.hpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/opto/callnode.hpp	Wed Jun 03 16:27:17 2009 +0200
@@ -526,12 +526,16 @@ protected:
   virtual uint size_of() const; // Size is bigger
 
   bool    _optimized_virtual;
+  bool    _is_tail_call;
+  bool    _is_sibling;
   ciMethod* _method;            // Method being direct called
 public:
   const int       _bci;         // Byte Code Index of call byte code
-  CallJavaNode(const TypeFunc* tf , address addr, ciMethod* method, int bci)
+  CallJavaNode(const TypeFunc* tf , address addr, ciMethod* method, int bci, 
+               bool is_tail_call=false, bool is_sibling=false)
     : CallNode(tf, addr, TypePtr::BOTTOM),
-      _method(method), _bci(bci), _optimized_virtual(false)
+      _method(method), _bci(bci), _optimized_virtual(false), 
+      _is_tail_call(is_tail_call), _is_sibling(is_sibling)
   {
     init_class_id(Class_CallJava);
   }
@@ -540,8 +544,11 @@ public:
   ciMethod* method() const                { return _method; }
   void  set_method(ciMethod *m)           { _method = m; }
   void  set_optimized_virtual(bool f)     { _optimized_virtual = f; }
+  void  set_is_tail_call(bool t)          { _is_tail_call = t; }
+  void  set_is_sibling(bool s)            { _is_sibling = s; }
   bool  is_optimized_virtual() const      { return _optimized_virtual; }
-
+  bool  is_tail_call() const              { return _is_tail_call; }
+  bool  is_sibling() const                { return _is_sibling; }
 #ifndef PRODUCT
   virtual void  dump_spec(outputStream *st) const;
 #endif
@@ -555,8 +562,8 @@ class CallStaticJavaNode : public CallJa
   virtual uint cmp( const Node &n ) const;
   virtual uint size_of() const; // Size is bigger
 public:
-  CallStaticJavaNode(const TypeFunc* tf, address addr, ciMethod* method, int bci)
-    : CallJavaNode(tf, addr, method, bci), _name(NULL) {
+  CallStaticJavaNode(const TypeFunc* tf, address addr, ciMethod* method, int bci, bool is_tail_call=false, bool is_sibling=false)
+    : CallJavaNode(tf, addr, method, bci, is_tail_call, is_sibling), _name(NULL) {
     init_class_id(Class_CallStaticJava);
   }
   CallStaticJavaNode(const TypeFunc* tf, address addr, const char* name, int bci,
@@ -584,7 +591,9 @@ class CallDynamicJavaNode : public CallJ
   virtual uint cmp( const Node &n ) const;
   virtual uint size_of() const; // Size is bigger
 public:
-  CallDynamicJavaNode( const TypeFunc *tf , address addr, ciMethod* method, int vtable_index, int bci ) : CallJavaNode(tf,addr,method,bci), _vtable_index(vtable_index) {
+  CallDynamicJavaNode( const TypeFunc *tf , address addr, ciMethod* method, int vtable_index, int bci, 
+                       bool is_tail_call = false, bool is_sibling=false) 
+  : CallJavaNode(tf,addr,method,bci, is_tail_call, is_sibling), _vtable_index(vtable_index) {
     init_class_id(Class_CallDynamicJava);
   }
 
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/opto/compile.cpp
--- a/src/share/vm/opto/compile.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/opto/compile.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -524,7 +524,7 @@ Compile::Compile( ciEnv* ci_env, C2Compi
       init_start(s);
       float past_uses = method()->interpreter_invocation_count();
       float expected_uses = past_uses;
-      cg = CallGenerator::for_inline(method(), expected_uses);
+      cg = CallGenerator::for_inline(method(), expected_uses, true);
     }
     if (failing())  return;
     if (cg == NULL) {
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/opto/compile.hpp
--- a/src/share/vm/opto/compile.hpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/opto/compile.hpp	Wed Jun 03 16:27:17 2009 +0200
@@ -473,7 +473,7 @@ class Compile : public Phase {
 
   // Decide how to build a call.
   // The profile factor is a discount to apply to this site's interp. profile.
-  CallGenerator*    call_generator(ciMethod* call_method, int vtable_index, bool call_is_virtual, JVMState* jvms, bool allow_inline, float profile_factor);
+  CallGenerator*    call_generator(ciMethod* call_method, int vtable_index, bool call_is_virtual, JVMState* jvms, bool allow_inline, float profile_factor, bool is_tail_call=false, bool is_sibling=false);
 
   // Report if there were too many traps at a current method and bci.
   // Report if a trap was recorded, and/or PerMethodTrapLimit was exceeded.
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/opto/doCall.cpp
--- a/src/share/vm/opto/doCall.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/opto/doCall.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -43,7 +43,7 @@ void trace_type_profile(ciMethod *method
 }
 #endif
 
-CallGenerator* Compile::call_generator(ciMethod* call_method, int vtable_index, bool call_is_virtual, JVMState* jvms, bool allow_inline, float prof_factor) {
+CallGenerator* Compile::call_generator(ciMethod* call_method, int vtable_index, bool call_is_virtual, JVMState* jvms, bool allow_inline, float prof_factor, bool is_tail_call, bool is_sibling) {
   CallGenerator* cg;
 
   // Dtrace currently doesn't work unless all calls are vanilla
@@ -98,7 +98,6 @@ CallGenerator* Compile::call_generator(c
     allow_inline = false;
   }
 
-  // Attempt to inline...
   if (allow_inline) {
     // The profile data is only partly attributable to this caller,
     // scale back the call site information.
@@ -127,7 +126,7 @@ CallGenerator* Compile::call_generator(c
       bool require_inline = (allow_inline && ci->is_hot());
 
       if (allow_inline) {
-        CallGenerator* cg = CallGenerator::for_inline(call_method, expected_uses);
+        CallGenerator* cg = CallGenerator::for_inline(call_method, expected_uses, false, is_tail_call);
         if (cg == NULL) {
           // Fall through.
         } else if (require_inline || !InlineWarmCalls) {
@@ -144,7 +143,7 @@ CallGenerator* Compile::call_generator(c
       // The major receiver's count >= TypeProfileMajorReceiverPercent of site_count.
       bool have_major_receiver = (100.*profile.receiver_prob(0) >= (float)TypeProfileMajorReceiverPercent);
       ciMethod* receiver_method = NULL;
-      if (have_major_receiver || profile.morphism() == 1 ||
+      if (have_major_receiver || (profile.morphism() == 1) ||
           (profile.morphism() == 2 && UseBimorphicInlining)) {
         // receiver_method = profile.method();
         // Profiles do not suggest methods now.  Look it up in the major receiver.
@@ -152,9 +151,14 @@ CallGenerator* Compile::call_generator(c
                                                       profile.receiver(0));
       }
       if (receiver_method != NULL) {
+        // If the called method contains a tail call we only allow inlining if
+        // the current call is also a tail call.
+        bool allow_inline_bimorphic = allow_inline &&
+            (TailCalls==false || receiver_method->contains_tail_call()==false||
+            is_tail_call);
         // The single majority receiver sufficiently outweighs the minority.
         CallGenerator* hit_cg = this->call_generator(receiver_method,
-              vtable_index, !call_is_virtual, jvms, allow_inline, prof_factor);
+              vtable_index, !call_is_virtual, jvms, allow_inline_bimorphic, prof_factor, is_tail_call, is_sibling);
         if (hit_cg != NULL) {
           // Look up second receiver.
           CallGenerator* next_hit_cg = NULL;
@@ -162,13 +166,13 @@ CallGenerator* Compile::call_generator(c
           if (profile.morphism() == 2 && UseBimorphicInlining) {
             next_receiver_method = call_method->resolve_invoke(jvms->method()->holder(),
                                                                profile.receiver(1));
-            if (next_receiver_method != NULL) {
+            if (next_receiver_method != NULL) { 
               next_hit_cg = this->call_generator(next_receiver_method,
                                   vtable_index, !call_is_virtual, jvms,
-                                  allow_inline, prof_factor);
+                                  allow_inline_bimorphic, prof_factor, is_tail_call, is_sibling);
               if (next_hit_cg != NULL && !next_hit_cg->is_inline() &&
                   have_major_receiver && UseOnlyInlinedBimorphic) {
-                  // Skip if we can't inline second receiver's method
+                  // Skip if we can't inline second receiver's method.
                   next_hit_cg = NULL;
               }
             }
@@ -198,7 +202,7 @@ CallGenerator* Compile::call_generator(c
           } else {
             // Generate virtual call for class check failure path
             // in case of polymorphic virtual call site.
-            miss_cg = CallGenerator::for_virtual_call(call_method, vtable_index);
+            miss_cg = CallGenerator::for_virtual_call(call_method, vtable_index, is_tail_call, is_sibling);
           }
           if (miss_cg != NULL) {
             if (next_hit_cg != NULL) {
@@ -221,11 +225,11 @@ CallGenerator* Compile::call_generator(c
   // There was no special inlining tactic, or it bailed out.
   // Use a more generic tactic, like a simple call.
   if (call_is_virtual) {
-    return CallGenerator::for_virtual_call(call_method, vtable_index);
+    return CallGenerator::for_virtual_call(call_method, vtable_index, is_tail_call, is_sibling);
   } else {
     // Class Hierarchy Analysis or Type Profile reveals a unique target,
     // or it is a static or special call.
-    return CallGenerator::for_direct_call(call_method);
+    return CallGenerator::for_direct_call(call_method, is_tail_call, is_sibling);
   }
 }
 
@@ -274,7 +278,8 @@ void Parse::do_call() {
   bool is_virtual = bc() == Bytecodes::_invokevirtual;
   bool is_virtual_or_interface = is_virtual || bc() == Bytecodes::_invokeinterface;
   bool has_receiver = is_virtual_or_interface || bc() == Bytecodes::_invokespecial;
-
+  bool is_tail_call = iter().is_wide();
+  bool is_sibling = false;
   // Find target being called
   bool             will_link;
   ciMethod*        dest_method   = iter().get_method(will_link);
@@ -283,7 +288,10 @@ void Parse::do_call() {
   ciInstanceKlass* klass = ciEnv::get_instance_klass_for_declared_method_holder(holder);
 
   int   nargs    = dest_method->arg_size();
-
+  if (TailCalls) {
+    // Check whether there is enough space for arguments.
+    if (nargs <= method()->arg_size()) is_sibling=true;
+  }
   // uncommon-trap when callee is unloaded, uninitialized or will not link
   // bailout when too many arguments for register representation
   if (!will_link || can_not_compile_call_site(dest_method, klass)) {
@@ -344,7 +352,7 @@ void Parse::do_call() {
   // Decide call tactic.
   // This call checks with CHA, the interpreter profile, intrinsics table, etc.
   // It decides whether inlining is desirable or not.
-  CallGenerator* cg = C->call_generator(call_method, vtable_index, call_is_virtual, jvms, try_inline, prof_factor());
+  CallGenerator* cg = C->call_generator(call_method, vtable_index, call_is_virtual, jvms, try_inline, prof_factor(), is_tail_call, is_sibling);
 
   // ---------------------
   // Round double arguments before call
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/opto/machnode.hpp
--- a/src/share/vm/opto/machnode.hpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/opto/machnode.hpp	Wed Jun 03 16:27:17 2009 +0200
@@ -661,7 +661,10 @@ public:
 public:
   ciMethod* _method;             // Method being direct called
   int        _bci;               // Byte Code index of call byte code
-  bool       _optimized_virtual; // Tells if node is a static call or an optimized virtual
+  bool       _optimized_virtual; // Tells if node is a static call or an
+                                 // optimized virtual
+  bool       _is_tail_call;
+  bool       _is_sibling;
   MachCallJavaNode() : MachCallNode() {
     init_class_id(Class_MachCallJava);
   }
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/opto/matcher.cpp
--- a/src/share/vm/opto/matcher.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/opto/matcher.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -244,6 +244,15 @@ void Matcher::match( ) {
 
     // Saved biased stack-slot register number
     _parm_regs[i].set_pair(reg2, reg1);
+  }
+
+  // Account for tail_call_protection_domain slots. The slots will only be
+  // observed if above code observes a stack register and the code in
+  // warp_incoming_stk_arg adjusts _in_arg_limit.
+  if (TailCalls) {
+    VMReg reg = VMRegImpl::stack2reg(SharedRuntime::tail_call_protection_domain_slots()); // Tail call slot.
+    assert( reg->is_stack(), "oops");  // Stack slot argument?
+    OptoReg::Name warped = warp_incoming_stk_arg(reg);
   }
 
   // Finally, make sure the incoming arguments take up an even number of
@@ -1009,6 +1018,8 @@ MachNode *Matcher::match_sfpt( SafePoint
       mcall_java->_method = method;
       mcall_java->_bci = call_java->_bci;
       mcall_java->_optimized_virtual = call_java->is_optimized_virtual();
+      mcall_java->_is_tail_call = call_java->is_tail_call();
+      mcall_java->_is_sibling = call_java->is_sibling();
       if( mcall_java->is_MachCallStaticJava() )
         mcall_java->as_MachCallStaticJava()->_name =
          call_java->as_CallStaticJava()->_name;
@@ -1120,6 +1131,15 @@ MachNode *Matcher::match_sfpt( SafePoint
     // Compute number of stack slots needed to restore stack in case of
     // Pascal-style argument popping.
     mcall->_argsize = out_arg_limit_per_call - begin_out_arg_area;
+  }
+
+  // Account for tail_call_protection_domain slots. The slots will only be
+  // observed if above code observes a stack register and the code in
+  // warp_outgoing_stk_arg adjusts  out_arg_limit_per_call.
+  if (TailCalls && mcall && call->is_CallJava()) {
+    VMReg reg = VMRegImpl::stack2reg(SharedRuntime::tail_call_protection_domain_slots()); // Tail call slot.
+    assert( reg->is_stack(), "oops");  // Stack slot argument?
+    OptoReg::Name warped = warp_outgoing_stk_arg(reg, begin_out_arg_area, out_arg_limit_per_call);
   }
 
   // Compute the max stack slot killed by any call.  These will not be
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/opto/output.cpp
--- a/src/share/vm/opto/output.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/opto/output.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -29,6 +29,10 @@ extern uint reloc_java_to_interp();
 extern uint reloc_java_to_interp();
 extern uint size_exception_handler();
 extern uint size_deopt_handler();
+extern uint size_verified_tail_call_stub(int arg_slots);
+extern uint size_tail_call_stub(int arg_slots);
+extern uint size_verified_not_sib_tail_call_stub(int arg_slots);
+extern uint size_not_sib_tail_call_stub(int arg_slots);
 
 #ifndef PRODUCT
 #define DEBUG_ARG(x) , x
@@ -38,7 +42,10 @@ extern uint size_deopt_handler();
 
 extern int emit_exception_handler(CodeBuffer &cbuf);
 extern int emit_deopt_handler(CodeBuffer &cbuf);
-
+extern void emit_verified_tail_call_stub(Compile *C, CodeBuffer& cbuf, int arg_slots, int frame_size, CodeOffsets& code_offsets);
+extern void emit_tail_call_stub(Compile* C, CodeBuffer& cbuf, int arg_slots, int frame_size, CodeOffsets& code_offsets);
+extern void emit_verified_not_sib_tail_call_stub(CodeBuffer& cbuf, int arg_slots, int VEP_offset, CodeOffsets& code_offsets);
+extern void emit_not_sib_tail_call_stub(CodeBuffer& cbuf, int arg_slots, int VEP_offset, CodeOffsets& code_offsets);
 //------------------------------Output-----------------------------------------
 // Convert Nodes to instruction bits and pass off to the VM
 void Compile::Output() {
@@ -529,7 +536,7 @@ void Compile::Shorten_branches(Label *la
 
   // Relocation records
   reloc_size += 1;              // Relo entry for exception handler
-
+  reloc_size += 16; //for tail call stubs ARNOLD
   // Adjust reloc_size to number of record of relocation info
   // Min is 2 bytes, max is probably 6 or 8, with a tax up to 25% for
   // a relocation index.
@@ -1018,6 +1025,19 @@ void Compile::Fill_buffer() {
   int  stub_req   = TraceJumps ? initial_stub_capacity * 10 : initial_stub_capacity;
   int  const_req  = initial_const_capacity;
   bool labels_not_set = true;
+  int arg_slots = 0;
+  { // Compute arg_slots. Needed for tail call stubs.
+    const TypeTuple *domain = tf()->domain();
+    uint             argcnt = domain->cnt() - TypeFunc::Parms;
+    BasicType *sig_bt        = NEW_RESOURCE_ARRAY( BasicType, argcnt );
+    VMRegPair *vm_parm_regs  = NEW_RESOURCE_ARRAY( VMRegPair, argcnt );
+    uint i;
+    for( i = 0; i<argcnt; i++ ) {
+      sig_bt[i] = domain->field_at(i+TypeFunc::Parms)->basic_type();
+    }
+    arg_slots = SharedRuntime::java_calling_convention( sig_bt, vm_parm_regs, argcnt, 3);
+    //tty->print_cr("Arg slots %d", arg_slots);
+  }
 
   int  pad_req    = NativeCall::instruction_size;
   // The extra spacing after the code is necessary on some platforms.
@@ -1067,13 +1087,17 @@ void Compile::Fill_buffer() {
   // nmethod and CodeBuffer count stubs & constants as part of method's code.
   int exception_handler_req = size_exception_handler();
   int deopt_handler_req = size_deopt_handler();
+  int tail_call_stubs_rq = size_verified_tail_call_stub(arg_slots) +
+    size_verified_not_sib_tail_call_stub(arg_slots) +
+    size_tail_call_stub(arg_slots) +
+    size_not_sib_tail_call_stub(arg_slots);
   exception_handler_req += MAX_stubs_size; // add marginal slop for handler
   deopt_handler_req += MAX_stubs_size; // add marginal slop for handler
   stub_req += MAX_stubs_size;   // ensure per-stub margin
   code_req += MAX_inst_size;    // ensure per-instruction margin
   if (StressCodeBuffers)
     code_req = const_req = stub_req = exception_handler_req = deopt_handler_req = 0x10;  // force expansion
-  int total_req = code_req + pad_req + stub_req + exception_handler_req + deopt_handler_req + const_req;
+  int total_req = code_req + pad_req + stub_req + exception_handler_req + deopt_handler_req + const_req + tail_call_stubs_rq;
   CodeBuffer* cb = code_buffer();
   cb->initialize(total_req, locs_req);
 
@@ -1417,6 +1441,12 @@ void Compile::Fill_buffer() {
     _code_offsets.set_value(CodeOffsets::Exceptions, emit_exception_handler(*cb));
     // Emit the deopt handler code.
     _code_offsets.set_value(CodeOffsets::Deopt, emit_deopt_handler(*cb));
+    // Emit tail call stubs.
+    int framesize = (C->frame_slots() << LogBytesPerInt)-2*wordSize; // Remove ebp esp.
+    emit_verified_tail_call_stub(C, *cb, arg_slots, framesize, _code_offsets);
+    emit_tail_call_stub(C, *cb, arg_slots, framesize, _code_offsets);
+    emit_verified_not_sib_tail_call_stub(*cb, arg_slots, _first_block_size, _code_offsets);
+    emit_not_sib_tail_call_stub(*cb, arg_slots, _first_block_size, _code_offsets);
   }
 
   // One last check for failed CodeBuffer::expand:
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/prims/jvmtiClassFileReconstituter.cpp
--- a/src/share/vm/prims/jvmtiClassFileReconstituter.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/prims/jvmtiClassFileReconstituter.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -642,7 +642,7 @@ void JvmtiClassFileReconstituter::copy_b
     assert(len > 0, "length must be > 0");
 
     // copy the bytecodes
-    *p = (unsigned char) (bs.is_wide()? Bytecodes::_wide : code);
+    *p = (unsigned char) (bs.is_wide_index() || bs.is_tail_call()? Bytecodes::_wide : code);
     if (len > 1) {
       memcpy(p+1, bcp+1, len-1);
     }
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/prims/methodComparator.cpp
--- a/src/share/vm/prims/methodComparator.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/prims/methodComparator.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -236,7 +236,7 @@ bool MethodComparator::args_same(Bytecod
   case Bytecodes::_lload  : // fall through
   case Bytecodes::_lstore : // fall through
   case Bytecodes::_ret    :
-    if (_s_old->is_wide() != _s_new->is_wide())
+    if (_s_old->is_wide_index() != _s_new->is_wide_index())
       return false;
     if (_s_old->get_index() != _s_new->get_index())
       return false;
@@ -282,9 +282,9 @@ bool MethodComparator::args_same(Bytecod
   }
 
   case Bytecodes::_iinc :
-    if (_s_old->is_wide() != _s_new->is_wide())
+    if (_s_old->is_wide_index() != _s_new->is_wide_index())
       return false;
-    if (! _s_old->is_wide()) {
+    if (! _s_old->is_wide_index()) {
       if (_s_old->get_index_big() != _s_new->get_index_big())
         return false;
     } else {
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/runtime/deoptimization.cpp
--- a/src/share/vm/runtime/deoptimization.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/runtime/deoptimization.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -375,7 +375,7 @@ Deoptimization::UnrollBlock* Deoptimizat
       tty->print_cr("Deoptimizing method containing inlining");
     }
   }
-
+  
   array->set_unroll_block(info);
   return info;
 }
@@ -430,10 +430,8 @@ void Deoptimization::cleanup_deopt_info(
   thread->dec_in_deopt_handler();
 }
 
-
 // Return BasicType of value being returned
 JRT_LEAF(BasicType, Deoptimization::unpack_frames(JavaThread* thread, int exec_mode))
-
   // We are already active int he special DeoptResourceMark any ResourceObj's we
   // allocate will be freed at the end of the routine.
 
@@ -444,7 +442,10 @@ JRT_LEAF(BasicType, Deoptimization::unpa
   HandleMark hm;
 
   frame stub_frame = thread->last_frame();
-
+  // Renable yellow zone for reexecute of stack compression
+  if (exec_mode == Unpack_stack_compression) {
+    thread->reguard_stack();
+  }  
   // Since the frame to unpack is the top frame of this thread, the vframe_array_head
   // must point to the vframeArray for the unpack frame.
   vframeArray* array = thread->vframe_array_head();
@@ -564,7 +565,7 @@ JRT_LEAF(BasicType, Deoptimization::unpa
              (iframe->interpreter_frame_expression_stack_size() == (next_mask_expression_stack_size -
                                                                     top_frame_expression_stack_adjustment))) ||
             (is_top_frame && (exec_mode == Unpack_exception) && iframe->interpreter_frame_expression_stack_size() == 0) ||
-            (is_top_frame && (exec_mode == Unpack_uncommon_trap || exec_mode == Unpack_reexecute) &&
+            (is_top_frame && (exec_mode == Unpack_uncommon_trap || exec_mode == Unpack_reexecute || exec_mode == Unpack_stack_compression) &&
              (iframe->interpreter_frame_expression_stack_size() == mask.expression_stack_size() + cur_invoke_parameter_size))
             )) {
         ttyLocker ttyl;
@@ -599,7 +600,6 @@ JRT_LEAF(BasicType, Deoptimization::unpa
     }
   }
 #endif /* !PRODUCT */
-
 
   return bt;
 JRT_END
@@ -1130,9 +1130,7 @@ JRT_ENTRY(void, Deoptimization::uncommon
     Events::log("Uncommon trap occurred @" INTPTR_FORMAT " unloaded_class_index = %d", fr.pc(), (int) trap_request);
     vframe*  vf  = vframe::new_vframe(&fr, &reg_map, thread);
     compiledVFrame* cvf = compiledVFrame::cast(vf);
-
-    nmethod* nm = cvf->code();
-
+    nmethod * nm = cvf->code();
     ScopeDesc*      trap_scope  = cvf->scope();
     methodHandle    trap_method = trap_scope->method();
     int             trap_bci    = trap_scope->bci();
@@ -1845,3 +1843,510 @@ const char* Deoptimization::format_trap_
 }
 
 #endif // COMPILER2
+
+/// Stack compression stuff
+
+StackCompressor::StackCompressor(JavaThread * th) {
+  _thread = th;
+  _stack_frames = new GrowableArray<FrameInfo*>(INITIAL_ARRAY_SIZE);
+  _methods = new GrowableArray<methodHandle>(INITIAL_ARRAY_SIZE);
+  _frame_cnt = 0;
+  _did_last_frame_tail_call = false;
+  _did_compress = false;
+  _saw_bottom_frame = false;
+  _bottom_most_frame = NULL;
+  _callee_parameters = 0;
+  _is_top_of_stack_frame = true;
+}
+
+// Can compress the stack if we encounter two consecutive frames that are
+// tail calling.
+bool StackCompressor::can_compress(JavaThread * th) {
+  RegisterMap reg_map(th, false);
+  vframe * vf = th->last_java_vframe(&reg_map);
+  javaVFrame * jf = NULL;
+  bool frame_is_tail_calling = false;
+  bool compressed = false;
+  bool prev_frame_tail_called = false;
+  assert(vf != NULL, "we should really see some java frames");
+  while(vf!=NULL) {
+    if (vf->is_java_frame()) {
+      jf = javaVFrame::cast(vf);
+      frame_is_tail_calling = is_tail_call_frame(jf);
+      if (frame_is_tail_calling && prev_frame_tail_called) {
+        compressed = true;
+        break;
+      } else if (frame_is_tail_calling) {
+        prev_frame_tail_called = true;
+      } else {
+        prev_frame_tail_called = false;
+      }
+    }
+    vf = vf->sender();
+  }
+  return compressed;
+}
+
+vframeArray * StackCompressor::compress_frames() {
+  // Handles (expressions, locals) live only until vframeArray::allocate.
+  // That is okay since the area where a safepoint can occur ends before that.
+  // (the end of JRT_BLOCK).
+  HandleMark hm(_thread);
+  // If we encounter interpreted frames a mutex in at least frame.locals() will
+  // be grabbed. So we need a way to be able to execute in vm state.  So we will
+  // walk the stack storing the stuff in handles.  And then allocate_compression
+  // from those handles.
+  // 1 Phase: walk/compress the stack can be done in vm_state
+  //   we'll use frameinfo for storage.
+  // 2 Phase: allocate_compression with oops is done nonblockingly/without safepoint.
+  // 1 Phase:
+  frame caller;
+  frame deoptee;
+  frame stub_frame; 
+  JavaThread * thread = _thread; // for JRT_BLOCK
+  {
+    RegisterMap map(thread, true);
+    stub_frame = _thread->last_frame(); // Makes stack walkable as side effect
+    deoptee = stub_frame.sender(&map);
+  }
+  JRT_BLOCK { //nice formatting
+    RegisterMap reg_map(_thread, true);
+    vframe * vf = _thread->last_java_vframe(&reg_map);
+
+    while ( vf != NULL ) {
+      if (vf->is_java_frame()) {
+        javaVFrame * jf = javaVFrame::cast(vf);
+        // Push frame if it can not be ommitted.
+        push_frame(jf);
+        _is_top_of_stack_frame = false;
+        _callee_parameters = jf->method()->size_of_parameters();
+      } else {
+        break; // Stop compression.
+      }
+      // Get parent.
+      vf = vf->sender();
+    }
+    while(_stack_frames->pop()->java_frame()!=_bottom_most_frame) {
+      // Do nothing just pop the frames.
+    }
+   
+    // Bottom most frame at the bottom of the stack (e.g main()). 
+    caller = _bottom_most_frame->fr();
+#ifndef PRODUCT
+    if (TraceDeoptimization) {
+      tty->print_cr("Dumping compressed frames");
+      dump();
+    }
+#endif
+  } JRT_BLOCK_END
+  // If no frame could be removed return null.
+  if (_did_compress == false) return NULL;
+  // Remove callee parameters and stuff of caller.
+  int frame_size = caller.unextended_sp() - deoptee.sp();
+  // 2 Phase: allocate with handles is done nonblocAingly/without safepoint.
+  No_Safepoint_Verifier nsv;
+  const RegisterMap* caller_reg_map=_bottom_most_frame->register_map();
+  vframeArray* array = vframeArray::allocate(_thread, frame_size,
+                              _stack_frames, caller_reg_map, caller, deoptee);
+  return array;
+}
+
+bool StackCompressor::is_tail_call_frame(javaVFrame * frame) {
+  Bytecode_invoke * invoke = Bytecode_invoke_at_check(frame->method(), frame->bci());
+  if (invoke!=NULL) {
+    if (invoke->is_tailcall()) 
+      return true;
+  }
+  return false;
+}
+
+// Fix expression stack of interpreter frames by poping the parameter
+// expressions.
+static StackValueCollection * pop_callee_params_expressions(javaVFrame * frame,
+                                                            int callee_params) {
+  // Pop the params.
+  assert (frame->is_interpreted_frame() && callee_params > 0,
+    "Must be interpreted and have params to pop.");
+  int new_expr_size = frame->expressions()->size() - callee_params;
+  StackValueCollection * expressions = new StackValueCollection(new_expr_size);
+  
+  for (int i=0; i < new_expr_size; i++)
+    expressions->add(frame->expressions()->at(i));
+  
+  return expressions;
+}
+
+static StackValueCollection * top_interpreted_frame_with_parms(javaVFrame * frame) {
+  assert(frame->is_interpreted_frame(), "Must be interpreted frame");
+  InterpreterOopMap oop_mask;
+  interpretedVFrame * ivf = interpretedVFrame::cast(frame);
+  int length = ivf->fr().interpreter_frame_expression_stack_size();
+  int nof_locals = ivf->method()->max_locals();
+  StackValueCollection * result = new StackValueCollection(length);
+  // Get oopmap describing oops and int for current bci
+  methodHandle m_h(ivf->method());
+  // Compute an oopmap that includes parameters on the expression stack.
+  OopMapCache::compute_one_oop_map(m_h, ivf->bci(), &oop_mask, true);
+  // handle expressions
+  for(int i=0; i < length; i++) {
+    // Find stack location
+    intptr_t *addr = ivf->fr().interpreter_frame_expression_stack_at(i);
+    // Depending on oop/int put it in the right package
+    StackValue *sv;
+    if (oop_mask.is_oop(i + nof_locals)) {
+      // oop value
+      Handle h(*(oop *)addr);
+      sv = new StackValue(h);
+    } else {
+      // integer
+      sv = new StackValue(*addr);
+    }
+    assert(sv != NULL, "sanity check");
+    result->add(sv);
+  }
+  return result;
+}
+
+static StackValueCollection * top_compiled_frame_with_parms(javaVFrame * frame) {
+  assert(frame->is_compiled_frame(), "Must be compiled");
+  compiledVFrame * cvf = compiledVFrame::cast(frame);
+  StackValueCollection * result = cvf->expressions();
+  SimpleScopeDesc ssd(cvf->code(), cvf->fr().pc());
+  Bytecode_invoke* call = Bytecode_invoke_at(ssd.method(), ssd.bci());
+  bool is_static = call->is_invokestatic();
+  symbolOop signature = call->signature();
+  int arg_size=0;
+  BasicType * sig_bt = NEW_RESOURCE_ARRAY( BasicType, 256);
+  // Compute signature.
+  VMRegPair *_regs = SharedRuntime::find_callee_arguments(signature, is_static, &arg_size, sig_bt);
+  StackValue * sv = NULL;
+  // Add parameters.
+  bool prev_reg = false;
+  bool prev_stack = false;
+  bool do_shuffle = false;
+  intptr_t * value_addr = NULL;
+  for (int i=0; i < arg_size; i++) {
+    VMReg reg = _regs[i].first();
+    // Compute address on stack.
+    if (reg->is_stack()) {
+      value_addr = (intptr_t*) ((intptr_t*)cvf->fr().sp() + reg->reg2stack());
+      prev_reg = false;
+      prev_stack = true;
+    } else if( reg->is_reg()) {
+      value_addr = (intptr_t*) cvf->register_map()->location(reg);
+      prev_reg = true;
+      prev_stack = false;
+    } else if(prev_stack) {
+      value_addr++;
+      prev_reg = false;
+      prev_stack = false;
+      do_shuffle = true;
+    } else if (prev_reg) {
+      value_addr++; // safe slot of rcx before rdx
+      prev_reg = false;
+      prev_stack = false;
+      do_shuffle = true;
+    } else assert(0, "should not get here");
+    // Create stack value.
+    if (sig_bt[i]== T_OBJECT) {
+      Handle h((oop) (*value_addr));
+      sv = new StackValue(h);
+    } else {
+      sv = new StackValue(*value_addr);
+    }
+    result->add(sv);
+    if (do_shuffle) {
+      jint tmp = result->int_at(i);
+      result->set_int_at(i, result->int_at(i-1));
+      result->set_int_at(i-1, tmp);
+      do_shuffle=false;
+    }
+  }
+  return result;
+}
+
+// The top frame needs to include the parameter expressions.
+static StackValueCollection * top_frame_expressions_including_params(javaVFrame* frame) {
+  if (frame->is_interpreted_frame()) return top_interpreted_frame_with_parms(frame);
+  else return top_compiled_frame_with_parms(frame);
+}
+
+// Compresses the stack by only pushing frames that are relevant.
+void StackCompressor::push_frame(javaVFrame * frame) {
+  bool frame_tail_calls = is_tail_call_frame(frame);
+  // Store the first(bottommost) vframe of a frame. 
+  if (_saw_bottom_frame) {
+    _bottom_most_frame_expr_stack_size = frame->expressions()->size();
+    _bottom_most_frame = frame;
+  }
+  _saw_bottom_frame = frame->is_top();
+  // Only push the frame if:
+  // * frame does not do a tail call
+  // * previously seen frame did not tail call
+  // * frame tail calls and frame method was not already seen
+  methodHandle meth(_thread, frame->method());
+  if (_did_last_frame_tail_call==false ||
+      frame_tail_calls == false ||
+      _methods->contains(meth)==false) {
+
+    StackValueCollection * expressions = _is_top_of_stack_frame ?
+      top_frame_expressions_including_params(frame) :
+      frame->expressions();
+    // Interpreted frames leave the parameters on the expression stack. The
+    // rest of the deoptimization code assumes expression stack of
+    // compiledVFrames which have the parameters popped at call sites.
+    if (frame->is_interpreted_frame() && _callee_parameters > 0) {
+      expressions = pop_callee_params_expressions(frame, _callee_parameters);
+    }
+    FrameInfo * info = new FrameInfo(meth,
+                                     frame,
+                                     frame->bci(),
+                                     frame->locals(),
+                                     expressions,
+                                     frame->monitors());
+    {
+      //No_Safepoint_Verifier ns;
+      //frame->monitors();
+      // Handle'ize  monitor objects.
+    }
+    _stack_frames->push(info);
+    _methods->push(meth);
+    _frame_cnt++;
+    if (frame_tail_calls)
+      _did_last_frame_tail_call = true;
+    else _did_last_frame_tail_call = false;
+  } else if (frame_tail_calls) {
+    _did_compress = true;
+    _did_last_frame_tail_call=true;
+  }
+}
+
+void StackCompressor::dump() {
+#ifndef PRODUCT
+  tty->print_cr("Compressed (%d frames)", _frame_cnt);
+  for (int i=0; i < _stack_frames->length(); i++) {
+    tty->print_cr("FrameInfo:");
+    _stack_frames->at(i)->print(tty);
+    tty->print_cr("javaVFrame info:");
+    _stack_frames->at(i)->java_frame()->print();
+  }
+  tty->print_cr("Bottom frame");
+  _bottom_most_frame->method()->print_value();
+  tty->print_cr("");
+#endif
+}
+
+JRT_BLOCK_ENTRY(Deoptimization::UnrollBlock*, Deoptimization::fetch_unroll_info_stack_compression(JavaThread* thread))
+{
+  thread->inc_in_deopt_handler();
+  // Allocate our special deoptimization ResourceMark
+  DeoptResourceMark* dmark = new DeoptResourceMark(thread);
+  assert(thread->deopt_mark() == NULL, "Pending deopt!");
+  thread->set_deopt_mark(dmark);
+  
+  //No safepoint starts in compress_frames function;
+  StackCompressor * compressor = new StackCompressor(thread); 
+  vframeArray* array = compressor->compress_frames(); 
+  if (TraceTailCalls) tty->print_cr("Compressing the stack");
+  // Could not compress throw stackoverflow exception.
+  if (array == NULL) {
+    delete thread->deopt_mark();
+    thread->set_deopt_mark(NULL);
+    SharedRuntime::throw_StackOverflowError(thread);
+    thread->set_do_not_unlock_if_synchronized(false);
+    return NULL;
+  }
+
+  No_Safepoint_Verifier no_safepoint;
+
+  assert(thread->vframe_array_head() == NULL, "Pending deopt!");;
+  thread->set_vframe_array_head(array);
+  
+  // Now that the vframeArray has been created if we have any deferred local
+  // writes added by jvmti then we can free up that structure as the data is now
+  // in the vframeArray.
+  if (thread->deferred_locals() != NULL) {
+    tty->print_cr("deferred_locals");
+    GrowableArray<jvmtiDeferredLocalVariableSet*>* list = thread->deferred_locals();
+    int i = 0;
+    do {
+      // Because of inlining we could have multiple vframes for a single frame
+      // and several of the vframes could have deferred writes. Find them all.
+      if (list->at(i)->id() == array->original().id()) {
+        jvmtiDeferredLocalVariableSet* dlv = list->at(i);
+        list->remove_at(i);
+        // individual jvmtiDeferredLocalVariableSet are CHeapObj's
+        delete dlv;
+      } else {
+        i++;
+      }
+    } while ( i < list->length() );
+    if (list->length() == 0) {
+      thread->set_deferred_locals(NULL);
+      // free the list and elements back to C heap.
+      delete list;
+    }
+
+  }
+
+  // Compute the caller frame based on the sender sp of stub_frame and stored
+  // frame sizes info.
+  frame stub_frame = thread->last_frame();
+  RegisterMap dummy_map(thread, false);
+  CodeBlob* cb = stub_frame.cb();
+  // Verify we have the right vframeArray
+  assert(cb->frame_size() >= 0, "Unexpected frame size");
+  intptr_t* unpack_sp = stub_frame.sp() + cb->frame_size();
+#ifdef ASSERT
+  assert(cb->is_deoptimization_stub() || cb->is_uncommon_trap_stub(), "just checking");
+  Events::log("fetch unroll sp " INTPTR_FORMAT, unpack_sp);
+#endif
+  // This is a guarantee instead of an assert because if vframe doesn't match
+  // we will unpack the wrong deoptimized frame and wind up in strange places
+  // where it will be very difficult to figure out what went wrong. Better
+  // to die an early death here than some very obscure death later when the
+  // trail is cold.
+  // Note: on ia64 this guarantee can be fooled by frames with no memory stack
+  // in that it will fail to detect a problem when there is one. This needs
+  // more work in tiger timeframe.
+  guarantee(array->unextended_sp() == unpack_sp || array->sp() == unpack_sp,
+      "vframe_array_head must contain the vframeArray to unpack");
+
+  int number_of_frames = array->frames();
+
+  // Compute the vframes' sizes.  Note that frame_sizes[] entries are ordered
+  // from outermost to innermost virtual activation, which is the reverse of the
+  // elements in the vframes array.
+  intptr_t* frame_sizes = NEW_C_HEAP_ARRAY(intptr_t, number_of_frames);
+  // +1 because we always have an interpreter return address for the final slot.
+  address* frame_pcs = NEW_C_HEAP_ARRAY(address, number_of_frames + 1);
+  int callee_parameters = 0;
+  int callee_locals = 0;
+  int popframe_extra_args = 0;
+  // Create an interpreter return address for the stub to use as its return
+  // address so the skeletal frames are perfectly walkable
+  frame_pcs[number_of_frames] = Interpreter::deopt_entry(vtos, 0);
+
+  // PopFrame requires that the preserved incoming arguments from the
+  // recently-popped topmost activation be put back on the expression stack of
+  // the caller for reexecution.
+  if (JvmtiExport::can_pop_frame() &&
+      thread->popframe_forcing_deopt_reexecution()) {
+    popframe_extra_args =
+      in_words(thread->popframe_preserved_args_size_in_words());
+    tty->print_cr("adding popframe_extra_args (%d) due to can_pop_frame",
+        popframe_extra_args);
+  }
+
+  //
+  // frame_sizes/frame_pcs[0] oldest frame (int or c2i)
+  // frame_sizes/frame_pcs[1] next oldest frame (int)
+  // frame_sizes/frame_pcs[n] youngest frame (int)
+  //
+  // Now a pc in frame_pcs is actually the return address to the frame's caller
+  // (a frame owns the space for the return address to it's caller).  Confusing
+  // ain't it.
+  //
+  // The vframe array can address vframes with indices running from
+  // 0.._frames-1. Index  0 is the youngest frame and _frame - 1 is the oldest
+  // (root) frame.  When we create the skeletal frames we need the oldest frame
+  // to be in the zero slot in the frame_sizes/frame_pcs so the assembly code
+  // can do a trivial walk.
+  // so things look a little strange in this loop.
+  //
+  for (int index = 0; index < array->frames(); index++ ) {
+    // frame[number_of_frames - 1 ] = on_stack_size(youngest)
+    // frame[number_of_frames - 2 ] = on_stack_size(sender(youngest))
+    // frame[number_of_frames - 3 ] = on_stack_size(sender(sender(youngest)))
+    frame_sizes[number_of_frames - 1 - index] = 
+      BytesPerWord * array->element(index)->on_stack_size(callee_parameters,
+                                                          callee_locals,
+                                                          index == 0,
+                                                          popframe_extra_args);
+    // This pc doesn't have to be perfect just good enough to identify the frame
+    // as interpreted so the skeleton frame will be walkable
+    // The correct pc will be set when the skeleton frame is completely filled out
+    // The final pc we store in the loop is wrong and will be overwritten below
+    frame_pcs[number_of_frames - 1 - index ] = 
+      Interpreter::deopt_entry(vtos, 0) - frame::pc_return_offset;
+
+    callee_parameters = array->element(index)->method()->size_of_parameters();
+    callee_locals = array->element(index)->method()->max_locals();
+    popframe_extra_args = 0;
+  }
+
+  // Compute whether the root vframe returns a float or double value.
+  BasicType return_type;
+  {
+    HandleMark hm;
+    methodHandle method(thread, array->element(0)->method());
+    Bytecode_invoke* invoke = Bytecode_invoke_at_check(method, array->element(0)->bci());
+    return_type = (invoke != NULL) ? invoke->result_type(thread) : T_ILLEGAL;
+  }
+
+  // Compute information for handling adapters and adjusting the frame size of the caller.
+  int caller_adjustment = 0;
+  // Find the current pc for sender of the deoptee. Since the sender may have been deoptimized
+  // itself since the deoptee vframeArray was created we must get a fresh value of the pc rather
+  // than simply use array->sender.pc(). This requires us to walk the current set of frames
+  //
+  //frame deopt_sender = stub_frame.sender(&dummy_map); // First is the deoptee frame
+  //deopt_sender = deopt_sender.sender(&dummy_map);     // Now deoptee caller
+  frame deopt_sender = compressor->bottom_most_frame()->fr();
+
+  // Compute the amount the oldest interpreter frame will have to adjust
+  // its caller's stack by. If the caller is a compiled frame then
+  // we pretend that the callee has no parameters so that the
+  // extension counts for the full amount of locals and not just
+  // locals-parms. This is because without a c2i adapter the parm
+  // area as created by the compiled frame will not be usable by
+  // the interpreter. (Depending on the calling convention there
+  // may not even be enough space).
+
+  // QQQ I'd rather see this pushed down into last_frame_adjust
+  // and have it take the sender (aka caller).
+
+  if (deopt_sender.is_compiled_frame()) {
+    caller_adjustment = last_frame_adjust(0, callee_locals);
+  } else {
+    // The caller frame may need extending to accommodate
+    // locals of the first unpacked interpreted frame.
+    // Compute that adjustment. 
+    caller_adjustment = last_frame_adjust(0, callee_locals);
+  }
+
+
+  // Set pc of bottom most frame (main()'s frame in many cases).
+  frame_pcs[0] = deopt_sender.pc();
+  assert(CodeCache::find_blob_unsafe(frame_pcs[0]) != NULL, "bad pc");
+
+  UnrollBlock* info = new UnrollBlock(array->frame_size() * BytesPerWord,
+                                      caller_adjustment * BytesPerWord,
+                                      number_of_frames,
+                                      frame_sizes,
+                                      frame_pcs,
+                                      return_type);
+#if defined(IA32) || defined(AMD64)
+  // We need a way to pass fp to the unpacking code so the skeletal frames
+  // come out correct. This is only needed for x86 because of c2 using ebp
+  // as an allocatable register. So this update is useless (and harmless)
+  // on the other platforms. It would be nice to do this in a different
+  // way but even the old style deoptimization had a problem with deriving
+  // this value. NEEDS_CLEANUP
+  // Note: now that c1 is using c2's deopt blob we must do this on all
+  // x86 based platforms
+  intptr_t** fp_addr = (intptr_t**) (((address)info) + info->initial_fp_offset_in_bytes());
+  *fp_addr = array->sender().fp(); // was adapter_caller
+#endif /* IA32 || AMD64 */
+
+  if (array->frames() > 1) {
+    if (VerifyStack && TraceDeoptimization) {
+      tty->print_cr("Deoptimizing method containing inlining");
+    }
+  }
+
+  array->set_unroll_block(info);
+  return info;
+}
+JRT_END
+
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/runtime/deoptimization.hpp
--- a/src/share/vm/runtime/deoptimization.hpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/runtime/deoptimization.hpp	Wed Jun 03 16:27:17 2009 +0200
@@ -78,7 +78,8 @@ class Deoptimization : AllStatic {
     Unpack_deopt                = 0, // normal deoptimization, use pc computed in unpack_vframe_on_stack
     Unpack_exception            = 1, // exception is pending
     Unpack_uncommon_trap        = 2, // redo last byte code (C2 only)
-    Unpack_reexecute            = 3  // reexecute bytecode (C1 only)
+    Unpack_reexecute            = 3, // reexecute bytecode (C1 only)
+    Unpack_stack_compression    = 4  // extract stack compression (like reexecute)
   };
 
   // Checks all compiled methods. Invalid methods are deleted and
@@ -180,6 +181,8 @@ class Deoptimization : AllStatic {
   // @argument thread.     Thread where stub_frame resides.
   // @see OptoRuntime::deoptimization_fetch_unroll_info_C
   static UnrollBlock* fetch_unroll_info(JavaThread* thread);
+
+  static UnrollBlock* fetch_unroll_info_stack_compression(JavaThread* thread);
 
   //** Unpacks vframeArray onto execution stack
   // Called by assembly stub after execution has returned to
@@ -346,3 +349,45 @@ public:
   ~DeoptimizationMarker() { _is_active = false; }
   static bool is_active() { return _is_active; }
 };
+
+class javaVFrame;
+class FrameInfo;
+
+class StackCompressor : public ResourceObj {
+
+private:
+  enum { INITIAL_ARRAY_SIZE = 100 };
+  JavaThread*                     _thread;
+  GrowableArray<FrameInfo*> *     _stack_frames;
+  GrowableArray<methodHandle>*    _methods;
+  javaVFrame*                     _bottom_most_frame; // Frame at bottom of stack (e.g main()).
+  int _frame_cnt;
+  // Parameters of callee. Used for adjusting the expression stack of interpreted frames.
+  int _callee_parameters;
+  int _bottom_most_frame_expr_stack_size;
+  bool _did_last_frame_tail_call;
+  bool _did_compress;
+  bool _saw_bottom_frame;
+  bool _is_top_of_stack_frame;
+public:
+  StackCompressor(JavaThread * th); 
+  
+  bool did_compress() const                  { return _did_compress; }
+  javaVFrame * bottom_most_frame() const        { return _bottom_most_frame; }
+  int bottom_most_frame_expr_stack_size() const { return _bottom_most_frame_expr_stack_size; }
+  vframeArray* compress_frames(); 
+  // Can compress the stack if we encounter two consecutive frames that are
+  // tail calling.
+  static bool can_compress(JavaThread * th); 
+private:
+
+  static bool is_tail_call_frame(javaVFrame * frame); 
+
+  // Compresses the stack by only pushing frames that are relevant.
+  void push_frame(javaVFrame * frame); 
+
+public:
+  void dump(); 
+  
+};
+
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/runtime/frame.cpp
--- a/src/share/vm/runtime/frame.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/runtime/frame.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -326,7 +326,10 @@ frame frame::profile_find_Java_sender_fr
 
 // Interpreter frames
 
-
+void frame::interpreter_frame_set_osr(int turn_off_OSR)  {
+  assert(is_interpreted_frame(), "Not an interpreted frame");
+  *interpreter_frame_osr_addr() = turn_off_OSR;
+}
 void frame::interpreter_frame_set_locals(intptr_t* locs)  {
   assert(is_interpreted_frame(), "Not an interpreted frame");
   *interpreter_frame_locals_addr() = locs;
@@ -1206,6 +1209,7 @@ void frame::oops_do_internal(OopClosure*
   } else if (is_entry_frame())          { oops_entry_do      (f, map);
   } else if (CodeCache::contains(pc())) { oops_code_blob_do  (f, map);
   } else {
+    tty->print_cr("frame pc: " INTPTR_FORMAT " frame sp: " INTPTR_FORMAT, pc(), sp());
     ShouldNotReachHere();
   }
 }
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/runtime/frame.hpp
--- a/src/share/vm/runtime/frame.hpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/runtime/frame.hpp	Wed Jun 03 16:27:17 2009 +0200
@@ -186,6 +186,7 @@ class frame VALUE_OBJ_CLASS_SPEC {
   // Interpreter frames:
 
  private:
+  int32_t *  interpreter_frame_osr_addr() const;
   intptr_t** interpreter_frame_locals_addr() const;
   intptr_t*  interpreter_frame_bcx_addr() const;
   intptr_t*  interpreter_frame_mdx_addr() const;
@@ -218,6 +219,9 @@ class frame VALUE_OBJ_CLASS_SPEC {
   intptr_t interpreter_frame_bcx() const                  { return *interpreter_frame_bcx_addr(); }
   void interpreter_frame_set_bcx(intptr_t bcx);
 
+  // OSR disabling stuff: use by tail calls. if passed value other than 0 OSR is
+  // turned of for this frame.
+  void interpreter_frame_set_osr(int turn_off_osr);
   // byte code index
   jint interpreter_frame_bci() const;
   void interpreter_frame_set_bci(jint bci);
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/runtime/globals.hpp
--- a/src/share/vm/runtime/globals.hpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/runtime/globals.hpp	Wed Jun 03 16:27:17 2009 +0200
@@ -3302,6 +3302,15 @@ class CommandLineFlags {
           "Skip assert() and verify() which page-in unwanted shared "       \
           "objects. ")                                                      \
                                                                             \
+  product(bool, TailCalls, false,                                           \
+          "Recognize the tail call (wide) invocation instruction prefix.")  \
+  diagnostic(bool, TraceTailCalls, false,                                   \
+          "Trace tail calls.")                                               \
+  product(intx, MinOutgoingArgStackSlotSize, 0,                             \
+          "Stack slots every java frame reserves for outgoing arguments.")  \
+  product(bool, TailCallsStackCompression, false,                           \
+          "A protection domain mismatch causes stack to grow when tail calling.") \
+                                                                            \
   product(bool, AnonymousClasses, false,                                    \
           "support sun.misc.Unsafe.defineAnonymousClass")                   \
                                                                             \
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/runtime/sharedRuntime.cpp
--- a/src/share/vm/runtime/sharedRuntime.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/runtime/sharedRuntime.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -489,6 +489,12 @@ address SharedRuntime::compute_compiled_
   return nm->instructions_begin() + t->pco();
 }
 
+
+JRT_ENTRY(void, SharedRuntime::throw_TailCallException(JavaThread* thread))
+  // Occurs in tail call vtable stub.
+  throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_TailCallException());
+JRT_END
+
 JRT_ENTRY(void, SharedRuntime::throw_AbstractMethodError(JavaThread* thread))
   // These errors occur only at call sites
   throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_AbstractMethodError());
@@ -525,6 +531,41 @@ JRT_ENTRY(void, SharedRuntime::throw_Sta
   throw_and_post_jvmti_exception(thread, exception);
 JRT_END
 
+JRT_BLOCK_ENTRY(void, SharedRuntime::tail_call_handle_stack_overflow(JavaThread * thread)) 
+{// to get emacs to indent correctly
+
+  // We don't want a safepoint to happen here if the stack is compressible.
+  // generate_throw_exception which ends up here does not generate a register
+  // map so oops (e.g in rcx, rdx) will not be updated.  
+  // Try to compress the stack here.
+  JavaThread * java_thread = JavaThread::active();
+  assert (java_thread == thread, "oops");
+  // Look at stack.
+  {
+    No_Safepoint_Verifier no_safepoint;
+    ResourceMark rm(thread);
+    if (StackCompressor::can_compress(thread)) {
+      java_thread->set_tail_call_do_stack_compression((address)1);
+      return;
+    }
+  } 
+  JRT_BLOCK {
+    // Could not compress. Throw stackoverflow error.
+    // Inline throw_StackOverflowError
+    // We avoid using the normal exception construction in this case because
+    // it performs an upcall to Java, and we're already out of stack space.
+    klassOop k = SystemDictionary::StackOverflowError_klass();
+    oop exception_oop = instanceKlass::cast(k)->allocate_instance(CHECK);
+    Handle exception (thread, exception_oop);
+    if (StackTraceInThrowable) {
+      java_lang_Throwable::fill_in_stack_trace(exception);
+    }
+    java_thread->set_tail_call_do_stack_compression((address)0);
+    throw_and_post_jvmti_exception(thread, exception);
+  } JRT_BLOCK_END
+}// to get emacs to indent correctly
+JRT_BLOCK_END
+
 address SharedRuntime::continuation_for_implicit_exception(JavaThread* thread,
                                                            address pc,
                                                            SharedRuntime::ImplicitExceptionKind exception_kind)
@@ -539,7 +580,12 @@ address SharedRuntime::continuation_for_
     switch (exception_kind) {
       case IMPLICIT_NULL:           return Interpreter::throw_NullPointerException_entry();
       case IMPLICIT_DIVIDE_BY_ZERO: return Interpreter::throw_ArithmeticException_entry();
-      case STACK_OVERFLOW:          return Interpreter::throw_StackOverflowError_entry();
+      case STACK_OVERFLOW:
+                                    if (TailCallsStackCompression==false)
+                                      return Interpreter::throw_StackOverflowError_entry();
+                                    else
+                                      return Interpreter::tail_call_handle_stack_overflow_entry();
+
       default:                      ShouldNotReachHere();
     }
 #endif // !CC_INTERP
@@ -553,6 +599,10 @@ address SharedRuntime::continuation_for_
         // For stack overflow in deoptimization blob, cleanup thread.
         if (thread->deopt_mark() != NULL) {
           Deoptimization::cleanup_deopt_info(thread, NULL);
+        }
+        if (TailCallsStackCompression) {
+          assert (NULL != StubRoutines::tail_call_handle_stack_overflow_entry(),  "oops");
+          return StubRoutines::tail_call_handle_stack_overflow_entry();
         }
         return StubRoutines::throw_StackOverflowError_entry();
       }
@@ -767,9 +817,9 @@ Handle SharedRuntime::find_callee_info_h
   // Find caller and bci from vframe
   methodHandle caller (THREAD, vfst.method());
   int          bci    = vfst.bci();
-
   // Find bytecode
   Bytecode_invoke* bytecode = Bytecode_invoke_at(caller, bci);
+  callinfo.set_tail_call(bytecode->is_tailcall());
   bc = bytecode->adjusted_invoke_code();
   int bytecode_index = bytecode->index();
 
@@ -853,9 +903,11 @@ methodHandle SharedRuntime::find_callee_
 // Resolves a call.
 methodHandle SharedRuntime::resolve_helper(JavaThread *thread,
                                            bool is_virtual,
-                                           bool is_optimized, TRAPS) {
+                                           bool is_optimized,
+                                           bool is_tail_call,
+                                           bool is_sibling_call, TRAPS) {
   methodHandle callee_method;
-  callee_method = resolve_sub_helper(thread, is_virtual, is_optimized, THREAD);
+  callee_method = resolve_sub_helper(thread, is_virtual, is_optimized, is_tail_call, is_sibling_call, THREAD);
   if (JvmtiExport::can_hotswap_or_post_breakpoint()) {
     int retry_count = 0;
     while (!HAS_PENDING_EXCEPTION && callee_method->is_old() &&
@@ -872,17 +924,48 @@ methodHandle SharedRuntime::resolve_help
       guarantee((retry_count++ < 100),
                 "Could not resolve to latest version of redefined method");
       // method is redefined in the middle of resolve so re-try.
-      callee_method = resolve_sub_helper(thread, is_virtual, is_optimized, THREAD);
+      callee_method = resolve_sub_helper(thread, is_virtual, is_optimized, is_tail_call, is_sibling_call, THREAD);
     }
   }
   return callee_method;
+}
+
+instanceKlassHandle caller_klass_from_frame(JavaThread * thread, Thread * the_thread) {
+  vframeStream vfst(thread, true);
+  assert(!vfst.at_end(), "Java frame must exist");
+  methodHandle caller(the_thread, vfst.method());
+  KlassHandle caller_klass(the_thread, caller->method_holder());
+  assert(caller_klass->oop_is_instance(), "caller should be instanceoop");
+  instanceKlassHandle caller_instance_klass(the_thread, caller_klass());
+  return caller_instance_klass;
+}
+
+// Check whether protecion domain of caller and callee are equal.
+bool SharedRuntime::protection_domains_match(JavaThread * thread,
+                                             CallInfo& call_info,
+                                             TRAPS) {
+  methodHandle callee_method = call_info.selected_method();
+  KlassHandle callee_klass(callee_method->method_holder());
+  assert(callee_klass->oop_is_instance(), "callee should be instanceoop");
+
+  instanceKlassHandle callee_instance_klass(THREAD, callee_klass());
+  instanceKlassHandle caller_instance_klass(caller_klass_from_frame(thread, THREAD));
+  
+  // check whether protection domains match
+  if (caller_instance_klass->protection_domain() != 
+      callee_instance_klass->protection_domain()) {
+    return false;
+  } else 
+    return true;
 }
 
 // Resolves a call.  The compilers generate code for calls that go here
 // and are patched with the real destination of the call.
 methodHandle SharedRuntime::resolve_sub_helper(JavaThread *thread,
                                            bool is_virtual,
-                                           bool is_optimized, TRAPS) {
+                                           bool is_optimized,
+                                           bool is_tail_call,
+                                           bool is_sibling_call, TRAPS) {
 
   ResourceMark rm(thread);
   RegisterMap cbl_map(thread, false);
@@ -925,6 +1008,19 @@ methodHandle SharedRuntime::resolve_sub_
   }
 #endif
 
+  // Check whether protection domains match. Currently we throw an exception if
+  // they don't. Might change to changing is_tail_call -> false in the future
+  bool pd_match = false;
+  if (is_tail_call) {
+    methodHandle nullHandle;
+    pd_match = protection_domains_match(thread, call_info, CHECK_(nullHandle));
+    if (pd_match == false) {
+      if (TraceTailCalls) tty->print_cr("protection domains don't match");
+      if (!TailCallsStackCompression) 
+        THROW_0(vmSymbols::java_lang_TailCallException());
+    }
+  }
+
   // Compute entry points. This might require generation of C2I converter
   // frames, so we cannot be holding any locks here. Furthermore, the
   // computation of the entry points is independent of patching the call.  We
@@ -950,11 +1046,11 @@ methodHandle SharedRuntime::resolve_sub_
     bool static_bound = call_info.resolved_method()->can_be_statically_bound();
     KlassHandle h_klass(THREAD, receiver->klass());
     CompiledIC::compute_monomorphic_entry(callee_method, h_klass,
-                     is_optimized, static_bound, virtual_call_info,
-                     CHECK_(methodHandle()));
+                     is_optimized, static_bound, is_tail_call && pd_match, is_sibling_call,
+                     virtual_call_info, CHECK_(methodHandle()));
   } else {
     // static call
-    CompiledStaticCall::compute_entry(callee_method, static_call_info);
+    CompiledStaticCall::compute_entry(callee_method, static_call_info, is_tail_call && pd_match, is_sibling_call);
   }
 
   // grab lock, check for deoptimization and potentially patch caller
@@ -973,13 +1069,23 @@ methodHandle SharedRuntime::resolve_sub_
       }
 #endif
       if (is_virtual) {
-        CompiledIC* inline_cache = CompiledIC_before(caller_frame.pc());
+        CompiledIC* inline_cache = CompiledIC_before(caller_frame.pc(), is_tail_call);
         if (inline_cache->is_clean()) {
-          inline_cache->set_to_monomorphic(virtual_call_info);
+          inline_cache->set_to_monomorphic(virtual_call_info, is_tail_call);
         }
       } else {
-        CompiledStaticCall* ssc = compiledStaticCall_before(caller_frame.pc());
-        if (ssc->is_clean()) ssc->set(static_call_info);
+        if (is_tail_call) {
+          CompiledStaticCall * ssc = compiledStaticCall_before(caller_frame.pc(), is_tail_call);
+          if (ssc->is_clean_static_tail_call())
+            ssc->set_tail_call(static_call_info);
+          else assert(false, "Something is wrong here.");
+        } else {
+          CompiledStaticCall* ssc = compiledStaticCall_before(caller_frame.pc(), false);
+          // Really should remove set_tail_call path since it does not buy us anything.
+          if (ssc->is_clean() || (TailCallsStackCompression && ssc->is_clean_static_tail_call())) {
+            ssc->set(static_call_info);
+          }
+        } 
       }
     }
 
@@ -988,14 +1094,54 @@ methodHandle SharedRuntime::resolve_sub_
   return callee_method;
 }
 
+// Compute entry point for the type of a call when we are in a
+// handle_wrong_method_xxx stub.
+static address get_entry_for_tail_call_type(methodOop callee, address caller_pc, bool want_c2i_entry) {
+  address c2i_entry = NULL, verified_entry = NULL;
+  assert(NativeCall::is_call_before(caller_pc) ||
+      NativeJump::is_jump_before(caller_pc), "must be a call");
+  NativeCall *call_site = NativeJump::is_jump_before(caller_pc) ?
+    (NativeCall*) nativeJump_before(caller_pc) :   
+    nativeCall_before(caller_pc);
+  CodeBlob* cb = CodeCache::find_blob(caller_pc);
+  address call_addr = call_site->instruction_address();
+  RelocIterator iter(cb, call_site->instruction_address(), call_site->next_instruction_address());
+  iter.next();
+  assert(iter.has_current(), "must have a reloc at java call site");
+
+  switch (iter.tail_call_type()) {
+  case relocInfo::not_tail_call:
+    c2i_entry = callee->get_c2i_entry();
+    verified_entry = callee->verified_code_entry();
+    break;
+  case relocInfo::sibling_tail_call_type:
+    c2i_entry = callee->get_c2i_verified_tail_call_entry();
+      verified_entry = callee->verified_tail_call_code_entry();
+      break;
+  case relocInfo::not_sibling_tail_call_type:
+    c2i_entry = callee->get_c2i_verified_not_sibling_tail_call_entry();
+    verified_entry = callee->verified_not_sibling_tail_call_code_entry();
+    break;
+  default: assert(0, "oops"); break;
+  }
+  if (want_c2i_entry) {
+    assert(c2i_entry!=NULL, "c2i_entry not null");
+    return c2i_entry;
+  }
+  assert(verified_entry!=NULL, "verified_entry not null");
+  return verified_entry;
+}
 
 // Inline caches exist only in compiled code
 JRT_BLOCK_ENTRY(address, SharedRuntime::handle_wrong_method_ic_miss(JavaThread* thread))
-#ifdef ASSERT
+
   RegisterMap reg_map(thread, false);
   frame stub_frame = thread->last_frame();
+#ifdef ASSERT
   assert(stub_frame.is_runtime_frame(), "sanity check");
+#endif
   frame caller_frame = stub_frame.sender(&reg_map);
+#ifdef ASSERT
   assert(!caller_frame.is_interpreted_frame() && !caller_frame.is_entry_frame(), "unexpected frame");
 #endif /* ASSERT */
 
@@ -1007,7 +1153,8 @@ JRT_BLOCK_ENTRY(address, SharedRuntime::
   JRT_BLOCK_END
   // return compiled code entry point after potential safepoints
   assert(callee_method->verified_code_entry() != NULL, " Jump to zero!");
-  return callee_method->verified_code_entry();
+  return get_entry_for_tail_call_type(callee_method(), caller_frame.pc(), false);
+  //return callee_method->verified_code_entry();
 JRT_END
 
 
@@ -1026,12 +1173,15 @@ JRT_BLOCK_ENTRY(address, SharedRuntime::
   frame stub_frame = thread->last_frame();
   assert(stub_frame.is_runtime_frame(), "sanity check");
   frame caller_frame = stub_frame.sender(&reg_map);
+  address pc = caller_frame.pc();
+
   if (caller_frame.is_interpreted_frame() || caller_frame.is_entry_frame() ) {
     methodOop callee = thread->callee_target();
     guarantee(callee != NULL && callee->is_method(), "bad handshake");
     thread->set_vm_result(callee);
     thread->set_callee_target(NULL);
-    return callee->get_c2i_entry();
+    
+    return get_entry_for_tail_call_type(callee, pc, true);
   }
 
   // Must be compiled to compiled path which is safe to stackwalk
@@ -1043,7 +1193,8 @@ JRT_BLOCK_ENTRY(address, SharedRuntime::
   JRT_BLOCK_END
   // return compiled code entry point after potential safepoints
   assert(callee_method->verified_code_entry() != NULL, " Jump to zero!");
-  return callee_method->verified_code_entry();
+  //return callee_method->verified_code_entry();
+  return get_entry_for_tail_call_type(callee_method(), pc, false);
 JRT_END
 
 
@@ -1051,7 +1202,7 @@ JRT_BLOCK_ENTRY(address, SharedRuntime::
 JRT_BLOCK_ENTRY(address, SharedRuntime::resolve_static_call_C(JavaThread *thread ))
   methodHandle callee_method;
   JRT_BLOCK
-    callee_method = SharedRuntime::resolve_helper(thread, false, false, CHECK_NULL);
+    callee_method = SharedRuntime::resolve_helper(thread, false, false, false, false, CHECK_NULL);
     thread->set_vm_result(callee_method());
   JRT_BLOCK_END
   // return compiled code entry point after potential safepoints
@@ -1059,12 +1210,34 @@ JRT_BLOCK_ENTRY(address, SharedRuntime::
   return callee_method->verified_code_entry();
 JRT_END
 
+// resolve a  static tail call and patch code
+JRT_BLOCK_ENTRY(address, SharedRuntime::resolve_static_tail_call_C(JavaThread *thread ))
+  methodHandle callee_method;
+  JRT_BLOCK
+    callee_method = SharedRuntime::resolve_helper(thread, false, false, true, true, CHECK_NULL);
+    thread->set_vm_result(callee_method());
+  JRT_BLOCK_END
+  // return compiled code entry point after potential safepoints
+  assert(callee_method->verified_tail_call_code_entry() != NULL, " Jump to zero!");
+  return callee_method->verified_tail_call_code_entry();
+JRT_END
+
+JRT_BLOCK_ENTRY(address, SharedRuntime::resolve_not_sibling_static_tail_call_C(JavaThread *thread ))
+  methodHandle callee_method;
+  JRT_BLOCK
+    callee_method = SharedRuntime::resolve_helper(thread, false, false, true, false, CHECK_NULL);
+    thread->set_vm_result(callee_method());
+  JRT_BLOCK_END
+  // return compiled code entry point after potential safepoints
+  assert(callee_method->verified_not_sibling_tail_call_code_entry() != NULL, " Jump to zero!");
+  return callee_method->verified_not_sibling_tail_call_code_entry();
+JRT_END
 
 // resolve virtual call and update inline cache to monomorphic
 JRT_BLOCK_ENTRY(address, SharedRuntime::resolve_virtual_call_C(JavaThread *thread ))
   methodHandle callee_method;
   JRT_BLOCK
-    callee_method = SharedRuntime::resolve_helper(thread, true, false, CHECK_NULL);
+    callee_method = SharedRuntime::resolve_helper(thread, true, false, false, false, CHECK_NULL);
     thread->set_vm_result(callee_method());
   JRT_BLOCK_END
   // return compiled code entry point after potential safepoints
@@ -1072,13 +1245,60 @@ JRT_BLOCK_ENTRY(address, SharedRuntime::
   return callee_method->verified_code_entry();
 JRT_END
 
+// resolve virtual tail call and update inline cache to monomorphic
+JRT_BLOCK_ENTRY(address, SharedRuntime::resolve_virtual_tail_call_C(JavaThread *thread ))
+  methodHandle callee_method;
+  JRT_BLOCK
+    callee_method = SharedRuntime::resolve_helper(thread, true, false, true, true, CHECK_NULL);
+    thread->set_vm_result(callee_method());
+  JRT_BLOCK_END
+  // return compiled code entry point after potential safepoints
+  assert(callee_method->verified_tail_call_code_entry() != NULL, " Jump to zero!");
+  return callee_method->verified_tail_call_code_entry();
+JRT_END
+
+// resolve virtual tail call and update inline cache to monomorphic
+JRT_BLOCK_ENTRY(address, SharedRuntime::resolve_not_sibling_virtual_tail_call_C(JavaThread *thread ))
+  methodHandle callee_method;
+  JRT_BLOCK
+    callee_method = SharedRuntime::resolve_helper(thread, true, false, true, false, CHECK_NULL);
+    thread->set_vm_result(callee_method());
+  JRT_BLOCK_END
+  // return compiled code entry point after potential safepoints
+  assert(callee_method->verified_not_sibling_tail_call_code_entry() != NULL, " Jump to zero!");
+  return callee_method->verified_not_sibling_tail_call_code_entry();
+JRT_END
+
+// Resolve a virtual tail call that can be statically bound (e.g., always
+// monomorphic, so it has no inline cache).  Patch code to resolved target.
+JRT_BLOCK_ENTRY(address, SharedRuntime::resolve_opt_virtual_tail_call_C(JavaThread *thread ))
+  methodHandle callee_method;
+  JRT_BLOCK
+    callee_method = SharedRuntime::resolve_helper(thread, true, true, true, true, CHECK_NULL);
+    thread->set_vm_result(callee_method());
+  JRT_BLOCK_END
+  // return compiled code entry point after potential safepoints
+  assert(callee_method->verified_tail_call_code_entry() != NULL, " Jump to zero!");
+  return callee_method->verified_tail_call_code_entry();
+JRT_END
+
+JRT_BLOCK_ENTRY(address, SharedRuntime::resolve_opt_not_sibling_virtual_tail_call_C(JavaThread *thread ))
+  methodHandle callee_method;
+  JRT_BLOCK
+    callee_method = SharedRuntime::resolve_helper(thread, true, true, true, false, CHECK_NULL);
+    thread->set_vm_result(callee_method());
+  JRT_BLOCK_END
+  // return compiled code entry point after potential safepoints
+  assert(callee_method->verified_not_sibling_tail_call_code_entry() != NULL, " Jump to zero!");
+  return callee_method->verified_not_sibling_tail_call_code_entry();
+JRT_END
 
 // Resolve a virtual call that can be statically bound (e.g., always
 // monomorphic, so it has no inline cache).  Patch code to resolved target.
 JRT_BLOCK_ENTRY(address, SharedRuntime::resolve_opt_virtual_call_C(JavaThread *thread))
   methodHandle callee_method;
   JRT_BLOCK
-    callee_method = SharedRuntime::resolve_helper(thread, true, true, CHECK_NULL);
+    callee_method = SharedRuntime::resolve_helper(thread, true, true, false, false, CHECK_NULL);
     thread->set_vm_result(callee_method());
   JRT_BLOCK_END
   // return compiled code entry point after potential safepoints
@@ -1162,7 +1382,8 @@ methodHandle SharedRuntime::handle_ic_mi
     CodeBlob* cb = caller_frame.cb();
     if (cb->is_nmethod() && ((nmethod*)cb)->is_in_use()) {
       // Not a non-entrant nmethod, so find inline_cache
-      CompiledIC* inline_cache = CompiledIC_before(caller_frame.pc());
+      bool is_tail_call = NativeJump::is_jump_before(caller_frame.pc());
+      CompiledIC* inline_cache = CompiledIC_before(caller_frame.pc(), is_tail_call);
       bool should_be_mono = false;
       if (inline_cache->is_optimized()) {
         if (TraceCallFixup) {
@@ -1200,15 +1421,26 @@ methodHandle SharedRuntime::handle_ic_mi
         // by using a new icBuffer.
         CompiledICInfo info;
         KlassHandle receiver_klass(THREAD, receiver()->klass());
+        bool is_tail_call = ! (inline_cache->tail_call_type() == relocInfo::not_tail_call);
+        bool is_sibling = inline_cache->tail_call_type()==relocInfo::sibling_tail_call_type;
         inline_cache->compute_monomorphic_entry(callee_method,
                                                 receiver_klass,
                                                 inline_cache->is_optimized(),
                                                 false,
+                                                is_tail_call,
+                                                is_sibling,
                                                 info, CHECK_(methodHandle()));
-        inline_cache->set_to_monomorphic(info);
+        inline_cache->set_to_monomorphic(info, is_tail_call);
       } else if (!inline_cache->is_megamorphic() && !inline_cache->is_clean()) {
         // Change to megamorphic
-        inline_cache->set_to_megamorphic(&call_info, bc, CHECK_(methodHandle()));
+        oop protection_domain = NULL;
+        if (call_info.is_tail_call()) {
+          instanceKlassHandle callerKlass = caller_klass_from_frame(thread, THREAD);
+          // Garbage collector does not like non perm objects in codecache. So
+          // we pass the klass instead of the protection domain.
+          protection_domain = callerKlass();
+        }
+        inline_cache->set_to_megamorphic(&call_info, bc, protection_domain, CHECK_(methodHandle()));
       } else {
         // Either clean or megamorphic
       }
@@ -1267,11 +1499,15 @@ methodHandle SharedRuntime::reresolve_ca
       if (NativeCall::is_call_before(pc)) {
         NativeCall *ncall = nativeCall_before(pc);
         call_addr = ncall->instruction_address();
+      } else if (NativeJump::is_jump_before(pc)) {
+        NativeJump *ncall = nativeJump_before(pc);
+        call_addr = ncall->instruction_address();
       }
     }
 
     // Check for static or virtual call
     bool is_static_call = false;
+    bool is_tail_call = false;
     nmethod* caller_nm = CodeCache::find_nmethod(pc);
     // Make sure nmethod doesn't get deoptimized and removed until
     // this is done with it.
@@ -1285,10 +1521,12 @@ methodHandle SharedRuntime::reresolve_ca
         assert(iter.addr() == call_addr, "must find call");
         if (iter.type() == relocInfo::static_call_type) {
           is_static_call = true;
+          is_tail_call = iter.tail_call_type() != relocInfo::not_tail_call;
         } else {
           assert(iter.type() == relocInfo::virtual_call_type ||
                  iter.type() == relocInfo::opt_virtual_call_type
                 , "unexpected relocInfo. type");
+          is_tail_call = iter.tail_call_type() != relocInfo::not_tail_call;
         }
       } else {
         assert(!UseInlineCaches, "relocation info. must exist for this address");
@@ -1308,11 +1546,11 @@ methodHandle SharedRuntime::reresolve_ca
       //
       if (caller_nm->is_in_use()) {
         if (is_static_call) {
-          CompiledStaticCall* ssc= compiledStaticCall_at(call_addr);
+          CompiledStaticCall* ssc= compiledStaticCall_at(call_addr, is_tail_call);
           ssc->set_to_clean();
         } else {
           // compiled, dispatched call (which used to call an interpreted method)
-          CompiledIC* inline_cache = CompiledIC_at(call_addr);
+          CompiledIC* inline_cache = CompiledIC_at(call_addr, is_tail_call);
           inline_cache->set_to_clean();
         }
       }
@@ -1347,6 +1585,8 @@ IRT_LEAF(void, SharedRuntime::fixup_call
   methodOop moop(method);
 
   address entry_point = moop->from_compiled_entry();
+  address tail_call_entry_point = moop->from_compiled_tail_call_entry();
+  address not_sibling_tail_call_entry_point = moop->from_compiled_not_sibling_tail_call_entry();
 
   // It's possible that deoptimization can occur at a call site which hasn't
   // been resolved yet, in which case this function will be called from
@@ -1360,7 +1600,9 @@ IRT_LEAF(void, SharedRuntime::fixup_call
   //
 
   CodeBlob* cb = CodeCache::find_blob(caller_pc);
-  if ( !cb->is_nmethod() || entry_point == moop->get_c2i_entry()) {
+  if ( !cb->is_nmethod() || entry_point == moop->get_c2i_entry() || 
+       tail_call_entry_point == moop->get_c2i_verified_tail_call_entry() ||
+       not_sibling_tail_call_entry_point == moop->get_c2i_verified_not_sibling_tail_call_entry()) {
     return;
   }
 
@@ -1376,11 +1618,19 @@ IRT_LEAF(void, SharedRuntime::fixup_call
   if (moop->code() == NULL) return;
 
   if (((nmethod*)cb)->is_in_use()) {
-
+     
     // Expect to find a native call there (unless it was no-inline cache vtable dispatch)
     MutexLockerEx ml_patch(Patching_lock, Mutex::_no_safepoint_check_flag);
-    if (NativeCall::is_call_before(caller_pc + frame::pc_return_offset)) {
-      NativeCall *call = nativeCall_before(caller_pc + frame::pc_return_offset);
+    address call_addr = caller_pc + frame::pc_return_offset;
+    bool is_a_call = NativeCall::is_call_before(call_addr);
+    NativeCall * call = NULL;
+    if (!is_a_call && NativeJump::is_jump_before(call_addr)) {
+       is_a_call = true;
+      call = (NativeCall*) nativeJump_before(call_addr);
+    } else if (is_a_call) {
+      call = nativeCall_before(caller_pc + frame::pc_return_offset);
+    }
+    if (is_a_call) {
       //
       // bug 6281185. We might get here after resolving a call site to a vanilla
       // virtual call. Because the resolvee uses the verified entry it may then
@@ -1400,8 +1650,38 @@ IRT_LEAF(void, SharedRuntime::fixup_call
            typ != relocInfo::static_stub_type) {
         return;
       }
+      // In case of static_stub_type we get the info from the corresponding call
+      // site.
+      if (typ == relocInfo::static_stub_type) {
+         static_stub_Relocation * r = iter.static_stub_reloc();
+         NativeCall *call_site = nativeCall_at(r->static_call());
+         iter = RelocIterator(cb, call_site->instruction_address(), call_site->next_instruction_address());
+         iter.next();
+         assert(iter.has_current(), "must have a reloc at java call site");
+         typ = iter.reloc()->type();
+      }
+
+      // Get the tail call type.
+      relocInfo::tailCallType tail_call_type = iter.tail_call_type();
+      
+      
+      // Get the new entry point.
+      address new_entry_point = NULL;
+      switch(tail_call_type) {
+      case relocInfo::not_tail_call:
+        new_entry_point = entry_point; break;
+        
+      case relocInfo::sibling_tail_call_type : 
+        new_entry_point =  tail_call_entry_point; break;
+        
+      case relocInfo::not_sibling_tail_call_type : 
+        new_entry_point = not_sibling_tail_call_entry_point; break;
+        
+      default: guarantee(0, "should not get here");break;
+      }
+      
       address destination = call->destination();
-      if (destination != entry_point) {
+      if (destination != new_entry_point) {
         CodeBlob* callee = CodeCache::find_blob(destination);
         // callee == cb seems weird. It means calling interpreter thru stub.
         if (callee == cb || callee->is_adapter_blob()) {
@@ -1409,14 +1689,16 @@ IRT_LEAF(void, SharedRuntime::fixup_call
           if (TraceCallFixup) {
             tty->print("fixup callsite at " INTPTR_FORMAT " to compiled code for", caller_pc);
             moop->print_short_name(tty);
-            tty->print_cr(" to " INTPTR_FORMAT, entry_point);
+            tty->print_cr(" to " INTPTR_FORMAT, new_entry_point);
+            tty->print_cr("tail call type %d (0=no,1=sibling,2=general)", tail_call_type);
           }
-          call->set_destination_mt_safe(entry_point);
+          call->set_destination_mt_safe(new_entry_point);
         } else {
           if (TraceCallFixup) {
             tty->print("failed to fixup callsite at " INTPTR_FORMAT " to compiled code for", caller_pc);
             moop->print_short_name(tty);
-            tty->print_cr(" to " INTPTR_FORMAT, entry_point);
+            tty->print_cr(" to " INTPTR_FORMAT, new_entry_point);
+            tty->print_cr("tail call type %d (0=no,1=sibling,2=general)", tail_call_type);
           }
           // assert is too strong could also be resolve destinations.
           // assert(InlineCacheBuffer::contains(destination) || VtableStubs::contains(destination), "must be");
@@ -1425,7 +1707,7 @@ IRT_LEAF(void, SharedRuntime::fixup_call
           if (TraceCallFixup) {
             tty->print("already patched  callsite at " INTPTR_FORMAT " to compiled code for", caller_pc);
             moop->print_short_name(tty);
-            tty->print_cr(" to " INTPTR_FORMAT, entry_point);
+            tty->print_cr(" to " INTPTR_FORMAT, new_entry_point);
           }
       }
     }
@@ -1801,7 +2083,7 @@ void AdapterHandlerLibrary::initialize()
   _fingerprints->append(0/*the never-allowed 0 fingerprint*/);
   assert(_handlers->length() == AbstractMethodHandler, "in wrong slot");
   _handlers->append(new AdapterHandlerEntry(StubRoutines::throw_AbstractMethodError_entry(),
-                                            wrong_method, wrong_method));
+                                            wrong_method, wrong_method, wrong_method, wrong_method, wrong_method, wrong_method, wrong_method));
 }
 
 int AdapterHandlerLibrary::get_create_adapter_index(methodHandle method) {
@@ -1963,6 +2245,11 @@ void AdapterHandlerEntry::relocate(addre
     _i2c_entry += delta;
     _c2i_entry += delta;
     _c2i_unverified_entry += delta;
+    _c2i_verified_tail_call_entry += delta;
+    _c2i_unverified_tail_call_entry += delta;
+    _c2i_entry_skip_fixup += delta;
+    _c2i_verified_not_sibling_tail_call_entry += delta;
+    _c2i_unverified_not_sibling_tail_call_entry += delta;
 }
 
 // Create a native wrapper for this native method.  The wrapper converts the
@@ -2135,7 +2422,7 @@ VMReg SharedRuntime::name_for_receiver()
   return regs.first();
 }
 
-VMRegPair *SharedRuntime::find_callee_arguments(symbolOop sig, bool is_static, int* arg_size) {
+VMRegPair *SharedRuntime::find_callee_arguments(symbolOop sig, bool is_static, int* arg_size, BasicType *sig_bt) {
   // This method is returning a data structure allocating as a
   // ResourceObject, so do not put any ResourceMarks in here.
   char *s = sig->as_C_string();
@@ -2144,7 +2431,9 @@ VMRegPair *SharedRuntime::find_callee_ar
   char *t = s+len;
   while( *(--t) != ')' ) ;      // Find close paren
 
-  BasicType *sig_bt = NEW_RESOURCE_ARRAY( BasicType, 256 );
+  //BasicType *sig_bt = NEW_RESOURCE_ARRAY( BasicType, 256 );
+  if (sig_bt==NULL)
+    sig_bt = NEW_RESOURCE_ARRAY( BasicType, 256);
   VMRegPair *regs = NEW_RESOURCE_ARRAY( VMRegPair, 256 );
   int cnt = 0;
   if (!is_static) {
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/runtime/sharedRuntime.hpp
--- a/src/share/vm/runtime/sharedRuntime.hpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/runtime/sharedRuntime.hpp	Wed Jun 03 16:27:17 2009 +0200
@@ -33,9 +33,16 @@ class vframeStream;
 
 class SharedRuntime: AllStatic {
  private:
+  static bool protection_domains_match(JavaThread * thread,
+                                       CallInfo& call_info,
+                                       TRAPS);
+
   static methodHandle resolve_sub_helper(JavaThread *thread,
                                      bool is_virtual,
-                                     bool is_optimized, TRAPS);
+                                     bool is_optimized,
+                                     bool is_tail_call,
+                                     bool is_sibling_call,
+                                     TRAPS);
 
   // Shared stub locations
 
@@ -44,7 +51,12 @@ class SharedRuntime: AllStatic {
   static RuntimeStub* _resolve_opt_virtual_call_blob;
   static RuntimeStub* _resolve_virtual_call_blob;
   static RuntimeStub* _resolve_static_call_blob;
-
+  static RuntimeStub* _resolve_static_tail_call_blob;
+  static RuntimeStub* _resolve_not_sibling_static_tail_call_blob;
+  static RuntimeStub* _resolve_virtual_tail_call_blob;
+  static RuntimeStub* _resolve_not_sibling_virtual_tail_call_blob;
+  static RuntimeStub* _resolve_opt_virtual_tail_call_blob;
+  static RuntimeStub* _resolve_opt_not_sibling_virtual_tail_call_blob;
   static SafepointBlob* _polling_page_safepoint_handler_blob;
   static SafepointBlob* _polling_page_return_handler_blob;
 #ifdef COMPILER2
@@ -62,7 +74,8 @@ class SharedRuntime: AllStatic {
 
   // max bytes for each dtrace string parameter
   enum { max_dtrace_string_size = 256 };
-
+  // Tail call support: Slots used on top of stack for protection domain token.
+  static int tail_call_protection_domain_slots();
   // The following arithmetic routines are used on platforms that do
   // not have machine instructions to implement their functionality.
   // Do not remove these.
@@ -119,6 +132,11 @@ class SharedRuntime: AllStatic {
   static void    throw_NullPointerException(JavaThread* thread);
   static void    throw_NullPointerException_at_call(JavaThread* thread);
   static void    throw_StackOverflowError(JavaThread* thread);
+  static void    throw_TailCallException(JavaThread* thread);
+  // Lazy tail call optimization. Compress the stack or throw a
+  // StackOverflowException.
+  static void tail_call_handle_stack_overflow(JavaThread * thread);
+
   static address continuation_for_implicit_exception(JavaThread* thread,
                                                      address faulting_pc,
                                                      ImplicitExceptionKind exception_kind);
@@ -154,6 +172,30 @@ class SharedRuntime: AllStatic {
     return _resolve_static_call_blob->instructions_begin();
   }
 
+  static address get_resolve_static_tail_call_stub() {
+    assert(_resolve_static_tail_call_blob != NULL, "oops");
+    return _resolve_static_tail_call_blob->instructions_begin();
+  }
+  static address get_resolve_not_sibling_static_tail_call_stub() {
+    assert(_resolve_not_sibling_static_tail_call_blob != NULL, "oops");
+    return _resolve_not_sibling_static_tail_call_blob->instructions_begin();
+  }
+  static address get_resolve_virtual_tail_call_stub() {
+    assert(_resolve_virtual_tail_call_blob != NULL, "oops");
+    return _resolve_virtual_tail_call_blob->instructions_begin();
+  }
+  static address get_resolve_not_sibling_virtual_tail_call_stub() {
+    assert(_resolve_not_sibling_virtual_tail_call_blob != NULL, "oops");
+    return _resolve_not_sibling_virtual_tail_call_blob->instructions_begin();
+  }
+  static address get_resolve_opt_virtual_tail_call_stub() {
+    assert(_resolve_opt_virtual_tail_call_blob != NULL, "oops");
+    return _resolve_opt_virtual_tail_call_blob->instructions_begin();
+  }
+  static address get_resolve_opt_not_sibling_virtual_tail_call_stub() {
+    assert(_resolve_opt_not_sibling_virtual_tail_call_blob != NULL, "oops");
+    return _resolve_opt_not_sibling_virtual_tail_call_blob->instructions_begin();
+  }
   static SafepointBlob* polling_page_return_handler_blob()     { return _polling_page_return_handler_blob; }
   static SafepointBlob* polling_page_safepoint_handler_blob()  { return _polling_page_safepoint_handler_blob; }
 
@@ -253,7 +295,10 @@ class SharedRuntime: AllStatic {
   // compiled code.
   static methodHandle resolve_helper(JavaThread *thread,
                                      bool is_virtual,
-                                     bool is_optimized, TRAPS);
+                                     bool is_optimized, 
+                                     bool is_tail_call,
+                                     bool is_sibling_call,
+                                     TRAPS);
 
   static void generate_stubs(void);
 
@@ -357,7 +402,9 @@ class SharedRuntime: AllStatic {
 
   // Convert a sig into a calling convention register layout
   // and find interesting things about it.
-  static VMRegPair* find_callee_arguments(symbolOop sig, bool is_static, int *arg_size);
+  // Pass an BasicType array of size 256 (!) if you are interrested types.
+  // This is currently used by stack compression when tail calling.
+  static VMRegPair* find_callee_arguments(symbolOop sig, bool is_static, int *arg_size, BasicType * sig_bt=NULL);
   static VMReg     name_for_receiver();
 
   // "Top of Stack" slots that may be unused by the calling convention but must
@@ -407,8 +454,14 @@ class SharedRuntime: AllStatic {
 
   // Resolving of calls
   static address resolve_static_call_C     (JavaThread *thread);
+  static address resolve_static_tail_call_C     (JavaThread *thread);
+  static address resolve_not_sibling_static_tail_call_C (JavaThread *thread);
   static address resolve_virtual_call_C    (JavaThread *thread);
+  static address resolve_virtual_tail_call_C(JavaThread *thread);
+  static address resolve_not_sibling_virtual_tail_call_C(JavaThread *thread);
   static address resolve_opt_virtual_call_C(JavaThread *thread);
+  static address resolve_opt_virtual_tail_call_C(JavaThread* thread);
+  static address resolve_opt_not_sibling_virtual_tail_call_C(JavaThread* thread);
 
   // arraycopy, the non-leaf version.  (See StubRoutines for all the leaf calls.)
   static void slow_arraycopy_C(oopDesc* src,  jint src_pos,
@@ -533,21 +586,36 @@ class AdapterHandlerEntry : public CHeap
   address _i2c_entry;
   address _c2i_entry;
   address _c2i_unverified_entry;
+  address _c2i_verified_tail_call_entry;
+  address _c2i_unverified_tail_call_entry;
+  address _c2i_entry_skip_fixup;
+  address _c2i_verified_not_sibling_tail_call_entry;
+  address _c2i_unverified_not_sibling_tail_call_entry;
 
  public:
 
   // The name we give all buffer blobs
   static const char* name;
 
-  AdapterHandlerEntry(address i2c_entry, address c2i_entry, address c2i_unverified_entry):
+  AdapterHandlerEntry(address i2c_entry, address c2i_entry, address c2i_unverified_entry, address c2i_verified_tail_call_entry, address c2i_unverified_tail_call_entry, address c2i_entry_skip_fixup, address c2i_verified_not_sibling_tail_call_entry, address c2i_unverified_not_sibling_tail_call_entry):
     _i2c_entry(i2c_entry),
     _c2i_entry(c2i_entry),
-    _c2i_unverified_entry(c2i_unverified_entry) {
+    _c2i_unverified_entry(c2i_unverified_entry),
+    _c2i_verified_tail_call_entry(c2i_verified_tail_call_entry),
+    _c2i_unverified_tail_call_entry(c2i_unverified_tail_call_entry),
+    _c2i_entry_skip_fixup(c2i_entry_skip_fixup),
+    _c2i_verified_not_sibling_tail_call_entry(c2i_verified_not_sibling_tail_call_entry),
+    _c2i_unverified_not_sibling_tail_call_entry(c2i_unverified_not_sibling_tail_call_entry) {
   }
 
   address get_i2c_entry()            { return _i2c_entry; }
   address get_c2i_entry()            { return _c2i_entry; }
   address get_c2i_unverified_entry() { return _c2i_unverified_entry; }
+  address get_c2i_verified_tail_call_entry() { return _c2i_verified_tail_call_entry; }
+  address get_c2i_unverified_tail_call_entry() { return _c2i_unverified_tail_call_entry; }
+  address get_c2i_entry_skip_fixup() { return _c2i_entry_skip_fixup; }
+  address get_c2i_verified_not_sibling_tail_call_entry() { return _c2i_verified_not_sibling_tail_call_entry; }
+  address get_c2i_unverified_not_sibling_tail_call_entry() { return _c2i_unverified_not_sibling_tail_call_entry; }
 
   void relocate(address new_base);
 #ifndef PRODUCT
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/runtime/stubRoutines.cpp
--- a/src/share/vm/runtime/stubRoutines.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/runtime/stubRoutines.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -45,7 +45,9 @@ address StubRoutines::_throw_NullPointer
 address StubRoutines::_throw_NullPointerException_entry         = NULL;
 address StubRoutines::_throw_NullPointerException_at_call_entry = NULL;
 address StubRoutines::_throw_StackOverflowError_entry           = NULL;
+address StubRoutines::_throw_TailCallException_entry            = NULL;
 address StubRoutines::_handler_for_unsafe_access_entry          = NULL;
+address StubRoutines::_tail_call_handle_stack_overflow_entry    = NULL;
 jint    StubRoutines::_verify_oop_count                         = 0;
 address StubRoutines::_verify_oop_subroutine_entry              = NULL;
 address StubRoutines::_atomic_xchg_entry                        = NULL;
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/runtime/stubRoutines.hpp
--- a/src/share/vm/runtime/stubRoutines.hpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/runtime/stubRoutines.hpp	Wed Jun 03 16:27:17 2009 +0200
@@ -89,7 +89,9 @@ class StubRoutines: AllStatic {
   static address _throw_NullPointerException_entry;
   static address _throw_NullPointerException_at_call_entry;
   static address _throw_StackOverflowError_entry;
+  static address _throw_TailCallException_entry;
   static address _handler_for_unsafe_access_entry;
+  static address _tail_call_handle_stack_overflow_entry;
 
   static address _atomic_xchg_entry;
   static address _atomic_xchg_ptr_entry;
@@ -190,7 +192,8 @@ class StubRoutines: AllStatic {
   static address throw_NullPointerException_entry()        { return _throw_NullPointerException_entry; }
   static address throw_NullPointerException_at_call_entry(){ return _throw_NullPointerException_at_call_entry; }
   static address throw_StackOverflowError_entry()          { return _throw_StackOverflowError_entry; }
-
+  static address throw_TailCallException_entry()           { return _throw_TailCallException_entry; }
+  static address tail_call_handle_stack_overflow_entry()   { return _tail_call_handle_stack_overflow_entry; }
   // Exceptions during unsafe access - should throw Java exception rather
   // than crash.
   static address handler_for_unsafe_access()               { return _handler_for_unsafe_access_entry; }
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/runtime/thread.cpp
--- a/src/share/vm/runtime/thread.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/runtime/thread.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -1165,6 +1165,7 @@ void JavaThread::initialize() {
   _blocked_on_compilation = false;
   _jni_active_critical = 0;
   _do_not_unlock_if_synchronized = false;
+  _disable_osr_for_frame = false;
   _cached_monitor_info = NULL;
   _parker = Parker::Allocate(this) ;
 
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/runtime/thread.hpp
--- a/src/share/vm/runtime/thread.hpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/runtime/thread.hpp	Wed Jun 03 16:27:17 2009 +0200
@@ -727,6 +727,9 @@ class JavaThread: public Thread {
   //  is shortly thereafter set false
   volatile bool _is_attaching;
 
+  volatile bool _disable_osr_for_frame; // Tail calls sometimes want to disable
+                                        // OSR for one interpreter function
+                                        // (frame).
  public:
   // State of the stack guard pages for this thread.
   enum StackGuardState {
@@ -746,6 +749,9 @@ class JavaThread: public Thread {
   volatile address _exception_pc;                // PC where exception happened
   volatile address _exception_handler_pc;        // PC for handler of exception
   volatile int     _exception_stack_size;        // Size of frame where exception happened
+
+  // Tail call stack compression support.
+  volatile address _tail_call_do_stack_compression;
 
   // support for compilation
   bool    _is_compiling;                         // is true if a compilation is active inthis thread (one compilation per thread possible)
@@ -1093,6 +1099,10 @@ class JavaThread: public Thread {
   void set_exception_handler_pc(address a)       { _exception_handler_pc = a; }
   void set_exception_stack_size(int size)        { _exception_stack_size = size; }
 
+  // Tail call stack compression support
+  void set_tail_call_do_stack_compression(address a) { _tail_call_do_stack_compression = a; }
+  address tail_call_do_stack_compression() const { return _tail_call_do_stack_compression; }
+
   // Stack overflow support
   inline size_t stack_available(address cur_sp);
   address stack_yellow_zone_base()
@@ -1167,8 +1177,11 @@ class JavaThread: public Thread {
   static ByteSize exception_stack_size_offset()  { return byte_offset_of(JavaThread, _exception_stack_size); }
   static ByteSize stack_guard_state_offset()     { return byte_offset_of(JavaThread, _stack_guard_state   ); }
   static ByteSize suspend_flags_offset()         { return byte_offset_of(JavaThread, _suspend_flags       ); }
+  static ByteSize tail_call_do_stack_compression_offset() { return byte_offset_of(JavaThread, _tail_call_do_stack_compression); }
 
   static ByteSize do_not_unlock_if_synchronized_offset() { return byte_offset_of(JavaThread, _do_not_unlock_if_synchronized); }
+  
+  static ByteSize disable_osr_for_frame_offset() { return byte_offset_of(JavaThread, _disable_osr_for_frame); }
 
 #ifndef SERIALGC
   static ByteSize satb_mark_queue_offset()       { return byte_offset_of(JavaThread, _satb_mark_queue); }
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/runtime/vframeArray.cpp
--- a/src/share/vm/runtime/vframeArray.cpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/runtime/vframeArray.cpp	Wed Jun 03 16:27:17 2009 +0200
@@ -181,6 +181,10 @@ void vframeArrayElement::unpack_on_stack
   JvmtiThreadState *state = thread->jvmti_thread_state();
     if (JvmtiExport::can_pop_frame() &&
         (thread->has_pending_popframe() || thread->popframe_forcing_deopt_reexecution())) {
+      if (exec_mode == Deoptimization::Unpack_stack_compression) {
+        // TBD: ???
+        ShouldNotReachHere();
+      }
       if (thread->has_pending_popframe()) {
         // Pop top frame after deoptimization
 #ifndef CC_INTERP
@@ -222,6 +226,7 @@ void vframeArrayElement::unpack_on_stack
         break;
       case Deoptimization::Unpack_uncommon_trap:
       case Deoptimization::Unpack_reexecute:
+      case Deoptimization::Unpack_stack_compression:
         // redo last byte code
         pc  = Interpreter::deopt_entry(vtos, 0);
         use_next_mdp = false;
@@ -263,10 +268,25 @@ void vframeArrayElement::unpack_on_stack
     BasicObjectLock* src = _monitors->at(index);
     top->set_obj(src->obj());
     src->lock()->move_to(src->obj(), top->lock());
+    // Frame's maximum sp moved down the stack. Adjust the owner to point at the
+    // current frame's lightweight lock stack pointer.
+    if (exec_mode == Deoptimization::Unpack_stack_compression) {
+      if (src->lock()->displaced_header() != NULL) {
+        if (src->obj()->mark()->has_monitor()) {
+          ObjectMonitor * mon = src->obj()->mark()->monitor();
+          assert(mon!=NULL, "oops");
+          if (src->lock()->displaced_header()!=markOopDesc::unused_mark()) {
+            mon->set_owner(top->lock());
+          }
+        }
+      }
+    }
   }
   if (ProfileInterpreter) {
     iframe()->interpreter_frame_set_mdx(0); // clear out the mdp.
   }
+  iframe()->interpreter_frame_set_osr(0); // 0=default OSR state: do OSR if
+                                          // DoOnStackReplacement is on.
   iframe()->interpreter_frame_set_bcx((intptr_t)bcp); // cannot use bcp because frame is not initialized yet
   if (ProfileInterpreter) {
     methodDataOop mdo = method()->method_data();
@@ -416,6 +436,181 @@ int vframeArrayElement::on_stack_size(in
                                       is_top_frame);
 }
 
+// Tail call stack compression support
+
+#ifndef PRODUCT
+void FrameInfo::print(outputStream* st) {
+  st->print("FrameInfo method: ");
+  _method->name()->print_symbol_on(st);
+  st->print(" (bci: %i, ", _bci);
+  st->print("locals: %i, expressions: %i, monitors: %i\n", _locals->size(), _expressions->size(), _monitors->length());
+
+  st->print("  locals: ");
+  for (int i=0; i<_locals->size(); i++) {
+    StackValue* v = _locals->at(i);
+    switch (v->type()) {
+    case T_INT:
+      st->print("%08x ", v->get_int());
+      break;
+    case T_OBJECT:
+      //st->print("OBJ(%08x) ", v->get_int(T_OBJECT));
+      st->print("OBJ(%08x) ", v->get_obj()());
+      break;
+    case T_CONFLICT:
+      st->print("CONFLICT(0)");
+      break;
+    default:
+      tty->print_cr("type : %d", v->type());
+      ShouldNotReachHere();
+    }
+  }
+
+  st->print("\n  expressions: ");
+  for (int i=0; i<_expressions->size(); i++) {
+    StackValue* v = _expressions->at(i);
+    switch (v->type()) {
+    case T_INT:
+      st->print("%08x ", v->get_int());
+      break;
+    case T_OBJECT:
+      //st->print("OBJ(%08x) ", v->get_int(T_OBJECT));
+      st->print("OBJ(%08x) ", v->get_obj()());
+      break;
+    case T_CONFLICT:
+      st->print("CONFLICT(0)");
+      break;
+    default:
+      ShouldNotReachHere();
+    }
+  }
+  st->print("\n  monitors: ");
+  for (int i=0; i<_monitors->length(); i++) {
+    MonitorInfo* v = _monitors->at(i);
+    st->print("%08x ", v->owner());
+  }
+  st->print("\n");
+}
+#endif /* PRODUCT */
+
+#ifdef ASSERT
+void FrameInfo::verify() {
+  // TODO add useful assertions
+}
+#endif /* ASSERT */
+
+// Copy a StackValueCollection replacing handles by their oop pointers.
+static StackValueCollection * copy_stack_value_collection(StackValueCollection * svc) {
+  StackValueCollection * result = new StackValueCollection(svc->size());
+  for(int index = 0; index < svc->size(); index++) {
+    StackValue* value = svc->at(index);
+    switch(value->type()) {
+      case T_OBJECT:
+        // preserve object type
+        result->add( new StackValue((intptr_t) (value->get_obj()()), T_OBJECT ));
+        break;
+      case T_CONFLICT:
+        // A dead stack element.  Will be initialized to null/zero.
+        // This can occur when the compiler emits a state in which stack
+        // elements are known to be dead (because of an imminent exception).
+        result->add( new StackValue());
+        break;
+      case T_INT:
+        result->add( new StackValue(value->get_int()));
+        break;
+      default:
+        ShouldNotReachHere();
+    }
+  }
+  return result;
+}
+
+void vframeArrayElement::fill_in(FrameInfo* frame_info) {
+
+  // copy the info from the FrameInfo object - it has to be in the correct format (object locals and expressions as ints)
+  _method = frame_info->method()();
+  _bci    = frame_info->bci();
+  _locals = copy_stack_value_collection(frame_info->locals());
+  _expressions = copy_stack_value_collection(frame_info->expressions());
+
+  // reacquire the monitors of the stack frame
+  GrowableArray<MonitorInfo*>* list = frame_info->monitors();
+  if (list->is_empty()) {
+    _monitors = NULL;
+  } else {
+    int index;
+    JavaThread* thread = JavaThread::current();
+    assert(frame_info->java_frame()->thread() == thread, "WHOOPS");
+    // Allocate monitor chunk
+    _monitors = new MonitorChunk(list->length());
+    thread->add_monitor_chunk(_monitors);
+
+    // Migrate the BasicLocks from the stack to the monitor chunk
+    for (index = 0; index < list->length(); index++) {
+      MonitorInfo* monitor = list->at(index);
+      assert(monitor->owner()->is_oop(), "must be a real oop");
+      assert(monitor->owner() == NULL || (!monitor->owner()->is_unlocked() && !monitor->owner()->has_bias_pattern()), "object must be null or locked, and unbiased");
+      BasicObjectLock* dest = _monitors->at(index);
+      dest->set_obj(monitor->owner());
+      monitor->lock()->move_to(monitor->owner(), dest->lock());
+    }
+  }
+}
+
+vframeArray* vframeArray::allocate(JavaThread* thread, int frame_size, GrowableArray<FrameInfo*>* frame_infos,
+                                   const RegisterMap * reg_map, frame sender, frame self) {
+
+  // Allocate the vframeArray
+  vframeArray * result = (vframeArray*) AllocateHeap(sizeof(vframeArray) + // fixed part
+                                                     sizeof(vframeArrayElement) * (frame_infos->length() - 1), // variable part
+                                                     "vframeArray::allocate");
+  result->_frames = frame_infos->length();
+  result->_owner_thread = thread;
+  result->_sender = sender;
+  result->_caller = self;
+  result->_original = self;
+  result->set_unroll_block(NULL); // initialize it
+
+  result->_frame_size = frame_size;
+  
+  for(int i = 0; i < frame_infos->length(); i++) {
+    result->element(i)->fill_in(frame_infos->at(i));
+  }
+
+  // Copy registers for callee-saved registers
+  if (reg_map != NULL) {
+    for(int i = 0; i < RegisterMap::reg_count; i++) {
+#ifdef AMD64
+      // The register map has one entry for every int (32-bit value), so
+      // 64-bit physical registers have two entries in the map, one for
+      // each half.  Ignore the high halves of 64-bit registers, just like
+      // frame::oopmapreg_to_location does.
+      //
+      // [phh] FIXME: this is a temporary hack!  This code *should* work
+      // correctly w/o this hack, possibly by changing RegisterMap::pd_location
+      // in frame_amd64.cpp and the values of the phantom high half registers
+      // in amd64.ad.
+      //      if (VMReg::Name(i) < SharedInfo::stack0 && is_even(i)) {
+        intptr_t* src = (intptr_t*) reg_map->location(VMRegImpl::as_VMReg(i));
+        result->_callee_registers[i] = src != NULL ? *src : NULL_WORD;
+        //      } else {
+        //      jint* src = (jint*) reg_map->location(VMReg::Name(i));
+        //      _callee_registers[i] = src != NULL ? *src : NULL_WORD;
+        //      }
+#else
+      jint* src = (jint*) reg_map->location(VMRegImpl::as_VMReg(i));
+      result->_callee_registers[i] = src != NULL ? *src : NULL_WORD;
+#endif
+      if (src == NULL) {
+        result->set_location_valid(i, false);
+      } else {
+        result->set_location_valid(i, true);
+        jint* dst = (jint*) result->register_location(i);
+        *dst = *src;
+      }
+    }
+  }
+  return result;
+}
 
 
 vframeArray* vframeArray::allocate(JavaThread* thread, int frame_size, GrowableArray<compiledVFrame*>* chunk,
@@ -502,7 +697,6 @@ void vframeArray::unpack_to_stack(frame 
   }
 
   frame caller_frame = me;
-
   // Do the unpacking of interpreter frames; the frame at index 0 represents the top activation, so it has no callee
 
   // Unpack the frames from the oldest (frames() -1) to the youngest (0)
@@ -574,7 +768,7 @@ void vframeArray::print_on_2(outputStrea
 }
 
 void vframeArrayElement::print(outputStream* st) {
-  st->print_cr(" - interpreter_frame -> sp: ", INTPTR_FORMAT, iframe()->sp());
+  st->print_cr(" - interpreter_frame -> sp: " INTPTR_FORMAT, iframe()->sp());
 }
 
 void vframeArray::print_value_on(outputStream* st) const {
diff -r aa0c48844632 -r a7d54b98ca4a src/share/vm/runtime/vframeArray.hpp
--- a/src/share/vm/runtime/vframeArray.hpp	Thu May 14 10:57:58 2009 -0700
+++ b/src/share/vm/runtime/vframeArray.hpp	Wed Jun 03 16:27:17 2009 +0200
@@ -33,6 +33,47 @@ class MonitorStackClosure;
 class MonitorStackClosure;
 class MonitorArrayElement;
 class StackValueCollection;
+
+// Represents the current state of one stack frame. Used for stack compression.
+class FrameInfo : public ResourceObj {
+private:
+  methodHandle                 _method;
+  int                          _bci;
+  StackValueCollection*        _locals;
+  StackValueCollection*        _expressions;
+  GrowableArray<MonitorInfo*>* _monitors;
+  javaVFrame*                  _jframe;
+public:
+
+  FrameInfo(methodHandle method, javaVFrame * jframe, int bci, StackValueCollection* locals, StackValueCollection* expressions, GrowableArray<MonitorInfo*>* monitors) {
+    _method = method;
+    _bci = bci;
+    _locals = locals;
+    _expressions = expressions;
+    _monitors = monitors;
+    _jframe = jframe;
+  }
+
+  methodHandle method() const                   { return _method; }
+
+  javaVFrame * java_frame() const               { return _jframe; }
+  
+  int bci() const                               { return _bci; }
+
+  StackValueCollection* locals() const          { return _locals; }
+
+  StackValueCollection* expressions() const     { return _expressions; }
+
+  GrowableArray<MonitorInfo*>* monitors() const { return _monitors; }
+
+#ifndef PRODUCT
+  void print(outputStream* st);
+#endif /* PRODUCT */
+
+#ifdef ASSERT
+  void verify();
+#endif /* ASSERT */
+};
 
 // A vframeArrayElement is an element of a vframeArray. Each element
 // represent an interpreter frame which will eventually be created.
@@ -66,6 +107,7 @@ class vframeArrayElement : public _Value
   StackValueCollection* expressions(void) const        { return _expressions; }
 
   void fill_in(compiledVFrame* vf);
+  void fill_in(FrameInfo* frame_info);
 
   // Formerly part of deoptimizedVFrame
 
@@ -145,6 +187,10 @@ class vframeArray: public CHeapObj {
   static vframeArray* allocate(JavaThread* thread, int frame_size, GrowableArray<compiledVFrame*>* chunk,
                                RegisterMap* reg_map, frame sender, frame caller, frame self);
 
+  // Tail call stack compression.
+  static vframeArray* allocate(JavaThread* thread, int frame_size, GrowableArray<FrameInfo*>* chunk,
+                               const RegisterMap* reg_map, frame caller, frame self); 
+  //void fill_in_compression(JavaThread* thread, int frame_size, GrowableArray<javaVFrame*>* chunk, const RegisterMap *reg_map);
 
   vframeArrayElement* element(int index)        { assert(is_within_bounds(index), "Bad index"); return &_elements[index]; }
 
diff -r aa0c48844632 -r a7d54b98ca4a test/tailcalltests/CompilerTestDouble.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/tailcalltests/CompilerTestDouble.java	Wed Jun 03 16:27:17 2009 +0200
@@ -0,0 +1,56 @@
+ /* @test
+ * @run main/othervm -XX:+TailCalls -Xcomp  CompilerTestDouble
+ * @summary Tests tail call involving doubles in compiler.
+ */
+public class CompilerTestDouble {
+
+  private String foo;
+  private int i;
+
+  public CompilerTestDouble() {
+    foo = "Hello World";
+    i = 50;
+  }
+
+  public double test2tailcall(double d, int a, int c) {
+    if (c<=0) return d;
+    return testtailcall(a+c, c*2, c-1, d);
+  }
+
+
+  public double testtailcall(int a, int b, int c, double d) {
+    return test2tailcall(d+1, a+b, c-1);
+  }
+  
+  public double test2(double d, int a, int c) {
+    if (c<=0) return d;
+    return test(a+c, c*2, c-1, d);
+  }
+
+
+  public double test(int a, int b, int c, double d) {
+    return test2(d+1, a+b, c-1);
+  }
+
+
+  public double printFoobartailcall() {
+    synchronized(this) {
+      System.out.println(foo);
+    }
+    return testtailcall(1,2,5000,(double)4.0);
+  }
+
+  public double printFoobar() {
+    synchronized(this) {
+      System.out.println(foo);
+    }
+    return test(1,2,5000,(double)4.0);
+  }
+
+  public static void main(String args[]) {
+    CompilerTestDouble a = new CompilerTestDouble();
+    double v = a.printFoobar();
+    double vtailcall = a.printFoobartailcall();
+    if (v!=vtailcall) System.exit(1);
+  }
+}
diff -r aa0c48844632 -r a7d54b98ca4a test/tailcalltests/CompilerTestMegamorphic.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/tailcalltests/CompilerTestMegamorphic.java	Wed Jun 03 16:27:17 2009 +0200
@@ -0,0 +1,90 @@
+/* @test
+ * @run main/othervm -XX:+TailCalls -Xcomp  CompilerTestMegamorphic
+ * @summary Tests megamorphic tail call in compiler.
+ */
+public class CompilerTestMegamorphic {
+  
+  static void assertEqual(int a, int b) {
+    if (a!=b) {
+      System.out.println("Error " +a + " != " + b);
+      System.exit(1);
+    }
+  }
+
+  static class TestMegaSub extends TestMega {
+    int field;
+
+    public TestMegaSub() {
+      field = 1;
+    }
+
+    public int tailCallee(int cnt, int val, TestMega target, TestMega base, TestMega sub, Integer a) {
+      TestMega t = null;
+      if (cnt==0)
+        return val;
+
+      if (cnt%3==0) t = base;
+      else t = sub;
+      assertEqual(val, a);
+      return goto target.tailCaller(cnt-1, val+1, t, base, sub, a+1);
+    }
+
+    public int tailCaller(int cnt, int val, TestMega target, TestMega base, TestMega sub, Integer a) {
+      TestMega t = null;
+      if (cnt==0)
+        return val;
+
+      if (cnt%3==0) t = base;
+      else t = sub;
+      assertEqual(val, a);
+      return goto target.tailCallee(cnt-1, val+1, t, base, sub, a+1);
+    }
+
+  }
+  static class TestMega {
+    int field;
+
+    public TestMega() {
+      field = 1;
+    }
+
+    public int tailCallee(int cnt, int val, TestMega target, TestMega base, TestMega sub, Integer a) {
+      TestMega t = null;
+      if (cnt==0)
+        return val;
+
+      if (cnt%5==0) t = base;
+      else t = sub;
+      
+      assertEqual(val, a);
+      return goto target.tailCaller(cnt-1, val+1, t, base, sub, a+1);
+    }
+
+    public int tailCaller(int cnt, int val, TestMega target, TestMega base, TestMega sub, Integer a) {
+      TestMega t = null;
+      if (cnt==0)
+        return val;
+
+      if (cnt%5==0) t = base;
+      else t = sub;
+
+      assertEqual(val, a);
+      return goto target.tailCallee(cnt-1, val+1, t, base, sub, a+1);
+    }
+
+  }  
+  public static void main(String args[]) {
+    int down = 5000;
+    TestMega a = new TestMega();
+    TestMegaSub b = new TestMegaSub();
+
+    int result = a.tailCaller(down, 0, b, a, b, 0);
+
+    if (result==down) {
+      System.out.println("Success");
+    }else {
+      System.out.println("Failure");
+    }
+  }
+
+}
diff -r aa0c48844632 -r a7d54b98ca4a test/tailcalltests/CompilerTestMegamorphicC2I.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/tailcalltests/CompilerTestMegamorphicC2I.java	Wed Jun 03 16:27:17 2009 +0200
@@ -0,0 +1,91 @@
+/* @test
+ * @run main/othervm -XX:CompileOnly=.tailCallee -XX:+TailCalls -Xcomp  CompilerTestMegamorphicC2I
+ * @summary Tests compiled-to-interpreted megamorphic tail call in compiler.
+ */
+
+public class CompilerTestMegamorphicC2I {
+  
+  static void assertEqual(int a, int b) {
+    if (a!=b) {
+      System.out.println("Error " +a + " != " + b);
+      System.exit(1);
+    }
+  }
+
+  static class TestMegaSub extends TestMega {
+    int field;
+
+    public TestMegaSub() {
+      field = 1;
+    }
+
+    public int tailCallee(int cnt, int val, TestMega target, TestMega base, TestMega sub, Integer a) {
+      TestMega t = null;
+      if (cnt==0)
+        return val;
+
+      if (cnt%3==0) t = base;
+      else t = sub;
+      assertEqual(val, a);
+      return goto target.tailCaller(cnt-1, val+1, t, base, sub, a+1);
+    }
+
+    public int tailCaller(int cnt, int val, TestMega target, TestMega base, TestMega sub, Integer a) {
+      TestMega t = null;
+      if (cnt==0)
+        return val;
+
+      if (cnt%3==0) t = base;
+      else t = sub;
+      assertEqual(val, a);
+      return goto target.tailCallee(cnt-1, val+1, t, base, sub, a+1);
+    }
+
+  }
+  static class TestMega {
+    int field;
+
+    public TestMega() {
+      field = 1;
+    }
+
+    public int tailCallee(int cnt, int val, TestMega target, TestMega base, TestMega sub, Integer a) {
+      TestMega t = null;
+      if (cnt==0)
+        return val;
+
+      if (cnt%5==0) t = base;
+      else t = sub;
+      
+      assertEqual(val, a);
+      return goto target.tailCaller(cnt-1, val+1, t, base, sub, a+1);
+    }
+
+    public int tailCaller(int cnt, int val, TestMega target, TestMega base, TestMega sub, Integer a) {
+      TestMega t = null;
+      if (cnt==0)
+        return val;
+
+      if (cnt%5==0) t = base;
+      else t = sub;
+
+      assertEqual(val, a);
+      return goto target.tailCallee(cnt-1, val+1, t, base, sub, a+1);
+    }
+
+  }  
+  public static void main(String args[]) {
+    int down = 5000;
+    TestMega a = new TestMega();
+    TestMegaSub b = new TestMegaSub();
+
+    int result = a.tailCaller(down, 0, b, a, b, 0);
+
+    if (result==down) {
+      System.out.println("Success");
+    }else {
+      System.out.println("Failure");
+    }
+  }
+
+}
diff -r aa0c48844632 -r a7d54b98ca4a test/tailcalltests/CompilerTestMegamorphicInterface.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/tailcalltests/CompilerTestMegamorphicInterface.java	Wed Jun 03 16:27:17 2009 +0200
@@ -0,0 +1,95 @@
+/* @test
+ * @run main/othervm -XX:+TailCalls -Xcomp  CompilerTestMegamorphicInterface
+ * @summary Tests megamorphic interface tail call in compiler.
+ */
+
+public class CompilerTestMegamorphicInterface {
+  static void assertEqual(int a, int b) {
+    if (a!=b) {
+      System.out.println("Error " +a + " != " + b);
+      System.exit(1);
+    }
+  }
+
+  static interface IFace {
+    public int tailCallee(int cnt, int val, IFace target, IFace base, IFace sub, Integer a);
+    public int tailCaller(int cnt, int val, IFace target, IFace base, IFace sub, Integer a);
+  }
+
+  static class TestMegaSub implements IFace {
+    int field;
+
+    public TestMegaSub() {
+      field = 1;
+    }
+
+    public int tailCallee(int cnt, int val, IFace target, IFace base, IFace sub, Integer a) {
+      IFace t = null;
+      if (cnt==0)
+        return val;
+
+      if (cnt%3==0) t = base;
+      else t = sub;
+      assertEqual(val, a);
+      return goto target.tailCaller(cnt-1, val+1, t, base, sub, a+1);
+    }
+
+    public int tailCaller(int cnt, int val, IFace target, IFace base, IFace sub, Integer a) {
+      IFace t = null;
+      if (cnt==0)
+        return val;
+
+      if (cnt%3==0) t = base;
+      else t = sub;
+      assertEqual(val, a);
+      return goto target.tailCallee(cnt-1, val+1, t, base, sub, a+1);
+    }
+
+  }
+  static class TestMega implements IFace {
+    int field;
+
+    public TestMega() {
+      field = 1;
+    }
+
+    public int tailCallee(int cnt, int val, IFace target, IFace base, IFace sub, Integer a) {
+      IFace t = null;
+      if (cnt==0)
+        return val;
+
+      if (cnt%5==0) t = base;
+      else t = sub;
+      
+      assertEqual(val, a);
+      return goto target.tailCaller(cnt-1, val+1, t, base, sub, a+1);
+    }
+
+    public int tailCaller(int cnt, int val, IFace target, IFace base, IFace sub, Integer a) {
+      IFace t = null;
+      if (cnt==0)
+        return val;
+
+      if (cnt%5==0) t = base;
+      else t = sub;
+
+      assertEqual(val, a);
+      return goto target.tailCallee(cnt-1, val+1, t, base, sub, a+1);
+    }
+
+  }  
+  public static void main(String args[]) {
+    int down = 5000;
+    IFace a = new TestMega();
+    IFace b = new TestMegaSub();
+
+    int result = a.tailCaller(down, 0, b, a, b, 0);
+
+    if (result==down) {
+      System.out.println("Success");
+    }else {
+      System.out.println("Failure");
+    }
+  }
+
+}
diff -r aa0c48844632 -r a7d54b98ca4a test/tailcalltests/CompilerTestMegamorphicInterfaceC2I.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/tailcalltests/CompilerTestMegamorphicInterfaceC2I.java	Wed Jun 03 16:27:17 2009 +0200
@@ -0,0 +1,95 @@
+/* @test
+ * @run main/othervm -XX:CompileOnly=.tailCaller -XX:+TailCalls -Xcomp  CompilerTestMegamorphicInterfaceC2I
+ * @summary Tests megamorphic interface tail call in compiler.
+ */
+
+public class CompilerTestMegamorphicInterfaceC2I {
+  static void assertEqual(int a, int b) {
+    if (a!=b) {
+      System.out.println("Error " +a + " != " + b);
+      System.exit(1);
+    }
+  }
+
+  static interface IFace {
+    public int tailCallee(int cnt, int val, IFace target, IFace base, IFace sub, Integer a);
+    public int tailCaller(int cnt, int val, IFace target, IFace base, IFace sub, Integer a);
+  }
+
+  static class TestMegaSub implements IFace {
+    int field;
+
+    public TestMegaSub() {
+      field = 1;
+    }
+
+    public int tailCallee(int cnt, int val, IFace target, IFace base, IFace sub, Integer a) {
+      IFace t = null;
+      if (cnt==0)
+        return val;
+
+      if (cnt%3==0) t = base;
+      else t = sub;
+      assertEqual(val, a);
+      return goto target.tailCaller(cnt-1, val+1, t, base, sub, a+1);
+    }
+
+    public int tailCaller(int cnt, int val, IFace target, IFace base, IFace sub, Integer a) {
+      IFace t = null;
+      if (cnt==0)
+        return val;
+
+      if (cnt%3==0) t = base;
+      else t = sub;
+      assertEqual(val, a);
+      return goto target.tailCallee(cnt-1, val+1, t, base, sub, a+1);
+    }
+
+  }
+  static class TestMega implements IFace {
+    int field;
+
+    public TestMega() {
+      field = 1;
+    }
+
+    public int tailCallee(int cnt, int val, IFace target, IFace base, IFace sub, Integer a) {
+      IFace t = null;
+      if (cnt==0)
+        return val;
+
+      if (cnt%5==0) t = base;
+      else t = sub;
+      
+      assertEqual(val, a);
+      return goto target.tailCaller(cnt-1, val+1, t, base, sub, a+1);
+    }
+
+    public int tailCaller(int cnt, int val, IFace target, IFace base, IFace sub, Integer a) {
+      IFace t = null;
+      if (cnt==0)
+        return val;
+
+      if (cnt%5==0) t = base;
+      else t = sub;
+
+      assertEqual(val, a);
+      return goto target.tailCallee(cnt-1, val+1, t, base, sub, a+1);
+    }
+
+  }  
+  public static void main(String args[]) {
+    int down = 5000;
+    IFace a = new TestMega();
+    IFace b = new TestMegaSub();
+
+    int result = a.tailCaller(down, 0, b, a, b, 0);
+
+    if (result==down) {
+      System.out.println("Success");
+    }else {
+      System.out.println("Failure");
+    }
+  }
+
+}
diff -r aa0c48844632 -r a7d54b98ca4a test/tailcalltests/CompilerTestMegamorphicInterfaceNotSib.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/tailcalltests/CompilerTestMegamorphicInterfaceNotSib.java	Wed Jun 03 16:27:17 2009 +0200
@@ -0,0 +1,95 @@
+/* @test
+ * @run main/othervm -XX:+TailCalls -Xcomp  CompilerTestMegamorphicInterfaceNotSib
+ * @summary Tests megamorphic interface tail call in compiler.
+ */
+
+public class CompilerTestMegamorphicInterfaceNotSib {
+  static void assertEqual(int a, int b) {
+    if (a!=b) {
+      System.out.println("Error " +a + " != " + b);
+      System.exit(1);
+    }
+  }
+
+  static interface IFace {
+    public int tailCallee(int cnt, int val, IFace target, IFace base, IFace sub, Integer a, Integer b, Integer c);
+    public int tailCaller(int cnt, int val, IFace target, IFace base, IFace sub, Integer a);
+  }
+
+  static class TestMegaSub implements IFace {
+    int field;
+
+    public TestMegaSub() {
+      field = 1;
+    }
+
+    public int tailCallee(int cnt, int val, IFace target, IFace base, IFace sub, Integer a, Integer b, Integer c) {
+      IFace t = null;
+      if (cnt==0)
+        return val;
+
+      if (cnt%3==0) t = base;
+      else t = sub;
+      assertEqual(val, a);
+      return goto target.tailCaller(cnt-1, val+1, t, base, sub, a+1);
+    }
+
+    public int tailCaller(int cnt, int val, IFace target, IFace base, IFace sub, Integer a) {
+      IFace t = null;
+      if (cnt==0)
+        return val;
+
+      if (cnt%3==0) t = base;
+      else t = sub;
+      assertEqual(val, a);
+      return goto target.tailCallee(cnt-1, val+1, t, base, sub, a+1, a+1, a+1);
+    }
+
+  }
+  static class TestMega implements IFace {
+    int field;
+
+    public TestMega() {
+      field = 1;
+    }
+
+    public int tailCallee(int cnt, int val, IFace target, IFace base, IFace sub, Integer a, Integer b, Integer c) {
+      IFace t = null;
+      if (cnt==0)
+        return val;
+
+      if (cnt%5==0) t = base;
+      else t = sub;
+      
+      assertEqual(val, a);
+      return goto target.tailCaller(cnt-1, val+1, t, base, sub, b+1);
+    }
+
+    public int tailCaller(int cnt, int val, IFace target, IFace base, IFace sub, Integer a) {
+      IFace t = null;
+      if (cnt==0)
+        return val;
+
+      if (cnt%5==0) t = base;
+      else t = sub;
+
+      assertEqual(val, a);
+      return goto target.tailCallee(cnt-1, val+1, t, base, sub, a+1, a+1, a+1);
+    }
+
+  }  
+  public static void main(String args[]) {
+    int down = 5000;
+    IFace a = new TestMega();
+    IFace b = new TestMegaSub();
+
+    int result = a.tailCaller(down, 0, b, a, b, 0);
+
+    if (result==down) {
+      System.out.println("Success");
+    }else {
+      System.out.println("Failure");
+    }
+  }
+
+}
diff -r aa0c48844632 -r a7d54b98ca4a test/tailcalltests/CompilerTestMegamorphicInterfaceNotSibC2I.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/tailcalltests/CompilerTestMegamorphicInterfaceNotSibC2I.java	Wed Jun 03 16:27:17 2009 +0200
@@ -0,0 +1,95 @@
+/* @test
+ * @run main/othervm -XX:CompileOnly=.tailCaller -XX:+TailCalls -Xcomp  CompilerTestMegamorphicInterfaceNotSibC2I
+ * @summary Tests megamorphic interface tail call in compiler.
+ */
+
+public class CompilerTestMegamorphicInterfaceNotSibC2I {
+  static void assertEqual(int a, int b) {
+    if (a!=b) {
+      System.out.println("Error " +a + " != " + b);
+      System.exit(1);
+    }
+  }
+
+  static interface IFace {
+    public int tailCallee(int cnt, int val, IFace target, IFace base, IFace sub, Integer a, Integer b, Integer c);
+    public int tailCaller(int cnt, int val, IFace target, IFace base, IFace sub, Integer a);
+  }
+
+  static class TestMegaSub implements IFace {
+    int field;
+
+    public TestMegaSub() {
+      field = 1;
+    }
+
+    public int tailCallee(int cnt, int val, IFace target, IFace base, IFace sub, Integer a, Integer b, Integer c) {
+      IFace t = null;
+      if (cnt==0)
+        return val;
+
+      if (cnt%3==0) t = base;
+      else t = sub;
+      assertEqual(val, a);
+      return goto target.tailCaller(cnt-1, val+1, t, base, sub, a+1);
+    }
+
+    public int tailCaller(int cnt, int val, IFace target, IFace base, IFace sub, Integer a) {
+      IFace t = null;
+      if (cnt==0)
+        return val;
+
+      if (cnt%3==0) t = base;
+      else t = sub;
+      assertEqual(val, a);
+      return goto target.tailCallee(cnt-1, val+1, t, base, sub, a+1, a+1, a+1);
+    }
+
+  }
+  static class TestMega implements IFace {
+    int field;
+
+    public TestMega() {
+      field = 1;
+    }
+
+    public int tailCallee(int cnt, int val, IFace target, IFace base, IFace sub, Integer a, Integer b, Integer c) {
+      IFace t = null;
+      if (cnt==0)
+        return val;
+
+      if (cnt%5==0) t = base;
+      else t = sub;
+      
+      assertEqual(val, a);
+      return goto target.tailCaller(cnt-1, val+1, t, base, sub, b+1);
+    }
+
+    public int tailCaller(int cnt, int val, IFace target, IFace base, IFace sub, Integer a) {
+      IFace t = null;
+      if (cnt==0)
+        return val;
+
+      if (cnt%5==0) t = base;
+      else t = sub;
+
+      assertEqual(val, a);
+      return goto target.tailCallee(cnt-1, val+1, t, base, sub, a+1, a+1, a+1);
+    }
+
+  }  
+  public static void main(String args[]) {
+    int down = 5000;
+    IFace a = new TestMega();
+    IFace b = new TestMegaSub();
+
+    int result = a.tailCaller(down, 0, b, a, b, 0);
+
+    if (result==down) {
+      System.out.println("Success");
+    }else {
+      System.out.println("Failure");
+    }
+  }
+
+}
diff -r aa0c48844632 -r a7d54b98ca4a test/tailcalltests/CompilerTestMegamorphicNotSib.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/tailcalltests/CompilerTestMegamorphicNotSib.java	Wed Jun 03 16:27:17 2009 +0200
@@ -0,0 +1,93 @@
+/* @test
+ * @run main/othervm -XX:+TailCalls -Xcomp  CompilerTestMegamorphicNotSib
+ * @summary Tests not sib megamorphic tail call in compiler.
+ */
+
+public class CompilerTestMegamorphicNotSib {
+  
+  static void assertEqual(int a, int b) {
+    if (a!=b) {
+      System.out.println("Error " +a + " != " + b);
+      System.exit(1);
+    }
+  }
+
+  static class TestMegaSub extends TestMega {
+    int field;
+
+    public TestMegaSub() {
+      field = 1;
+    }
+
+    public int tailCallee(int cnt, int val, TestMega target, TestMega base, TestMega sub, Integer a, Integer b, Integer c) {
+      TestMega t = null;
+      Integer tmp = cnt%3==0 ? a: b;
+      if (cnt==0)
+        return val;
+
+      if (cnt%3==0) t = base;
+      else t = sub;
+      assertEqual(val, a);
+      return goto target.tailCaller(cnt-1, val+1, t, base, sub, tmp+1);
+    }
+
+    public int tailCaller(int cnt, int val, TestMega target, TestMega base, TestMega sub, Integer a) {
+      TestMega t = null;
+      if (cnt==0)
+        return val;
+
+      if (cnt%3==0) t = base;
+      else t = sub;
+      assertEqual(val, a);
+      return goto target.tailCallee(cnt-1, val+1, t, base, sub, a+1, a+1, a+1);
+    }
+
+  }
+  static class TestMega {
+    int field;
+
+    public TestMega() {
+      field = 1;
+    }
+
+    public int tailCallee(int cnt, int val, TestMega target, TestMega base, TestMega sub, Integer a, Integer b, Integer c) {
+      TestMega t = null;
+      Integer tmp = cnt%3==0 ? a: b;
+      if (cnt==0)
+        return val;
+
+      if (cnt%5==0) t = base;
+      else t = sub;
+      
+      assertEqual(val, a);
+      return goto target.tailCaller(cnt-1, val+1, t, base, sub, tmp+1);
+    }
+
+    public int tailCaller(int cnt, int val, TestMega target, TestMega base, TestMega sub, Integer a) {
+      TestMega t = null;
+      if (cnt==0)
+        return val;
+
+      if (cnt%5==0) t = base;
+      else t = sub;
+
+      assertEqual(val, a);
+      return goto target.tailCallee(cnt-1, val+1, t, base, sub, a+1, a+1, a+1);
+    }
+
+  }  
+  public static void main(String args[]) {
+    int down = 5000;
+    TestMega a = new TestMega();
+    TestMegaSub b = new TestMegaSub();
+
+    int result = a.tailCaller(down, 0, b, a, b, 0);
+
+    if (result==down) {
+      System.out.println("Success");
+    }else {
+      System.out.println("Failure");
+    }
+  }
+
+}
diff -r aa0c48844632 -r a7d54b98ca4a test/tailcalltests/CompilerTestMegamorphicNotSibC2I.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/tailcalltests/CompilerTestMegamorphicNotSibC2I.java	Wed Jun 03 16:27:17 2009 +0200
@@ -0,0 +1,93 @@
+/* @test
+ * @run main/othervm -XX:CompileOnly=.tailCallee -XX:+TailCalls -Xcomp  CompilerTestMegamorphicNotSibC2I
+ * @summary Tests not sib megamorphic tail call in compiler.
+ */
+
+public class CompilerTestMegamorphicNotSibC2I {
+  
+  static void assertEqual(int a, int b) {
+    if (a!=b) {
+      System.out.println("Error " +a + " != " + b);
+      System.exit(1);
+    }
+  }
+
+  static class TestMegaSub extends TestMega {
+    int field;
+
+    public TestMegaSub() {
+      field = 1;
+    }
+
+    public int tailCallee(int cnt, int val, TestMega target, TestMega base, TestMega sub, Integer a, Integer b, Integer c) {
+      TestMega t = null;
+      Integer tmp = cnt%3==0 ? a: b;
+      if (cnt==0)
+        return val;
+
+      if (cnt%3==0) t = base;
+      else t = sub;
+      assertEqual(val, a);
+      return goto target.tailCaller(cnt-1, val+1, t, base, sub, tmp+1);
+    }
+
+    public int tailCaller(int cnt, int val, TestMega target, TestMega base, TestMega sub, Integer a) {
+      TestMega t = null;
+      if (cnt==0)
+        return val;
+
+      if (cnt%3==0) t = base;
+      else t = sub;
+      assertEqual(val, a);
+      return goto target.tailCallee(cnt-1, val+1, t, base, sub, a+1, a+1, a+1);
+    }
+
+  }
+  static class TestMega {
+    int field;
+
+    public TestMega() {
+      field = 1;
+    }
+
+    public int tailCallee(int cnt, int val, TestMega target, TestMega base, TestMega sub, Integer a, Integer b, Integer c) {
+      TestMega t = null;
+      Integer tmp = cnt%3==0 ? a: b;
+      if (cnt==0)
+        return val;
+
+      if (cnt%5==0) t = base;
+      else t = sub;
+      
+      assertEqual(val, a);
+      return goto target.tailCaller(cnt-1, val+1, t, base, sub, tmp+1);
+    }
+
+    public int tailCaller(int cnt, int val, TestMega target, TestMega base, TestMega sub, Integer a) {
+      TestMega t = null;
+      if (cnt==0)
+        return val;
+
+      if (cnt%5==0) t = base;
+      else t = sub;
+
+      assertEqual(val, a);
+      return goto target.tailCallee(cnt-1, val+1, t, base, sub, a+1, a+1, a+1);
+    }
+
+  }  
+  public static void main(String args[]) {
+    int down = 5000;
+    TestMega a = new TestMega();
+    TestMegaSub b = new TestMegaSub();
+
+    int result = a.tailCaller(down, 0, b, a, b, 0);
+
+    if (result==down) {
+      System.out.println("Success");
+    }else {
+      System.out.println("Failure");
+    }
+  }
+
+}
diff -r aa0c48844632 -r a7d54b98ca4a test/tailcalltests/CompilerTestMonomorphic1.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/tailcalltests/CompilerTestMonomorphic1.java	Wed Jun 03 16:27:17 2009 +0200
@@ -0,0 +1,54 @@
+/* @test
+ * @run main/othervm -XX:+TailCalls -Xcomp  CompilerTestMonomorphic1
+ * @summary Tests monomorphic tail call in compiler.
+ */
+public class CompilerTestMonomorphic1 {
+  
+  static class TestMono {
+    private int a;
+
+    public TestMono() {
+      a =1;
+    }
+
+    public int tailCaller(int arg) {
+      if (arg==5)
+        return goto tailCaller(arg+1);
+      return goto tailCallee(arg+1);
+    }
+
+    public int tailCallee(int arg) {
+      return arg+a;
+    }
+
+
+  }
+
+  static class TestSub extends TestMono{
+    private int a;
+
+    public TestSub() {
+      a =1;
+    }
+
+    public int tailCaller(int arg) {
+      return goto tailCallee(arg+1);
+    }
+
+    public int tailCallee(int arg) {
+      return arg+a;
+    }
+  }
+
+  public static void main(String args[]) {
+    TestMono a = new TestMono();
+    TestSub b = new TestSub();
+    int result = a.tailCaller(5);
+    if (result==8) {
+      System.out.println("Success");
+    } else {
+      System.out.println("Failure");
+      System.exit(1);
+    }
+  }
+}
diff -r aa0c48844632 -r a7d54b98ca4a test/tailcalltests/CompilerTestMonomorphic2.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/tailcalltests/CompilerTestMonomorphic2.java	Wed Jun 03 16:27:17 2009 +0200
@@ -0,0 +1,66 @@
+/* @test
+ * @run main/othervm -XX:+TailCalls -Xcomp  CompilerTestMonomorphic2
+ * @summary Tests monomorphic tail call in compiler.
+ */
+public class CompilerTestMonomorphic2 {
+  static class TestMono2 {
+    private int a;
+
+    public TestMono2() {
+      a =1;
+    }
+
+    public int tailCaller(int arg1, int cnt, Integer a, Integer b, Double c, Double d) {
+      if (cnt==0) {
+        return arg1;
+      } else {
+        return goto tailCallee(arg1+1, cnt-1, a+1, b+1, c+1.0, d+1.0);
+      }    
+    }
+
+    public int tailCallee(int arg1, int cnt, Integer a, Integer b, Double c, Double d) {
+     if (cnt==49999) {
+       System.out.println("Checking.");
+       if (a.intValue()!=b.intValue() || c.doubleValue()!=d.doubleValue()) {
+         System.exit(1);
+       }
+     }
+      if (cnt==0) {
+        return arg1;
+      } else {
+        return goto tailCaller(arg1+1, cnt-1, a+1, b+1, c+1.0, d+1.0);
+      } 
+    }
+  }
+
+  static class TestSub2 extends TestMono2{
+    private int b;
+
+    public TestSub2() {
+      b = 2;
+    }
+
+    public int tailCaller(int arg, int cnt, Integer a, Integer b, Double c, Double d) {
+      return arg+b+cnt;
+    }
+    public int tailCallee(int arg, int cnt, Integer a, Integer b, Double c, Double d) {
+      return arg+b+cnt;
+    }
+
+  }
+
+  
+  
+  public static void main(String args[]) {
+    int correctResult = 50000;
+    TestMono2 a = new TestMono2();
+    TestSub2 b = new TestSub2();
+    int result = a.tailCaller(0, correctResult, 2, 2, 3.0, 3.0);
+    if (result==correctResult) {
+      System.out.println("Success");
+    } else {
+      System.exit(1);
+      System.out.println("Failure");
+    }
+  }
+}
diff -r aa0c48844632 -r a7d54b98ca4a test/tailcalltests/CompilerTestMonomorphic2C2I.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/tailcalltests/CompilerTestMonomorphic2C2I.java	Wed Jun 03 16:27:17 2009 +0200
@@ -0,0 +1,66 @@
+/* @test
+ * @run main/othervm -XX:CompileOnly=.tailCaller -XX:+TailCalls -Xcomp  CompilerTestMonomorphic2C2I
+ * @summary Tests monomorphic tail call in compiler.
+ */
+public class CompilerTestMonomorphic2C2I {
+  static class TestMono2 {
+    private int a;
+
+    public TestMono2() {
+      a =1;
+    }
+
+    public int tailCaller(int arg1, int cnt, Integer a, Integer b, Double c, Double d) {
+      if (cnt==0) {
+        return arg1;
+      } else {
+        return goto tailCallee(arg1+1, cnt-1, a+1, b+1, c+1.0, d+1.0);
+      }    
+    }
+
+    public int tailCallee(int arg1, int cnt, Integer a, Integer b, Double c, Double d) {
+     if (cnt==49999) {
+       System.out.println("Checking.");
+       if (a.intValue()!=b.intValue() || c.doubleValue()!=d.doubleValue()) {
+         System.exit(1);
+       }
+     }
+      if (cnt==0) {
+        return arg1;
+      } else {
+        return goto tailCaller(arg1+1, cnt-1, a+1, b+1, c+1.0, d+1.0);
+      } 
+    }
+  }
+
+  static class TestSub2 extends TestMono2{
+    private int b;
+
+    public TestSub2() {
+      b = 2;
+    }
+
+    public int tailCaller(int arg, int cnt, Integer a, Integer b, Double c, Double d) {
+      return arg+b+cnt;
+    }
+    public int tailCallee(int arg, int cnt, Integer a, Integer b, Double c, Double d) {
+      return arg+b+cnt;
+    }
+
+  }
+
+  
+  
+  public static void main(String args[]) {
+    int correctResult = 50000;
+    TestMono2 a = new TestMono2();
+    TestSub2 b = new TestSub2();
+    int result = a.tailCaller(0, correctResult, 2, 2, 3.0, 3.0);
+    if (result==correctResult) {
+      System.out.println("Success");
+    } else {
+      System.exit(1);
+      System.out.println("Failure");
+    }
+  }
+}
diff -r aa0c48844632 -r a7d54b98ca4a test/tailcalltests/CompilerTestMonomorphic2NotSib.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/tailcalltests/CompilerTestMonomorphic2NotSib.java	Wed Jun 03 16:27:17 2009 +0200
@@ -0,0 +1,66 @@
+/* @test
+ * @run main/othervm -XX:+TailCalls -Xcomp  CompilerTestMonomorphic2NotSib
+ * @summary Tests monomorphic tail call in compiler.
+ */
+public class CompilerTestMonomorphic2NotSib {
+  static class TestMono2 {
+    private int a;
+
+    public TestMono2() {
+      a =1;
+    }
+
+    public int tailCaller(int arg1, int cnt, Integer a, Integer b, Double c, Double d) {
+      if (cnt==0) {
+        return arg1;
+      } else {
+        return goto tailCallee(arg1+1, cnt-1, a+1, b+1, c+1.0, d+1.0, a+1, a+1);
+      }    
+    }
+
+    public int tailCallee(int arg1, int cnt, Integer a, Integer b, Double c, Double d, Integer e, Integer f) {
+     if (cnt==49999) {
+       System.out.println("Checking.");
+       if (a.intValue()!=b.intValue() || c.doubleValue()!=d.doubleValue()) {
+         System.exit(1);
+       }
+     }
+      if (cnt==0) {
+        return arg1;
+      } else {
+        return goto tailCaller(arg1+1, cnt-1, a+1, b+1, c+1.0, d+1.0);
+      } 
+    }
+  }
+
+  static class TestSub2 extends TestMono2{
+    private int b;
+
+    public TestSub2() {
+      b = 2;
+    }
+
+    public int tailCaller(int arg, int cnt, Integer a, Integer b, Double c, Double d) {
+      return arg+b+cnt;
+    }
+    public int tailCallee(int arg, int cnt, Integer a, Integer b, Double c, Double d, Integer e, Integer f) {
+      return arg+b+cnt;
+    }
+
+  }
+
+  
+  
+  public static void main(String args[]) {
+    int correctResult = 50000;
+    TestMono2 a = new TestMono2();
+    TestSub2 b = new TestSub2();
+    int result = a.tailCaller(0, correctResult, 2, 2, 3.0, 3.0);
+    if (result==correctResult) {
+      System.out.println("Success");
+    } else {
+      System.exit(1);
+      System.out.println("Failure");
+    }
+  }
+}
diff -r aa0c48844632 -r a7d54b98ca4a test/tailcalltests/CompilerTestMonomorphic2NotSibC2I.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/tailcalltests/CompilerTestMonomorphic2NotSibC2I.java	Wed Jun 03 16:27:17 2009 +0200
@@ -0,0 +1,66 @@
+/* @test
+ * @run main/othervm -XX:CompileOnly=.tailCallee -XX:+TailCalls -Xcomp  CompilerTestMonomorphic2NotSibC2I
+ * @summary Tests monomorphic tail call in compiler.
+ */
+public class CompilerTestMonomorphic2NotSibC2I {
+  static class TestMono2 {
+    private int a;
+
+    public TestMono2() {
+      a =1;
+    }
+
+    public int tailCaller(int arg1, int cnt, Integer a, Integer b, Double c, Double d) {
+      if (cnt==0) {
+        return arg1;
+      } else {
+        return goto tailCallee(arg1+1, cnt-1, a+1, b+1, c+1.0, d+1.0, a+1, a+1);
+      }    
+    }
+
+    public int tailCallee(int arg1, int cnt, Integer a, Integer b, Double c, Double d, Integer e, Integer f) {
+     if (cnt==49999) {
+       System.out.println("Checking.");
+       if (a.intValue()!=b.intValue() || c.doubleValue()!=d.doubleValue()) {
+         System.exit(1);
+       }
+     }
+      if (cnt==0) {
+        return arg1;
+      } else {
+        return goto tailCaller(arg1+1, cnt-1, a+1, b+1, c+1.0, d+1.0);
+      } 
+    }
+  }
+
+  static class TestSub2 extends TestMono2{
+    private int b;
+
+    public TestSub2() {
+      b = 2;
+    }
+
+    public int tailCaller(int arg, int cnt, Integer a, Integer b, Double c, Double d) {
+      return arg+b+cnt;
+    }
+    public int tailCallee(int arg, int cnt, Integer a, Integer b, Double c, Double d, Integer e, Integer f) {
+      return arg+b+cnt;
+    }
+
+  }
+
+  
+  
+  public static void main(String args[]) {
+    int correctResult = 50000;
+    TestMono2 a = new TestMono2();
+    TestSub2 b = new TestSub2();
+    int result = a.tailCaller(0, correctResult, 2, 2, 3.0, 3.0);
+    if (result==correctResult) {
+      System.out.println("Success");
+    } else {
+      System.exit(1);
+      System.out.println("Failure");
+    }
+  }
+}
diff -r aa0c48844632 -r a7d54b98ca4a test/tailcalltests/CompilerTestStatic.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/tailcalltests/CompilerTestStatic.java	Wed Jun 03 16:27:17 2009 +0200
@@ -0,0 +1,43 @@
+/* @test
+ * @run main/othervm -XX:+TailCalls -Xcomp  CompilerTestStatic
+ * @summary Tests static tail call in compiler.
+ */
+public class CompilerTestStatic {
+
+
+  public static int tailcallee(Integer i, int a, int b, int c, int d, int e, int f) {
+    int tmp = a + b + c + d + e + f + i;
+    if (a==0) return tmp;
+    System.out.println("tailcallee: " + tmp);
+    return goto tailcaller(a,b,c,d,e,f, i);
+  }
+
+
+  public static int tailcaller(int a, int b , int c, int d, int e, int f, Integer i) {
+    int tmp = a + b + c + d + e + f;
+    System.out.println("tailcaller: " + tmp);
+    return goto tailcallee(new Integer(i+1),a-1,b,c,d,e,f);
+  }
+
+  public static void main(String args[]) {
+    int counter = 100;
+    Integer number = new Integer(1);
+    int num = (counter * number)+1;
+    int result = tailcaller(counter,2,3,4,5,6, number);
+    if (result==2+3+4+5+6+num) {
+      System.out.println("success");
+    } else {
+      System.out.println("failure");
+      System.exit(1);
+    }
+    // try the compiled path
+    number = new Integer(1);
+    result = tailcaller(counter,2,3,4,5,6, number);
+    if (result==2+3+4+5+6+num) {
+      System.out.println("success");
+    } else {
+      System.out.println("failure");
+      System.exit(1);
+    }
+  }
+}
diff -r aa0c48844632 -r a7d54b98ca4a test/tailcalltests/CompilerTestStaticC2I.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/tailcalltests/CompilerTestStaticC2I.java	Wed Jun 03 16:27:17 2009 +0200
@@ -0,0 +1,44 @@
+
+/* @test
+ * @run main/othervm -XX:CompileOnly=.tailcallee -XX:+TailCalls -Xcomp  CompilerTestStaticC2I
+ * @summary Tests static sibling tail call in compiler.
+ */
+public class CompilerTestStaticC2I {
+
+
+  public static int tailcallee(Integer i, int a, int b, int c, int d, int e, int f) {
+    int tmp = a + b + c + d + e + f + i;
+    if (a==0) return tmp;
+    System.out.println("tailcallee: " + tmp);
+    return goto tailcaller(a,b,c,d,e,f, i);
+  }
+
+
+  public static int tailcaller(int a, int b , int c, int d, int e, int f, Integer i) {
+    int tmp = a + b + c + d + e + f;
+    System.out.println("tailcaller: " + tmp);
+    return goto tailcallee(new Integer(i+1),a-1,b,c,d,e,f);
+  }
+
+  public static void main(String args[]) {
+    int counter = 100;
+    Integer number = new Integer(1);
+    int num = (counter * number)+1;
+    int result = tailcaller(counter,2,3,4,5,6, number);
+    if (result==2+3+4+5+6+num) {
+      System.out.println("success");
+    } else {
+      System.out.println("failure");
+      System.exit(1);
+    }
+    // try the compiled path
+    number = new Integer(1);
+    result = tailcaller(counter,2,3,4,5,6, number);
+    if (result==2+3+4+5+6+num) {
+      System.out.println("success");
+    } else {
+      System.out.println("failure");
+      System.exit(1);
+    }
+  }
+}
diff -r aa0c48844632 -r a7d54b98ca4a test/tailcalltests/CompilerTestStaticNotSib.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/tailcalltests/CompilerTestStaticNotSib.java	Wed Jun 03 16:27:17 2009 +0200
@@ -0,0 +1,44 @@
+
+/* @test
+ * @run main/othervm -XX:+TailCalls -Xcomp  CompilerTestStaticNotSib
+ * @summary Tests static not sibling tail call in compiler.
+ */
+public class CompilerTestStaticNotSib {
+
+
+  public static int tailcallee(Integer i, int a, int b, int c, int d, int e, int f, int f2, int f4) {
+    int tmp = a + b + c + d + e + f + i;
+    if (a==0) return tmp;
+    System.out.println("tailcallee: " + tmp);
+    return goto tailcaller(a,b,c,d,e,f, i);
+  }
+
+
+  public static int tailcaller(int a, int b , int c, int d, int e, int f, Integer i) {
+    int tmp = a + b + c + d + e + f;
+    System.out.println("tailcaller: " + tmp);
+    return goto tailcallee(new Integer(i+1),a-1,b,c,d,e,f, f+2, f+3);
+  }
+
+  public static void main(String args[]) {
+    int counter = 100;
+    Integer number = new Integer(1);
+    int num = (counter * number)+1;
+    int result = tailcaller(counter,2,3,4,5,6, number);
+    if (result==2+3+4+5+6+num) {
+      System.out.println("success");
+    } else {
+      System.out.println("failure");
+      System.exit(1);
+    }
+    // try the compiled path
+    number = new Integer(1);
+    result = tailcaller(counter,2,3,4,5,6, number);
+    if (result==2+3+4+5+6+num) {
+      System.out.println("success");
+    } else {
+      System.out.println("failure");
+      System.exit(1);
+    }
+  }
+}
diff -r aa0c48844632 -r a7d54b98ca4a test/tailcalltests/CompilerTestStaticNotSibC2I.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/tailcalltests/CompilerTestStaticNotSibC2I.java	Wed Jun 03 16:27:17 2009 +0200
@@ -0,0 +1,44 @@
+
+/* @test
+ * @run main/othervm -XX:CompileOnly=.tailcallee -XX:+TailCalls -Xcomp  CompilerTestStaticNotSibC2I
+ * @summary Tests static not sibling tail call in compiler.
+ */
+public class CompilerTestStaticNotSibC2I {
+
+
+  public static int tailcallee(Integer i, int a, int b, int c, int d, int e, int f, int f2, int f4) {
+    int tmp = a + b + c + d + e + f + i;
+    if (a==0) return tmp;
+    System.out.println("tailcallee: " + tmp);
+    return goto tailcaller(a,b,c,d,e,f, i);
+  }
+
+
+  public static int tailcaller(int a, int b , int c, int d, int e, int f, Integer i) {
+    int tmp = a + b + c + d + e + f;
+    System.out.println("tailcaller: " + tmp);
+    return goto tailcallee(new Integer(i+1),a-1,b,c,d,e,f, f+2, f+3);
+  }
+
+  public static void main(String args[]) {
+    int counter = 100;
+    Integer number = new Integer(1);
+    int num = (counter * number)+1;
+    int result = tailcaller(counter,2,3,4,5,6, number);
+    if (result==2+3+4+5+6+num) {
+      System.out.println("success");
+    } else {
+      System.out.println("failure");
+      System.exit(1);
+    }
+    // try the compiled path
+    number = new Integer(1);
+    result = tailcaller(counter,2,3,4,5,6, number);
+    if (result==2+3+4+5+6+num) {
+      System.out.println("success");
+    } else {
+      System.out.println("failure");
+      System.exit(1);
+    }
+  }
+}
diff -r aa0c48844632 -r a7d54b98ca4a test/tailcalltests/CompilerTestVirtualOpt.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/tailcalltests/CompilerTestVirtualOpt.java	Wed Jun 03 16:27:17 2009 +0200
@@ -0,0 +1,48 @@
+/* @test
+ * @run main/othervm -XX:+TailCalls -Xcomp  CompilerTestVirtualOpt
+ * @summary Tests optimized virtual tail call in compiler.
+ */
+public class CompilerTestVirtualOpt{
+  static class TestMono2 {
+    private int a;
+
+    public TestMono2() {
+      a =1;
+    }
+
+    public int tailCaller(int arg1, int cnt, Integer a, Integer b, Double c, Double d) {
+      if (cnt==0) {
+        return arg1;
+      } else {
+        return goto tailCallee(arg1+1, cnt-1, a+1, b+1, c+1.0, d+1.0);
+      }    
+    }
+
+    public int tailCallee(int arg1, int cnt, Integer a, Integer b, Double c, Double d) {
+     if (cnt==49999) {
+       System.out.println("Checking.");
+       if (a.intValue()!=b.intValue() || c.doubleValue()!=d.doubleValue()) {
+         System.exit(1);
+       }
+     }
+      if (cnt==0) {
+        return arg1;
+      } else {
+        return goto tailCaller(arg1+1, cnt-1, a+1, b+1, c+1.0, d+1.0);
+      } 
+    }
+  }
+
+  
+  public static void main(String args[]) {
+    int correctResult = 50000;
+    TestMono2 a = new TestMono2();
+    int result = a.tailCaller(0, correctResult, 2, 2, 3.0, 3.0);
+    if (result==correctResult) {
+      System.out.println("Success");
+    } else {
+      System.exit(1);
+      System.out.println("Failure");
+    }
+  }
+}
diff -r aa0c48844632 -r a7d54b98ca4a test/tailcalltests/CompilerTestVirtualOptC2I.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/tailcalltests/CompilerTestVirtualOptC2I.java	Wed Jun 03 16:27:17 2009 +0200
@@ -0,0 +1,48 @@
+/* @test
+ * @run main/othervm -XX:CompileOnly=.tailCallee -XX:+TailCalls -Xcomp  CompilerTestVirtualOptC2I
+ * @summary Tests optimized virtual tail call in compiler.
+ */
+public class CompilerTestVirtualOptC2I {
+  static class TestMono2 {
+    private int a;
+
+    public TestMono2() {
+      a =1;
+    }
+
+    public int tailCaller(int arg1, int cnt, Integer a, Integer b, Double c, Double d) {
+      if (cnt==0) {
+        return arg1;
+      } else {
+        return goto tailCallee(arg1+1, cnt-1, a+1, b+1, c+1.0, d+1.0);
+      }    
+    }
+
+    public int tailCallee(int arg1, int cnt, Integer a, Integer b, Double c, Double d) {
+     if (cnt==49999) {
+       System.out.println("Checking.");
+       if (a.intValue()!=b.intValue() || c.doubleValue()!=d.doubleValue()) {
+         System.exit(1);
+       }
+     }
+      if (cnt==0) {
+        return arg1;
+      } else {
+        return goto tailCaller(arg1+1, cnt-1, a+1, b+1, c+1.0, d+1.0);
+      } 
+    }
+  }
+
+  
+  public static void main(String args[]) {
+    int correctResult = 50000;
+    TestMono2 a = new TestMono2();
+    int result = a.tailCaller(0, correctResult, 2, 2, 3.0, 3.0);
+    if (result==correctResult) {
+      System.out.println("Success");
+    } else {
+      System.exit(1);
+      System.out.println("Failure");
+    }
+  }
+}
diff -r aa0c48844632 -r a7d54b98ca4a test/tailcalltests/CompilerTestVirtualOptNotSib.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/tailcalltests/CompilerTestVirtualOptNotSib.java	Wed Jun 03 16:27:17 2009 +0200
@@ -0,0 +1,49 @@
+/* @test
+ * @run main/othervm -XX:+TailCalls -Xcomp  CompilerTestVirtualOptNotSib
+ * @summary Tests optimized virtual tail call in compiler.
+ */
+public class CompilerTestVirtualOptNotSib{
+  static class TestMono2 {
+    private int a;
+
+    public TestMono2() {
+      a =1;
+    }
+
+    public int tailCaller(int arg1, int cnt, Integer a, Integer b, Double c, Double d) {
+      if (cnt==0) {
+        return arg1;
+      } else {
+        return goto tailCallee(arg1+1, cnt-1, a+1, b+1, c+1.0, d+1.0, a+1, a+1);
+      }    
+    }
+
+    public int tailCallee(int arg1, int cnt, Integer a, Integer b, Double c, Double d, Integer e, Integer f) {
+     if (cnt==49999) {
+       System.out.println("Checking.");
+       if (a.intValue()!=b.intValue() || c.doubleValue()!=d.doubleValue() ||
+           e.intValue() != f.intValue()) {
+         System.exit(1);
+       }
+     }
+      if (cnt==0) {
+        return arg1;
+      } else {
+        return goto tailCaller(arg1+1, cnt-1, a+1, b+1, c+1.0, d+1.0);
+      } 
+    }
+  }
+
+  
+  public static void main(String args[]) {
+    int correctResult = 50000;
+    TestMono2 a = new TestMono2();
+    int result = a.tailCaller(0, correctResult, 2, 2, 3.0, 3.0);
+    if (result==correctResult) {
+      System.out.println("Success");
+    } else {
+      System.exit(1);
+      System.out.println("Failure");
+    }
+  }
+}
diff -r aa0c48844632 -r a7d54b98ca4a test/tailcalltests/CompilerTestVirtualOptNotSibC2I.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/tailcalltests/CompilerTestVirtualOptNotSibC2I.java	Wed Jun 03 16:27:17 2009 +0200
@@ -0,0 +1,49 @@
+/* @test
+ * @run main/othervm -XX:CompileOnly=.tailCaller -XX:+TailCalls -Xcomp  CompilerTestVirtualOptNotSibC2I
+ * @summary Tests optimized virtual tail call in compiler.
+ */
+public class CompilerTestVirtualOptNotSibC2I {
+  static class TestMono2 {
+    private int a;
+
+    public TestMono2() {
+      a =1;
+    }
+
+    public int tailCaller(int arg1, int cnt, Integer a, Integer b, Double c, Double d) {
+      if (cnt==0) {
+        return arg1;
+      } else {
+        return goto tailCallee(arg1+1, cnt-1, a+1, b+1, c+1.0, d+1.0, a+1, a+1);
+      }    
+    }
+
+    public int tailCallee(int arg1, int cnt, Integer a, Integer b, Double c, Double d, Integer e, Integer f) {
+     if (cnt==49999) {
+       System.out.println("Checking.");
+       if (a.intValue()!=b.intValue() || c.doubleValue()!=d.doubleValue() ||
+           e.intValue() != f.intValue()) {
+         System.exit(1);
+       }
+     }
+      if (cnt==0) {
+        return arg1;
+      } else {
+        return goto tailCaller(arg1+1, cnt-1, a+1, b+1, c+1.0, d+1.0);
+      } 
+    }
+  }
+
+  
+  public static void main(String args[]) {
+    int correctResult = 50000;
+    TestMono2 a = new TestMono2();
+    int result = a.tailCaller(0, correctResult, 2, 2, 3.0, 3.0);
+    if (result==correctResult) {
+      System.out.println("Success");
+    } else {
+      System.exit(1);
+      System.out.println("Failure");
+    }
+  }
+}
diff -r aa0c48844632 -r a7d54b98ca4a test/tailcalltests/CompilerVoidTest.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/tailcalltests/CompilerVoidTest.java	Wed Jun 03 16:27:17 2009 +0200
@@ -0,0 +1,25 @@
+/* @test
+ * @run main/othervm -XX:+TailCalls -Xcomp  CompilerVoidTest
+ * @summary Tests void tail call in compiler.
+ */
+public class CompilerVoidTest {
+
+
+  public static void tailcallee(int cnt) {
+    int tmp = 3*cnt;
+    if (cnt==0) return;
+    else goto tailcaller(cnt-1);
+  }
+
+  public static void tailcaller(int cnt) {
+    int tmp = 2*cnt;
+    System.out.println("cnt:"+tmp);
+    if (cnt==0) return;
+    else goto tailcallee(cnt-1);
+  }
+
+  public static void main(String args[]) {
+    tailcaller(10000);
+    System.out.println("Success");
+  }
+}
diff -r aa0c48844632 -r a7d54b98ca4a test/tailcalltests/ContainsTailcall.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/tailcalltests/ContainsTailcall.java	Wed Jun 03 16:27:17 2009 +0200
@@ -0,0 +1,15 @@
+/* @test
+ * @run main/othervm/fail -Xint ContainsTailcall
+ * @summary Are we really emmiting tail calls? If yes this should fail
+ *          because -XX:+TailCalls is not enabled.
+ */
+public class ContainsTailcall {
+  public static int tailcall(int a) {
+    if (a==0) return 0;
+    else return goto tailcall(a);
+  }
+
+  public static void main (String args[]) {
+    int a = tailcall(10);
+  }
+}
diff -r aa0c48844632 -r a7d54b98ca4a test/tailcalltests/ExceptionTest1.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/tailcalltests/ExceptionTest1.java	Wed Jun 03 16:27:17 2009 +0200
@@ -0,0 +1,23 @@
+/* @test
+ * @run main/othervm/fail -Xint ExceptionTest1
+ * @summary Tail call within exception handler should fail.
+ */
+public class ExceptionTest1 {
+  
+  public int test(int a, int b) {
+    if (a==0) return b;
+    else {
+      try {
+        return goto test(a-1, b+1);
+      } catch (Exception e) {
+        System.out.println(e);
+      }
+    }
+    return 1;
+  }
+
+  public static void main(String args[]) {
+    new ExceptionTest1().test(10,1);
+    System.exit(1); // Should not get here.
+  }
+}
diff -r aa0c48844632 -r a7d54b98ca4a test/tailcalltests/InterpreterTest1.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/tailcalltests/InterpreterTest1.java	Wed Jun 03 16:27:17 2009 +0200
@@ -0,0 +1,27 @@
+/* @test
+ * @run main/othervm -XX:+TailCalls -Xint InterpreterTest1
+ * @summary Tests static tail call in interpreter.
+ */
+public class InterpreterTest1 {
+  
+  static int tailcaller(int rec, int a, int b, int c, int d) {
+    if (rec==0) return a + 2*b + 3*c + 4*d;
+    else
+      return goto tailcaller(rec-1, a+1, b, c, d);
+  }
+
+  public static void main (String args[]) {
+    int result = 0;
+    int expected = 100001;
+    int a = 0;
+    int b = 1;
+    int c = 2;
+    int d = 3;
+
+    result = tailcaller(expected, 0, b, c, d);
+
+    if (result != expected + 2*b + 3*c + 4*d) {
+      System.exit(1);
+    }
+  }
+}
diff -r aa0c48844632 -r a7d54b98ca4a test/tailcalltests/InterpreterTest2.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/tailcalltests/InterpreterTest2.java	Wed Jun 03 16:27:17 2009 +0200
@@ -0,0 +1,26 @@
+/* @test
+ * @run main/othervm -XX:+TailCalls -Xint InterpreterTest2
+ */
+public class InterpreterTest2 {
+  
+  static double tailcaller(int rec, int a, int b, int c, double d) {
+    if (rec==0) return a + 2*b + 3*c + 4*d;
+    else
+      return goto tailcaller(rec-1, a+1, b, c, d);
+  }
+
+  public static void main (String args[]) {
+    double result = 0.0;
+    int expected = 100001;
+    int a = 0;
+    int b = 1;
+    int c = 2;
+    double d = 3.0;
+
+    result = tailcaller(expected, 0, b, c, d);
+
+    if (result != expected + 2*b + 3*c + 4*d) {
+      System.exit(1);
+    }
+  }
+}
diff -r aa0c48844632 -r a7d54b98ca4a test/tailcalltests/InterpreterTest3.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/tailcalltests/InterpreterTest3.java	Wed Jun 03 16:27:17 2009 +0200
@@ -0,0 +1,33 @@
+/* @test
+ * @run main/othervm -XX:+TailCalls -Xint InterpreterTest3
+ */
+public class InterpreterTest3 {
+  static double tailcallee(int rec, int a, int b, double d, int c) {
+    System.out.println("tailcallee");
+    if (rec==0) return a + 2*b + 3*c + 4*d;
+    else
+      return goto tailcaller(rec-1, a+1, b, c, d);
+  }
+
+  static double tailcaller(int rec, int a, int b, int c, double d) {
+    System.out.println("tailcaller");
+    if (rec==0) return a + 2*b + 3*c + 4*d;
+    else
+      return goto tailcallee(rec-1, a+1, b, d, c);
+  }
+
+  public static void main (String args[]) {
+    double result = 0.0;
+    int expected = 1;
+    int a = 0;
+    int b = 1;
+    int c = 2;
+    double d = 3.0;
+
+    result = tailcaller(expected, 0, b, c, d);
+
+    if (result != expected + 2*b + 3*c + 4*d) {
+      System.exit(1);
+    }
+  }
+}
diff -r aa0c48844632 -r a7d54b98ca4a test/tailcalltests/InterpreterTestArgumentMove.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/tailcalltests/InterpreterTestArgumentMove.java	Wed Jun 03 16:27:17 2009 +0200
@@ -0,0 +1,39 @@
+/* @test
+ * @run main/othervm -XX:+TailCalls -Xint InterpreterTestArgumentMove
+ * @summary Tests moving of arguments during tail call in interpreter.
+ */
+public class InterpreterTestArgumentMove {
+ public static void assertEqual(int a, int e) {
+   if (a!=e) {
+     System.out.println("Error: "+ a + " not equal " + e);
+     System.exit(1);
+   }
+ }
+
+ public static int tailcaller(int a, int b, int c, int d, int e, int f, int g, int h, int i, int j, int k, int l, int m, int n) {
+   if (a == 99) {
+     assertEqual(a, 99);
+     assertEqual(b, 99);
+     assertEqual(c, 11);
+     assertEqual(d, 10);
+     assertEqual(e, 9);
+     assertEqual(f, 8);
+     assertEqual(g, 7);
+     assertEqual(h, 6);
+     assertEqual(i, 5);
+     assertEqual(j, 4);
+     assertEqual(k, 3);
+     assertEqual(l, 2);
+     assertEqual(m, 1);
+     assertEqual(n, 0);
+   }
+   if (a<=0)
+     return c+1;
+   return tailcaller(a-1,b-1,n,m,l,k, j, i, h, g, f, e, d, c); 
+  }
+
+  public static void main(String args[]) {
+    int a = tailcaller(100, 100, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+    assertEqual(a, 1);
+  }
+}
diff -r aa0c48844632 -r a7d54b98ca4a test/tailcalltests/InterpreterTestInvokeInterface.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/tailcalltests/InterpreterTestInvokeInterface.java	Wed Jun 03 16:27:17 2009 +0200
@@ -0,0 +1,32 @@
+/* @test
+ * @run main/othervm -XX:+TailCalls -Xint InterpreterTestInvokeInterface
+ * @summary Test invokeinterface in interpreter.
+ */
+public class InterpreterTestInvokeInterface {
+  static interface IFace {
+    int tailcaller(int rec, int a, int b, int c, int d);
+  }
+  static class Test implements IFace {
+    public int tailcaller(int rec, int a, int b, int c, int d) {
+      if (rec==0) return a + 2*b + 3*c + 4*d;
+      else {
+        IFace i = this;
+        return goto i.tailcaller(rec-1, a+1, b, c, d);
+      }
+    }
+  }
+  public static void main (String args[]) {
+    int result = 0;
+    int expected = 100001;
+    int a = 0;
+    int b = 1;
+    int c = 2;
+    int d = 3;
+    Test t = new Test();
+    result = t.tailcaller(expected, 0, b, c, d);
+
+    if (result != expected + 2*b + 3*c + 4*d) {
+      System.exit(1);
+    }
+  }
+}
diff -r aa0c48844632 -r a7d54b98ca4a test/tailcalltests/InterpreterTestInvokeSpecial.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/tailcalltests/InterpreterTestInvokeSpecial.java	Wed Jun 03 16:27:17 2009 +0200
@@ -0,0 +1,35 @@
+/* @test
+ * @run main/othervm -XX:+TailCalls -Xint InterpreterTestInvokeSpecial
+ * @summary Test invokespecial in interpreter.
+ */
+public class InterpreterTestInvokeSpecial {
+ 
+  static class Test {
+    public int tailcaller(int rec, int a, int b, int c, Integer d) {
+      if (rec==0) return a + 2*b + 3*c + 4*d;
+      else
+        return goto tailcaller(rec-1, a+1, b, c, d);
+    }
+  }
+
+  static class TestSub extends Test {
+    public int tailcaller(int rec, int a, int b, int c, Integer d) {
+      return goto super.tailcaller(rec, a, b, c, d);
+    }
+  }
+
+  public static void main (String args[]) {
+    int result = 0;
+    int expected = 100001;
+    int a = 0;
+    int b = 1;
+    int c = 2;
+    int d = 3;
+    Test t = new TestSub();
+    result = t.tailcaller(expected, 0, b, c, d);
+
+    if (result != expected + 2*b + 3*c + 4*d) {
+      System.exit(1);
+    }
+  }
+}
diff -r aa0c48844632 -r a7d54b98ca4a test/tailcalltests/InterpreterTestInvokeVirtual.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/tailcalltests/InterpreterTestInvokeVirtual.java	Wed Jun 03 16:27:17 2009 +0200
@@ -0,0 +1,28 @@
+/* @test
+ * @run main/othervm -XX:+TailCalls -Xint InterpreterTestInvokeVirtual
+ * @summary Test invokevirtual in interpreter.
+ */
+public class InterpreterTestInvokeVirtual {
+ 
+  static class Test {
+    public int tailcaller(int rec, int a, int b, int c, int d) {
+      if (rec==0) return a + 2*b + 3*c + 4*d;
+      else
+        return goto tailcaller(rec-1, a+1, b, c, d);
+    }
+  }
+  public static void main (String args[]) {
+    int result = 0;
+    int expected = 100001;
+    int a = 0;
+    int b = 1;
+    int c = 2;
+    int d = 3;
+    Test t = new Test();
+    result = t.tailcaller(expected, 0, b, c, d);
+
+    if (result != expected + 2*b + 3*c + 4*d) {
+      System.exit(1);
+    }
+  }
+}
diff -r aa0c48844632 -r a7d54b98ca4a test/tailcalltests/InterpreterTestObjects1.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/tailcalltests/InterpreterTestObjects1.java	Wed Jun 03 16:27:17 2009 +0200
@@ -0,0 +1,31 @@
+/* @test
+ * @run main/othervm -XX:+TailCalls -Xint InterpreterTestObjects1
+ */
+public class InterpreterTestObjects1 {
+  static double tailcallee(int rec, int a, int b, Double d, int c) {
+    if (rec==0) return a + 2*b + 3*c + 4*d;
+    else
+      return goto tailcaller(rec-1, a+1, b, c, d);
+  }
+
+  static double tailcaller(int rec, int a, int b, Integer c, Double d) {
+    if (rec==0) return a + 2*b + 3*c + 4*d;
+    else
+      return goto tailcallee(rec-1, a+1, b, d, c);
+  }
+
+  public static void main (String args[]) {
+    double result = 0.0;
+    int expected = 100000;
+    int a = 0;
+    int b = 1;
+    int c = 2;
+    double d = 3.0;
+
+    result = tailcaller(expected, 0, b, c, d);
+
+    if (result != expected + 2*b + 3*c + 4*d) {
+      System.exit(1);
+    }
+  }
+}
diff -r aa0c48844632 -r a7d54b98ca4a test/tailcalltests/InterpreterTestObjects2.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/tailcalltests/InterpreterTestObjects2.java	Wed Jun 03 16:27:17 2009 +0200
@@ -0,0 +1,33 @@
+/* @test
+ * @run main/othervm -XX:+TailCalls -Xint InterpreterTestObjects2
+ */
+public class InterpreterTestObjects2 {
+  static double tailcallee(int rec, int a, int b, Double d, int c) {
+    if (rec==0) return a + 2*b + 3*c + 4*d;
+    else
+      return goto tailcaller(rec-1, a+1, b, c, d, 3);
+  }
+
+  static double tailcaller(int rec, int a, int b, Integer c, Double d, int e) {
+    if (e != 3) System.exit(1);
+    if (rec==0) return a + 2*b + 3*c + 4*d;
+    else
+      return goto tailcallee(rec-1, a+1, b, d, c);
+  }
+
+  public static void main (String args[]) {
+    double result = 0.0;
+    int expected = 100000;
+    int a = 0;
+    int b = 1;
+    int c = 2;