diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index bf98f23fae581..2e4eb1da55ac7 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -8360,7 +8360,6 @@ SDValue LoongArchTargetLowering::LowerFormalArguments( SelectionDAG &DAG, SmallVectorImpl &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); - auto *LoongArchFI = MF.getInfo(); switch (CallConv) { default: @@ -8425,8 +8424,6 @@ SDValue LoongArchTargetLowering::LowerFormalArguments( continue; } InVals.push_back(ArgValue); - if (Ins[InsIdx].Flags.isByVal()) - LoongArchFI->addIncomingByValArgs(ArgValue); } if (IsVarArg) { @@ -8435,6 +8432,7 @@ SDValue LoongArchTargetLowering::LowerFormalArguments( const TargetRegisterClass *RC = &LoongArch::GPRRegClass; MachineFrameInfo &MFI = MF.getFrameInfo(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); + auto *LoongArchFI = MF.getInfo(); // Offset of the first variable argument from stack pointer, and size of // the vararg save area. For now, the varargs save area is either zero or @@ -8484,8 +8482,6 @@ SDValue LoongArchTargetLowering::LowerFormalArguments( LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize); } - LoongArchFI->setArgumentStackSize(CCInfo.getStackSize()); - // All stores are grouped in one node to allow the matching between // the size of Ins and InVals. This only happens for vararg functions. if (!OutChains.empty()) { @@ -8542,11 +8538,9 @@ bool LoongArchTargetLowering::isEligibleForTailCallOptimization( auto &Outs = CLI.Outs; auto &Caller = MF.getFunction(); auto CallerCC = Caller.getCallingConv(); - auto *LoongArchFI = MF.getInfo(); - // If the stack arguments for this call do not fit into our own save area then - // the call cannot be made tail. - if (CCInfo.getStackSize() > LoongArchFI->getArgumentStackSize()) + // Do not tail call opt if the stack is used to pass parameters. + if (CCInfo.getStackSize() != 0) return false; // Do not tail call opt if any parameters need to be passed indirectly. @@ -8558,19 +8552,13 @@ bool LoongArchTargetLowering::isEligibleForTailCallOptimization( // semantics. auto IsCallerStructRet = Caller.hasStructRetAttr(); auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); - if (IsCallerStructRet != IsCalleeStructRet) + if (IsCallerStructRet || IsCalleeStructRet) return false; - // Do not tail call opt if caller's and callee's byval arguments do not match. - for (unsigned i = 0, j = 0; i < Outs.size(); ++i) { - if (!Outs[i].Flags.isByVal()) - continue; - if (j >= LoongArchFI->getIncomingByValArgsSize()) - return false; - if (LoongArchFI->getIncomingByValArgs(j).getValueType() != Outs[i].ArgVT) + // Do not tail call opt if either the callee or caller has a byval argument. + for (auto &Arg : Outs) + if (Arg.Flags.isByVal()) return false; - ++j; - } // The callee has to preserve all registers the caller needs to preserve. const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo(); @@ -8580,14 +8568,6 @@ bool LoongArchTargetLowering::isEligibleForTailCallOptimization( if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) return false; } - - // If the callee takes no arguments then go on to check the results of the - // call. - const MachineRegisterInfo &MRI = MF.getRegInfo(); - const SmallVectorImpl &OutVals = CLI.OutVals; - if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals)) - return false; - return true; } @@ -8615,7 +8595,6 @@ LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI, bool &IsTailCall = CLI.IsTailCall; MachineFunction &MF = DAG.getMachineFunction(); - auto *LoongArchFI = MF.getInfo(); // Analyze the operands of the call, assigning locations to each operand. SmallVector ArgLocs; @@ -8641,7 +8620,7 @@ LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI, // Create local copies for byval args. SmallVector ByValArgs; - for (unsigned i = 0, j = 0, e = Outs.size(); i != e; ++i) { + for (unsigned i = 0, e = Outs.size(); i != e; ++i) { ISD::ArgFlagsTy Flags = Outs[i].Flags; if (!Flags.isByVal()) continue; @@ -8649,39 +8628,22 @@ LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI, SDValue Arg = OutVals[i]; unsigned Size = Flags.getByValSize(); Align Alignment = Flags.getNonZeroByValAlign(); + + int FI = + MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false); + SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT); - SDValue Dst; - if (IsTailCall) { - SDValue CallerArg = LoongArchFI->getIncomingByValArgs(j++); - if (isa(Arg) || isa(Arg) || - isa(Arg)) - Dst = CallerArg; - } else { - int FI = - MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false); - Dst = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); - } - if (Dst) { - Chain = - DAG.getMemcpy(Chain, DL, Dst, Arg, SizeNode, Alignment, - /*IsVolatile=*/false, - /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt, - MachinePointerInfo(), MachinePointerInfo()); - ByValArgs.push_back(Dst); - } + Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment, + /*IsVolatile=*/false, + /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt, + MachinePointerInfo(), MachinePointerInfo()); + ByValArgs.push_back(FIPtr); } if (!IsTailCall) Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); - // During a tail call, stores to the argument area must happen after all of - // the function's incoming arguments have been loaded because they may alias. - // This is done by folding in a TokenFactor from LowerFormalArguments, but - // there's no point in doing so repeatedly so this tracks whether that's - // happened yet. - bool AfterFormalArgLoads = false; - // Copy argument values to their designated locations. SmallVector> RegsToPass; SmallVector MemOpChains; @@ -8776,44 +8738,27 @@ LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI, } // Use local copy if it is a byval arg. - if (Flags.isByVal()) { - if (!IsTailCall || (isa(ArgValue) || - isa(ArgValue) || - isa(ArgValue))) - ArgValue = ByValArgs[j++]; - } + if (Flags.isByVal()) + ArgValue = ByValArgs[j++]; if (VA.isRegLoc()) { // Queue up the argument copies and emit them at the end. RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); } else { assert(VA.isMemLoc() && "Argument not register or memory"); - SDValue DstAddr; - MachinePointerInfo DstInfo; - int32_t Offset = VA.getLocMemOffset(); + assert(!IsTailCall && "Tail call not allowed if stack is used " + "for passing parameters"); // Work out the address of the stack slot. if (!StackPtr.getNode()) StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT); - - if (IsTailCall) { - unsigned OpSize = divideCeil(VA.getValVT().getSizeInBits(), 8); - int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true); - DstAddr = DAG.getFrameIndex(FI, PtrVT); - DstInfo = MachinePointerInfo::getFixedStack(MF, FI); - if (!AfterFormalArgLoads) { - Chain = DAG.getStackArgumentTokenFactor(Chain); - AfterFormalArgLoads = true; - } - } else { - SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL); - DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff); - DstInfo = MachinePointerInfo::getStack(MF, Offset); - } + SDValue Address = + DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, + DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); // Emit the store. MemOpChains.push_back( - DAG.getStore(Chain, DL, ArgValue, DstAddr, DstInfo)); + DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); } } diff --git a/llvm/lib/Target/LoongArch/LoongArchMachineFunctionInfo.h b/llvm/lib/Target/LoongArch/LoongArchMachineFunctionInfo.h index 4159b97bcf598..904985c189dba 100644 --- a/llvm/lib/Target/LoongArch/LoongArchMachineFunctionInfo.h +++ b/llvm/lib/Target/LoongArch/LoongArchMachineFunctionInfo.h @@ -32,17 +32,10 @@ class LoongArchMachineFunctionInfo : public MachineFunctionInfo { /// Size of stack frame to save callee saved registers unsigned CalleeSavedStackSize = 0; - /// Amount of bytes on stack consumed by the arguments being passed on - /// the stack - unsigned ArgumentStackSize = 0; - /// FrameIndex of the spill slot when there is no scavenged register in /// insertIndirectBranch. int BranchRelaxationSpillFrameIndex = -1; - /// Incoming ByVal arguments - SmallVector IncomingByValArgs; - /// Registers that have been sign extended from i32. SmallVector SExt32Registers; @@ -70,9 +63,6 @@ class LoongArchMachineFunctionInfo : public MachineFunctionInfo { unsigned getCalleeSavedStackSize() const { return CalleeSavedStackSize; } void setCalleeSavedStackSize(unsigned Size) { CalleeSavedStackSize = Size; } - unsigned getArgumentStackSize() const { return ArgumentStackSize; } - void setArgumentStackSize(unsigned size) { ArgumentStackSize = size; } - int getBranchRelaxationSpillFrameIndex() { return BranchRelaxationSpillFrameIndex; } @@ -80,10 +70,6 @@ class LoongArchMachineFunctionInfo : public MachineFunctionInfo { BranchRelaxationSpillFrameIndex = Index; } - void addIncomingByValArgs(SDValue Val) { IncomingByValArgs.push_back(Val); } - SDValue getIncomingByValArgs(int Idx) { return IncomingByValArgs[Idx]; } - unsigned getIncomingByValArgsSize() const { return IncomingByValArgs.size(); } - void addSExt32Register(Register Reg) { SExt32Registers.push_back(Reg); } bool isSExt32Register(Register Reg) const { diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index fed7faa309579..c2a81b4101061 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -24310,7 +24310,6 @@ SDValue RISCVTargetLowering::LowerFormalArguments( SelectionDAG &DAG, SmallVectorImpl &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); - RISCVMachineFunctionInfo *RVFI = MF.getInfo(); switch (CallConv) { default: @@ -24438,9 +24437,6 @@ SDValue RISCVTargetLowering::LowerFormalArguments( continue; } InVals.push_back(ArgValue); - - if (Ins[InsIdx].Flags.isByVal()) - RVFI->addIncomingByValArgs(ArgValue); } if (any_of(ArgLocs, @@ -24453,6 +24449,7 @@ SDValue RISCVTargetLowering::LowerFormalArguments( const TargetRegisterClass *RC = &RISCV::GPRRegClass; MachineFrameInfo &MFI = MF.getFrameInfo(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); + RISCVMachineFunctionInfo *RVFI = MF.getInfo(); // Size of the vararg save area. For now, the varargs save area is either // zero or large enough to hold a0-a7. @@ -24470,7 +24467,7 @@ SDValue RISCVTargetLowering::LowerFormalArguments( // If saving an odd number of registers then create an extra stack slot to // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures - // offsets to even-numbered registers remain 2*XLEN-aligned. + // offsets to even-numbered registered remain 2*XLEN-aligned. if (Idx % 2) { MFI.CreateFixedObject( XLenInBytes, VaArgOffset - static_cast(XLenInBytes), true); @@ -24500,12 +24497,9 @@ SDValue RISCVTargetLowering::LowerFormalArguments( RVFI->setVarArgsSaveSize(VarArgsSaveSize); } - RVFI->setArgumentStackSize(CCInfo.getStackSize()); - // All stores are grouped in one node to allow the matching between - // the size of Ins and InVals. + // the size of Ins and InVals. This only happens for vararg functions. if (!OutChains.empty()) { - assert(IsVarArg && "Only variadic functions should have OutChains"); OutChains.push_back(Chain); Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); } @@ -24524,7 +24518,6 @@ bool RISCVTargetLowering::isEligibleForTailCallOptimization( auto &Outs = CLI.Outs; auto &Caller = MF.getFunction(); auto CallerCC = Caller.getCallingConv(); - auto *RVFI = MF.getInfo(); // Exception-handling functions need a special set of instructions to // indicate a return to the hardware. Tail-calling another function would @@ -24534,29 +24527,29 @@ bool RISCVTargetLowering::isEligibleForTailCallOptimization( if (Caller.hasFnAttribute("interrupt")) return false; - // If the stack arguments for this call do not fit into our own save area then - // the call cannot be made tail. - if (CCInfo.getStackSize() > RVFI->getArgumentStackSize()) + // Do not tail call opt if the stack is used to pass parameters. + if (CCInfo.getStackSize() != 0) return false; + // Do not tail call opt if any parameters need to be passed indirectly. + // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are + // passed indirectly. So the address of the value will be passed in a + // register, or if not available, then the address is put on the stack. In + // order to pass indirectly, space on the stack often needs to be allocated + // in order to store the value. In this case the CCInfo.getNextStackOffset() + // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs + // are passed CCValAssign::Indirect. + for (auto &VA : ArgLocs) + if (VA.getLocInfo() == CCValAssign::Indirect) + return false; + // Do not tail call opt if either caller or callee uses struct return // semantics. auto IsCallerStructRet = Caller.hasStructRetAttr(); auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); - if (IsCallerStructRet != IsCalleeStructRet) + if (IsCallerStructRet || IsCalleeStructRet) return false; - // Do not tail call opt if caller's and callee's byval arguments do not match. - for (unsigned i = 0, j = 0, e = Outs.size(); i != e; ++i) { - if (!Outs[i].Flags.isByVal()) - continue; - if (j >= RVFI->getIncomingByValArgsSize()) - return false; - if (RVFI->getIncomingByValArgs(j).getValueType() != Outs[i].ArgVT) - return false; - ++j; - } - // The callee has to preserve all registers the caller needs to preserve. const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); @@ -24566,12 +24559,12 @@ bool RISCVTargetLowering::isEligibleForTailCallOptimization( return false; } - // If the callee takes no arguments then go on to check the results of the - // call. - const MachineRegisterInfo &MRI = MF.getRegInfo(); - const SmallVectorImpl &OutVals = CLI.OutVals; - if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals)) - return false; + // Byval parameters hand the function a pointer directly into the stack area + // we want to reuse during a tail call. Working around this *is* possible + // but less efficient and uglier in LowerCall. + for (auto &Arg : Outs) + if (Arg.Flags.isByVal()) + return false; return true; } @@ -24600,7 +24593,6 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, const CallBase *CB = CLI.CB; MachineFunction &MF = DAG.getMachineFunction(); - RISCVMachineFunctionInfo *RVFI = MF.getInfo(); MachineFunction::CallSiteInfo CSInfo; // Set type id for call site info. @@ -24634,7 +24626,7 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, // Create local copies for byval args SmallVector ByValArgs; - for (unsigned i = 0, j = 0, e = Outs.size(); i != e; ++i) { + for (unsigned i = 0, e = Outs.size(); i != e; ++i) { ISD::ArgFlagsTy Flags = Outs[i].Flags; if (!Flags.isByVal()) continue; @@ -24643,39 +24635,21 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, unsigned Size = Flags.getByValSize(); Align Alignment = Flags.getNonZeroByValAlign(); + int FI = + MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false); + SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT); - SDValue Dst; - if (IsTailCall) { - SDValue CallerArg = RVFI->getIncomingByValArgs(j++); - if (isa(Arg) || isa(Arg) || - isa(Arg)) - Dst = CallerArg; - } else { - int FI = - MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false); - Dst = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); - } - if (Dst) { - Chain = - DAG.getMemcpy(Chain, DL, Dst, Arg, SizeNode, Alignment, - /*IsVolatile=*/false, - /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt, - MachinePointerInfo(), MachinePointerInfo()); - ByValArgs.push_back(Dst); - } + Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment, + /*IsVolatile=*/false, + /*AlwaysInline=*/false, /*CI*/ nullptr, IsTailCall, + MachinePointerInfo(), MachinePointerInfo()); + ByValArgs.push_back(FIPtr); } if (!IsTailCall) Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); - // During a tail call, stores to the argument area must happen after all of - // the function's incoming arguments have been loaded because they may alias. - // This is done by folding in a TokenFactor from LowerFormalArguments, but - // there's no point in doing so repeatedly so this tracks whether that's - // happened yet. - bool AfterFormalArgLoads = false; - // Copy argument values to their designated locations. SmallVector, 8> RegsToPass; SmallVector MemOpChains; @@ -24777,12 +24751,8 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, } // Use local copy if it is a byval arg. - if (Flags.isByVal()) { - if (!IsTailCall || (isa(ArgValue) || - isa(ArgValue) || - isa(ArgValue))) - ArgValue = ByValArgs[j++]; - } + if (Flags.isByVal()) + ArgValue = ByValArgs[j++]; if (VA.isRegLoc()) { // Queue up the argument copies and emit them at the end. @@ -24793,32 +24763,20 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, CSInfo.ArgRegPairs.emplace_back(VA.getLocReg(), i); } else { assert(VA.isMemLoc() && "Argument not register or memory"); - SDValue DstAddr; - MachinePointerInfo DstInfo; - int32_t Offset = VA.getLocMemOffset(); + assert(!IsTailCall && "Tail call not allowed if stack is used " + "for passing parameters"); // Work out the address of the stack slot. if (!StackPtr.getNode()) StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); - - if (IsTailCall) { - unsigned OpSize = divideCeil(VA.getValVT().getSizeInBits(), 8); - int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true); - DstAddr = DAG.getFrameIndex(FI, PtrVT); - DstInfo = MachinePointerInfo::getFixedStack(MF, FI); - if (!AfterFormalArgLoads) { - Chain = DAG.getStackArgumentTokenFactor(Chain); - AfterFormalArgLoads = true; - } - } else { - SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL); - DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff); - DstInfo = MachinePointerInfo::getStack(MF, Offset); - } + SDValue Address = + DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, + DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); // Emit the store. MemOpChains.push_back( - DAG.getStore(Chain, DL, ArgValue, DstAddr, DstInfo)); + DAG.getStore(Chain, DL, ArgValue, Address, + MachinePointerInfo::getStack(MF, VA.getLocMemOffset()))); } } diff --git a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h index e23f162a317ef..f9be80feae211 100644 --- a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h +++ b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h @@ -65,14 +65,6 @@ class RISCVMachineFunctionInfo : public MachineFunctionInfo { uint64_t RVVPadding = 0; /// Size of stack frame to save callee saved registers unsigned CalleeSavedStackSize = 0; - - /// amount of bytes on stack consumed by the arguments being passed on the - /// stack - unsigned ArgumentStackSize = 0; - - /// Incoming ByVal arguments - SmallVector IncomingByValArgs; - /// Is there any vector argument or return? bool IsVectorCall = false; @@ -150,13 +142,6 @@ class RISCVMachineFunctionInfo : public MachineFunctionInfo { unsigned getCalleeSavedStackSize() const { return CalleeSavedStackSize; } void setCalleeSavedStackSize(unsigned Size) { CalleeSavedStackSize = Size; } - unsigned getArgumentStackSize() const { return ArgumentStackSize; } - void setArgumentStackSize(unsigned size) { ArgumentStackSize = size; } - - void addIncomingByValArgs(SDValue Val) { IncomingByValArgs.push_back(Val); } - SDValue getIncomingByValArgs(unsigned Idx) { return IncomingByValArgs[Idx]; } - unsigned getIncomingByValArgsSize() const { return IncomingByValArgs.size(); } - enum class PushPopKind { None = 0, StdExtZcmp, VendorXqccmp }; PushPopKind getPushPopKind(const MachineFunction &MF) const; diff --git a/llvm/test/CodeGen/LoongArch/issue187832.ll b/llvm/test/CodeGen/LoongArch/issue187832.ll deleted file mode 100644 index b483a7640e171..0000000000000 --- a/llvm/test/CodeGen/LoongArch/issue187832.ll +++ /dev/null @@ -1,48 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=loongarch32 %s -o - | FileCheck %s --check-prefix=LA32 -; RUN: llc -mtriple=loongarch64 %s -o - | FileCheck %s --check-prefix=LA64 - -%Box = type { i32, i32, i32, i8, [3 x i8], i32, i8, [1 x i8], i16, i16, i8, [5 x i8], { i64, ptr }, { i64, ptr }, { i64, ptr } } - -define void @test(ptr byval(%Box) %0) nounwind { -; LA32-LABEL: test: -; LA32: # %bb.0: -; LA32-NEXT: addi.w $sp, $sp, -112 -; LA32-NEXT: st.w $ra, $sp, 108 # 4-byte Folded Spill -; LA32-NEXT: st.w $fp, $sp, 104 # 4-byte Folded Spill -; LA32-NEXT: addi.w $fp, $sp, 24 -; LA32-NEXT: ori $a2, $zero, 80 -; LA32-NEXT: move $a0, $fp -; LA32-NEXT: move $a1, $zero -; LA32-NEXT: bl memcpy -; LA32-NEXT: st.w $zero, $sp, 8 -; LA32-NEXT: st.w $zero, $sp, 4 -; LA32-NEXT: st.w $zero, $sp, 0 -; LA32-NEXT: move $a0, $zero -; LA32-NEXT: move $a1, $zero -; LA32-NEXT: move $a2, $zero -; LA32-NEXT: move $a3, $fp -; LA32-NEXT: move $a4, $zero -; LA32-NEXT: move $a5, $zero -; LA32-NEXT: move $a6, $zero -; LA32-NEXT: move $a7, $zero -; LA32-NEXT: jirl $ra, $zero, 0 -; LA32-NEXT: ld.w $fp, $sp, 104 # 4-byte Folded Reload -; LA32-NEXT: ld.w $ra, $sp, 108 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 112 -; LA32-NEXT: ret -; -; LA64-LABEL: test: -; LA64: # %bb.0: -; LA64-NEXT: movgr2fr.d $fa0, $zero -; LA64-NEXT: move $a0, $zero -; LA64-NEXT: move $a1, $zero -; LA64-NEXT: move $a2, $zero -; LA64-NEXT: move $a3, $zero -; LA64-NEXT: move $a4, $zero -; LA64-NEXT: move $a5, $zero -; LA64-NEXT: move $a6, $zero -; LA64-NEXT: jr $a0 - tail call void null(ptr null, double 0.000000e+00, ptr byval(%Box) null, { i64, ptr } zeroinitializer, i32 0, i64 0, i1 false) - ret void -} diff --git a/llvm/test/CodeGen/LoongArch/musttail.ll b/llvm/test/CodeGen/LoongArch/musttail.ll deleted file mode 100644 index 23369ddd81fca..0000000000000 --- a/llvm/test/CodeGen/LoongArch/musttail.ll +++ /dev/null @@ -1,567 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=loongarch32 %s -o - | FileCheck %s --check-prefix=LA32 -; RUN: llc -mtriple=loongarch64 %s -o - | FileCheck %s --check-prefix=LA64 - -declare i32 @many_args_callee(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9) - -define i32 @many_args_tail(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9) { -; LA32-LABEL: many_args_tail: -; LA32: # %bb.0: -; LA32-NEXT: ori $a0, $zero, 8 -; LA32-NEXT: st.w $a0, $sp, 0 -; LA32-NEXT: ori $a0, $zero, 9 -; LA32-NEXT: ori $a1, $zero, 1 -; LA32-NEXT: ori $a2, $zero, 2 -; LA32-NEXT: ori $a3, $zero, 3 -; LA32-NEXT: ori $a4, $zero, 4 -; LA32-NEXT: ori $a5, $zero, 5 -; LA32-NEXT: ori $a6, $zero, 6 -; LA32-NEXT: ori $a7, $zero, 7 -; LA32-NEXT: st.w $a0, $sp, 4 -; LA32-NEXT: move $a0, $zero -; LA32-NEXT: b many_args_callee -; -; LA64-LABEL: many_args_tail: -; LA64: # %bb.0: -; LA64-NEXT: ori $a0, $zero, 8 -; LA64-NEXT: st.d $a0, $sp, 0 -; LA64-NEXT: ori $a0, $zero, 9 -; LA64-NEXT: ori $a1, $zero, 1 -; LA64-NEXT: ori $a2, $zero, 2 -; LA64-NEXT: ori $a3, $zero, 3 -; LA64-NEXT: ori $a4, $zero, 4 -; LA64-NEXT: ori $a5, $zero, 5 -; LA64-NEXT: ori $a6, $zero, 6 -; LA64-NEXT: ori $a7, $zero, 7 -; LA64-NEXT: st.d $a0, $sp, 8 -; LA64-NEXT: move $a0, $zero -; LA64-NEXT: pcaddu18i $t8, %call36(many_args_callee) -; LA64-NEXT: jr $t8 - %ret = tail call i32 @many_args_callee(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9) - ret i32 %ret -} - -define i32 @many_args_musttail(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9) { -; LA32-LABEL: many_args_musttail: -; LA32: # %bb.0: -; LA32-NEXT: ori $a0, $zero, 8 -; LA32-NEXT: st.w $a0, $sp, 0 -; LA32-NEXT: ori $a0, $zero, 9 -; LA32-NEXT: ori $a1, $zero, 1 -; LA32-NEXT: ori $a2, $zero, 2 -; LA32-NEXT: ori $a3, $zero, 3 -; LA32-NEXT: ori $a4, $zero, 4 -; LA32-NEXT: ori $a5, $zero, 5 -; LA32-NEXT: ori $a6, $zero, 6 -; LA32-NEXT: ori $a7, $zero, 7 -; LA32-NEXT: st.w $a0, $sp, 4 -; LA32-NEXT: move $a0, $zero -; LA32-NEXT: b many_args_callee -; -; LA64-LABEL: many_args_musttail: -; LA64: # %bb.0: -; LA64-NEXT: ori $a0, $zero, 8 -; LA64-NEXT: st.d $a0, $sp, 0 -; LA64-NEXT: ori $a0, $zero, 9 -; LA64-NEXT: ori $a1, $zero, 1 -; LA64-NEXT: ori $a2, $zero, 2 -; LA64-NEXT: ori $a3, $zero, 3 -; LA64-NEXT: ori $a4, $zero, 4 -; LA64-NEXT: ori $a5, $zero, 5 -; LA64-NEXT: ori $a6, $zero, 6 -; LA64-NEXT: ori $a7, $zero, 7 -; LA64-NEXT: st.d $a0, $sp, 8 -; LA64-NEXT: move $a0, $zero -; LA64-NEXT: pcaddu18i $t8, %call36(many_args_callee) -; LA64-NEXT: jr $t8 - %ret = musttail call i32 @many_args_callee(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9) - ret i32 %ret -} - -; This function has more arguments than it's tail-callee. This isn't valid for -; the musttail attribute, but can still be tail-called as a non-guaranteed -; optimisation, because the outgoing arguments to @many_args_callee fit in the -; stack space allocated by the caller of @more_args_tail. -define i32 @more_args_tail(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9) { -; LA32-LABEL: more_args_tail: -; LA32: # %bb.0: -; LA32-NEXT: ori $a0, $zero, 8 -; LA32-NEXT: st.w $a0, $sp, 0 -; LA32-NEXT: ori $a0, $zero, 9 -; LA32-NEXT: ori $a1, $zero, 1 -; LA32-NEXT: ori $a2, $zero, 2 -; LA32-NEXT: ori $a3, $zero, 3 -; LA32-NEXT: ori $a4, $zero, 4 -; LA32-NEXT: ori $a5, $zero, 5 -; LA32-NEXT: ori $a6, $zero, 6 -; LA32-NEXT: ori $a7, $zero, 7 -; LA32-NEXT: st.w $a0, $sp, 4 -; LA32-NEXT: move $a0, $zero -; LA32-NEXT: b many_args_callee -; -; LA64-LABEL: more_args_tail: -; LA64: # %bb.0: -; LA64-NEXT: ori $a0, $zero, 8 -; LA64-NEXT: st.d $a0, $sp, 0 -; LA64-NEXT: ori $a0, $zero, 9 -; LA64-NEXT: ori $a1, $zero, 1 -; LA64-NEXT: ori $a2, $zero, 2 -; LA64-NEXT: ori $a3, $zero, 3 -; LA64-NEXT: ori $a4, $zero, 4 -; LA64-NEXT: ori $a5, $zero, 5 -; LA64-NEXT: ori $a6, $zero, 6 -; LA64-NEXT: ori $a7, $zero, 7 -; LA64-NEXT: st.d $a0, $sp, 8 -; LA64-NEXT: move $a0, $zero -; LA64-NEXT: pcaddu18i $t8, %call36(many_args_callee) -; LA64-NEXT: jr $t8 - %ret = tail call i32 @many_args_callee(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9) - ret i32 %ret -} - -; Again, this isn't valid for musttail, but can be tail-called in practice -; because the stack size if the same. -define i32 @different_args_tail_32bit(i64 %0, i64 %1, i64 %2, i64 %3, i64 %4) nounwind { -; LA32-LABEL: different_args_tail_32bit: -; LA32: # %bb.0: -; LA32-NEXT: ori $a0, $zero, 8 -; LA32-NEXT: st.w $a0, $sp, 0 -; LA32-NEXT: ori $a0, $zero, 9 -; LA32-NEXT: ori $a1, $zero, 1 -; LA32-NEXT: ori $a2, $zero, 2 -; LA32-NEXT: ori $a3, $zero, 3 -; LA32-NEXT: ori $a4, $zero, 4 -; LA32-NEXT: ori $a5, $zero, 5 -; LA32-NEXT: ori $a6, $zero, 6 -; LA32-NEXT: ori $a7, $zero, 7 -; LA32-NEXT: st.w $a0, $sp, 4 -; LA32-NEXT: move $a0, $zero -; LA32-NEXT: b many_args_callee -; -; LA64-LABEL: different_args_tail_32bit: -; LA64: # %bb.0: -; LA64-NEXT: addi.d $sp, $sp, -32 -; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill -; LA64-NEXT: ori $a0, $zero, 9 -; LA64-NEXT: st.d $a0, $sp, 8 -; LA64-NEXT: ori $a0, $zero, 8 -; LA64-NEXT: ori $a1, $zero, 1 -; LA64-NEXT: ori $a2, $zero, 2 -; LA64-NEXT: ori $a3, $zero, 3 -; LA64-NEXT: ori $a4, $zero, 4 -; LA64-NEXT: ori $a5, $zero, 5 -; LA64-NEXT: ori $a6, $zero, 6 -; LA64-NEXT: ori $a7, $zero, 7 -; LA64-NEXT: st.d $a0, $sp, 0 -; LA64-NEXT: move $a0, $zero -; LA64-NEXT: pcaddu18i $ra, %call36(many_args_callee) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 32 -; LA64-NEXT: ret - %ret = tail call i32 @many_args_callee(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9) - ret i32 %ret -} - -define i32 @different_args_tail_64bit(i128 %0, i128 %1, i128 %2, i128 %3, i128 %4) nounwind { -; LA32-LABEL: different_args_tail_64bit: -; LA32: # %bb.0: -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a0, $zero, 9 -; LA32-NEXT: st.w $a0, $sp, 4 -; LA32-NEXT: ori $a0, $zero, 8 -; LA32-NEXT: ori $a1, $zero, 1 -; LA32-NEXT: ori $a2, $zero, 2 -; LA32-NEXT: ori $a3, $zero, 3 -; LA32-NEXT: ori $a4, $zero, 4 -; LA32-NEXT: ori $a5, $zero, 5 -; LA32-NEXT: ori $a6, $zero, 6 -; LA32-NEXT: ori $a7, $zero, 7 -; LA32-NEXT: st.w $a0, $sp, 0 -; LA32-NEXT: move $a0, $zero -; LA32-NEXT: bl many_args_callee -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret -; -; LA64-LABEL: different_args_tail_64bit: -; LA64: # %bb.0: -; LA64-NEXT: ori $a0, $zero, 8 -; LA64-NEXT: st.d $a0, $sp, 0 -; LA64-NEXT: ori $a0, $zero, 9 -; LA64-NEXT: ori $a1, $zero, 1 -; LA64-NEXT: ori $a2, $zero, 2 -; LA64-NEXT: ori $a3, $zero, 3 -; LA64-NEXT: ori $a4, $zero, 4 -; LA64-NEXT: ori $a5, $zero, 5 -; LA64-NEXT: ori $a6, $zero, 6 -; LA64-NEXT: ori $a7, $zero, 7 -; LA64-NEXT: st.d $a0, $sp, 8 -; LA64-NEXT: move $a0, $zero -; LA64-NEXT: pcaddu18i $t8, %call36(many_args_callee) -; LA64-NEXT: jr $t8 - %ret = tail call i32 @many_args_callee(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9) - ret i32 %ret -} - -; Here, the caller requires less stack space for it's arguments than the -; callee, so it would not ba valid to do a tail-call. -define i32 @fewer_args_tail(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4) nounwind { -; LA32-LABEL: fewer_args_tail: -; LA32: # %bb.0: -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a0, $zero, 9 -; LA32-NEXT: st.w $a0, $sp, 4 -; LA32-NEXT: ori $a0, $zero, 8 -; LA32-NEXT: ori $a1, $zero, 1 -; LA32-NEXT: ori $a2, $zero, 2 -; LA32-NEXT: ori $a3, $zero, 3 -; LA32-NEXT: ori $a4, $zero, 4 -; LA32-NEXT: ori $a5, $zero, 5 -; LA32-NEXT: ori $a6, $zero, 6 -; LA32-NEXT: ori $a7, $zero, 7 -; LA32-NEXT: st.w $a0, $sp, 0 -; LA32-NEXT: move $a0, $zero -; LA32-NEXT: bl many_args_callee -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret -; -; LA64-LABEL: fewer_args_tail: -; LA64: # %bb.0: -; LA64-NEXT: addi.d $sp, $sp, -32 -; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill -; LA64-NEXT: ori $a0, $zero, 9 -; LA64-NEXT: st.d $a0, $sp, 8 -; LA64-NEXT: ori $a0, $zero, 8 -; LA64-NEXT: ori $a1, $zero, 1 -; LA64-NEXT: ori $a2, $zero, 2 -; LA64-NEXT: ori $a3, $zero, 3 -; LA64-NEXT: ori $a4, $zero, 4 -; LA64-NEXT: ori $a5, $zero, 5 -; LA64-NEXT: ori $a6, $zero, 6 -; LA64-NEXT: ori $a7, $zero, 7 -; LA64-NEXT: st.d $a0, $sp, 0 -; LA64-NEXT: move $a0, $zero -; LA64-NEXT: pcaddu18i $ra, %call36(many_args_callee) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 32 -; LA64-NEXT: ret - %ret = tail call i32 @many_args_callee(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9) - ret i32 %ret -} - -declare void @foo(i32, i32, i32, i32, i32, i32, i32, i32, i32) - -define void @bar(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8) nounwind { -; LA32-LABEL: bar: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -48 -; LA32-NEXT: st.w $ra, $sp, 44 # 4-byte Folded Spill -; LA32-NEXT: st.w $fp, $sp, 40 # 4-byte Folded Spill -; LA32-NEXT: st.w $s0, $sp, 36 # 4-byte Folded Spill -; LA32-NEXT: st.w $s1, $sp, 32 # 4-byte Folded Spill -; LA32-NEXT: st.w $s2, $sp, 28 # 4-byte Folded Spill -; LA32-NEXT: st.w $s3, $sp, 24 # 4-byte Folded Spill -; LA32-NEXT: st.w $s4, $sp, 20 # 4-byte Folded Spill -; LA32-NEXT: st.w $s5, $sp, 16 # 4-byte Folded Spill -; LA32-NEXT: st.w $s6, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: move $fp, $a7 -; LA32-NEXT: move $s0, $a6 -; LA32-NEXT: move $s1, $a5 -; LA32-NEXT: move $s2, $a4 -; LA32-NEXT: move $s3, $a3 -; LA32-NEXT: move $s4, $a2 -; LA32-NEXT: move $s5, $a1 -; LA32-NEXT: move $s6, $a0 -; LA32-NEXT: ori $a0, $zero, 1 -; LA32-NEXT: st.w $a0, $sp, 0 -; LA32-NEXT: move $a0, $s6 -; LA32-NEXT: bl foo -; LA32-NEXT: ori $a0, $zero, 2 -; LA32-NEXT: st.w $a0, $sp, 48 -; LA32-NEXT: move $a0, $s6 -; LA32-NEXT: move $a1, $s5 -; LA32-NEXT: move $a2, $s4 -; LA32-NEXT: move $a3, $s3 -; LA32-NEXT: move $a4, $s2 -; LA32-NEXT: move $a5, $s1 -; LA32-NEXT: move $a6, $s0 -; LA32-NEXT: move $a7, $fp -; LA32-NEXT: ld.w $s6, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: ld.w $s5, $sp, 16 # 4-byte Folded Reload -; LA32-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload -; LA32-NEXT: ld.w $s3, $sp, 24 # 4-byte Folded Reload -; LA32-NEXT: ld.w $s2, $sp, 28 # 4-byte Folded Reload -; LA32-NEXT: ld.w $s1, $sp, 32 # 4-byte Folded Reload -; LA32-NEXT: ld.w $s0, $sp, 36 # 4-byte Folded Reload -; LA32-NEXT: ld.w $fp, $sp, 40 # 4-byte Folded Reload -; LA32-NEXT: ld.w $ra, $sp, 44 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 48 -; LA32-NEXT: b foo -; -; LA64-LABEL: bar: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -96 -; LA64-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill -; LA64-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill -; LA64-NEXT: st.d $s0, $sp, 72 # 8-byte Folded Spill -; LA64-NEXT: st.d $s1, $sp, 64 # 8-byte Folded Spill -; LA64-NEXT: st.d $s2, $sp, 56 # 8-byte Folded Spill -; LA64-NEXT: st.d $s3, $sp, 48 # 8-byte Folded Spill -; LA64-NEXT: st.d $s4, $sp, 40 # 8-byte Folded Spill -; LA64-NEXT: st.d $s5, $sp, 32 # 8-byte Folded Spill -; LA64-NEXT: st.d $s6, $sp, 24 # 8-byte Folded Spill -; LA64-NEXT: move $fp, $a7 -; LA64-NEXT: move $s0, $a6 -; LA64-NEXT: move $s1, $a5 -; LA64-NEXT: move $s2, $a4 -; LA64-NEXT: move $s3, $a3 -; LA64-NEXT: move $s4, $a2 -; LA64-NEXT: move $s5, $a1 -; LA64-NEXT: move $s6, $a0 -; LA64-NEXT: ori $a0, $zero, 1 -; LA64-NEXT: st.d $a0, $sp, 0 -; LA64-NEXT: move $a0, $s6 -; LA64-NEXT: pcaddu18i $ra, %call36(foo) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ori $a0, $zero, 2 -; LA64-NEXT: st.d $a0, $sp, 96 -; LA64-NEXT: move $a0, $s6 -; LA64-NEXT: move $a1, $s5 -; LA64-NEXT: move $a2, $s4 -; LA64-NEXT: move $a3, $s3 -; LA64-NEXT: move $a4, $s2 -; LA64-NEXT: move $a5, $s1 -; LA64-NEXT: move $a6, $s0 -; LA64-NEXT: move $a7, $fp -; LA64-NEXT: ld.d $s6, $sp, 24 # 8-byte Folded Reload -; LA64-NEXT: ld.d $s5, $sp, 32 # 8-byte Folded Reload -; LA64-NEXT: ld.d $s4, $sp, 40 # 8-byte Folded Reload -; LA64-NEXT: ld.d $s3, $sp, 48 # 8-byte Folded Reload -; LA64-NEXT: ld.d $s2, $sp, 56 # 8-byte Folded Reload -; LA64-NEXT: ld.d $s1, $sp, 64 # 8-byte Folded Reload -; LA64-NEXT: ld.d $s0, $sp, 72 # 8-byte Folded Reload -; LA64-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload -; LA64-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 96 -; LA64-NEXT: pcaddu18i $t8, %call36(foo) -; LA64-NEXT: jr $t8 -entry: - call void @foo(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 1) - musttail call void @foo(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 2) - ret void -} - -declare void @sret_callee(ptr sret({ double, double }) align 8) - -; Functions which return by sret can be tail-called because the incoming sret -; pointer gets passed through to the callee. -define void @sret_caller_tail(ptr sret({ double, double }) align 8 %result) { -; LA32-LABEL: sret_caller_tail: -; LA32: # %bb.0: # %entry -; LA32-NEXT: b sret_callee -; -; LA64-LABEL: sret_caller_tail: -; LA64: # %bb.0: # %entry -; LA64-NEXT: pcaddu18i $t8, %call36(sret_callee) -; LA64-NEXT: jr $t8 -entry: - tail call void @sret_callee(ptr sret({ double, double }) align 8 %result) - ret void -} - -define void @sret_caller_musttail(ptr sret({ double, double }) align 8 %result) { -; LA32-LABEL: sret_caller_musttail: -; LA32: # %bb.0: # %entry -; LA32-NEXT: b sret_callee -; -; LA64-LABEL: sret_caller_musttail: -; LA64: # %bb.0: # %entry -; LA64-NEXT: pcaddu18i $t8, %call36(sret_callee) -; LA64-NEXT: jr $t8 -entry: - musttail call void @sret_callee(ptr sret({ double, double }) align 8 %result) - ret void -} - -%twenty_bytes = type { [5 x i32] } -declare void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4) - -; Functions with byval parameters can be tail-called, because the value is -; actually passed in registers in the same way for the caller and callee. -define void @large_caller(%twenty_bytes* byval(%twenty_bytes) align 4 %a) { -; LA32-LABEL: large_caller: -; LA32: # %bb.0: # %entry -; LA32-NEXT: b large_callee -; -; LA64-LABEL: large_caller: -; LA64: # %bb.0: # %entry -; LA64-NEXT: pcaddu18i $t8, %call36(large_callee) -; LA64-NEXT: jr $t8 -entry: - musttail call void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %a) - ret void -} - -; As above, but with some inline asm to test that the arguments in r4 is -; re-loaded before the call. -define void @large_caller_check_regs(%twenty_bytes* byval(%twenty_bytes) align 4 %a) nounwind { -; LA32-LABEL: large_caller_check_regs: -; LA32: # %bb.0: # %entry -; LA32-NEXT: move $a1, $a0 -; LA32-NEXT: #APP -; LA32-NEXT: #NO_APP -; LA32-NEXT: move $a0, $a1 -; LA32-NEXT: b large_callee -; -; LA64-LABEL: large_caller_check_regs: -; LA64: # %bb.0: # %entry -; LA64-NEXT: move $a1, $a0 -; LA64-NEXT: #APP -; LA64-NEXT: #NO_APP -; LA64-NEXT: move $a0, $a1 -; LA64-NEXT: pcaddu18i $t8, %call36(large_callee) -; LA64-NEXT: jr $t8 -entry: - tail call void asm sideeffect "", "~{r4}"() - musttail call void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %a) - ret void -} - -; The IR for this one looks dodgy, because it has an alloca passed to a -; musttail function, but it is passed as a byval argument, so will be copied -; into the stack space allocated by @large_caller_new_value's caller, so is -; valid. -define void @large_caller_new_value(%twenty_bytes* byval(%twenty_bytes) align 4 %a) nounwind { -; LA32-LABEL: large_caller_new_value: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -32 -; LA32-NEXT: st.w $zero, $sp, 12 -; LA32-NEXT: ori $a1, $zero, 1 -; LA32-NEXT: st.w $a1, $sp, 16 -; LA32-NEXT: ori $a2, $zero, 2 -; LA32-NEXT: st.w $a2, $sp, 20 -; LA32-NEXT: ori $a3, $zero, 3 -; LA32-NEXT: st.w $a3, $sp, 24 -; LA32-NEXT: ori $a4, $zero, 4 -; LA32-NEXT: st.w $a4, $sp, 28 -; LA32-NEXT: st.w $a4, $a0, 16 -; LA32-NEXT: st.w $a3, $a0, 12 -; LA32-NEXT: st.w $a2, $a0, 8 -; LA32-NEXT: st.w $a1, $a0, 4 -; LA32-NEXT: st.w $zero, $a0, 0 -; LA32-NEXT: addi.w $sp, $sp, 32 -; LA32-NEXT: b large_callee -; -; LA64-LABEL: large_caller_new_value: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -32 -; LA64-NEXT: ori $a1, $zero, 0 -; LA64-NEXT: lu32i.d $a1, 1 -; LA64-NEXT: st.d $a1, $sp, 12 -; LA64-NEXT: ori $a1, $zero, 2 -; LA64-NEXT: lu32i.d $a1, 3 -; LA64-NEXT: st.d $a1, $sp, 20 -; LA64-NEXT: ori $a1, $zero, 4 -; LA64-NEXT: st.w $a1, $sp, 28 -; LA64-NEXT: st.w $a1, $a0, 16 -; LA64-NEXT: vld $vr0, $sp, 12 -; LA64-NEXT: vst $vr0, $a0, 0 -; LA64-NEXT: addi.d $sp, $sp, 32 -; LA64-NEXT: pcaddu18i $t8, %call36(large_callee) -; LA64-NEXT: jr $t8 -entry: - %y = alloca %twenty_bytes, align 4 - store i32 0, ptr %y, align 4 - %0 = getelementptr inbounds i8, ptr %y, i32 4 - store i32 1, ptr %0, align 4 - %1 = getelementptr inbounds i8, ptr %y, i32 8 - store i32 2, ptr %1, align 4 - %2 = getelementptr inbounds i8, ptr %y, i32 12 - store i32 3, ptr %2, align 4 - %3 = getelementptr inbounds i8, ptr %y, i32 16 - store i32 4, ptr %3, align 4 - musttail call void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %y) - ret void -} - -declare void @two_byvals_callee(%twenty_bytes* byval(%twenty_bytes) align 4, %twenty_bytes* byval(%twenty_bytes) align 4) -define void @swap_byvals(%twenty_bytes* byval(%twenty_bytes) align 4 %a, %twenty_bytes* byval(%twenty_bytes) align 4 %b) { -; LA32-LABEL: swap_byvals: -; LA32: # %bb.0: # %entry -; LA32-NEXT: move $a2, $a0 -; LA32-NEXT: move $a0, $a1 -; LA32-NEXT: move $a1, $a2 -; LA32-NEXT: b two_byvals_callee -; -; LA64-LABEL: swap_byvals: -; LA64: # %bb.0: # %entry -; LA64-NEXT: move $a2, $a0 -; LA64-NEXT: move $a0, $a1 -; LA64-NEXT: move $a1, $a2 -; LA64-NEXT: pcaddu18i $t8, %call36(two_byvals_callee) -; LA64-NEXT: jr $t8 -entry: - musttail call void @two_byvals_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %b, %twenty_bytes* byval(%twenty_bytes) align 4 %a) - ret void -} - -; A forwarded byval arg, but in a different argument register, so it needs to -; be moved between registers first. This can't be musttail because of the -; different signatures, but is still tail-called as an optimisation. -declare void @shift_byval_callee(%twenty_bytes* byval(%twenty_bytes) align 4) -define void @shift_byval(i32 %a, %twenty_bytes* byval(%twenty_bytes) align 4 %b) { -; LA32-LABEL: shift_byval: -; LA32: # %bb.0: # %entry -; LA32-NEXT: move $a0, $a1 -; LA32-NEXT: b shift_byval_callee -; -; LA64-LABEL: shift_byval: -; LA64: # %bb.0: # %entry -; LA64-NEXT: move $a0, $a1 -; LA64-NEXT: pcaddu18i $t8, %call36(shift_byval_callee) -; LA64-NEXT: jr $t8 -entry: - tail call void @shift_byval_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %b) - ret void -} - -; A global object passed to a byval argument, so it must be copied, but doesn't -; need a stack temporary. -@large_global = external global %twenty_bytes -define void @large_caller_from_global(%twenty_bytes* byval(%twenty_bytes) align 4 %a) { -; LA32-LABEL: large_caller_from_global: -; LA32: # %bb.0: # %entry -; LA32-NEXT: .Lpcadd_hi0: -; LA32-NEXT: pcaddu12i $a1, %got_pcadd_hi20(large_global) -; LA32-NEXT: ld.w $a1, $a1, %got_pcadd_lo12(.Lpcadd_hi0) -; LA32-NEXT: ld.w $a2, $a1, 16 -; LA32-NEXT: st.w $a2, $a0, 16 -; LA32-NEXT: ld.w $a2, $a1, 12 -; LA32-NEXT: st.w $a2, $a0, 12 -; LA32-NEXT: ld.w $a2, $a1, 8 -; LA32-NEXT: st.w $a2, $a0, 8 -; LA32-NEXT: ld.w $a2, $a1, 4 -; LA32-NEXT: st.w $a2, $a0, 4 -; LA32-NEXT: ld.w $a1, $a1, 0 -; LA32-NEXT: st.w $a1, $a0, 0 -; LA32-NEXT: b large_callee -; -; LA64-LABEL: large_caller_from_global: -; LA64: # %bb.0: # %entry -; LA64-NEXT: pcalau12i $a1, %got_pc_hi20(large_global) -; LA64-NEXT: ld.d $a1, $a1, %got_pc_lo12(large_global) -; LA64-NEXT: ld.w $a2, $a1, 16 -; LA64-NEXT: st.w $a2, $a0, 16 -; LA64-NEXT: vld $vr0, $a1, 0 -; LA64-NEXT: vst $vr0, $a0, 0 -; LA64-NEXT: pcaddu18i $t8, %call36(large_callee) -; LA64-NEXT: jr $t8 -entry: - musttail call void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4 @large_global) - ret void -} diff --git a/llvm/test/CodeGen/LoongArch/tail-calls.ll b/llvm/test/CodeGen/LoongArch/tail-calls.ll index e14fbc2302cce..533761c8a1c70 100644 --- a/llvm/test/CodeGen/LoongArch/tail-calls.ll +++ b/llvm/test/CodeGen/LoongArch/tail-calls.ll @@ -80,15 +80,20 @@ entry: ret void } -;; Perform tail call optimization if callee arg stack usage ≤ caller +;; Do not tail call optimize if stack is used to pass parameters. declare i32 @callee_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i) define i32 @caller_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i) nounwind { ; CHECK-LABEL: caller_args: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: ld.d $t0, $sp, 0 +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: ld.d $t0, $sp, 16 ; CHECK-NEXT: st.d $t0, $sp, 0 -; CHECK-NEXT: pcaddu18i $t8, %call36(callee_args) -; CHECK-NEXT: jr $t8 +; CHECK-NEXT: pcaddu18i $ra, %call36(callee_args) +; CHECK-NEXT: jirl $ra, $ra, 0 +; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret entry: %r = tail call i32 @callee_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i) ret i32 %r diff --git a/llvm/test/CodeGen/RISCV/musttail-call.ll b/llvm/test/CodeGen/RISCV/musttail-call.ll index a3ac3560378db..f6ec5307b8bad 100644 --- a/llvm/test/CodeGen/RISCV/musttail-call.ll +++ b/llvm/test/CodeGen/RISCV/musttail-call.ll @@ -9,13 +9,12 @@ ; RUN: not --crash llc -mtriple riscv64-unknown-elf -o - %s \ ; RUN: 2>&1 | FileCheck %s -declare void @callee_musttail() +%struct.A = type { i32 } -define void @caller_musttail() #0 { +declare void @callee_musttail(ptr sret(%struct.A) %a) +define void @caller_musttail(ptr sret(%struct.A) %a) { ; CHECK: LLVM ERROR: failed to perform tail call elimination on a call site marked musttail entry: - musttail call void @callee_musttail() + musttail call void @callee_musttail(ptr sret(%struct.A) %a) ret void } - -attributes #0 = { "interrupt"="machine" } diff --git a/llvm/test/CodeGen/RISCV/musttail.ll b/llvm/test/CodeGen/RISCV/musttail.ll deleted file mode 100644 index b4b847b89d07e..0000000000000 --- a/llvm/test/CodeGen/RISCV/musttail.ll +++ /dev/null @@ -1,579 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=riscv32 %s -o - | FileCheck %s --check-prefix=RV32 -; RUN: llc -mtriple=riscv64 %s -o - | FileCheck %s --check-prefix=RV64 - -declare i32 @many_args_callee(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9) - -define i32 @many_args_tail(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9) { -; RV32-LABEL: many_args_tail: -; RV32: # %bb.0: -; RV32-NEXT: li a0, 8 -; RV32-NEXT: li t0, 9 -; RV32-NEXT: li a1, 1 -; RV32-NEXT: li a2, 2 -; RV32-NEXT: li a3, 3 -; RV32-NEXT: li a4, 4 -; RV32-NEXT: li a5, 5 -; RV32-NEXT: li a6, 6 -; RV32-NEXT: sw a0, 0(sp) -; RV32-NEXT: li a7, 7 -; RV32-NEXT: sw t0, 4(sp) -; RV32-NEXT: li a0, 0 -; RV32-NEXT: tail many_args_callee -; -; RV64-LABEL: many_args_tail: -; RV64: # %bb.0: -; RV64-NEXT: li a0, 8 -; RV64-NEXT: li t0, 9 -; RV64-NEXT: li a1, 1 -; RV64-NEXT: li a2, 2 -; RV64-NEXT: li a3, 3 -; RV64-NEXT: li a4, 4 -; RV64-NEXT: li a5, 5 -; RV64-NEXT: li a6, 6 -; RV64-NEXT: sd a0, 0(sp) -; RV64-NEXT: li a7, 7 -; RV64-NEXT: sd t0, 8(sp) -; RV64-NEXT: li a0, 0 -; RV64-NEXT: tail many_args_callee - %ret = tail call i32 @many_args_callee(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9) - ret i32 %ret -} - -define i32 @many_args_musttail(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9) { -; RV32-LABEL: many_args_musttail: -; RV32: # %bb.0: -; RV32-NEXT: li a0, 8 -; RV32-NEXT: li t0, 9 -; RV32-NEXT: li a1, 1 -; RV32-NEXT: li a2, 2 -; RV32-NEXT: li a3, 3 -; RV32-NEXT: li a4, 4 -; RV32-NEXT: li a5, 5 -; RV32-NEXT: li a6, 6 -; RV32-NEXT: sw a0, 0(sp) -; RV32-NEXT: li a7, 7 -; RV32-NEXT: sw t0, 4(sp) -; RV32-NEXT: li a0, 0 -; RV32-NEXT: tail many_args_callee -; -; RV64-LABEL: many_args_musttail: -; RV64: # %bb.0: -; RV64-NEXT: li a0, 8 -; RV64-NEXT: li t0, 9 -; RV64-NEXT: li a1, 1 -; RV64-NEXT: li a2, 2 -; RV64-NEXT: li a3, 3 -; RV64-NEXT: li a4, 4 -; RV64-NEXT: li a5, 5 -; RV64-NEXT: li a6, 6 -; RV64-NEXT: sd a0, 0(sp) -; RV64-NEXT: li a7, 7 -; RV64-NEXT: sd t0, 8(sp) -; RV64-NEXT: li a0, 0 -; RV64-NEXT: tail many_args_callee - %ret = musttail call i32 @many_args_callee(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9) - ret i32 %ret -} - -; This function has more arguments than it's tail-callee. This isn't valid for -; the musttail attribute, but can still be tail-called as a non-guaranteed -; optimisation, because the outgoing arguments to @many_args_callee fit in the -; stack space allocated by the caller of @more_args_tail. -define i32 @more_args_tail(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i64 %9, i32 %10) { -; RV32-LABEL: more_args_tail: -; RV32: # %bb.0: -; RV32-NEXT: li a0, 8 -; RV32-NEXT: li t0, 9 -; RV32-NEXT: li a1, 1 -; RV32-NEXT: li a2, 2 -; RV32-NEXT: li a3, 3 -; RV32-NEXT: li a4, 4 -; RV32-NEXT: li a5, 5 -; RV32-NEXT: li a6, 6 -; RV32-NEXT: sw a0, 0(sp) -; RV32-NEXT: li a7, 7 -; RV32-NEXT: sw t0, 4(sp) -; RV32-NEXT: li a0, 0 -; RV32-NEXT: tail many_args_callee -; -; RV64-LABEL: more_args_tail: -; RV64: # %bb.0: -; RV64-NEXT: li a0, 8 -; RV64-NEXT: li t0, 9 -; RV64-NEXT: li a1, 1 -; RV64-NEXT: li a2, 2 -; RV64-NEXT: li a3, 3 -; RV64-NEXT: li a4, 4 -; RV64-NEXT: li a5, 5 -; RV64-NEXT: li a6, 6 -; RV64-NEXT: sd a0, 0(sp) -; RV64-NEXT: li a7, 7 -; RV64-NEXT: sd t0, 8(sp) -; RV64-NEXT: li a0, 0 -; RV64-NEXT: tail many_args_callee - %ret = tail call i32 @many_args_callee(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9) - ret i32 %ret -} - -; Again, this isn't valid for musttail, but can be tail-called in practice -; because the stack size is the same. -define i32 @different_args_tail_32bit(i64 %0, i64 %1, i64 %2, i64 %3, i64 %4) nounwind { -; RV32-LABEL: different_args_tail_32bit: -; RV32: # %bb.0: -; RV32-NEXT: li a0, 8 -; RV32-NEXT: li t0, 9 -; RV32-NEXT: li a1, 1 -; RV32-NEXT: li a2, 2 -; RV32-NEXT: li a3, 3 -; RV32-NEXT: li a4, 4 -; RV32-NEXT: li a5, 5 -; RV32-NEXT: li a6, 6 -; RV32-NEXT: sw a0, 0(sp) -; RV32-NEXT: li a7, 7 -; RV32-NEXT: sw t0, 4(sp) -; RV32-NEXT: li a0, 0 -; RV32-NEXT: tail many_args_callee -; -; RV64-LABEL: different_args_tail_32bit: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; RV64-NEXT: li a0, 9 -; RV64-NEXT: li t0, 8 -; RV64-NEXT: li a1, 1 -; RV64-NEXT: li a2, 2 -; RV64-NEXT: li a3, 3 -; RV64-NEXT: li a4, 4 -; RV64-NEXT: li a5, 5 -; RV64-NEXT: li a6, 6 -; RV64-NEXT: li a7, 7 -; RV64-NEXT: sd t0, 0(sp) -; RV64-NEXT: sd a0, 8(sp) -; RV64-NEXT: li a0, 0 -; RV64-NEXT: call many_args_callee -; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 32 -; RV64-NEXT: ret - %ret = tail call i32 @many_args_callee(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9) - ret i32 %ret -} - -define i32 @different_args_tail_64bit(i128 %0, i128 %1, i128 %2, i128 %3, i128 %4) nounwind { -; RV32-LABEL: different_args_tail_64bit: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: li a0, 9 -; RV32-NEXT: li t0, 8 -; RV32-NEXT: li a1, 1 -; RV32-NEXT: li a2, 2 -; RV32-NEXT: li a3, 3 -; RV32-NEXT: li a4, 4 -; RV32-NEXT: li a5, 5 -; RV32-NEXT: li a6, 6 -; RV32-NEXT: li a7, 7 -; RV32-NEXT: sw t0, 0(sp) -; RV32-NEXT: sw a0, 4(sp) -; RV32-NEXT: li a0, 0 -; RV32-NEXT: call many_args_callee -; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret -; -; RV64-LABEL: different_args_tail_64bit: -; RV64: # %bb.0: -; RV64-NEXT: li a0, 8 -; RV64-NEXT: li t0, 9 -; RV64-NEXT: li a1, 1 -; RV64-NEXT: li a2, 2 -; RV64-NEXT: li a3, 3 -; RV64-NEXT: li a4, 4 -; RV64-NEXT: li a5, 5 -; RV64-NEXT: li a6, 6 -; RV64-NEXT: sd a0, 0(sp) -; RV64-NEXT: li a7, 7 -; RV64-NEXT: sd t0, 8(sp) -; RV64-NEXT: li a0, 0 -; RV64-NEXT: tail many_args_callee - %ret = tail call i32 @many_args_callee(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9) - ret i32 %ret -} - -; Here, the caller requires less stack space for it's arguments than the -; callee, so it would not ba valid to do a tail-call. -define i32 @fewer_args_tail(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4) nounwind { -; RV32-LABEL: fewer_args_tail: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: li a0, 9 -; RV32-NEXT: li t0, 8 -; RV32-NEXT: li a1, 1 -; RV32-NEXT: li a2, 2 -; RV32-NEXT: li a3, 3 -; RV32-NEXT: li a4, 4 -; RV32-NEXT: li a5, 5 -; RV32-NEXT: li a6, 6 -; RV32-NEXT: li a7, 7 -; RV32-NEXT: sw t0, 0(sp) -; RV32-NEXT: sw a0, 4(sp) -; RV32-NEXT: li a0, 0 -; RV32-NEXT: call many_args_callee -; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret -; -; RV64-LABEL: fewer_args_tail: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; RV64-NEXT: li a0, 9 -; RV64-NEXT: li t0, 8 -; RV64-NEXT: li a1, 1 -; RV64-NEXT: li a2, 2 -; RV64-NEXT: li a3, 3 -; RV64-NEXT: li a4, 4 -; RV64-NEXT: li a5, 5 -; RV64-NEXT: li a6, 6 -; RV64-NEXT: li a7, 7 -; RV64-NEXT: sd t0, 0(sp) -; RV64-NEXT: sd a0, 8(sp) -; RV64-NEXT: li a0, 0 -; RV64-NEXT: call many_args_callee -; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 32 -; RV64-NEXT: ret - %ret = tail call i32 @many_args_callee(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9) - ret i32 %ret -} - -declare void @foo(i32, i32, i32, i32, i32, i32, i32, i32, i32) - -define void @bar(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8) nounwind { -; RV32-LABEL: bar: -; RV32: # %bb.0: # %entry -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 40(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s1, 36(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s2, 32(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s3, 28(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s4, 24(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s5, 20(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s6, 16(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s7, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: mv s0, a7 -; RV32-NEXT: mv s1, a6 -; RV32-NEXT: mv s2, a5 -; RV32-NEXT: mv s3, a4 -; RV32-NEXT: mv s4, a3 -; RV32-NEXT: mv s5, a2 -; RV32-NEXT: mv s6, a1 -; RV32-NEXT: mv s7, a0 -; RV32-NEXT: li a0, 1 -; RV32-NEXT: sw a0, 0(sp) -; RV32-NEXT: mv a0, s7 -; RV32-NEXT: call foo -; RV32-NEXT: li a0, 2 -; RV32-NEXT: sw a0, 48(sp) -; RV32-NEXT: mv a0, s7 -; RV32-NEXT: mv a1, s6 -; RV32-NEXT: mv a2, s5 -; RV32-NEXT: mv a3, s4 -; RV32-NEXT: mv a4, s3 -; RV32-NEXT: mv a5, s2 -; RV32-NEXT: mv a6, s1 -; RV32-NEXT: mv a7, s0 -; RV32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s0, 40(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s1, 36(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s2, 32(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s3, 28(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s4, 24(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s5, 20(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s6, 16(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s7, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 48 -; RV32-NEXT: tail foo -; -; RV64-LABEL: bar: -; RV64: # %bb.0: # %entry -; RV64-NEXT: addi sp, sp, -80 -; RV64-NEXT: sd ra, 72(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 64(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s1, 56(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s2, 48(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s3, 40(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s4, 32(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s5, 24(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s6, 16(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s7, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: mv s0, a7 -; RV64-NEXT: mv s1, a6 -; RV64-NEXT: mv s2, a5 -; RV64-NEXT: mv s3, a4 -; RV64-NEXT: mv s4, a3 -; RV64-NEXT: mv s5, a2 -; RV64-NEXT: mv s6, a1 -; RV64-NEXT: mv s7, a0 -; RV64-NEXT: li a0, 1 -; RV64-NEXT: sd a0, 0(sp) -; RV64-NEXT: mv a0, s7 -; RV64-NEXT: call foo -; RV64-NEXT: li a0, 2 -; RV64-NEXT: sd a0, 80(sp) -; RV64-NEXT: mv a0, s7 -; RV64-NEXT: mv a1, s6 -; RV64-NEXT: mv a2, s5 -; RV64-NEXT: mv a3, s4 -; RV64-NEXT: mv a4, s3 -; RV64-NEXT: mv a5, s2 -; RV64-NEXT: mv a6, s1 -; RV64-NEXT: mv a7, s0 -; RV64-NEXT: ld ra, 72(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s0, 64(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s1, 56(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s2, 48(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s3, 40(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s4, 32(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s5, 24(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s6, 16(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s7, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 80 -; RV64-NEXT: tail foo -entry: - call void @foo(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 1) - musttail call void @foo(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 2) - ret void -} - -declare void @sret_callee(ptr sret({ double, double }) align 8) - -; Functions which return by sret can be tail-called because the incoming sret -; pointer gets passed through to the callee. -define void @sret_caller_tail(ptr sret({ double, double }) align 8 %result) { -; RV32-LABEL: sret_caller_tail: -; RV32: # %bb.0: # %entry -; RV32-NEXT: tail sret_callee -; -; RV64-LABEL: sret_caller_tail: -; RV64: # %bb.0: # %entry -; RV64-NEXT: tail sret_callee -entry: - tail call void @sret_callee(ptr sret({ double, double }) align 8 %result) - ret void -} - -define void @sret_caller_musttail(ptr sret({ double, double }) align 8 %result) { -; RV32-LABEL: sret_caller_musttail: -; RV32: # %bb.0: # %entry -; RV32-NEXT: tail sret_callee -; -; RV64-LABEL: sret_caller_musttail: -; RV64: # %bb.0: # %entry -; RV64-NEXT: tail sret_callee -entry: - musttail call void @sret_callee(ptr sret({ double, double }) align 8 %result) - ret void -} - -%twenty_bytes = type { [5 x i32] } -declare void @large_callee(ptr byval(%twenty_bytes) align 4) - -; Functions with byval parameters can be tail-called, because the value is -; actually passed on the stack, with a pointer in the register. This is the same -; way for the caller and callee. -define void @large_caller(ptr byval(%twenty_bytes) align 4 %a) { -; RV32-LABEL: large_caller: -; RV32: # %bb.0: # %entry -; RV32-NEXT: tail large_callee -; -; RV64-LABEL: large_caller: -; RV64: # %bb.0: # %entry -; RV64-NEXT: tail large_callee -entry: - musttail call void @large_callee(ptr byval(%twenty_bytes) align 4 %a) - ret void -} - -; As above, but with some inline asm to test that the arguments in a0 is -; re-loaded before the call. -define void @large_caller_check_regs(ptr byval(%twenty_bytes) align 4 %a) nounwind { -; RV32-LABEL: large_caller_check_regs: -; RV32: # %bb.0: # %entry -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: #APP -; RV32-NEXT: #NO_APP -; RV32-NEXT: mv a0, a1 -; RV32-NEXT: tail large_callee -; -; RV64-LABEL: large_caller_check_regs: -; RV64: # %bb.0: # %entry -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: #APP -; RV64-NEXT: #NO_APP -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: tail large_callee -entry: - tail call void asm sideeffect "", "~{a0}"() - musttail call void @large_callee(ptr byval(%twenty_bytes) align 4 %a) - ret void -} - -; The IR for this one looks dodgy, because it has an alloca passed to a -; musttail function, but it is passed as a byval argument, so will be copied -; into the stack space allocated by @large_caller_new_value's caller, so is -; valid. -define void @large_caller_new_value(ptr byval(%twenty_bytes) align 4 %a) nounwind { -; RV32-LABEL: large_caller_new_value: -; RV32: # %bb.0: # %entry -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: li a0, 1 -; RV32-NEXT: li a2, 2 -; RV32-NEXT: li a3, 3 -; RV32-NEXT: sw zero, 12(sp) -; RV32-NEXT: sw a0, 16(sp) -; RV32-NEXT: sw a2, 20(sp) -; RV32-NEXT: sw a3, 24(sp) -; RV32-NEXT: li a0, 4 -; RV32-NEXT: sw a0, 28(sp) -; RV32-NEXT: #APP -; RV32-NEXT: #NO_APP -; RV32-NEXT: lw a0, 28(sp) -; RV32-NEXT: sw a0, 16(a1) -; RV32-NEXT: lw a0, 24(sp) -; RV32-NEXT: sw a0, 12(a1) -; RV32-NEXT: lw a0, 20(sp) -; RV32-NEXT: sw a0, 8(a1) -; RV32-NEXT: lw a0, 16(sp) -; RV32-NEXT: sw a0, 4(a1) -; RV32-NEXT: lw a0, 12(sp) -; RV32-NEXT: sw a0, 0(a1) -; RV32-NEXT: mv a0, a1 -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: tail large_callee -; -; RV64-LABEL: large_caller_new_value: -; RV64: # %bb.0: # %entry -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: li a0, 1 -; RV64-NEXT: li a2, 2 -; RV64-NEXT: li a3, 3 -; RV64-NEXT: sw zero, 12(sp) -; RV64-NEXT: sw a0, 16(sp) -; RV64-NEXT: sw a2, 20(sp) -; RV64-NEXT: sw a3, 24(sp) -; RV64-NEXT: li a0, 4 -; RV64-NEXT: sw a0, 28(sp) -; RV64-NEXT: #APP -; RV64-NEXT: #NO_APP -; RV64-NEXT: lw a0, 28(sp) -; RV64-NEXT: sw a0, 16(a1) -; RV64-NEXT: lw a0, 24(sp) -; RV64-NEXT: sw a0, 12(a1) -; RV64-NEXT: lw a0, 20(sp) -; RV64-NEXT: sw a0, 8(a1) -; RV64-NEXT: lw a0, 16(sp) -; RV64-NEXT: sw a0, 4(a1) -; RV64-NEXT: lw a0, 12(sp) -; RV64-NEXT: sw a0, 0(a1) -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: addi sp, sp, 32 -; RV64-NEXT: tail large_callee -entry: - %y = alloca %twenty_bytes, align 4 - store i32 0, ptr %y, align 4 - %0 = getelementptr inbounds i8, ptr %y, i32 4 - store i32 1, ptr %0, align 4 - %1 = getelementptr inbounds i8, ptr %y, i32 8 - store i32 2, ptr %1, align 4 - %2 = getelementptr inbounds i8, ptr %y, i32 12 - store i32 3, ptr %2, align 4 - %3 = getelementptr inbounds i8, ptr %y, i32 16 - store i32 4, ptr %3, align 4 - tail call void asm sideeffect "", "~{a0}"() - musttail call void @large_callee(ptr byval(%twenty_bytes) align 4 %y) - ret void -} - -declare void @two_byvals_callee(ptr byval(%twenty_bytes) align 4, ptr byval(%twenty_bytes) align 4) -define void @swap_byvals(ptr byval(%twenty_bytes) align 4 %a, ptr byval(%twenty_bytes) align 4 %b) { -; RV32-LABEL: swap_byvals: -; RV32: # %bb.0: # %entry -; RV32-NEXT: mv a2, a0 -; RV32-NEXT: mv a0, a1 -; RV32-NEXT: mv a1, a2 -; RV32-NEXT: tail two_byvals_callee -; -; RV64-LABEL: swap_byvals: -; RV64: # %bb.0: # %entry -; RV64-NEXT: mv a2, a0 -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: mv a1, a2 -; RV64-NEXT: tail two_byvals_callee -entry: - musttail call void @two_byvals_callee(ptr byval(%twenty_bytes) align 4 %b, ptr byval(%twenty_bytes) align 4 %a) - ret void -} - -; A forwarded byval arg, but in a different argument register, so it needs to -; be moved between registers first. This can't be musttail because of the -; different signatures, but is still tail-called as an optimisation. -declare void @shift_byval_callee(ptr byval(%twenty_bytes) align 4) -define void @shift_byval(i32 %a, ptr byval(%twenty_bytes) align 4 %b) { -; RV32-LABEL: shift_byval: -; RV32: # %bb.0: # %entry -; RV32-NEXT: mv a0, a1 -; RV32-NEXT: tail shift_byval_callee -; -; RV64-LABEL: shift_byval: -; RV64: # %bb.0: # %entry -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: tail shift_byval_callee -entry: - tail call void @shift_byval_callee(ptr byval(%twenty_bytes) align 4 %b) - ret void -} - -; A global object passed to a byval argument, so it must be copied, but doesn't -; need a stack temporary. -@large_global = external global %twenty_bytes -define void @large_caller_from_global(ptr byval(%twenty_bytes) align 4 %a) { -; RV32-LABEL: large_caller_from_global: -; RV32: # %bb.0: # %entry -; RV32-NEXT: lui a1, %hi(large_global) -; RV32-NEXT: addi a1, a1, %lo(large_global) -; RV32-NEXT: lw a2, 16(a1) -; RV32-NEXT: sw a2, 16(a0) -; RV32-NEXT: lw a2, 12(a1) -; RV32-NEXT: sw a2, 12(a0) -; RV32-NEXT: lw a2, 8(a1) -; RV32-NEXT: sw a2, 8(a0) -; RV32-NEXT: lw a2, 4(a1) -; RV32-NEXT: sw a2, 4(a0) -; RV32-NEXT: lw a1, 0(a1) -; RV32-NEXT: sw a1, 0(a0) -; RV32-NEXT: tail large_callee -; -; RV64-LABEL: large_caller_from_global: -; RV64: # %bb.0: # %entry -; RV64-NEXT: lui a1, %hi(large_global) -; RV64-NEXT: addi a1, a1, %lo(large_global) -; RV64-NEXT: lw a2, 16(a1) -; RV64-NEXT: sw a2, 16(a0) -; RV64-NEXT: lw a2, 12(a1) -; RV64-NEXT: sw a2, 12(a0) -; RV64-NEXT: lw a2, 8(a1) -; RV64-NEXT: sw a2, 8(a0) -; RV64-NEXT: lw a2, 4(a1) -; RV64-NEXT: sw a2, 4(a0) -; RV64-NEXT: lw a1, 0(a1) -; RV64-NEXT: sw a1, 0(a0) -; RV64-NEXT: tail large_callee -entry: - musttail call void @large_callee(ptr byval(%twenty_bytes) align 4 @large_global) - ret void -} diff --git a/llvm/test/CodeGen/RISCV/pr187832.ll b/llvm/test/CodeGen/RISCV/pr187832.ll deleted file mode 100644 index dd4c3c6e3487e..0000000000000 --- a/llvm/test/CodeGen/RISCV/pr187832.ll +++ /dev/null @@ -1,48 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=riscv32 %s -o - | FileCheck %s --check-prefix=RV32 -; RUN: llc -mtriple=riscv64 %s -o - | FileCheck %s --check-prefix=RV64 - -%Box = type { i32, i32, i32, i8, [3 x i8], i32, i8, [1 x i8], i16, i16, i8, [5 x i8], { i64, ptr }, { i64, ptr }, { i64, ptr } } - -define void @test(ptr byval(%Box) %0) nounwind { -; RV32-LABEL: test: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -112 -; RV32-NEXT: sw ra, 108(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 104(sp) # 4-byte Folded Spill -; RV32-NEXT: addi a0, sp, 24 -; RV32-NEXT: li a2, 80 -; RV32-NEXT: li s0, 0 -; RV32-NEXT: li a1, 0 -; RV32-NEXT: call memcpy -; RV32-NEXT: addi a3, sp, 24 -; RV32-NEXT: sw zero, 0(sp) -; RV32-NEXT: sw zero, 4(sp) -; RV32-NEXT: sw zero, 8(sp) -; RV32-NEXT: li a0, 0 -; RV32-NEXT: li a1, 0 -; RV32-NEXT: li a2, 0 -; RV32-NEXT: li a4, 0 -; RV32-NEXT: li a5, 0 -; RV32-NEXT: li a6, 0 -; RV32-NEXT: li a7, 0 -; RV32-NEXT: jalr s0 -; RV32-NEXT: lw ra, 108(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s0, 104(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 112 -; RV32-NEXT: ret -; -; RV64-LABEL: test: -; RV64: # %bb.0: -; RV64-NEXT: li a0, 0 -; RV64-NEXT: li a1, 0 -; RV64-NEXT: li a2, 0 -; RV64-NEXT: li a3, 0 -; RV64-NEXT: li a4, 0 -; RV64-NEXT: li a5, 0 -; RV64-NEXT: li a6, 0 -; RV64-NEXT: li a7, 0 -; RV64-NEXT: jr a0 - tail call void null(ptr null, double 0.000000e+00, ptr byval(%Box) null, { i64, ptr } zeroinitializer, i32 0, i64 0, i1 false) - ret void -} diff --git a/llvm/test/CodeGen/RISCV/tail-calls.ll b/llvm/test/CodeGen/RISCV/tail-calls.ll index 33feba3c6fba1..6756fea8a1f85 100644 --- a/llvm/test/CodeGen/RISCV/tail-calls.ll +++ b/llvm/test/CodeGen/RISCV/tail-calls.ll @@ -204,39 +204,49 @@ declare i32 @callee_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 % define i32 @caller_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n) nounwind { ; CHECK-LABEL: caller_args: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lw t0, 20(sp) -; CHECK-NEXT: lw t1, 16(sp) -; CHECK-NEXT: lw t2, 0(sp) -; CHECK-NEXT: lw t3, 4(sp) -; CHECK-NEXT: lw t4, 8(sp) -; CHECK-NEXT: lw t5, 12(sp) -; CHECK-NEXT: sw t2, 0(sp) -; CHECK-NEXT: sw t3, 4(sp) -; CHECK-NEXT: sw t4, 8(sp) -; CHECK-NEXT: sw t5, 12(sp) -; CHECK-NEXT: sw t1, 16(sp) -; CHECK-NEXT: sw t0, 20(sp) -; CHECK-NEXT: tail callee_args +; CHECK-NEXT: addi sp, sp, -32 +; CHECK-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; CHECK-NEXT: lw t0, 32(sp) +; CHECK-NEXT: lw t1, 36(sp) +; CHECK-NEXT: lw t2, 40(sp) +; CHECK-NEXT: lw t3, 44(sp) +; CHECK-NEXT: lw t4, 48(sp) +; CHECK-NEXT: lw t5, 52(sp) +; CHECK-NEXT: sw t4, 16(sp) +; CHECK-NEXT: sw t5, 20(sp) +; CHECK-NEXT: sw t0, 0(sp) +; CHECK-NEXT: sw t1, 4(sp) +; CHECK-NEXT: sw t2, 8(sp) +; CHECK-NEXT: sw t3, 12(sp) +; CHECK-NEXT: call callee_args +; CHECK-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: ret ; ; CHECK-LARGE-ZICFILP-LABEL: caller_args: ; CHECK-LARGE-ZICFILP: # %bb.0: # %entry ; CHECK-LARGE-ZICFILP-NEXT: lpad 0 -; CHECK-LARGE-ZICFILP-NEXT: lw t0, 20(sp) -; CHECK-LARGE-ZICFILP-NEXT: lw t1, 16(sp) -; CHECK-LARGE-ZICFILP-NEXT: lw t2, 0(sp) -; CHECK-LARGE-ZICFILP-NEXT: lw t3, 12(sp) -; CHECK-LARGE-ZICFILP-NEXT: lw t4, 8(sp) -; CHECK-LARGE-ZICFILP-NEXT: lw t5, 4(sp) -; CHECK-LARGE-ZICFILP-NEXT: sw t2, 0(sp) +; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, -32 +; CHECK-LARGE-ZICFILP-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; CHECK-LARGE-ZICFILP-NEXT: lw t0, 32(sp) +; CHECK-LARGE-ZICFILP-NEXT: lw t1, 36(sp) +; CHECK-LARGE-ZICFILP-NEXT: lw t3, 40(sp) +; CHECK-LARGE-ZICFILP-NEXT: lw t4, 44(sp) +; CHECK-LARGE-ZICFILP-NEXT: lw t2, 48(sp) +; CHECK-LARGE-ZICFILP-NEXT: lw t5, 52(sp) +; CHECK-LARGE-ZICFILP-NEXT: sw t2, 16(sp) +; CHECK-LARGE-ZICFILP-NEXT: sw t5, 20(sp) ; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi8: ; CHECK-LARGE-ZICFILP-NEXT: auipc t2, %pcrel_hi(.LCPI6_0) ; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi8)(t2) -; CHECK-LARGE-ZICFILP-NEXT: sw t5, 4(sp) -; CHECK-LARGE-ZICFILP-NEXT: sw t4, 8(sp) -; CHECK-LARGE-ZICFILP-NEXT: sw t3, 12(sp) -; CHECK-LARGE-ZICFILP-NEXT: sw t1, 16(sp) -; CHECK-LARGE-ZICFILP-NEXT: sw t0, 20(sp) -; CHECK-LARGE-ZICFILP-NEXT: jr t2 +; CHECK-LARGE-ZICFILP-NEXT: sw t0, 0(sp) +; CHECK-LARGE-ZICFILP-NEXT: sw t1, 4(sp) +; CHECK-LARGE-ZICFILP-NEXT: sw t3, 8(sp) +; CHECK-LARGE-ZICFILP-NEXT: sw t4, 12(sp) +; CHECK-LARGE-ZICFILP-NEXT: jalr t2 +; CHECK-LARGE-ZICFILP-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, 32 +; CHECK-LARGE-ZICFILP-NEXT: ret entry: %r = tail call i32 @callee_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n) ret i32 %r @@ -247,20 +257,24 @@ declare i32 @callee_indirect_args(fp128 %a) define void @caller_indirect_args() nounwind { ; CHECK-LABEL: caller_indirect_args: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: addi sp, sp, -32 +; CHECK-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; CHECK-NEXT: lui a1, 262128 ; CHECK-NEXT: mv a0, sp ; CHECK-NEXT: sw zero, 0(sp) ; CHECK-NEXT: sw zero, 4(sp) ; CHECK-NEXT: sw zero, 8(sp) ; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: tail callee_indirect_args +; CHECK-NEXT: call callee_indirect_args +; CHECK-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: ret ; ; CHECK-LARGE-ZICFILP-LABEL: caller_indirect_args: ; CHECK-LARGE-ZICFILP: # %bb.0: # %entry ; CHECK-LARGE-ZICFILP-NEXT: lpad 0 -; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, -16 +; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, -32 +; CHECK-LARGE-ZICFILP-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; CHECK-LARGE-ZICFILP-NEXT: lui a1, 262128 ; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi9: ; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI7_0) @@ -270,8 +284,10 @@ define void @caller_indirect_args() nounwind { ; CHECK-LARGE-ZICFILP-NEXT: sw zero, 4(sp) ; CHECK-LARGE-ZICFILP-NEXT: sw zero, 8(sp) ; CHECK-LARGE-ZICFILP-NEXT: sw a1, 12(sp) -; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, 16 -; CHECK-LARGE-ZICFILP-NEXT: jr t2 +; CHECK-LARGE-ZICFILP-NEXT: jalr t2 +; CHECK-LARGE-ZICFILP-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, 32 +; CHECK-LARGE-ZICFILP-NEXT: ret entry: %call = tail call i32 @callee_indirect_args(fp128 0xL00000000000000003FFF000000000000) ret void