-
Notifications
You must be signed in to change notification settings - Fork 12.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV] Add stack clash protection #117612
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-clang @llvm/pr-subscribers-clang-driver Author: Raphael Moreira Zinsly (rzinsly) ChangesEnable Patch is 41.99 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/117612.diff 11 Files Affected:
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index d3eec9fea0d498..48fc43a8f968bc 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -3774,7 +3774,8 @@ static void RenderSCPOptions(const ToolChain &TC, const ArgList &Args,
return;
if (!EffectiveTriple.isX86() && !EffectiveTriple.isSystemZ() &&
- !EffectiveTriple.isPPC64() && !EffectiveTriple.isAArch64())
+ !EffectiveTriple.isPPC64() && !EffectiveTriple.isAArch64() &&
+ !EffectiveTriple.isRISCV())
return;
Args.addOptInFlag(CmdArgs, options::OPT_fstack_clash_protection,
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index 1ff435b76ad68a..0b6e89a0e29bf4 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -610,22 +610,134 @@ static MCCFIInstruction createDefCFAOffset(const TargetRegisterInfo &TRI,
Comment.str());
}
+// Allocate stack space and probe it if necessary.
void RISCVFrameLowering::allocateStack(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- StackOffset Offset, bool EmitCFI,
- unsigned CFIIndex) const {
+ MachineFunction &MF, uint64_t Offset,
+ uint64_t RealStackSize, bool EmitCFI,
+ bool NeedProbe,
+ uint64_t ProbeSize) const {
DebugLoc DL;
const RISCVRegisterInfo *RI = STI.getRegisterInfo();
const RISCVInstrInfo *TII = STI.getInstrInfo();
- RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, Offset, MachineInstr::FrameSetup,
+ // Simply allocate the stack if it's not big enough to require a probe.
+ if (!NeedProbe || Offset <= ProbeSize) {
+ RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(-Offset),
+ MachineInstr::FrameSetup, getStackAlign());
+
+ if (EmitCFI) {
+ // Emit ".cfi_def_cfa_offset RealStackSize"
+ unsigned CFIIndex = MF.addFrameInst(
+ MCCFIInstruction::cfiDefCfaOffset(nullptr, RealStackSize));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ return;
+ }
+
+ // Unroll the probe loop depending on the number of iterations.
+ if (Offset < ProbeSize * 5) {
+ uint64_t CurrentOffset = 0;
+ bool IsRV64 = STI.hasFeature(RISCV::Feature64Bit);
+ while (CurrentOffset + ProbeSize <= Offset) {
+ RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
+ StackOffset::getFixed(-ProbeSize), MachineInstr::FrameSetup,
+ getStackAlign());
+ // s[d|w] zero, 0(sp)
+ BuildMI(MBB, MBBI, DL, TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
+ .addReg(RISCV::X0)
+ .addReg(SPReg)
+ .addImm(0)
+ .setMIFlags(MachineInstr::FrameSetup);
+
+ CurrentOffset += ProbeSize;
+ if (EmitCFI) {
+ // Emit ".cfi_def_cfa_offset CurrentOffset"
+ unsigned CFIIndex = MF.addFrameInst(
+ MCCFIInstruction::cfiDefCfaOffset(nullptr, CurrentOffset));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+ }
+
+ uint64_t Residual = Offset - CurrentOffset;
+ if (Residual) {
+ RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
+ StackOffset::getFixed(-Residual), MachineInstr::FrameSetup,
+ getStackAlign());
+ if (EmitCFI) {
+ // Emit ".cfi_def_cfa_offset Offset"
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, Offset));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+ }
+
+ return;
+ }
+
+ // Emit a variable-length allocation probing loop.
+ uint64_t RoundedSize = (Offset / ProbeSize) * ProbeSize;
+ uint64_t Residual = Offset - RoundedSize;
+
+ Register TargetReg = RISCV::X6;
+ // SUB TargetReg, SP, RoundedSize
+ RI->adjustReg(MBB, MBBI, DL, TargetReg, SPReg,
+ StackOffset::getFixed(-RoundedSize), MachineInstr::FrameSetup,
getStackAlign());
if (EmitCFI) {
- // Emit ".cfi_def_cfa_offset StackSize"
+ // Set the CFA register to TargetReg.
+ unsigned Reg = STI.getRegisterInfo()->getDwarfRegNum(TargetReg, true);
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::cfiDefCfa(nullptr, Reg, RoundedSize));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
- .setMIFlag(MachineInstr::FrameSetup);
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+
+ // It will be expanded to a probe loop in `inlineStackProbe`.
+ BuildMI(MBB, MBBI, DL, TII->get(RISCV::PROBED_STACKALLOC))
+ .addReg(SPReg)
+ .addReg(TargetReg);
+
+ if (EmitCFI) {
+ // Set the CFA register back to SP.
+ unsigned Reg = STI.getRegisterInfo()->getDwarfRegNum(SPReg, true);
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+
+ if (Residual) {
+ RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(-Residual),
+ MachineInstr::FrameSetup, getStackAlign());
+ if (Residual > ProbeSize) {
+ // s[d|w] zero, 0(sp)
+ bool IsRV64 = STI.hasFeature(RISCV::Feature64Bit);
+ BuildMI(MBB, MBBI, DL, TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
+ .addReg(RISCV::X0)
+ .addReg(SPReg)
+ .addImm(0)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+ }
+
+ if (EmitCFI) {
+ // Emit ".cfi_def_cfa_offset Offset"
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, Offset));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
}
}
@@ -743,13 +855,14 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
getPushOrLibCallsSavedInfo(MF, CSI));
}
- if (StackSize != 0) {
- // Allocate space on the stack if necessary.
- unsigned CFIIndex = MF.addFrameInst(
- MCCFIInstruction::cfiDefCfaOffset(nullptr, RealStackSize));
- allocateStack(MBB, MBBI, StackOffset::getFixed(-StackSize),
- /*EmitCFI=*/ true, CFIIndex);
- }
+ // Allocate space on the stack if necessary.
+ auto &Subtarget = MF.getSubtarget<RISCVSubtarget>();
+ const RISCVTargetLowering *TLI = Subtarget.getTargetLowering();
+ bool NeedProbe = TLI->hasInlineStackProbe(MF);
+ uint64_t ProbeSize = TLI->getStackProbeSize(MF, getStackAlign().value());
+ if (StackSize != 0)
+ allocateStack(MBB, MBBI, MF, StackSize, RealStackSize, /*EmitCFI=*/ true,
+ NeedProbe, ProbeSize);
// The frame pointer is callee-saved, and code has been generated for us to
// save it to the stack. We need to skip over the storing of callee-saved
@@ -790,12 +903,9 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
assert(SecondSPAdjustAmount > 0 &&
"SecondSPAdjustAmount should be greater than zero");
- // If we are using a frame-pointer, and thus emitted ".cfi_def_cfa fp, 0",
- // don't emit an sp-based .cfi_def_cfa_offset
- unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(
- nullptr, getStackSizeWithRVVPadding(MF)));
- allocateStack(MBB, MBBI, StackOffset::getFixed(-SecondSPAdjustAmount),
- !hasFP(MF), CFIIndex);
+ allocateStack(MBB, MBBI, MF, SecondSPAdjustAmount,
+ getStackSizeWithRVVPadding(MF), !hasFP(MF), NeedProbe,
+ ProbeSize);
}
if (RVVStackSize) {
@@ -1939,3 +2049,72 @@ bool RISCVFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
TargetStackID::Value RISCVFrameLowering::getStackIDForScalableVectors() const {
return TargetStackID::ScalableVector;
}
+
+// Synthesize the probe loop.
+static void emitStackProbeInline(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ DebugLoc DL) {
+
+ auto &Subtarget = MF.getSubtarget<RISCVSubtarget>();
+ const RISCVInstrInfo *TII = Subtarget.getInstrInfo();
+ bool IsRV64 = Subtarget.hasFeature(RISCV::Feature64Bit);
+ Align StackAlign = Subtarget.getFrameLowering()->getStackAlign();
+ const RISCVTargetLowering *TLI = Subtarget.getTargetLowering();
+ uint64_t ProbeSize = TLI->getStackProbeSize(MF, StackAlign.value());
+
+ MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
+ MachineBasicBlock *LoopTestMBB =
+ MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+ MF.insert(MBBInsertPoint, LoopTestMBB);
+ MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+ MF.insert(MBBInsertPoint, ExitMBB);
+ MachineInstr::MIFlag Flags = MachineInstr::FrameSetup;
+ Register TargetReg = RISCV::X6;
+ Register ScratchReg = RISCV::X7;
+
+ // ScratchReg = ProbeSize
+ TII->movImm(MBB, MBBI, DL, ScratchReg, ProbeSize, Flags);
+
+ // LoopTest:
+ // SUB SP, SP, ProbeSize
+ BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::SUB), SPReg)
+ .addReg(SPReg)
+ .addReg(ScratchReg)
+ .setMIFlags(Flags);
+
+ // s[d|w] zero, 0(sp)
+ BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL,
+ TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
+ .addReg(RISCV::X0)
+ .addReg(SPReg)
+ .addImm(0)
+ .setMIFlags(Flags);
+
+ // BNE SP, TargetReg, LoopTest
+ BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::BNE))
+ .addReg(SPReg)
+ .addReg(TargetReg)
+ .addMBB(LoopTestMBB)
+ .setMIFlags(Flags);
+
+ ExitMBB->splice(ExitMBB->end(), &MBB, std::next(MBBI), MBB.end());
+
+ LoopTestMBB->addSuccessor(ExitMBB);
+ LoopTestMBB->addSuccessor(LoopTestMBB);
+ MBB.addSuccessor(LoopTestMBB);
+}
+
+void RISCVFrameLowering::inlineStackProbe(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ // Get the instructions that need to be replaced. We emit at most two of
+ // these. Remember them in order to avoid complications coming from the need
+ // to traverse the block while potentially creating more blocks.
+ auto Where = llvm::find_if(MBB, [](MachineInstr &MI) {
+ return MI.getOpcode() == RISCV::PROBED_STACKALLOC;
+ });
+ if (Where != MBB.end()) {
+ DebugLoc DL = MBB.findDebugLoc(Where);
+ emitStackProbeInline(MF, MBB, Where, DL);
+ Where->eraseFromParent();
+ }
+}
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h
index 9aff4dc9e4089a..f6ab5dc9ecfa37 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h
@@ -79,7 +79,9 @@ class RISCVFrameLowering : public TargetFrameLowering {
}
void allocateStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
- StackOffset Offset, bool EmitCFI, unsigned CFIIndex) const;
+ MachineFunction &MF, uint64_t Offset,
+ uint64_t RealStackSize, bool EmitCFI, bool NeedProbe,
+ uint64_t ProbeSize) const;
protected:
const RISCVSubtarget &STI;
@@ -105,6 +107,9 @@ class RISCVFrameLowering : public TargetFrameLowering {
std::pair<int64_t, Align>
assignRVVStackObjectOffsets(MachineFunction &MF) const;
+ // Replace a StackProbe stub (if any) with the actual probe code inline
+ void inlineStackProbe(MachineFunction &MF,
+ MachineBasicBlock &PrologueMBB) const override;
};
} // namespace llvm
#endif
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 8ca3e714c32c28..eba806907f3d70 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -22156,3 +22156,25 @@ namespace llvm::RISCVVIntrinsicsTable {
#include "RISCVGenSearchableTables.inc"
} // namespace llvm::RISCVVIntrinsicsTable
+
+bool RISCVTargetLowering::hasInlineStackProbe(const MachineFunction &MF) const {
+
+ // If the function specifically requests inline stack probes, emit them.
+ if (MF.getFunction().hasFnAttribute("probe-stack"))
+ return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
+ "inline-asm";
+
+ return false;
+}
+
+unsigned RISCVTargetLowering::getStackProbeSize(const MachineFunction &MF,
+ unsigned StackAlign) const {
+ // The default stack probe size is 4096 if the function has no
+ // stack-probe-size attribute.
+ const Function &Fn = MF.getFunction();
+ unsigned StackProbeSize =
+ Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
+ // Round down to the stack alignment.
+ StackProbeSize &= ~(StackAlign - 1);
+ return StackProbeSize ? StackProbeSize : StackAlign;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 7ada941563c1ff..b112b5d3f219e4 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -923,6 +923,12 @@ class RISCVTargetLowering : public TargetLowering {
MachineBasicBlock::instr_iterator &MBBI,
const TargetInstrInfo *TII) const override;
+ /// True if stack clash protection is enabled for this functions.
+ bool hasInlineStackProbe(const MachineFunction &MF) const override;
+
+ unsigned getStackProbeSize(const MachineFunction &MF,
+ unsigned StackAlign) const;
+
private:
void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo,
const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 5747f05ffafd47..a442a7c84ab340 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -1373,6 +1373,17 @@ def PseudoAddTPRel : Pseudo<(outs GPR:$rd),
def : Pat<(FrameAddrRegImm (iPTR GPR:$rs1), simm12:$imm12),
(ADDI GPR:$rs1, simm12:$imm12)>;
+/// Stack probing
+
+let hasSideEffects = 1, mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in {
+// Probed stack allocation of a constant size, used in function prologues when
+// stack-clash protection is enabled.
+def PROBED_STACKALLOC : Pseudo<(outs GPR:$sp),
+ (ins GPR:$scratch),
+ []>,
+ Sched<[]>;
+}
+
/// HI and ADD_LO address nodes.
// Pseudo for a rematerializable LUI+ADDI sequence for loading an address.
diff --git a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.cpp b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.cpp
index d0c363042f5118..beaa218c943060 100644
--- a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "RISCVMachineFunctionInfo.h"
+#include "llvm/IR/Module.h"
using namespace llvm;
@@ -26,6 +27,35 @@ MachineFunctionInfo *RISCVMachineFunctionInfo::clone(
return DestMF.cloneInfo<RISCVMachineFunctionInfo>(*this);
}
+RISCVMachineFunctionInfo::RISCVMachineFunctionInfo(const Function &F,
+ const RISCVSubtarget *STI) {
+
+ // The default stack probe size is 4096 if the function has no
+ // stack-probe-size attribute. This is a safe default because it is the
+ // smallest possible guard page size.
+ uint64_t ProbeSize = 4096;
+ if (F.hasFnAttribute("stack-probe-size"))
+ ProbeSize = F.getFnAttributeAsParsedInteger("stack-probe-size");
+ else if (const auto *PS = mdconst::extract_or_null<ConstantInt>(
+ F.getParent()->getModuleFlag("stack-probe-size")))
+ ProbeSize = PS->getZExtValue();
+ assert(int64_t(ProbeSize) > 0 && "Invalid stack probe size");
+
+ // Round down to the stack alignment.
+ uint64_t StackAlign =
+ STI->getFrameLowering()->getTransientStackAlign().value();
+ ProbeSize = std::max(StackAlign, ProbeSize & ~(StackAlign - 1U));
+ StringRef ProbeKind;
+ if (F.hasFnAttribute("probe-stack"))
+ ProbeKind = F.getFnAttribute("probe-stack").getValueAsString();
+ else if (const auto *PS = dyn_cast_or_null<MDString>(
+ F.getParent()->getModuleFlag("probe-stack")))
+ ProbeKind = PS->getString();
+ if (ProbeKind.size()) {
+ StackProbeSize = ProbeSize;
+ }
+}
+
void yaml::RISCVMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
MappingTraits<RISCVMachineFunctionInfo>::mapping(YamlIO, *this);
}
diff --git a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
index 779c652b4d8fc4..cf20cf8a5c25c5 100644
--- a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
@@ -76,8 +76,10 @@ class RISCVMachineFunctionInfo : public MachineFunctionInfo {
unsigned RVPushRegs = 0;
int RVPushRlist = llvm::RISCVZC::RLISTENCODE::INVALID_RLIST;
+ int64_t StackProbeSize = 0;
+
public:
- RISCVMachineFunctionInfo(const Function &F, const TargetSubtargetInfo *STI) {}
+ RISCVMachineFunctionInfo(const Function &F, const RISCVSubtarget *STI);
MachineFunctionInfo *
clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF,
@@ -157,6 +159,9 @@ class RISCVMachineFunctionInfo : public MachineFunctionInfo {
bool isVectorCall() const { return IsVectorCall; }
void setIsVectorCall() { IsVectorCall = true; }
+
+ bool hasStackProbing() const { return StackProbeSize != 0; }
+ int64_t getStackProbeSize() const { return StackProbeSize; }
};
} // end namespace llvm
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index fa507653264ccd..53b178809ccac5 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -266,8 +266,8 @@ RISCVTargetMachine::getSubtargetImpl(const Function &F) const {
MachineFunctionInfo *RISCVTargetMachine::createMachineFunctionInfo(
BumpPtrAllocator &Allocator, const Function &F,
const TargetSubtargetInfo *STI) const {
- return RISCVMachineFunctionInfo::create<RISCVMachineFunctionInfo>(Allocator,
- F, STI);
+ return RISCVMachineFunctionInfo::create<RISCVMachineFunctionInfo>(
+ Allocator, F, static_cast<const RISCVSubtarget *>(STI));
}
TargetTransformInfo
diff --git a/llvm/test/CodeGen/RISCV/stack-clash-prologue-nounwind.ll b/llvm/test/CodeGen/RISCV/stack-clash-prologue-nounwind.ll
new file mode 100644
index 00000000000000..f3f6d7e4940f7d
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/stack-clash-prologue-nounwind.ll
@@ -0,0 +1,208 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v -O2 < %s \
+; RUN: | FileCheck %s -check-prefix=RV64I
+
+; Tests copied from PowerPC.
+
+; Free probe
+define i8 @f0() #0 nounwind {
+; RV64I-LABEL: f0:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -64
+; RV64I-NEXT: li a0, 3
+; RV64I-NEXT: sb a0, 0(sp)
+; RV64I-NEXT: lbu a0, 0(sp)
+; RV64I-NEXT: addi sp, sp, 64
+; RV64I-NEXT: ret
+entry:
+ %a = alloca i8, i64 64
+ %b = getelementptr inbounds i8, ptr %a, i64 63
+ store volatile i8 3, ptr %a
+ %c = load volatile i8, ptr %a
+ ret i8 %c
+}
+
+define i8 @f1() #0 nounwind {
+; RV64I-LABEL: f1:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lui a0, 1
+; RV64I-NEXT: sub sp, sp, a0
+; RV64I-NEXT: sd zero, 0(sp)
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: li a0, 3
+; RV64I-NEXT: sb a0, 16(sp)
+; RV64I-NEXT: lbu a0, 16(sp)
+...
[truncated]
|
@llvm/pr-subscribers-backend-risc-v Author: Raphael Moreira Zinsly (rzinsly) ChangesEnable Patch is 41.99 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/117612.diff 11 Files Affected:
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index d3eec9fea0d498..48fc43a8f968bc 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -3774,7 +3774,8 @@ static void RenderSCPOptions(const ToolChain &TC, const ArgList &Args,
return;
if (!EffectiveTriple.isX86() && !EffectiveTriple.isSystemZ() &&
- !EffectiveTriple.isPPC64() && !EffectiveTriple.isAArch64())
+ !EffectiveTriple.isPPC64() && !EffectiveTriple.isAArch64() &&
+ !EffectiveTriple.isRISCV())
return;
Args.addOptInFlag(CmdArgs, options::OPT_fstack_clash_protection,
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index 1ff435b76ad68a..0b6e89a0e29bf4 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -610,22 +610,134 @@ static MCCFIInstruction createDefCFAOffset(const TargetRegisterInfo &TRI,
Comment.str());
}
+// Allocate stack space and probe it if necessary.
void RISCVFrameLowering::allocateStack(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- StackOffset Offset, bool EmitCFI,
- unsigned CFIIndex) const {
+ MachineFunction &MF, uint64_t Offset,
+ uint64_t RealStackSize, bool EmitCFI,
+ bool NeedProbe,
+ uint64_t ProbeSize) const {
DebugLoc DL;
const RISCVRegisterInfo *RI = STI.getRegisterInfo();
const RISCVInstrInfo *TII = STI.getInstrInfo();
- RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, Offset, MachineInstr::FrameSetup,
+ // Simply allocate the stack if it's not big enough to require a probe.
+ if (!NeedProbe || Offset <= ProbeSize) {
+ RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(-Offset),
+ MachineInstr::FrameSetup, getStackAlign());
+
+ if (EmitCFI) {
+ // Emit ".cfi_def_cfa_offset RealStackSize"
+ unsigned CFIIndex = MF.addFrameInst(
+ MCCFIInstruction::cfiDefCfaOffset(nullptr, RealStackSize));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ return;
+ }
+
+ // Unroll the probe loop depending on the number of iterations.
+ if (Offset < ProbeSize * 5) {
+ uint64_t CurrentOffset = 0;
+ bool IsRV64 = STI.hasFeature(RISCV::Feature64Bit);
+ while (CurrentOffset + ProbeSize <= Offset) {
+ RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
+ StackOffset::getFixed(-ProbeSize), MachineInstr::FrameSetup,
+ getStackAlign());
+ // s[d|w] zero, 0(sp)
+ BuildMI(MBB, MBBI, DL, TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
+ .addReg(RISCV::X0)
+ .addReg(SPReg)
+ .addImm(0)
+ .setMIFlags(MachineInstr::FrameSetup);
+
+ CurrentOffset += ProbeSize;
+ if (EmitCFI) {
+ // Emit ".cfi_def_cfa_offset CurrentOffset"
+ unsigned CFIIndex = MF.addFrameInst(
+ MCCFIInstruction::cfiDefCfaOffset(nullptr, CurrentOffset));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+ }
+
+ uint64_t Residual = Offset - CurrentOffset;
+ if (Residual) {
+ RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
+ StackOffset::getFixed(-Residual), MachineInstr::FrameSetup,
+ getStackAlign());
+ if (EmitCFI) {
+ // Emit ".cfi_def_cfa_offset Offset"
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, Offset));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+ }
+
+ return;
+ }
+
+ // Emit a variable-length allocation probing loop.
+ uint64_t RoundedSize = (Offset / ProbeSize) * ProbeSize;
+ uint64_t Residual = Offset - RoundedSize;
+
+ Register TargetReg = RISCV::X6;
+ // SUB TargetReg, SP, RoundedSize
+ RI->adjustReg(MBB, MBBI, DL, TargetReg, SPReg,
+ StackOffset::getFixed(-RoundedSize), MachineInstr::FrameSetup,
getStackAlign());
if (EmitCFI) {
- // Emit ".cfi_def_cfa_offset StackSize"
+ // Set the CFA register to TargetReg.
+ unsigned Reg = STI.getRegisterInfo()->getDwarfRegNum(TargetReg, true);
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::cfiDefCfa(nullptr, Reg, RoundedSize));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
- .setMIFlag(MachineInstr::FrameSetup);
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+
+ // It will be expanded to a probe loop in `inlineStackProbe`.
+ BuildMI(MBB, MBBI, DL, TII->get(RISCV::PROBED_STACKALLOC))
+ .addReg(SPReg)
+ .addReg(TargetReg);
+
+ if (EmitCFI) {
+ // Set the CFA register back to SP.
+ unsigned Reg = STI.getRegisterInfo()->getDwarfRegNum(SPReg, true);
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+
+ if (Residual) {
+ RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(-Residual),
+ MachineInstr::FrameSetup, getStackAlign());
+ if (Residual > ProbeSize) {
+ // s[d|w] zero, 0(sp)
+ bool IsRV64 = STI.hasFeature(RISCV::Feature64Bit);
+ BuildMI(MBB, MBBI, DL, TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
+ .addReg(RISCV::X0)
+ .addReg(SPReg)
+ .addImm(0)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+ }
+
+ if (EmitCFI) {
+ // Emit ".cfi_def_cfa_offset Offset"
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, Offset));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
}
}
@@ -743,13 +855,14 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
getPushOrLibCallsSavedInfo(MF, CSI));
}
- if (StackSize != 0) {
- // Allocate space on the stack if necessary.
- unsigned CFIIndex = MF.addFrameInst(
- MCCFIInstruction::cfiDefCfaOffset(nullptr, RealStackSize));
- allocateStack(MBB, MBBI, StackOffset::getFixed(-StackSize),
- /*EmitCFI=*/ true, CFIIndex);
- }
+ // Allocate space on the stack if necessary.
+ auto &Subtarget = MF.getSubtarget<RISCVSubtarget>();
+ const RISCVTargetLowering *TLI = Subtarget.getTargetLowering();
+ bool NeedProbe = TLI->hasInlineStackProbe(MF);
+ uint64_t ProbeSize = TLI->getStackProbeSize(MF, getStackAlign().value());
+ if (StackSize != 0)
+ allocateStack(MBB, MBBI, MF, StackSize, RealStackSize, /*EmitCFI=*/ true,
+ NeedProbe, ProbeSize);
// The frame pointer is callee-saved, and code has been generated for us to
// save it to the stack. We need to skip over the storing of callee-saved
@@ -790,12 +903,9 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
assert(SecondSPAdjustAmount > 0 &&
"SecondSPAdjustAmount should be greater than zero");
- // If we are using a frame-pointer, and thus emitted ".cfi_def_cfa fp, 0",
- // don't emit an sp-based .cfi_def_cfa_offset
- unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(
- nullptr, getStackSizeWithRVVPadding(MF)));
- allocateStack(MBB, MBBI, StackOffset::getFixed(-SecondSPAdjustAmount),
- !hasFP(MF), CFIIndex);
+ allocateStack(MBB, MBBI, MF, SecondSPAdjustAmount,
+ getStackSizeWithRVVPadding(MF), !hasFP(MF), NeedProbe,
+ ProbeSize);
}
if (RVVStackSize) {
@@ -1939,3 +2049,72 @@ bool RISCVFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
TargetStackID::Value RISCVFrameLowering::getStackIDForScalableVectors() const {
return TargetStackID::ScalableVector;
}
+
+// Synthesize the probe loop.
+static void emitStackProbeInline(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ DebugLoc DL) {
+
+ auto &Subtarget = MF.getSubtarget<RISCVSubtarget>();
+ const RISCVInstrInfo *TII = Subtarget.getInstrInfo();
+ bool IsRV64 = Subtarget.hasFeature(RISCV::Feature64Bit);
+ Align StackAlign = Subtarget.getFrameLowering()->getStackAlign();
+ const RISCVTargetLowering *TLI = Subtarget.getTargetLowering();
+ uint64_t ProbeSize = TLI->getStackProbeSize(MF, StackAlign.value());
+
+ MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
+ MachineBasicBlock *LoopTestMBB =
+ MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+ MF.insert(MBBInsertPoint, LoopTestMBB);
+ MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+ MF.insert(MBBInsertPoint, ExitMBB);
+ MachineInstr::MIFlag Flags = MachineInstr::FrameSetup;
+ Register TargetReg = RISCV::X6;
+ Register ScratchReg = RISCV::X7;
+
+ // ScratchReg = ProbeSize
+ TII->movImm(MBB, MBBI, DL, ScratchReg, ProbeSize, Flags);
+
+ // LoopTest:
+ // SUB SP, SP, ProbeSize
+ BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::SUB), SPReg)
+ .addReg(SPReg)
+ .addReg(ScratchReg)
+ .setMIFlags(Flags);
+
+ // s[d|w] zero, 0(sp)
+ BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL,
+ TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
+ .addReg(RISCV::X0)
+ .addReg(SPReg)
+ .addImm(0)
+ .setMIFlags(Flags);
+
+ // BNE SP, TargetReg, LoopTest
+ BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::BNE))
+ .addReg(SPReg)
+ .addReg(TargetReg)
+ .addMBB(LoopTestMBB)
+ .setMIFlags(Flags);
+
+ ExitMBB->splice(ExitMBB->end(), &MBB, std::next(MBBI), MBB.end());
+
+ LoopTestMBB->addSuccessor(ExitMBB);
+ LoopTestMBB->addSuccessor(LoopTestMBB);
+ MBB.addSuccessor(LoopTestMBB);
+}
+
+void RISCVFrameLowering::inlineStackProbe(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ // Get the instructions that need to be replaced. We emit at most two of
+ // these. Remember them in order to avoid complications coming from the need
+ // to traverse the block while potentially creating more blocks.
+ auto Where = llvm::find_if(MBB, [](MachineInstr &MI) {
+ return MI.getOpcode() == RISCV::PROBED_STACKALLOC;
+ });
+ if (Where != MBB.end()) {
+ DebugLoc DL = MBB.findDebugLoc(Where);
+ emitStackProbeInline(MF, MBB, Where, DL);
+ Where->eraseFromParent();
+ }
+}
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h
index 9aff4dc9e4089a..f6ab5dc9ecfa37 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h
@@ -79,7 +79,9 @@ class RISCVFrameLowering : public TargetFrameLowering {
}
void allocateStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
- StackOffset Offset, bool EmitCFI, unsigned CFIIndex) const;
+ MachineFunction &MF, uint64_t Offset,
+ uint64_t RealStackSize, bool EmitCFI, bool NeedProbe,
+ uint64_t ProbeSize) const;
protected:
const RISCVSubtarget &STI;
@@ -105,6 +107,9 @@ class RISCVFrameLowering : public TargetFrameLowering {
std::pair<int64_t, Align>
assignRVVStackObjectOffsets(MachineFunction &MF) const;
+ // Replace a StackProbe stub (if any) with the actual probe code inline
+ void inlineStackProbe(MachineFunction &MF,
+ MachineBasicBlock &PrologueMBB) const override;
};
} // namespace llvm
#endif
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 8ca3e714c32c28..eba806907f3d70 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -22156,3 +22156,25 @@ namespace llvm::RISCVVIntrinsicsTable {
#include "RISCVGenSearchableTables.inc"
} // namespace llvm::RISCVVIntrinsicsTable
+
+bool RISCVTargetLowering::hasInlineStackProbe(const MachineFunction &MF) const {
+
+ // If the function specifically requests inline stack probes, emit them.
+ if (MF.getFunction().hasFnAttribute("probe-stack"))
+ return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
+ "inline-asm";
+
+ return false;
+}
+
+unsigned RISCVTargetLowering::getStackProbeSize(const MachineFunction &MF,
+ unsigned StackAlign) const {
+ // The default stack probe size is 4096 if the function has no
+ // stack-probe-size attribute.
+ const Function &Fn = MF.getFunction();
+ unsigned StackProbeSize =
+ Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
+ // Round down to the stack alignment.
+ StackProbeSize &= ~(StackAlign - 1);
+ return StackProbeSize ? StackProbeSize : StackAlign;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 7ada941563c1ff..b112b5d3f219e4 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -923,6 +923,12 @@ class RISCVTargetLowering : public TargetLowering {
MachineBasicBlock::instr_iterator &MBBI,
const TargetInstrInfo *TII) const override;
+ /// True if stack clash protection is enabled for this functions.
+ bool hasInlineStackProbe(const MachineFunction &MF) const override;
+
+ unsigned getStackProbeSize(const MachineFunction &MF,
+ unsigned StackAlign) const;
+
private:
void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo,
const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 5747f05ffafd47..a442a7c84ab340 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -1373,6 +1373,17 @@ def PseudoAddTPRel : Pseudo<(outs GPR:$rd),
def : Pat<(FrameAddrRegImm (iPTR GPR:$rs1), simm12:$imm12),
(ADDI GPR:$rs1, simm12:$imm12)>;
+/// Stack probing
+
+let hasSideEffects = 1, mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in {
+// Probed stack allocation of a constant size, used in function prologues when
+// stack-clash protection is enabled.
+def PROBED_STACKALLOC : Pseudo<(outs GPR:$sp),
+ (ins GPR:$scratch),
+ []>,
+ Sched<[]>;
+}
+
/// HI and ADD_LO address nodes.
// Pseudo for a rematerializable LUI+ADDI sequence for loading an address.
diff --git a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.cpp b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.cpp
index d0c363042f5118..beaa218c943060 100644
--- a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "RISCVMachineFunctionInfo.h"
+#include "llvm/IR/Module.h"
using namespace llvm;
@@ -26,6 +27,35 @@ MachineFunctionInfo *RISCVMachineFunctionInfo::clone(
return DestMF.cloneInfo<RISCVMachineFunctionInfo>(*this);
}
+RISCVMachineFunctionInfo::RISCVMachineFunctionInfo(const Function &F,
+ const RISCVSubtarget *STI) {
+
+ // The default stack probe size is 4096 if the function has no
+ // stack-probe-size attribute. This is a safe default because it is the
+ // smallest possible guard page size.
+ uint64_t ProbeSize = 4096;
+ if (F.hasFnAttribute("stack-probe-size"))
+ ProbeSize = F.getFnAttributeAsParsedInteger("stack-probe-size");
+ else if (const auto *PS = mdconst::extract_or_null<ConstantInt>(
+ F.getParent()->getModuleFlag("stack-probe-size")))
+ ProbeSize = PS->getZExtValue();
+ assert(int64_t(ProbeSize) > 0 && "Invalid stack probe size");
+
+ // Round down to the stack alignment.
+ uint64_t StackAlign =
+ STI->getFrameLowering()->getTransientStackAlign().value();
+ ProbeSize = std::max(StackAlign, ProbeSize & ~(StackAlign - 1U));
+ StringRef ProbeKind;
+ if (F.hasFnAttribute("probe-stack"))
+ ProbeKind = F.getFnAttribute("probe-stack").getValueAsString();
+ else if (const auto *PS = dyn_cast_or_null<MDString>(
+ F.getParent()->getModuleFlag("probe-stack")))
+ ProbeKind = PS->getString();
+ if (ProbeKind.size()) {
+ StackProbeSize = ProbeSize;
+ }
+}
+
void yaml::RISCVMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
MappingTraits<RISCVMachineFunctionInfo>::mapping(YamlIO, *this);
}
diff --git a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
index 779c652b4d8fc4..cf20cf8a5c25c5 100644
--- a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
@@ -76,8 +76,10 @@ class RISCVMachineFunctionInfo : public MachineFunctionInfo {
unsigned RVPushRegs = 0;
int RVPushRlist = llvm::RISCVZC::RLISTENCODE::INVALID_RLIST;
+ int64_t StackProbeSize = 0;
+
public:
- RISCVMachineFunctionInfo(const Function &F, const TargetSubtargetInfo *STI) {}
+ RISCVMachineFunctionInfo(const Function &F, const RISCVSubtarget *STI);
MachineFunctionInfo *
clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF,
@@ -157,6 +159,9 @@ class RISCVMachineFunctionInfo : public MachineFunctionInfo {
bool isVectorCall() const { return IsVectorCall; }
void setIsVectorCall() { IsVectorCall = true; }
+
+ bool hasStackProbing() const { return StackProbeSize != 0; }
+ int64_t getStackProbeSize() const { return StackProbeSize; }
};
} // end namespace llvm
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index fa507653264ccd..53b178809ccac5 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -266,8 +266,8 @@ RISCVTargetMachine::getSubtargetImpl(const Function &F) const {
MachineFunctionInfo *RISCVTargetMachine::createMachineFunctionInfo(
BumpPtrAllocator &Allocator, const Function &F,
const TargetSubtargetInfo *STI) const {
- return RISCVMachineFunctionInfo::create<RISCVMachineFunctionInfo>(Allocator,
- F, STI);
+ return RISCVMachineFunctionInfo::create<RISCVMachineFunctionInfo>(
+ Allocator, F, static_cast<const RISCVSubtarget *>(STI));
}
TargetTransformInfo
diff --git a/llvm/test/CodeGen/RISCV/stack-clash-prologue-nounwind.ll b/llvm/test/CodeGen/RISCV/stack-clash-prologue-nounwind.ll
new file mode 100644
index 00000000000000..f3f6d7e4940f7d
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/stack-clash-prologue-nounwind.ll
@@ -0,0 +1,208 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v -O2 < %s \
+; RUN: | FileCheck %s -check-prefix=RV64I
+
+; Tests copied from PowerPC.
+
+; Free probe
+define i8 @f0() #0 nounwind {
+; RV64I-LABEL: f0:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -64
+; RV64I-NEXT: li a0, 3
+; RV64I-NEXT: sb a0, 0(sp)
+; RV64I-NEXT: lbu a0, 0(sp)
+; RV64I-NEXT: addi sp, sp, 64
+; RV64I-NEXT: ret
+entry:
+ %a = alloca i8, i64 64
+ %b = getelementptr inbounds i8, ptr %a, i64 63
+ store volatile i8 3, ptr %a
+ %c = load volatile i8, ptr %a
+ ret i8 %c
+}
+
+define i8 @f1() #0 nounwind {
+; RV64I-LABEL: f1:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lui a0, 1
+; RV64I-NEXT: sub sp, sp, a0
+; RV64I-NEXT: sd zero, 0(sp)
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: li a0, 3
+; RV64I-NEXT: sb a0, 16(sp)
+; RV64I-NEXT: lbu a0, 16(sp)
+...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
118415f
to
ba40cf6
Compare
; RUN: llc -mtriple=riscv64 -mattr=+m,+v -O2 < %s \ | ||
; RUN: | FileCheck %s -check-prefix=RV64I |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why is this only testing rv64?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It was an oversight on my part, fixed now.
void RISCVFrameLowering::allocateStack(MachineBasicBlock &MBB, | ||
MachineBasicBlock::iterator MBBI, | ||
StackOffset Offset, bool EmitCFI, | ||
unsigned CFIIndex) const { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see the CFIIndex argument was removed. Can that be done as a separate patch?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I separated in another commit, should it be in another PR?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok, I opened a new one: #117871
|
||
auto &Subtarget = MF.getSubtarget<RISCVSubtarget>(); | ||
const RISCVInstrInfo *TII = Subtarget.getInstrInfo(); | ||
bool IsRV64 = Subtarget.hasFeature(RISCV::Feature64Bit); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Subtarget.is64Bit()?
// Unroll the probe loop depending on the number of iterations. | ||
if (Offset < ProbeSize * 5) { | ||
uint64_t CurrentOffset = 0; | ||
bool IsRV64 = STI.hasFeature(RISCV::Feature64Bit); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
STI.is64Bit()?
} | ||
|
||
unsigned RISCVTargetLowering::getStackProbeSize(const MachineFunction &MF, | ||
unsigned StackAlign) const { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use Align StackAlign
.
// Round down to the stack alignment. | ||
uint64_t StackAlign = | ||
STI->getFrameLowering()->getTransientStackAlign().value(); | ||
ProbeSize = std::max(StackAlign, ProbeSize & ~(StackAlign - 1U)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we use std::max(StackAlign, alignDown(ProbeSize, StackAlign))
?
unsigned StackProbeSize = | ||
Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096); | ||
// Round down to the stack alignment. | ||
StackProbeSize &= ~(StackAlign - 1); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use alignDown
from MathExtras.h?
…teStack Calculates CFIIndex inside RISCVFrameLowering::allocateStack instead of sending it by argument.
ba40cf6
to
8831935
Compare
@topperc thanks for the review, I followed your suggestions. |
Yes. Our github configuration will only allow PRs to "squash and merge" so the separate commit won't survive. |
Enable `-fstack-clash-protection` for RISCV and stack probe for function prologues. We probe the stack by creating an unrolled loop that allocates and probe the stack in ProbeSize chunks, this is not ideal if the loop has many iterations.
Limit the unrolled probe loop and emit a variable length probe loop for bigger allocations. We add a new pseudo instruction RISCV::PROBED_STACKALLOC that will later be synthesized in a loop by `inlineStackProbe`.
8831935
to
34a0b71
Compare
Enable
-fstack-clash-protection
for RISCV and stack probe for function prologues.We probe the stack by creating a loop that allocates and probe the stack in ProbeSize chunks.
We emit an unrolled probe loop for small allocations and emit a variable length probe loop for bigger ones.