This defines the abstract syntax of MiniRust programs.
MiniRust has two kinds of expressions: value expressions evaluate to a value and are found, in particular, on the right-hand side of assignments; place expressions evaluate to a place and are found, in particular, in the left-hand side of assignments.
Obviously, these are all quite incomplete still.
/// A "value expression" evaluates to a `Value`.
pub enum ValueExpr {
/// Just return a constant value.
Constant(Constant, Type),
/// An n-tuple, used for arrays, structs, tuples (including unit).
Tuple(List<ValueExpr>, Type),
/// A `Union` value.
Union {
/// The union's field which will be initialized.
field: Int,
/// The value it will be initialized with.
#[specr::indirection]
expr: ValueExpr,
/// The union type, needs to be `Type::Union`
union_ty: Type,
},
/// A variant of an enum type.
Variant {
/// The discriminant of the variant.
discriminant: Int,
/// The `ValueExpr` for the variant.
#[specr::indirection]
data: ValueExpr,
/// The enum type, needs to be `Type::Enum`.
enum_ty: Type,
},
/// Read the discriminant of an enum type.
/// As we don't need to know the validity of the inner data
/// we don't fully load the variant value.
GetDiscriminant {
/// The place where the enum is located.
#[specr::indirection]
place: PlaceExpr,
},
/// Load a value from memory.
Load {
/// The place to load from.
#[specr::indirection]
source: PlaceExpr,
},
/// Create a pointer (raw pointer or reference) to a place.
AddrOf {
/// The place to create a pointer to.
#[specr::indirection]
target: PlaceExpr,
/// The type of the created pointer.
ptr_ty: PtrType,
},
/// Unary operators.
UnOp {
operator: UnOp,
#[specr::indirection]
operand: ValueExpr,
},
/// Binary operators.
BinOp {
operator: BinOp,
#[specr::indirection]
left: ValueExpr,
#[specr::indirection]
right: ValueExpr,
},
}
/// Constants are basically values, but cannot have provenance.
/// Currently we do not support Ptr and Union constants.
pub enum Constant {
/// A mathematical integer, used for `i*`/`u*` types.
Int(Int),
/// A Boolean value, used for `bool`.
Bool(bool),
/// A pointer pointing into a global allocation with a given offset.
GlobalPointer(Relocation),
/// A pointer pointing to a function.
FnPointer(FnName),
/// A pointer with constant address, not pointing into any allocation.
PointerWithoutProvenance(Address),
}
pub enum IntUnOp {
/// Negate an integer value arithmetically (`x` becomes `-x`).
Neg,
/// Bitwise-invert an integer value
BitNot,
/// Used for the intrinsic ˋctpopˋ.
CountOnes,
}
pub enum CastOp {
/// Argument can be any integer type; returns the given integer type.
IntToInt(IntType),
/// Transmute the value to a different type.
/// The program is well-formed even if the output type has a different size than the
/// input type, but the operation is UB in that case.
Transmute(Type),
}
pub enum UnOp {
/// An operation on an integer; returns an integer of the same type.
Int(IntUnOp),
/// A form of cast; the return type is given by the specific cast operation.
Cast(CastOp),
// The following 2 operations correspond to the two parts of `<*const T>::to_raw_parts()`.
/// Returns a raw pointer with same thin pointer of the operand, but without the metadata.
GetThinPointer,
/// Returns the metadata of a pointer as a value. For a thin pointer this is `()`.
GetMetadata,
/// Returns the dynamic size of the pointee behind the operand.
/// The operand must be a reference.
SizeOfVal,
}
pub enum IntBinOp {
/// Add two integer values.
Add,
/// Add two integer values.
/// Throws UB on overflow.
AddUnchecked,
/// Subtract two integer values.
Sub,
/// Subtract two integer values.
/// Throws UB on overflow.
SubUnchecked,
/// Multiply two integer values.
Mul,
/// Multiply two integer values.
/// Throws UB on overflow.
MulUnchecked,
/// Divide two integer values.
/// UB on division by zero and on `int::MIN / -1`.
Div,
/// Divide two integer values.
/// UB on division by zero, on `int::MIN / -1`, and on a non-zero remainder.
DivExact,
/// Remainder of a division, the `%` operator.
/// UB if the modulos (right operand) is zero and on `int::MIN % -1`.
Rem,
/// Shift left `<<`
Shl,
/// Shift left `<<`
/// Throws UB if right operand not in range 0..left::BITS.
ShlUnchecked,
/// Shift right `>>` (arithmetic shift for unsigned integers, logical shift for signed integers)
Shr,
/// Shift right `>>` (arithmetic shift for unsigned integers, logical shift for signed integers)
/// Throws UB if right operand not in range 0..left::BITS.
ShrUnchecked,
/// Bitwise-and two integer values.
BitAnd,
/// Bitwise-or two integer values.
BitOr,
/// Bitwise-xor two integer values.
BitXor,
}
pub enum IntBinOpWithOverflow {
/// Add two integer values, returns a tuple of the result integer
/// and a bool indicating whether the calculation overflowed.
Add,
/// Subtract two integer values, returns a tuple of the result integer
/// and a bool indicating whether the calculation overflowed.
Sub,
/// Multiply two integers, returns a tuple of the result integer
/// and a bool indicating whether the calculation overflowed.
Mul,
}
/// A relational operator indicates how two values are to be compared.
/// Unless noted otherwise, these all return a Boolean.
pub enum RelOp {
/// less than
Lt,
/// greater than
Gt,
/// less than or equal
Le,
/// greater than or equal
Ge,
/// equal
Eq,
/// inequal
Ne,
/// The three-way comparison; returns an i8:
/// * -1 if left < right
/// * 0 if left == right
/// * +1 if left > right
Cmp,
}
pub enum BinOp {
/// An operation on integers (both must have the same type); returns an integer of the same type.
Int(IntBinOp),
/// An operation on integers (both must have the same type); returns a tuple of integer of the same type
/// and a boolean that is true if the result is not equal to the infinite-precision result.
IntWithOverflow(IntBinOpWithOverflow),
/// Compares two values according to the given relational operator. Both must have the same type,
/// and they must both be integers, Booleans, or pointers.
Rel(RelOp),
/// Add a byte-offset to a pointer (with or without inbounds requirement).
/// Takes a pointer as left operand and an integer as right operand;
/// returns a pointer.
/// FIXME: should we make this in units of the pointee size? The thing is, for
/// raw pointers we do not have the pointee type...
PtrOffset { inbounds: bool },
/// Compute the distance between two pointers in bytes (with or without inbounds requirement).
/// Takes two pointers; returns a signed pointer-sized integer.
/// If `nonneg` is true, it is UB for the result to be negative.
PtrOffsetFrom { inbounds: bool, nonneg: bool },
/// This corresponds to `core::ptr::from_raw_parts`
/// and takes a thin pointer and matching metadata to construct a pointer of the given type.
/// When the target type is a thin pointer and the metadata is `()`, this is just a pointer cast.
ConstructWidePointer(PtrType),
}
/// A "place expression" evaluates to a `Place`.
pub enum PlaceExpr {
/// Denotes a local variable.
Local(LocalName),
/// Dereference a value (of pointer/reference type).
Deref {
#[specr::indirection]
operand: ValueExpr,
// The type of the newly created place.
ty: Type,
},
/// Project to a field.
Field {
/// The place to base the projection on.
#[specr::indirection]
root: PlaceExpr,
/// The field to project to.
field: Int,
},
/// Index to an array element.
Index {
/// The array to index into.
#[specr::indirection]
root: PlaceExpr,
/// The index to project to.
#[specr::indirection]
index: ValueExpr,
},
/// Enum variant downcast.
Downcast {
/// The base enum to project to the specific variant.
#[specr::indirection]
root: PlaceExpr,
/// The discriminant of the variant to project to.
discriminant: Int,
},
}
Next, the statements and terminators that MiniRust programs consist of:
pub enum Statement {
/// Copy value from `source` to `destination`.
Assign {
destination: PlaceExpr,
source: ValueExpr,
},
/// Evaluate a place without accessing it.
/// This is the result of translating e.g. `let _ = place;`.
PlaceMention(PlaceExpr),
/// Set the discriminant of the variant at `destination` to `value`.
SetDiscriminant {
destination: PlaceExpr,
value: Int,
},
/// Ensure that `place` contains a valid value of its type (else UB).
/// Also perform retagging and ensure safe pointers are dereferenceable.
///
/// The frontend is generally expected to generate this for all function argument,
/// and possibly in more places.
Validate {
place: PlaceExpr,
/// Indicates whether this operation occurs as part of the prelude
/// that we have at the top of each function (which affects retagging).
fn_entry: bool,
},
/// De-initialize a place.
Deinit {
place: PlaceExpr,
},
/// Allocate the backing store for this local.
StorageLive(LocalName),
/// Deallocate the backing store for this local.
StorageDead(LocalName),
}
pub enum Terminator {
/// Just jump to the next block.
Goto(BbName),
/// `value` needs to evaluate to a `Value::Int`.
/// `cases` map those values to blocks to jump to and therefore have to have the equivalent type.
/// If no value matches we fall back to the block given in `fallback`.
Switch {
value: ValueExpr,
cases: Map<Int, BbName>,
fallback: BbName,
},
/// If this is ever executed, we have UB.
Unreachable,
/// Invoke the given intrinsic operation with the given arguments.
///
/// Intrinsics are langauge primitives that can have arbitrary side-effects, including divergence.
Intrinsic {
intrinsic: IntrinsicOp,
/// The arguments to pass.
arguments: List<ValueExpr>,
/// The place to put the return value into.
ret: PlaceExpr,
/// The block to jump to when this call returns.
/// If `None`, UB will be raised when the intrinsic returns.
next_block: Option<BbName>,
},
/// Call the given function with the given arguments.
Call {
callee: ValueExpr,
/// The calling convention to use for this call.
calling_convention: CallingConvention,
/// The arguments to pass.
arguments: List<ArgumentExpr>,
/// The place to put the return value into.
ret: PlaceExpr,
/// The block to jump to when this call returns.
/// If `None`, UB will be raised when the function returns.
next_block: Option<BbName>,
},
/// Return from the current function.
Return,
}
/// Function arguments can be passed by-value or in-place.
pub enum ArgumentExpr {
/// Pass a copy of this value to the function.
///
/// Technically this could be encoded by generating a fresh temporary, copying the value there, and doing in-place passing.
/// FIXME: is it worth providing this mode anyway?
ByValue(ValueExpr),
/// Pass the argument value in-place; the contents of this place may be altered arbitrarily by the callee.
InPlace(PlaceExpr),
}
/// The `CallingConvention` defines how function arguments and return values are passed.
///
/// The assumption is that if caller and callee agree on the calling convention, and all arguments and the return types
/// pass `check_abi_compatibility`, then this implies they are ABI-compatible on real implementations.
pub enum CallingConvention {
Rust, C,
}
pub enum IntrinsicLockOp {
Acquire,
Release,
Create,
}
/// The intrinsic operations supported by MiniRust.
/// Generally we only make things intrinsics if they cannot be operands, i.e.
/// they are non-deterministic or mutate the global state.
/// We also make them intrinsic if they return `()`, because an operand that
/// does not return anything is kind of odd.
pub enum IntrinsicOp {
Assume,
Exit,
Panic,
PrintStdout,
PrintStderr,
Allocate,
Deallocate,
Spawn,
Join,
/// Determines whether the raw bytes pointed to by two pointers are equal.
/// (Can't be an operand because it reads from memory.)
RawEq,
AtomicStore,
AtomicLoad,
AtomicCompareExchange,
AtomicFetchAndOp(IntBinOp),
Lock(IntrinsicLockOp),
/// 'Expose' the provenance a pointer so that it can later be cast to an integer.
/// The address part of the pointer is stored in `destination`.
PointerExposeProvenance,
/// Create a new pointer from the given address with some previously exposed provenance.
PointerWithExposedProvenance,
}
Finally, the general structure of programs and functions:
/// Opaque types of names for functions and globals.
/// The internal representations of these types do not matter.
pub struct FnName(pub libspecr::Name);
pub struct GlobalName(pub libspecr::Name);
/// A closed MiniRust program.
pub struct Program {
/// Associate a function with each declared function name.
pub functions: Map<FnName, Function>,
/// The function where execution starts.
pub start: FnName,
/// Associate each global name with the associated global.
pub globals: Map<GlobalName, Global>,
}
/// Opaque types of names for local variables and basic blocks.
pub struct LocalName(pub libspecr::Name);
pub struct BbName(pub libspecr::Name);
/// A MiniRust function.
pub struct Function {
/// The locals of this function, and their type.
pub locals: Map<LocalName, Type>,
/// A list of locals that are initially filled with the function arguments.
pub args: List<LocalName>,
/// The name of a local that holds the return value when the function returns.
pub ret: LocalName,
/// The call calling convention of this function.
pub calling_convention: CallingConvention,
/// Associate each basic block name with the associated block.
pub blocks: Map<BbName, BasicBlock>,
/// The basic block where execution starts.
pub start: BbName,
}
/// A basic block is a sequence of statements followed by a terminator.
pub struct BasicBlock {
pub statements: List<Statement>,
pub terminator: Terminator,
}
/// A global allocation.
pub struct Global {
/// The raw bytes of the allocation. `None` represents uninitialized bytes.
pub bytes: List<Option<u8>>,
/// Cross-references pointing to other global allocations,
/// together with an offset, expressing where this allocation should put the pointer.
/// Note that the pointers created due to relocations overwrite the data given by `bytes`.
pub relocations: List<(Offset, Relocation)>,
/// The alignment with which this global shall be allocated.
pub align: Align,
}
/// A pointer into a global allocation.
pub struct Relocation {
/// The name of the global allocation we are pointing into.
pub name: GlobalName,
/// The offset within that allocation.
pub offset: Offset,
}