Skip to content

Commit

Permalink
Implement contract deduplication optimization (#1631)
Browse files Browse the repository at this point in the history
* Duplicate contrat elision: first tentative

* Fix contract deduplication

The previous attempt wasn't fruitful, because we don't pass the initial
environment to the contract equality checker. This commit thread the
initial environment through the virtual machine and various functions to
make it available to the contract deduplicator

* Use contract deduplication for annotations as well

Use a special contract deduplication to avoid pretty printing many many
identical type annotations when evaluating. The actual contract
applications were already elided as far as evaluation is concerned, but
contracts annotations were still accumulated without deduplication.

* Rename elision->dedup which is better descriptive

* Fix clippy warning

* Fix cargo doc warnings

* Cosmetic improvements

* Mention idempotency assumption in the manual

* Update core/src/typecheck/eq.rs

Co-authored-by: Taeer Bar-Yam <[email protected]>

* Fix REPL query not working anymore

* Update core/src/typecheck/eq.rs

Co-authored-by: jneem <[email protected]>

* Formatting

* Fix clippy error

* Update core/src/program.rs

Co-authored-by: Viktor Kleen <[email protected]>

---------

Co-authored-by: Taeer Bar-Yam <[email protected]>
Co-authored-by: jneem <[email protected]>
Co-authored-by: Viktor Kleen <[email protected]>
  • Loading branch information
4 people authored Sep 28, 2023
1 parent f4a4792 commit f06bda9
Show file tree
Hide file tree
Showing 13 changed files with 487 additions and 208 deletions.
18 changes: 15 additions & 3 deletions core/src/eval/merge.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ use crate::label::{Label, MergeLabel};
use crate::position::TermPos;
use crate::term::{
record::{self, Field, FieldDeps, FieldMetadata, RecordAttrs, RecordData},
BinaryOp, IndexMap, RichTerm, Term,
BinaryOp, IndexMap, RichTerm, Term, TypeAnnotation,
};
use crate::transform::Closurizable;

Expand Down Expand Up @@ -67,6 +67,7 @@ impl From<MergeMode> for MergeLabel {
#[allow(clippy::too_many_arguments)] // TODO: Is it worth to pack the inputs in an ad-hoc struct?
pub fn merge<C: Cache>(
cache: &mut C,
initial_env: &Environment,
t1: RichTerm,
env1: Environment,
t2: RichTerm,
Expand Down Expand Up @@ -307,6 +308,7 @@ pub fn merge<C: Cache>(
id,
merge_fields(
cache,
initial_env,
merge_label,
field1,
env1.clone(),
Expand Down Expand Up @@ -357,6 +359,7 @@ pub fn merge<C: Cache>(
#[allow(clippy::too_many_arguments)]
fn merge_fields<'a, C: Cache, I: DoubleEndedIterator<Item = &'a LocIdent> + Clone>(
cache: &mut C,
initial_env: &Environment,
merge_label: MergeLabel,
field1: Field,
env1: Environment,
Expand Down Expand Up @@ -410,12 +413,21 @@ fn merge_fields<'a, C: Cache, I: DoubleEndedIterator<Item = &'a LocIdent> + Clon
};

let mut pending_contracts = pending_contracts1.revert_closurize(cache, env_final, env1.clone());
pending_contracts.extend(pending_contracts2.revert_closurize(cache, env_final, env2.clone()));

for ctr2 in pending_contracts2.revert_closurize(cache, env_final, env2.clone()) {
RuntimeContract::push_dedup(
initial_env,
&mut pending_contracts,
env_final,
ctr2,
env_final,
);
}

Ok(Field {
metadata: FieldMetadata {
doc: merge_doc(metadata1.doc, metadata2.doc),
annotation: Combine::combine(metadata1.annotation, metadata2.annotation),
annotation: TypeAnnotation::combine_dedup(metadata1.annotation, metadata2.annotation),
// If one of the record requires this field, then it musn't be optional. The
// resulting field is optional iff both are.
opt: metadata1.opt && metadata2.opt,
Expand Down
112 changes: 64 additions & 48 deletions core/src/eval/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,8 @@ pub struct VirtualMachine<R: ImportResolver, C: Cache> {
import_resolver: R,
// The evaluation cache.
pub cache: C,
// The initial environment containing stdlib and builtin functions accessible from anywhere
initial_env: Environment,
// The stream for writing trace output.
trace: Box<dyn Write>,
}
Expand All @@ -133,6 +135,7 @@ impl<R: ImportResolver, C: Cache> VirtualMachine<R, C> {
call_stack: Default::default(),
stack: Stack::new(),
cache: Cache::new(),
initial_env: Environment::new(),
trace: Box::new(trace),
}
}
Expand All @@ -144,6 +147,7 @@ impl<R: ImportResolver, C: Cache> VirtualMachine<R, C> {
stack: Stack::new(),
cache,
trace: Box::new(trace),
initial_env: Environment::new(),
}
}

Expand All @@ -164,55 +168,58 @@ impl<R: ImportResolver, C: Cache> VirtualMachine<R, C> {

/// Evaluate a Nickel term. Wrapper around [VirtualMachine::eval_closure] that starts from an
/// empty local environment and drops the final environment.
pub fn eval(&mut self, t0: RichTerm, initial_env: &Environment) -> Result<RichTerm, EvalError> {
self.eval_closure(Closure::atomic_closure(t0), initial_env)
pub fn eval(&mut self, t: RichTerm) -> Result<RichTerm, EvalError> {
self.eval_closure(Closure::atomic_closure(t))
.map(|(term, _)| term)
}

/// Fully evaluate a Nickel term: the result is not a WHNF but to a value with all variables
/// substituted.
pub fn eval_full(
&mut self,
t0: RichTerm,
initial_env: &Environment,
) -> Result<RichTerm, EvalError> {
self.eval_deep_closure(t0, initial_env, false)
.map(|(term, env)| subst(&self.cache, term, initial_env, &env))
pub fn eval_full(&mut self, t0: RichTerm) -> Result<RichTerm, EvalError> {
self.eval_full_closure(Closure::atomic_closure(t0))
.map(|(term, _)| term)
}

pub fn eval_full_closure(&mut self, t0: Closure) -> Result<(RichTerm, Environment), EvalError> {
self.eval_deep_closure(t0, false)
.map(|(term, env)| (subst(&self.cache, term, &self.initial_env, &env), env))
}

/// Like `eval_full`, but skips evaluating record fields marked `not_exported`.
pub fn eval_full_for_export(
&mut self,
t0: RichTerm,
initial_env: &Environment,
) -> Result<RichTerm, EvalError> {
self.eval_deep_closure(t0, initial_env, true)
.map(|(term, env)| subst(&self.cache, term, initial_env, &env))
pub fn eval_full_for_export(&mut self, t0: RichTerm) -> Result<RichTerm, EvalError> {
self.eval_deep_closure(Closure::atomic_closure(t0), true)
.map(|(term, env)| subst(&self.cache, term, &self.initial_env, &env))
}

/// Fully evaluates a Nickel term like `eval_full`, but does not substitute all variables.
pub fn eval_deep(
&mut self,
t0: RichTerm,
initial_env: &Environment,
) -> Result<RichTerm, EvalError> {
self.eval_deep_closure(t0, initial_env, false)
pub fn eval_deep(&mut self, t0: RichTerm) -> Result<RichTerm, EvalError> {
self.eval_deep_closure(Closure::atomic_closure(t0), false)
.map(|(term, _)| term)
}

/// Use a specific initial environment for evaluation. Usually, [VirtualMachine::prepare_eval]
/// is populating the initial environment. But in some cases, such as testing or benchmarks, we
/// might want to use a different one.
///
/// Return the new virtual machine with the updated initial environment.
pub fn with_initial_env(mut self, env: Environment) -> Self {
self.initial_env = env;
self
}

fn eval_deep_closure(
&mut self,
rt: RichTerm,
initial_env: &Environment,
mut closure: Closure,
for_export: bool,
) -> Result<(RichTerm, Environment), EvalError> {
let wrapper = mk_term::op1(
closure.body = mk_term::op1(
UnaryOp::Force {
ignore_not_exported: for_export,
},
rt,
closure.body,
);
self.eval_closure(Closure::atomic_closure(wrapper), initial_env)

self.eval_closure(closure)
}

/// Query the value and the metadata of a record field in an expression.
Expand All @@ -221,14 +228,15 @@ impl<R: ImportResolver, C: Cache> VirtualMachine<R, C> {
/// `baz`. The content of `baz` is evaluated as well, and variables are substituted, in order
/// to obtain a value that can be printed. The metadata and the evaluated value are returned as
/// a new field.
pub fn query(
&mut self,
t: RichTerm,
path: QueryPath,
initial_env: &Environment,
) -> Result<Field, EvalError> {
let mut prev_pos = t.pos;
let (rt, mut env) = self.eval_closure(Closure::atomic_closure(t), initial_env)?;
pub fn query(&mut self, t: RichTerm, path: QueryPath) -> Result<Field, EvalError> {
self.query_closure(Closure::atomic_closure(t), path)
}

/// Same as [VirtualMachine::query], but starts from a closure instead of a term in an empty
/// environment.
pub fn query_closure(&mut self, closure: Closure, path: QueryPath) -> Result<Field, EvalError> {
let mut prev_pos = closure.body.pos;
let (rt, mut env) = self.eval_closure(closure)?;

let mut field: Field = rt.into();

Expand Down Expand Up @@ -268,13 +276,10 @@ impl<R: ImportResolver, C: Cache> VirtualMachine<R, C> {
pending_contracts.into_iter(),
prev_pos,
);
let (new_value, new_env) = self.eval_closure(
Closure {
body: value_with_ctr,
env: env.clone(),
},
initial_env,
)?;
let (new_value, new_env) = self.eval_closure(Closure {
body: value_with_ctr,
env: env.clone(),
})?;
env = new_env;

Ok(new_value)
Expand Down Expand Up @@ -329,7 +334,6 @@ impl<R: ImportResolver, C: Cache> VirtualMachine<R, C> {
pub fn eval_closure(
&mut self,
mut clos: Closure,
initial_env: &Environment,
) -> Result<(RichTerm, Environment), EvalError> {
loop {
let Closure {
Expand Down Expand Up @@ -383,7 +387,7 @@ impl<R: ImportResolver, C: Cache> VirtualMachine<R, C> {
Term::Var(x) => {
let mut idx = env
.get(&x.ident())
.or_else(|| initial_env.get(&x.ident()))
.or_else(|| self.initial_env.get(&x.ident()))
.cloned()
.ok_or(EvalError::UnboundIdentifier(*x, pos))?;
std::mem::drop(env); // idx may be a 1RC pointer
Expand Down Expand Up @@ -820,17 +824,29 @@ impl<R: ImportResolver, C: Cache> VirtualMachine<R, C> {
}

impl<C: Cache> VirtualMachine<ImportCache, C> {
pub fn prepare_eval(&mut self, main_id: FileId) -> Result<(RichTerm, Environment), Error> {
/// Prepare the underlying program for evaluation (load the stdlib, typecheck, transform,
/// etc.). Sets the initial environment of the virtual machine.
pub fn prepare_eval(&mut self, main_id: FileId) -> Result<RichTerm, Error> {
let Envs {
eval_env,
type_ctxt,
}: Envs = self.import_resolver.prepare_stdlib(&mut self.cache)?;
} = self.import_resolver.prepare_stdlib(&mut self.cache)?;
self.import_resolver.prepare(main_id, &type_ctxt)?;
Ok((self.import_resolver().get(main_id).unwrap(), eval_env))
self.initial_env = eval_env;
Ok(self.import_resolver().get(main_id).unwrap())
}

/// Prepare the stdlib for evaluation. Sets the initial environment of the virtual machine. As
/// opposed to [VirtualMachine::prepare_eval], [VirtualMachine::prepare_stdlib] doesn't prepare
/// the main program yet (typechecking, transformations, etc.).
///
/// # Returns
///
/// The initial evaluation and typing environments, containing the stdlib items.
pub fn prepare_stdlib(&mut self) -> Result<Envs, Error> {
self.import_resolver.prepare_stdlib(&mut self.cache)
let envs = self.import_resolver.prepare_stdlib(&mut self.cache)?;
self.initial_env = envs.eval_env.clone();
Ok(envs)
}
}

Expand Down
19 changes: 15 additions & 4 deletions core/src/eval/operation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1881,6 +1881,7 @@ impl<R: ImportResolver, C: Cache> VirtualMachine<R, C> {
},
BinaryOp::Merge(merge_label) => merge::merge(
&mut self.cache,
&self.initial_env,
RichTerm {
term: t1,
pos: pos1,
Expand Down Expand Up @@ -2113,6 +2114,7 @@ impl<R: ImportResolver, C: Cache> VirtualMachine<R, C> {
// due to a limitation of `match_sharedterm`: see the macro's
// documentation
let mut record_data = record_data;
let term_original_env = env2.clone();

let mut contract_at_field = |id: LocIdent| {
let pos = contract_term.pos;
Expand All @@ -2125,10 +2127,18 @@ impl<R: ImportResolver, C: Cache> VirtualMachine<R, C> {
};

for (id, field) in record_data.fields.iter_mut() {
field.pending_contracts.push(RuntimeContract {
contract: contract_at_field(*id),
label: label.clone(),
});
let runtime_ctr = RuntimeContract {
contract: contract_at_field(*id),
label: label.clone(),
};

crate::term::RuntimeContract::push_dedup(
&self.initial_env,
&mut field.pending_contracts,
&term_original_env,
runtime_ctr,
&contract_env,
);
}

// IMPORTANT: here, we revert the record back to a `RecRecord`. The
Expand Down Expand Up @@ -2463,6 +2473,7 @@ impl<R: ImportResolver, C: Cache> VirtualMachine<R, C> {
Term::Lbl(lbl) => {
merge::merge(
&mut self.cache,
&self.initial_env,
RichTerm {
term: t2,
pos: pos2,
Expand Down
19 changes: 9 additions & 10 deletions core/src/eval/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use codespan::Files;
/// Evaluate a term without import support.
fn eval_no_import(t: RichTerm) -> Result<Term, EvalError> {
VirtualMachine::<_, CacheImpl>::new(DummyResolver {}, std::io::sink())
.eval(t, &Environment::new())
.eval(t)
.map(Term::from)
}

Expand Down Expand Up @@ -169,9 +169,7 @@ fn imports() {
let mk_import_two = mk_import("x", "two", mk_term::var("x"), &mut vm).unwrap();
vm.reset();
assert_eq!(
vm.eval(mk_import_two, &Environment::new(),)
.map(RichTerm::without_pos)
.unwrap(),
vm.eval(mk_import_two).map(RichTerm::without_pos).unwrap(),
mk_term::integer(2)
);

Expand All @@ -187,9 +185,7 @@ fn imports() {
);
vm.reset();
assert_eq!(
vm.eval(mk_import_lib.unwrap(), &Environment::new(),)
.map(Term::from)
.unwrap(),
vm.eval(mk_import_lib.unwrap()).map(Term::from).unwrap(),
Term::Bool(true)
);
}
Expand Down Expand Up @@ -269,15 +265,17 @@ fn initial_env() {
let t = mk_term::let_in("x", mk_term::integer(2), mk_term::var("x"));
assert_eq!(
VirtualMachine::new_with_cache(DummyResolver {}, eval_cache.clone(), std::io::sink())
.eval(t, &initial_env)
.with_initial_env(initial_env.clone())
.eval(t)
.map(RichTerm::without_pos),
Ok(mk_term::integer(2))
);

let t = mk_term::let_in("x", mk_term::integer(2), mk_term::var("g"));
assert_eq!(
VirtualMachine::new_with_cache(DummyResolver {}, eval_cache.clone(), std::io::sink())
.eval(t, &initial_env)
.with_initial_env(initial_env.clone())
.eval(t)
.map(RichTerm::without_pos),
Ok(mk_term::integer(1))
);
Expand All @@ -286,7 +284,8 @@ fn initial_env() {
let t = mk_term::let_in("g", mk_term::integer(2), mk_term::var("g"));
assert_eq!(
VirtualMachine::new_with_cache(DummyResolver {}, eval_cache.clone(), std::io::sink())
.eval(t, &initial_env)
.with_initial_env(initial_env.clone())
.eval(t)
.map(RichTerm::without_pos),
Ok(mk_term::integer(2))
);
Expand Down
Loading

0 comments on commit f06bda9

Please sign in to comment.