Skip to content

Commit

Permalink
Merge pull request #106 from mila-iqia/dev_atom_types_diffusion
Browse files Browse the repository at this point in the history
Dev atom types diffusion
  • Loading branch information
rousseab authored Dec 1, 2024
2 parents 0426bf0 + 6a74116 commit c382f6e
Show file tree
Hide file tree
Showing 173 changed files with 7,456 additions and 3,721 deletions.
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@ examples/data/
examples/*/output/
examples/*/lightning_logs/

**/train_run*/
**/valid_run*/
**/processed/
**/cache/
**/output/

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
Expand Down
6 changes: 6 additions & 0 deletions data/SiGe_diffusion_1x1x1/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Configuration for the dataloader
batch_size: 1024
num_workers: 0
max_atom: 8
spatial_dimension: 3
elements: [Si, Ge]
16 changes: 16 additions & 0 deletions data/SiGe_diffusion_1x1x1/create_data.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/bash

source ../data_generation_functions.sh

TEMPERATURE=300
BOX_SIZE=1
STEP=10000
CROP=10000
NTRAIN_RUN=10
NVALID_RUN=5

SW_PATH="../stillinger_weber_coefficients/SiGe.sw"
IN_PATH="in.SiGe.lammps"
CONFIG_PATH="config.yaml"

create_data_function $TEMPERATURE $BOX_SIZE $STEP $CROP $NTRAIN_RUN $NVALID_RUN $SW_PATH $IN_PATH $CONFIG_PATH
34 changes: 34 additions & 0 deletions data/SiGe_diffusion_1x1x1/in.SiGe.lammps
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
log log.lammps

units metal
atom_style atomic
atom_modify map array

lattice diamond 5.5421217827
region box block 0 ${S} 0 ${S} 0 ${S}

create_box 2 box
create_atoms 1 box basis 1 1 basis 2 1 basis 3 1 basis 4 1 basis 5 2 basis 6 2 basis 7 2 basis 8 2


mass 1 28.0855
mass 2 72.64

group Si type 1
group Ge type 2

pair_style sw
pair_coeff * * ${SW_PATH} Si Ge

velocity all create ${T} ${SEED}

dump dump_id all yaml 1 dump.${T}-${S}.yaml id element x y z fx fy fz
dump_modify dump_id element Si Ge

thermo_style yaml
thermo 1
#==========================Output files========================

fix 1 all nvt temp ${T} ${T} 0.01
run ${STEP}
unfix 1
6 changes: 6 additions & 0 deletions data/SiGe_diffusion_2x2x2/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Configuration for the dataloader
batch_size: 1024
num_workers: 0
max_atom: 64
spatial_dimension: 3
elements: [Si, Ge]
16 changes: 16 additions & 0 deletions data/SiGe_diffusion_2x2x2/create_data.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/bash

source ../data_generation_functions.sh

TEMPERATURE=300
BOX_SIZE=2
STEP=10000
CROP=10000
NTRAIN_RUN=10
NVALID_RUN=5

SW_PATH="../stillinger_weber_coefficients/SiGe.sw"
IN_PATH="in.SiGe.lammps"
CONFIG_PATH="config.yaml"

create_data_function $TEMPERATURE $BOX_SIZE $STEP $CROP $NTRAIN_RUN $NVALID_RUN $SW_PATH $IN_PATH $CONFIG_PATH
34 changes: 34 additions & 0 deletions data/SiGe_diffusion_2x2x2/in.SiGe.lammps
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
log log.lammps

units metal
atom_style atomic
atom_modify map array

lattice diamond 5.5421217827
region box block 0 ${S} 0 ${S} 0 ${S}

create_box 2 box
create_atoms 1 box basis 1 1 basis 2 1 basis 3 1 basis 4 1 basis 5 2 basis 6 2 basis 7 2 basis 8 2


mass 1 28.0855
mass 2 72.64

group Si type 1
group Ge type 2

pair_style sw
pair_coeff * * ${SW_PATH} Si Ge

velocity all create ${T} ${SEED}

dump dump_id all yaml 1 dump.${T}-${S}.yaml id element x y z fx fy fz
dump_modify dump_id element Si Ge

thermo_style yaml
thermo 1
#==========================Output files========================

fix 1 all nvt temp ${T} ${T} 0.01
run ${STEP}
unfix 1
6 changes: 6 additions & 0 deletions data/SiGe_diffusion_3x3x3/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Configuration for the dataloader
batch_size: 1024
num_workers: 0
max_atom: 216
spatial_dimension: 3
elements: [Si, Ge]
16 changes: 16 additions & 0 deletions data/SiGe_diffusion_3x3x3/create_data.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/bash

source ../data_generation_functions.sh

TEMPERATURE=300
BOX_SIZE=3
STEP=10000
CROP=10000
NTRAIN_RUN=10
NVALID_RUN=5

SW_PATH="../stillinger_weber_coefficients/SiGe.sw"
IN_PATH="in.SiGe.lammps"
CONFIG_PATH="config.yaml"

create_data_function $TEMPERATURE $BOX_SIZE $STEP $CROP $NTRAIN_RUN $NVALID_RUN $SW_PATH $IN_PATH $CONFIG_PATH
34 changes: 34 additions & 0 deletions data/SiGe_diffusion_3x3x3/in.SiGe.lammps
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
log log.lammps

units metal
atom_style atomic
atom_modify map array

lattice diamond 5.5421217827
region box block 0 ${S} 0 ${S} 0 ${S}

create_box 2 box
create_atoms 1 box basis 1 1 basis 2 1 basis 3 1 basis 4 1 basis 5 2 basis 6 2 basis 7 2 basis 8 2


mass 1 28.0855
mass 2 72.64

group Si type 1
group Ge type 2

pair_style sw
pair_coeff * * ${SW_PATH} Si Ge

velocity all create ${T} ${SEED}

dump dump_id all yaml 1 dump.${T}-${S}.yaml id element x y z fx fy fz
dump_modify dump_id element Si Ge

thermo_style yaml
thermo 1
#==========================Output files========================

fix 1 all nvt temp ${T} ${T} 0.01
run ${STEP}
unfix 1
6 changes: 6 additions & 0 deletions data/Si_diffusion_1x1x1/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Configuration for the dataloader
batch_size: 1024
num_workers: 0
max_atom: 8
spatial_dimension: 3
elements: [Si]
16 changes: 16 additions & 0 deletions data/Si_diffusion_1x1x1/create_data.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/bash

source ../data_generation_functions.sh

TEMPERATURE=300
BOX_SIZE=1
STEP=10000
CROP=10000
NTRAIN_RUN=10
NVALID_RUN=5

SW_PATH="../stillinger_weber_coefficients/Si.sw"
IN_PATH="in.Si.lammps"
CONFIG_PATH="config.yaml"

create_data_function $TEMPERATURE $BOX_SIZE $STEP $CROP $NTRAIN_RUN $NVALID_RUN $SW_PATH $IN_PATH $CONFIG_PATH
6 changes: 4 additions & 2 deletions data/si_diffusion_2x2x2/in.si.lammps → data/Si_diffusion_1x1x1/in.Si.lammps
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,13 @@ mass 1 28.0855
group Si type 1

pair_style sw
pair_coeff * * ../../si.sw Si
pair_coeff * * ${SW_PATH} Si


velocity all create ${T} ${SEED}

dump 1 all yaml 1 dump.si-${T}-${S}.yaml id type x y z fx fy fz
dump dump_id all yaml 1 dump.${T}-${S}.yaml id element x y z fx fy fz
dump_modify dump_id element Si

thermo_style yaml
thermo 1
Expand Down
6 changes: 6 additions & 0 deletions data/Si_diffusion_2x2x2/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Configuration for the dataloader
batch_size: 1024
num_workers: 0
max_atom: 64
spatial_dimension: 3
elements: [Si]
16 changes: 16 additions & 0 deletions data/Si_diffusion_2x2x2/create_data.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/bash

source ../data_generation_functions.sh

TEMPERATURE=300
BOX_SIZE=2
STEP=10000
CROP=10000
NTRAIN_RUN=10
NVALID_RUN=5

SW_PATH="../stillinger_weber_coefficients/Si.sw"
IN_PATH="in.Si.lammps"
CONFIG_PATH="config.yaml"

create_data_function $TEMPERATURE $BOX_SIZE $STEP $CROP $NTRAIN_RUN $NVALID_RUN $SW_PATH $IN_PATH $CONFIG_PATH
6 changes: 4 additions & 2 deletions data/si_diffusion_1x1x1_large/in.si.lammps → data/Si_diffusion_2x2x2/in.Si.lammps
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,13 @@ mass 1 28.0855
group Si type 1

pair_style sw
pair_coeff * * ../../si.sw Si
pair_coeff * * ${SW_PATH} Si


velocity all create ${T} ${SEED}

dump 1 all yaml 1 dump.si-${T}-${S}.yaml id type x y z fx fy fz
dump dump_id all yaml 1 dump.${T}-${S}.yaml id element x y z fx fy fz
dump_modify dump_id element Si

thermo_style yaml
thermo 1
Expand Down
6 changes: 6 additions & 0 deletions data/Si_diffusion_3x3x3/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Configuration for the dataloader
batch_size: 1024
num_workers: 0
max_atom: 216
spatial_dimension: 3
elements: [Si]
16 changes: 16 additions & 0 deletions data/Si_diffusion_3x3x3/create_data.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/bash

source ../data_generation_functions.sh

TEMPERATURE=300
BOX_SIZE=3
STEP=10000
CROP=10000
NTRAIN_RUN=10
NVALID_RUN=5

SW_PATH="../stillinger_weber_coefficients/Si.sw"
IN_PATH="in.Si.lammps"
CONFIG_PATH="config.yaml"

create_data_function $TEMPERATURE $BOX_SIZE $STEP $CROP $NTRAIN_RUN $NVALID_RUN $SW_PATH $IN_PATH $CONFIG_PATH
6 changes: 4 additions & 2 deletions data/si_diffusion_1x1x1/in.si.lammps → data/Si_diffusion_3x3x3/in.Si.lammps
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,13 @@ mass 1 28.0855
group Si type 1

pair_style sw
pair_coeff * * ../../si.sw Si
pair_coeff * * ${SW_PATH} Si


velocity all create ${T} ${SEED}

dump 1 all yaml 1 dump.si-${T}-${S}.yaml id type x y z fx fy fz
dump dump_id all yaml 1 dump.${T}-${S}.yaml id element x y z fx fy fz
dump_modify dump_id element Si

thermo_style yaml
thermo 1
Expand Down
56 changes: 56 additions & 0 deletions data/data_generation_functions.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#!/bin/bash

function create_data_function() {
# this function drives the creation training and validation data with LAMMPS.
# It assumes :
# - the function is sourced in a bash script (the "calling script") within the folder where the data is to be created.
# - the calling script is invoked in a shell with the correct python environment.
# - the LAMMPS input file follows a template and has all the passed variables defined.
# - the paths are defined with respect to the folder where the generation script is called.

TEMPERATURE="$1"
BOX_SIZE="$2"
STEP="$3"
CROP="$4"
NTRAIN_RUN="$5"
NVALID_RUN="$6"
SW_PATH="$7"
IN_PATH="$8"
CONFIG_PATH="$9"

NRUN=$(($NTRAIN_RUN + $NVALID_RUN))

# Generate the data
for SEED in $(seq 1 $NRUN); do
if [ "$SEED" -le $NTRAIN_RUN ]; then
MODE="train"
else
MODE="valid"
fi
echo "Creating LAMMPS data for ${MODE}_run_${SEED}..."
mkdir -p "${MODE}_run_${SEED}"
cd "${MODE}_run_${SEED}"

# Calling LAMMPS with various arguments to keep it quiet. Also, the current location is "${MODE}_run_${SEED}", which is one
# folder away from the location of the calling script.
lmp -echo none -screen none < ../$IN_PATH -v STEP $(($STEP + $CROP)) -v T $TEMPERATURE -v S $BOX_SIZE -v SEED $SEED -v SW_PATH ../$SW_PATH

# extract the thermodynamic outputs in a yaml file
egrep '^(keywords:|data:$|---$|\.\.\.$| - \[)' log.lammps > thermo_log.yaml

mkdir -p "uncropped_outputs"
mv "dump.${TEMPERATURE}-${BOX_SIZE}.yaml" uncropped_outputs/
mv thermo_log.yaml uncropped_outputs/

python ../../crop_lammps_outputs.py \
--lammps_yaml "uncropped_outputs/dump.${TEMPERATURE}-${BOX_SIZE}.yaml" \
--lammps_thermo "uncropped_outputs/thermo_log.yaml" \
--crop $CROP \
--output_dir ./

cd ..
done

# process the data
python ../process_lammps_data.py --data "./" --processed_datadir "./processed/" --config ${CONFIG_PATH}
}
31 changes: 0 additions & 31 deletions data/lammps_input_example.lammps

This file was deleted.

Loading

0 comments on commit c382f6e

Please sign in to comment.