raminmh · dolphonie · May 12, 2022 · May 12, 2022 · May 12, 2022 · May 16, 2022
diff --git a/configs/experiment/s4-lra-pathfinder-new.yaml b/configs/experiment/s4-lra-pathfinder-new.yaml
@@ -11,7 +11,7 @@ scheduler:
 model:
   dropout: 0.0
   n_layers: 6
-  prenorm: true
+  prenorm: false # p edit
   d_model: 256
   norm: batch
   layer:
@@ -20,15 +20,15 @@ model:
     dt_min: 0.001
     dt_max: 0.1
     measure: legs
-    bidirectional: true
+    bidirectional: false # p edit
     postact: glu
     n_ssm: 256
 
 decoder:
   mode: last
 
 loader:
-  batch_size: 64
+  batch_size: 256
 
 optimizer:
   lr: 0.004

diff --git a/configs/experiment/s4-lra-pathx-new.yaml b/configs/experiment/s4-lra-pathx-new.yaml
@@ -11,7 +11,7 @@ scheduler:
 model:
   dropout: 0.
   n_layers: 6
-  prenorm: true
+  prenorm: false # p edit
   d_model: 256
   norm: batch
   layer:
@@ -20,12 +20,12 @@ model:
     dt_min: 0.0001
     dt_max: 0.01
     measure: legs
-    bidirectional: true
+    bidirectional: false # p edit
     postact: glu
     n_ssm: 256
 
 loader:
-  batch_size: 16
+  batch_size: 4 # from 16 p edit
 
 optimizer:
   lr: 0.0005

diff --git a/slurm/environment.yml b/slurm/environment.yml
@@ -0,0 +1,186 @@
+name: new_torch
+channels:
+  - https://opence.mit.edu
+  - pytorch
+  - defaults
+  - conda-forge
+dependencies:
+  - _libgcc_mutex=0.1=main
+  - _pytorch_select=2.0=cuda_2
+  - abseil-cpp=20200923.3=h29c3540_0
+  - arrow-cpp=3.0.0=py38h3a2661a_15_cpu
+  - av=8.0.3=py38h4eda063_3
+  - blas=1.0=openblas
+  - brotli=1.0.9=he6710b0_2
+  - brotlipy=0.7.0=py38h140841e_1003
+  - bzip2=1.0.8=h7b6447c_0
+  - c-ares=1.18.1=h140841e_0
+  - ca-certificates=2022.4.26=h6ffa863_0
+  - certifi=2021.10.8=py38hf8b3453_1
+  - cffi=1.14.6=py38hf9d8e4b_0
+  - charset-normalizer=2.0.12=pyhd8ed1ab_0
+  - click=7.1.2=pyhd3eb1b0_0
+  - cryptography=37.0.1=py38h179485c_0
+  - cudatoolkit=10.2.89=hfd86e86_1
+  - cudnn=8.0.5_10.2=hf3ed852_1
+  - ffmpeg=4.2.2=h20bf706_0
+  - freetype=2.11.0=h9215f1b_0
+  - future=0.18.2=py38_1
+  - gflags=2.2.2=hb209c28_1002
+  - giflib=5.2.1=h7b6447c_0
+  - glog=0.5.0=h29c3540_0
+  - gmp=6.2.1=h29c3540_0
+  - gnutls=3.6.15=hd39c10c_0
+  - grpc-cpp=1.36.4=h0cec4b6_pb3.14_3
+  - idna=3.3=pyhd3eb1b0_0
+  - jpeg=9e=h140841e_0
+  - lame=3.100=h7b6447c_0
+  - lcms2=2.12=h2045e0b_0
+  - ld_impl_linux-ppc64le=2.33.1=h0f24833_7
+  - leveldb=1.20=hf484d3e_1
+  - libevent=2.1.12=hb567c45_0
+  - libffi=3.3=he6710b0_2
+  - libgcc-ng=8.2.0=h822a55f_1
+  - libgfortran-ng=7.5.0=h69aa010_20
+  - libgfortran4=7.5.0=h69aa010_20
+  - libidn2=2.3.2=h140841e_0
+  - libopenblas=0.3.18=h04d2106_0
+  - libopus=1.3.1=h7b6447c_0
+  - libpng=1.6.37=hbc83047_0
+  - libprotobuf=3.14.0=h5f94dde_0
+  - libstdcxx-ng=8.2.0=h822a55f_1
+  - libtasn1=4.16.0=h140841e_0
+  - libthrift=0.15.0=heb2aae8_0
+  - libtiff=4.2.0=h781710b_0
+  - libunistring=0.9.10=h140841e_0
+  - libutf8proc=2.6.1=h140841e_0
+  - libvpx=1.7.0=hf484d3e_0
+  - libwebp=1.2.2=he32dc1f_0
+  - libwebp-base=1.2.2=h140841e_0
+  - lmdb=0.9.29=h29c3540_0
+  - lz4-c=1.9.3=h29c3540_1
+  - nccl=2.8.3=cuda10.2_3
+  - ncurses=6.3=h140841e_2
+  - nettle=3.7.3=hdc176a3_1
+  - networkx=2.7.1=pyhd3eb1b0_0
+  - numactl=2.0.12=h459fe5f_5
+  - numpy-base=1.19.2=py38h75fe3a5_0
+  - olefile=0.46=pyhd3eb1b0_0
+  - openh264=2.1.1=h5f94dde_0
+  - openssl=1.1.1o=h140841e_0
+  - orc=1.6.5=hb77ef19_2
+  - pillow=8.4.0=py38h3f95422_0
+  - protobuf=3.14.0=py38h29c3540_1
+  - pyarrow=3.0.0=py38he93583c_15_cpu
+  - pycparser=2.21=pyhd3eb1b0_0
+  - pyopenssl=22.0.0=pyhd3eb1b0_0
+  - pysocks=1.7.1=py38h6ffa863_0
+  - python=3.8.12=h836d2c2_0
+  - python_abi=3.8=2_cp38
+  - pytorch=1.9.0=cuda10.2_py38_1
+  - pytorch-base=1.9.0=h1234567_cuda10.2_py38_pb3.14_1
+  - pyyaml=5.4.1=py38h140841e_1
+  - re2=2020.11.01=h29c3540_1
+  - readline=8.1.2=h140841e_1
+  - requests=2.27.1=pyhd3eb1b0_0
+  - scipy=1.7.3=py38he743248_0
+  - sentencepiece=0.1.91=hd4d1946_py38_pb3.14_7
+  - six=1.15.0=py38h6ffa863_0
+  - snappy=1.1.9=h29c3540_0
+  - sqlite=3.38.3=hd7247d8_0
+  - tabulate=0.8.9=py38h6ffa863_0
+  - tk=8.6.11=h7e00dab_1
+  - torchtext=0.10.0=py38_1
+  - torchvision-base=0.10.0=cuda10.2_py38_1
+  - tqdm=4.64.0=py38h6ffa863_0
+  - typing_extensions=3.7.4.3=py_0
+  - urllib3=1.26.9=py38h6ffa863_0
+  - wheel=0.37.1=pyhd3eb1b0_0
+  - x264=1!157.20191217=h7b6447c_0
+  - xz=5.2.5=h140841e_0
+  - yaml=0.2.5=h7b6447c_0
+  - zlib=1.2.12=h140841e_2
+  - zstd=1.4.9=hc52992f_0
+  - pip:
+    - absl-py==1.0.0
+    - aiohttp==3.8.1
+    - aiosignal==1.2.0
+    - antlr4-python3-runtime==4.8
+    - async-timeout==4.0.2
+    - attrs==21.4.0
+    - cachetools==5.0.0
+    - cmake==3.22.4
+    - commonmark==0.9.1
+    - cycler==0.11.0
+    - datasets==1.18.3
+    - dill==0.3.4
+    - docker-pycreds==0.4.0
+    - einops==0.4.1
+    - filelock==3.6.0
+    - fonttools==4.33.3
+    - frozenlist==1.3.0
+    - fsspec==2022.3.0
+    - gitdb==4.0.9
+    - gitpython==3.1.27
+    - google-auth==2.6.6
+    - google-auth-oauthlib==0.4.6
+    - grpcio==1.46.1
+    - huggingface-hub==0.6.0
+    - hydra-core==1.1.2
+    - importlib-metadata==4.11.3
+    - importlib-resources==5.2.3
+    - joblib==1.1.0
+    - kiwisolver==1.4.2
+    - markdown==3.3.7
+    - matplotlib==3.5.2
+    - multidict==6.0.2
+    - multiprocess==0.70.12.2
+    - munch==2.5.0
+    - numpy==1.21.6
+    - oauthlib==3.2.0
+    - omegaconf==2.1.2
+    - opt-einsum==3.3.0
+    - packaging==21.3
+    - pandas==1.4.2
+    - pathtools==0.1.2
+    - patsy==0.5.2
+    - pip==22.1
+    - promise==2.3
+    - psutil==5.9.0
+    - pyasn1==0.4.8
+    - pyasn1-modules==0.2.8
+    - pydeprecate==0.3.2
+    - pygments==2.12.0
+    - pyparsing==3.0.9
+    - python-dateutil==2.8.2
+    - pytorch-fast-transformers==0.4.0
+    - pytorch-lightning==1.6.3
+    - pytz==2022.1
+    - regex==2022.4.24
+    - requests-oauthlib==1.3.1
+    - rich==12.0.1
+    - rsa==4.8
+    - sacremoses==0.0.53
+    - scikit-learn==1.0.2
+    - sentry-sdk==1.5.12
+    - setproctitle==1.2.3
+    - setuptools==59.5.0
+    - shortuuid==1.0.9
+    - sklearn==0.0
+    - smmap==5.0.0
+    - tensorboard==2.9.0
+    - tensorboard-data-server==0.6.1
+    - tensorboard-plugin-wit==1.8.1
+    - threadpoolctl==3.1.0
+    - tokenizers==0.12.1
+    - torchmetrics==0.8.2
+    - transformers==4.18.0
+    - typing-extensions==4.2.0
+    - wandb==0.12.16
+    - werkzeug==2.1.2
+    - wrapt==1.14.1
+    - xxhash==3.0.0
+    - yarl==1.7.2
+    - zipp==3.8.0
+prefix: /nobackup/users/pdkao/anaconda3/envs/new_torch
+
diff --git a/slurm/poly_training.bash b/slurm/poly_training.bash
@@ -0,0 +1,27 @@
+#!/bin/bash
+#SBATCH -o train_%x-%j.out
+#SBATCH -e train_%x-%j.err
+#SBATCH [email protected]
+#SBATCH --mail-type=FAIL
+#SBATCH --gres=gpu:4
+#SBATCH --gpus-per-node=4
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=1
+#SBATCH --mem=0
+#SBATCH --time=24:00:00
+#SBATCH --exclusive
+#SBATCH --qos=sched_level_2
+
+
+## User python environment
+HOME2=/nobackup/users/$(whoami)
+PYTHON_VIRTUAL_ENVIRONMENT=new_torch
+CONDA_ROOT=$HOME2/anaconda3
+
+## Activate WMLCE virtual environment
+source "${CONDA_ROOT}"/etc/profile.d/conda.sh
+conda activate $PYTHON_VIRTUAL_ENVIRONMENT
+ulimit -s unlimited
+
+cd ~/liquid-s4
+python -m train wandb=null experiment=s4-lra-"${SLURM_JOB_NAME}"-new
diff --git a/slurm/poly_training_16_batch.bash b/slurm/poly_training_16_batch.bash
@@ -0,0 +1,27 @@
+#!/bin/bash
+#SBATCH -o train_%x-%j.out
+#SBATCH -e train_%x-%j.err
+#SBATCH [email protected]
+#SBATCH --mail-type=FAIL
+#SBATCH --gres=gpu:4
+#SBATCH --gpus-per-node=4
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=1
+#SBATCH --mem=0
+#SBATCH --time=24:00:00
+#SBATCH --exclusive
+#SBATCH --qos=sched_level_2
+
+
+## User python environment
+HOME2=/nobackup/users/$(whoami)
+PYTHON_VIRTUAL_ENVIRONMENT=new_torch
+CONDA_ROOT=$HOME2/anaconda3
+
+## Activate WMLCE virtual environment
+source "${CONDA_ROOT}"/etc/profile.d/conda.sh
+conda activate $PYTHON_VIRTUAL_ENVIRONMENT
+ulimit -s unlimited
+
+cd ~/liquid-s4
+python -m train wandb=null loader.batch_size=16 experiment=s4-lra-"${SLURM_JOB_NAME}"-new