diff --git a/README.md b/README.md index 203e8fb..bf9dabf 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,6 @@ -# Structured State Spaces for Sequence Modeling +# Liquid State Space Models -This repository provides implementations and experiments for the following papers. - -## Liquid S4 +This repository provides implementations and experiments for Liquid S4 ```bash python3 -m train wandb=null experiment=s4-lra-cifar-new # plain S4 @@ -11,34 +9,6 @@ python3 -m train wandb=null experiment=s4-lra-cifar-new model.layer.liquid=2 # l python3 -m train wandb=null experiment=s4-lra-cifar-new model.layer.liquid=N # liquid S4 with N terms u(i)*u(j)... ``` -## SaShiMi (arXiv) - -![SaShiMi](assets/sashimi.png "SaShiMi Architecture") -> **It's Raw! Audio Generation with State-Space Models**\ -> Karan Goel, Albert Gu, Chris Donahue, Christopher Ré\ -> Paper: https://arxiv.org/abs/2202.09729 - -## S4 (ICLR 2022 - Outstanding Paper HM) - -![Structured State Spaces](assets/properties.png "Properties of Structured State Spaces") -> **Efficiently Modeling Long Sequences with Structured State Spaces**\ -> Albert Gu, Karan Goel, Christopher Ré\ -> Paper: https://arxiv.org/abs/2111.00396 - -## LSSL (NeurIPS 2021) - -![Linear State Space Layer](assets/splash.png "Properties of Sequential State Spaces") -> **Combining Recurrent, Convolutional, and Continuous-time Models with the Linear State Space Layer**\ -> Albert Gu, Isys Johnson, Karan Goel, Khaled Saab, Tri Dao, Atri Rudra, Christopher Ré\ -> Paper: https://arxiv.org/abs/2110.13985 - -## HiPPO (NeurIPS 2020 - Spotlight) -![HiPPO Framework](assets/hippo.png "HiPPO Framework") -> **HiPPO: Recurrent Memory with Optimal Polynomial Projections**\ -> Albert Gu*, Tri Dao*, Stefano Ermon, Atri Rudra, Christopher Ré\ -> Paper: https://arxiv.org/abs/2008.07669 - - ## Table of Contents - [Repository Setup](#setup) - S4 @@ -49,26 +19,6 @@ python3 -m train wandb=null experiment=s4-lra-cifar-new model.layer.liquid=N # l - [Repository Structure](#overall-repository-structure) - [Citation](#citation) -## Changelog - -### 2022-05-01 - [V2.1] -- Minor updates to S4 modules -- New S4D (S4-diagonal) standalone model found at `src/models/sequence/ss/standalone/s4d.py`. Simple variant using diagonal SSMs that recovers S4's performance on most tasks. Can be run with any existing experiment config with the additional flag `model/layer=s4d` on the command line. -- New [LRA configs](#long-range-arena-lra) for updated S4 code, with an average score of ~86 - -### 2022-04-03 -By default, S4 no longer requires installing Pykeops or a custom CUDA kernel. - -### 2022-02-27 - [V2] -Code release for SaShiMi audio model. - -### 2022-01-29 -Added configs for time series datasets from the Informer paper. - -### 2021-11-18 - [V1] -First release of this repository containing the S4 module and configs to reproduce sCIFAR, Speech Commands, Long Range Arena, and WikiText-103 results. - - ## Setup ### Requirements @@ -372,4 +322,4 @@ If you use this codebase, or otherwise found our work valuable, please cite: volume={33}, year={2020} } -``` \ No newline at end of file +``` diff --git a/deploy.py b/deploy.py deleted file mode 100644 index 1dfbb71..0000000 --- a/deploy.py +++ /dev/null @@ -1,73 +0,0 @@ -import os -import sys - -base_source_dirs = { - "mathias-thinkpad": "/home/mathias/dev", - "mathias-desktop": "/home/mathias/dev/ist", -} -base_move_dirs = { - "mathias-thinkpad": "/home/mathias/dev/azure", - "deephawk": "/home/mathias/", - "mathias-desktop": "/home/mathias/dev/azure/", -} - -base_target_dirs = { - "deephawk": "deephawk/dev/", - "pixie": "deephawk/dev/azure/pixie", - "hpc": "hpc", - "z1": "deephawk/dev/azure/z1", - "z2": "deephawk/dev/azure/z2", - "rpc": "rpc", - "1080": "deephawk/dev/azure/1080", - "deepart": "deephawk/dev/azure/deepart", -} - -sync_dirs = [ - "s4", - "s4/configs/experiment", - "s4/configs/model/layer", - "s4/src/models/hippo", - "s4/src/models/sequence/ss", -] -sync_ext = [".py", ".sh", ".sl", ".yaml"] - - -host = os.uname()[1].lower() -if not host in base_move_dirs or not host in base_move_dirs: - print("Unknown host: " + host) - sys.exit(-1) - -target = "deephawk" -if len(sys.argv) > 1: - target = sys.argv[1].lower() -if not target in base_target_dirs: - print("Unknown target: " + target) - sys.exit(-1) - -flags = "ruv" -if len(sys.argv) > 2: - if sys.argv[2].lower() == "--force": - flags = "rv" - print("Force rewrite") - -print('Syncing from "' + host + '" to "' + target + '"') -for sync_dir in sync_dirs: - print(' moving dir "' + sync_dir + '"') - source_dir = os.path.join(base_source_dirs[host], sync_dir) - - target_dir = os.path.join(base_move_dirs[host], base_target_dirs[target], sync_dir) - - print("Source dir: " + str(source_dir)) - print("Target dir: " + str(target_dir)) - cmd_str = ( - "rsync -" - + flags - + " '" - + source_dir - + "/' --exclude '.git' --include '*.py' --include '*.yaml' --include '*.sl' --include '*.sh' --exclude '*' '" - + target_dir - + "/'" - ) - - os.system(cmd_str) - # print(cmd_str) \ No newline at end of file diff --git a/run_local.sh b/run_local.sh deleted file mode 100644 index a06d8f1..0000000 --- a/run_local.sh +++ /dev/null @@ -1,9 +0,0 @@ -# papeR: S4_v1: 87.26, v2: 88.5, liquid: 90.95 -python3 -m train wandb=null experiment=s4-lra-cifar-new # 4.2/it/s top score: 0.8600000143051147 -python3 -m train wandb=null experiment=s4-lra-cifar-new model.layer.liquid=1 # 3.04 it/s -python3 -m train wandb=null experiment=s4-lra-cifar-new model.layer.liquid=2 # 2.91 it/s top score: 0.8611999750137329 -python3 -m train wandb=null experiment=s4-lra-cifar-new model.layer.liquid=3 # it/s -python3 -m train wandb=null experiment=s4-lra-cifar-new optimizer.weight_decay=0.01 trainer.max_epochs=200 -python3 -m train wandb=null experiment=s4-lra-cifar-new optimizer.weight_decay=0.01 trainer.max_epochs=300 -python3 -m train wandb=null experiment=s4-lra-cifar-new model.d_model=192 model.layer.postact=glu model.layer.bidirectional=true optimizer.weight_decay=0.01 trainer.max_epochs=200 -python3 -m train wandb=null experiment=s4-lra-cifar-new optimizer.lr=0.01 optimizer.weight_decay=0.01 trainer.max_epochs=200 \ No newline at end of file diff --git a/run_sl1.sl b/run_sl1.sl deleted file mode 100644 index 1533eb7..0000000 --- a/run_sl1.sl +++ /dev/null @@ -1,100 +0,0 @@ -#!/bin/bash -# -#SBATCH --job-name=s4_3 -#SBATCH --output=s4_3.txt -# -#number of CPUs to be used -#SBATCH --ntasks=1 -#SBATCH -c 8 -# -#Define the number of hours the job should run. -#Maximum runtime is limited to 10 days, ie. 240 hours -#SBATCH --time=128:00:00 -# -#Define the amount of system RAM used by your job in GigaBytes -#SBATCH --mem=128G - -#SBATCH --partition=gpu -#SBATCH --gres=gpu:2 -#SBATCH --constraint=A10 - -#Send emails when a job starts, it is finished or it exits -#SBATCH --mail-user=mlechner@ist.ac.at -#SBATCH --mail-type=END,FAIL -# -#SBATCH --no-requeue -#SBATCH --export=NONE -unset SLURM_EXPORT_ENV - - -module load python/3.8.12 -module load cuda/11.2.2 -module load cudnn/8.2.4.15 -#module load cudnn/8.1.0.77 -#module load cudnn/8.1.1.33 - -cd $HOME/s4 -source venv/bin/activate - - -# mkdir -p $HOME/s4/tmp/tmp_$SLURM_ARRAY_TASK_ID -# export TMPDIR=$HOME/s4/tmp/tmp_$SLURM_ARRAY_TASK_ID -# python3 -m train model.layer.poly=true pipeline=mnist model=s4 -### S4 Paper: -# Listops 58.35 -# IMDB: 76.02 -# AAN: 87.09 -# CIFAR: 87.26 -# Pathfinder: 86.05 -# Path-X: 88.10 - -# listops: test_acc: 0.567 -# imdb: 0.76816 -# pathfindeR: 0.495 - -# 4 gpus: -# listops best 0.54650 -# imdb 0.74128 -# cifar: 0.84100 -# pathfindeR 0.75270 - -python3 -m train wandb=null experiment=s4-lra-cifar-new trainer.gpus=2 model.layer.liquid=3 # it/s - - -# trainer.gpus=2 -# lr: 0.004 -# weight_decay: 0.03 -#python -m train wandb=null experiment=s4-lra-pathfinder-new optimizer.lr=0.008 optimizer.weight_decay=0.01 trainer.gpus=4 -#python -m train wandb=null experiment=s4-lra-pathfinder-new optimizer.lr=0.01 optimizer.weight_decay=0.03 trainer.gpus=4 -#python -m train wandb=null experiment=s4-lra-pathfinder-new optimizer.lr=0.008 model.layer.d_state=128 trainer.gpus=4 - - -#if [ $SLURM_ARRAY_TASK_ID = 6 ] -#then -# python -m train model.layer.poly=true experiment=s4-lra-cifar model.layer.postact=glu model.layer.bidirectional=true optimizer.weight_decay=0.01 trainer.max_epochs=160 -#elif [ $SLURM_ARRAY_TASK_ID = 7 ] -#then -# python -m train model.layer.poly=true experiment=s4-lra-cifar model.layer.postact=glu model.layer.bidirectional=true optimizer.weight_decay=0.005 trainer.max_epochs=160 -# python -m train experiment=s4-lra-cifar model.layer.postact=glu model.layer.bidirectional=true optimizer.weight_decay=0.005 trainer.max_epochs=160 -#elif [ $SLURM_ARRAY_TASK_ID = 8 ] -#then -# python -m train model.layer.poly=true experiment=s4-lra-cifar model.layer.postact=glu model.layer.bidirectional=true optimizer.weight_decay=0.01 trainer.max_epochs=160 scheduler.patience=20 -#elif [ $SLURM_ARRAY_TASK_ID = 9 ] -#then -# python -m train model.layer.poly=true experiment=s4-lra-pathfinder -# python -m train experiment=s4-lra-pathfinder -#elif [ $SLURM_ARRAY_TASK_ID = 10 ] -#then -# python -m train model.layer.poly=true experiment=s4-lra-pathfinder optimizer.lr=0.01 -#elif [ $SLURM_ARRAY_TASK_ID = 11 ] -#then -# python -m train model.layer.poly=true experiment=s4-lra-pathfinder optimizer.lr=0.006 -#elif [ $SLURM_ARRAY_TASK_ID = 12 ] -#then -# python -m train model.layer.poly=true experiment=s4-lra-pathfinder optimizer.lr=0.004 scheduler.patience=20 -#elif [ $SLURM_ARRAY_TASK_ID = 13 ] -#then -# python -m train model.layer.poly=true experiment=s4-lra-aan &> results/aan.txt -#fi - -# rm -r $HOME/s4/tmp/tmp_$SLURM_ARRAY_TASK_ID \ No newline at end of file diff --git a/run_sl2.sl b/run_sl2.sl deleted file mode 100644 index 2f3ed1d..0000000 --- a/run_sl2.sl +++ /dev/null @@ -1,46 +0,0 @@ -#!/bin/bash -# -#SBATCH --job-name=s4_2 -#SBATCH --output=s4_2.txt -# -#number of CPUs to be used -#SBATCH --ntasks=1 -#SBATCH -c 24 -# -#Define the number of hours the job should run. -#Maximum runtime is limited to 10 days, ie. 240 hours -#SBATCH --time=120:00:00 -# -#Define the amount of system RAM used by your job in GigaBytes -#SBATCH --mem=500G - -#SBATCH --partition=gpu -#SBATCH --gres=gpu:4 -#SBATCH --constraint=A10 - -#Send emails when a job starts, it is finished or it exits -#SBATCH --mail-user=mlechner@ist.ac.at -#SBATCH --mail-type=END,FAIL -# -#SBATCH --no-requeue -#SBATCH --export=NONE -unset SLURM_EXPORT_ENV - - -module load python/3.8.12 -module load cuda/11.2.2 -module load cudnn/8.2.4.15 -#module load cudnn/8.1.0.77 -#module load cudnn/8.1.1.33 - - - -cd $HOME/s4 -source venv/bin/activate - -# lr: 0.0005 -# weight_decay: 0.05 - -# trainer.gpus=2 -python -m train wandb=null experiment=s4-lra-pathx-new trainer.gpus=4 optimizer.lr=0.0008 -python -m train wandb=null experiment=s4-lra-pathx-new trainer.gpus=4 optimizer.weight_decay=0.02 \ No newline at end of file