diff --git a/Makefile b/Makefile index cab8cae..21c621c 100644 --- a/Makefile +++ b/Makefile @@ -8,12 +8,9 @@ SCALE_FACTOR ?= 1 .PHONY: \ run-10-times \ - install-deps \ bump-deps \ - install-gpu-env \ fmt \ pre-commit \ - tables \ run-polars \ run-fireducks \ run-cudf \ @@ -56,17 +53,23 @@ run-10-times: export PATH=$$HOME/.cargo/bin:$$PATH uv venv --python 3.11 --seed -install-deps: .venv ## Install Python project dependencies +install-deps: .venv/.installed-deps .venv ## Install Python project dependencies if not already installed + +.venv/.installed-deps: ## Install only if dependencies aren't already installed @unset CONDA_PREFIX \ && $(VENV_BIN)/python -m pip install --upgrade uv \ - && $(VENV_BIN)/uv pip install --compile -r requirements.txt \ - && $(VENV_BIN)/uv pip install --compile -r requirements-dev.txt \ - && $(VENV_BIN)/uv pip install cudf-cu12 cudf-polars-cu12 --extra-index-url=https://pypi.nvidia.com + && $(VENV_BIN)/uv pip install --compile -r requirements.txt --extra-index-url=https://pypi.nvidia.com + touch .venv/.installed-deps + +bump-deps-with-gpu: .venv ## Bump Python project dependencies + $(VENV_BIN)/python -m pip install --upgrade uv + $(VENV_BIN)/uv pip compile --extra-index-url=https://pypi.nvidia.com requirements.in requirements-gpu.in > requirements.txt + rm .venv/.installed-deps bump-deps: .venv ## Bump Python project dependencies $(VENV_BIN)/python -m pip install --upgrade uv - $(VENV_BIN)/uv pip compile requirements.in > requirements.txt - $(VENV_BIN)/uv pip compile requirements-dev.in > requirements-dev.txt + $(VENV_BIN)/uv pip compile requirements.in -o requirements.txt + rm .venv/.installed-deps fmt: ## Run autoformatting and linting $(VENV_BIN)/ruff check @@ -77,7 +80,7 @@ pre-commit: fmt ## Run all code quality checks tables: data/tables/scale-$(SCALE_FACTOR)/.done ## Alias for the dataset generation -data/tables/scale-$(SCALE_FACTOR)/.done: install-deps ## Generate data tables if not already generated +data/tables/scale-$(SCALE_FACTOR)/.done: | install-deps ## Generate data tables if not already generated $(MAKE) -C tpch-dbgen dbgen cd tpch-dbgen && ./dbgen -vf -s $(SCALE_FACTOR) && cd .. mkdir -p "data/tables/scale-$(SCALE_FACTOR)" @@ -92,13 +95,13 @@ run-polars: install-deps tables ## Run Polars benchmarks run-fireducks: install-deps tables ## Run Fireducks benchmarks $(VENV_BIN)/python -m queries.fireducks -run-cudf: install-gpu-env tables ## Run cuDF benchmarks +run-cudf: install-deps tables ## Run cuDF benchmarks $(VENV_BIN)/python -m queries.cudf run-polars-eager: install-deps tables ## Run Polars benchmarks in eager mode POLARS_EAGER=1 $(VENV_BIN)/python -m queries.polars -run-polars-gpu: install-gpu-env tables ## Run Polars GPU benchmarks +run-polars-gpu: install-deps tables ## Run Polars GPU benchmarks POLARS_GPU=1 $(VENV_BIN)/python -m queries.polars run-polars-streaming: install-deps tables ## Run Polars streaming benchmarks @@ -135,7 +138,7 @@ run-all: run-all-polars run-duckdb run-pandas run-pyspark run-dask run-modin ## run-all-polars: run-polars run-polars-eager run-polars-gpu run-polars-streaming ## Run all Polars benchmarks -run-all-gpu: run-polars run-polars-gpu run-pandas #run-cudf ## Run all GPU-accelerated library benchmarks +run-all-gpu: run-polars run-polars-gpu run-pandas run-cudf ## Run all GPU-accelerated library benchmarks plot: install-deps ## Plot results $(VENV_BIN)/python -m scripts.plot_bars diff --git a/requirements-gpu.in b/requirements-gpu.in new file mode 100644 index 0000000..fc39b71 --- /dev/null +++ b/requirements-gpu.in @@ -0,0 +1,2 @@ +cudf-cu12 +cudf-polars-cu12 diff --git a/requirements.txt b/requirements.txt index 475c172..a54f78e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ # This file was autogenerated by uv via the following command: -# uv pip compile requirements.in +# uv pip compile requirements.in requirements-gpu.in aiosignal==1.3.1 # via ray annotated-types==0.7.0 @@ -8,9 +8,11 @@ attrs==24.2.0 # via # jsonschema # referencing +cachetools==5.5.0 + # via cudf-cu12 certifi==2024.8.30 # via requests -charset-normalizer==3.3.2 +charset-normalizer==3.4.0 # via requests click==8.1.7 # via @@ -22,6 +24,18 @@ contourpy==1.3.0 # via matplotlib cramjam==2.8.4 # via fastparquet +cuda-python==12.6.0 + # via + # cudf-cu12 + # rmm-cu12 +cudf-cu12==24.8.3 + # via + # -r requirements-gpu.in + # cudf-polars-cu12 +cudf-polars-cu12==24.8.3 + # via -r requirements-gpu.in +cupy-cuda12x==13.3.0 + # via cudf-cu12 cycler==0.12.1 # via matplotlib dask==2024.9.1 @@ -36,6 +50,8 @@ duckdb==1.1.1 # via -r requirements.in fastparquet==2024.5.0 # via -r requirements.in +fastrlock==0.8.2 + # via cupy-cuda12x filelock==3.16.1 # via ray fonttools==4.54.1 @@ -46,11 +62,14 @@ frozenlist==1.4.1 # ray fsspec==2024.9.0 # via + # cudf-cu12 # dask # fastparquet # modin idna==3.10 # via requests +importlib-metadata==8.5.0 + # via dask jsonschema==4.23.0 # via ray jsonschema-specifications==2024.10.1 @@ -59,32 +78,49 @@ kiwisolver==1.4.7 # via matplotlib linetimer==0.1.5 # via -r requirements.in +llvmlite==0.43.0 + # via numba locket==1.0.0 # via partd +markdown-it-py==3.0.0 + # via rich matplotlib==3.9.2 # via plotnine +mdurl==0.1.2 + # via markdown-it-py mizani==0.11.4 # via plotnine modin==0.32.0 # via -r requirements.in msgpack==1.1.0 # via ray -numpy==2.1.2 +numba==0.60.0 + # via + # cudf-cu12 + # rmm-cu12 +numpy==1.26.4 # via # contourpy + # cudf-cu12 + # cupy-cuda12x # dask # fastparquet # matplotlib # mizani # modin + # numba # pandas # patsy # plotnine # pyarrow + # rmm-cu12 # scipy # statsmodels +nvtx==0.2.10 + # via cudf-cu12 packaging==24.1 # via + # cudf-cu12 # dask # fastparquet # matplotlib @@ -92,9 +128,10 @@ packaging==24.1 # plotly # ray # statsmodels -pandas==2.2.3 +pandas==2.2.2 # via # -r requirements.in + # cudf-cu12 # dask # dask-expr # fastparquet @@ -113,16 +150,19 @@ plotly==5.24.1 plotnine==0.13.6 # via -r requirements.in polars==1.9.0 - # via -r requirements.in + # via + # -r requirements.in + # cudf-polars-cu12 protobuf==5.28.2 # via ray psutil==6.0.0 # via modin py4j==0.10.9.7 # via pyspark -pyarrow==17.0.0 +pyarrow==16.1.0 # via # -r requirements.in + # cudf-cu12 # dask-expr # modin pydantic==2.9.2 @@ -133,6 +173,10 @@ pydantic-core==2.23.4 # via pydantic pydantic-settings==2.5.2 # via -r requirements.in +pygments==2.18.0 + # via rich +pynvjitlink-cu12==0.2.3 + # via cudf-cu12 pyparsing==3.1.4 # via matplotlib pyspark==3.5.3 @@ -157,6 +201,10 @@ referencing==0.35.1 # jsonschema-specifications requests==2.32.3 # via ray +rich==13.9.2 + # via cudf-cu12 +rmm-cu12==24.8.2 + # via cudf-cu12 rpds-py==0.20.0 # via # jsonschema @@ -182,9 +230,12 @@ toolz==1.0.0 # partd typing-extensions==4.12.2 # via + # cudf-cu12 # pydantic # pydantic-core tzdata==2024.2 # via pandas urllib3==2.2.3 # via requests +zipp==3.20.2 + # via importlib-metadata