From ab178246423324aa467a20d0ae894ae54a255498 Mon Sep 17 00:00:00 2001 From: Brian Date: Fri, 1 Nov 2024 19:55:10 -0400 Subject: [PATCH] reduced the size of the nvidia containerfile Signed-off-by: Brian --- container-images/cuda/Containerfile | 94 +++++++++++++---------------- 1 file changed, 42 insertions(+), 52 deletions(-) diff --git a/container-images/cuda/Containerfile b/container-images/cuda/Containerfile index fecc744b..244093f2 100644 --- a/container-images/cuda/Containerfile +++ b/container-images/cuda/Containerfile @@ -1,4 +1,5 @@ -FROM docker.io/nvidia/cuda:12.6.2-devel-ubi9 +# Base image with CUDA for compilation +FROM docker.io/nvidia/cuda:12.6.2-devel-ubi9 AS builder # renovate: datasource=github-releases depName=huggingface/huggingface_hub extractVersion=^v(?.*) ARG HUGGINGFACE_HUB_VERSION=0.26.2 @@ -8,64 +9,53 @@ ARG LLAMA_CPP_SHA=3f1ae2e32cde00c39b96be6d01c2997c29bae555 # renovate: datasource=git-refs depName=ggerganov/whisper.cpp packageName=https://github.com/ggerganov/whisper.cpp gitRef=master versioning=loose type=digest ARG WHISPER_CPP_SHA=4e10afb5a94469c605aae4eceb4021fb0e68c8f5 -# vulkan-headers vulkan-loader-devel vulkan-tools glslc glslang python3-pip mesa-libOpenCL-$MESA_VER.aarch64 -RUN dnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && \ - crb enable && \ - dnf install -y epel-release && \ - dnf --enablerepo=ubi-9-appstream-rpms install -y git procps-ng vim \ - dnf-plugins-core python3-dnf-plugin-versionlock cmake gcc-c++ \ - python3-pip && \ - dnf clean all && \ - rm -rf /var/cache/*dnf* - -RUN /usr/bin/python3 --version -RUN pip install "huggingface_hub==${HUGGINGFACE_HUB_VERSION}" -RUN pip install "omlmd==${OMLMD_VERSION}" - -# CUDA_DOCKER_ARCH = -# Hopper GPUs (e.g., H100): Use 90 -# Ampere GPUs (e.g., RTX 30 Series, A100): Use 80 -# Turing GPUs (e.g., RTX 20 Series, GTX 16 Series): Use 75 -# Volta GPUs (e.g., V100): Use 70 -# Pascal GPUs (e.g., GTX 10 Series): Use 61 -# Maxwell GPUs (e.g., GTX 900 Series): Use 52 -# Kepler GPUs (e.g., GTX 600 and 700 Series): Use 35 - -# Change to your gpu architecture (Optional) ARG CUDA_DOCKER_ARCH=default -# Followed https://github.com/ggerganov/llama.cpp/blob/master/.devops/full-cuda.Dockerfile -# for reference to build llama.cpp with cuda using cmake +# Install dependencies only needed for building +RUN dnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && \ + dnf install -y git cmake gcc-c++ python3-pip && \ + dnf clean all && rm -rf /var/cache/*dnf* -RUN git clone https://github.com/ggerganov/llama.cpp && \ - cd llama.cpp && \ +# Install Python packages +RUN pip install "huggingface_hub==${HUGGINGFACE_HUB_VERSION}" "omlmd==${OMLMD_VERSION}" + +# Build llama.cpp +RUN git clone https://github.com/ggerganov/llama.cpp && cd llama.cpp && \ git reset --hard ${LLAMA_CPP_SHA} && \ cmake -B build -DGGML_CUDA=ON -DCUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ cmake --build build --config Release -j$(nproc) && \ - # Move llama-cli and llama-server to /usr/bin - mv build/bin/llama-cli /usr/bin/llama-cli && \ - mv build/bin/llama-server /usr/bin/llama-server && \ - # Move shared libraries to a standard library directory - mv build/ggml/src/libggml.so /usr/lib/libggml.so && \ - mv build/src/libllama.so /usr/lib/libllama.so && \ - # Update the dynamic linker cache - ldconfig && \ - # Clean up - cd / && \ - rm -rf llama.cpp + mv build/bin/llama-cli /usr/bin/ && mv build/bin/llama-server /usr/bin/ && \ + mv build/ggml/src/libggml.so /usr/lib/ && mv build/src/libllama.so /usr/lib/ && \ + cd / && rm -rf llama.cpp -RUN git clone https://github.com/ggerganov/whisper.cpp.git && \ - cd whisper.cpp && \ +# Build whisper.cpp +RUN git clone https://github.com/ggerganov/whisper.cpp && cd whisper.cpp && \ git reset --hard ${WHISPER_CPP_SHA} && \ cmake -B build -DGGML_CUDA=ON -DCUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ cmake --build build --config Release -j$(nproc) && \ - # Move whisper binaries to /usr/bin - mv build/bin/main /usr/bin/whisper-main && \ - mv build/bin/server /usr/bin/whisper-server && \ - # Move any shared libraries to a standard library directory if needed - if [ -f build/lib/libwhisper.so ]; then mv build/lib/libwhisper.so /usr/lib/libwhisper.so; fi && \ - # Update the dynamic linker cache if any shared libraries were moved - ldconfig || true && \ - # Clean up - cd / && \ - rm -rf whisper.cpp \ No newline at end of file + mv build/bin/main /usr/bin/whisper-main && mv build/bin/server /usr/bin/whisper-server && \ + if [ -f build/lib/libwhisper.so ]; then mv build/lib/libwhisper.so /usr/lib/; fi && \ + cd / && rm -rf whisper.cpp + +# Final runtime image +FROM docker.io/nvidia/cuda:12.6.2-runtime-ubi9 + +# renovate: datasource=github-releases depName=huggingface/huggingface_hub extractVersion=^v(?.*) +ARG HUGGINGFACE_HUB_VERSION=0.26.2 +# renovate: datasource=github-releases depName=containers/omlmd extractVersion=^v(?.*) +ARG OMLMD_VERSION=0.1.6 + +# Install minimal runtime dependencies +RUN dnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && \ + dnf install -y python3 python3-pip && dnf clean all && rm -rf /var/cache/*dnf* + +# Install Python packages in the runtime image +RUN pip install "huggingface_hub==${HUGGINGFACE_HUB_VERSION}" "omlmd==${OMLMD_VERSION}" + +# Copy only necessary files from the build stage +COPY --from=builder /usr/bin/llama-cli /usr/bin/llama-server /usr/bin/ +COPY --from=builder /usr/bin/whisper-main /usr/bin/whisper-server /usr/bin/ +COPY --from=builder /usr/lib/libggml.so /usr/lib/libllama.so /usr/lib/ + +# Update dynamic linker cache +RUN ldconfig || true \ No newline at end of file