-
-
Notifications
You must be signed in to change notification settings - Fork 26
/
Dockerfile
46 lines (40 loc) · 1.73 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
FROM webrecorder/browsertrix-crawler:1.4.0-beta.0
LABEL org.opencontainers.image.source https://github.com/openzim/zimit
RUN apt-get update \
&& apt-get install -qqy --no-install-recommends \
libmagic1 \
python3.12-venv \
&& rm -rf /var/lib/apt/lists/* \
# python setup (in venv not to conflict with browsertrix)
&& python3.12 -m venv /app/zimit \
# placeholder (default output location)
&& mkdir -p /output \
# disable chrome upgrade
&& printf "repo_add_once=\"false\"\nrepo_reenable_on_distupgrade=\"false\"\n" > /etc/default/google-chrome \
# download list of bad domains to filter-out. intentionnaly ran post-install \
# so it's not cached in earlier layers (url stays same but content updated) \
&& mkdir -p /tmp/ads \
&& cd /tmp/ads \
&& curl -L -O https://hosts.anudeep.me/mirror/adservers.txt \
&& curl -L -O https://hosts.anudeep.me/mirror/CoinMiner.txt \
&& curl -L -O https://hosts.anudeep.me/mirror/facebook.txt \
&& cat ./*.txt > /etc/blocklist.txt \
&& rm ./*.txt \
&& printf '#!/bin/sh\ncat /etc/blocklist.txt >> /etc/hosts\nexec "$@"' > /usr/local/bin/entrypoint.sh \
&& chmod +x /usr/local/bin/entrypoint.sh
# Copy pyproject.toml and its dependencies
COPY pyproject.toml README.md /src/
COPY src/zimit/__about__.py /src/src/zimit/__about__.py
# Install Python dependencies
RUN . /app/zimit/bin/activate && python -m pip install --no-cache-dir /src
# Copy code + associated artifacts
COPY src /src/src
COPY *.md /src/
# Install + cleanup
RUN . /app/zimit/bin/activate && python -m pip install --no-cache-dir /src \
&& ln -s /app/zimit/bin/zimit /usr/bin/zimit \
&& ln -s /app/zimit/bin/warc2zim /usr/bin/warc2zim \
&& chmod +x /usr/bin/zimit \
&& rm -rf /src
ENTRYPOINT ["entrypoint.sh"]
CMD ["zimit", "--help"]