-
Notifications
You must be signed in to change notification settings - Fork 310
Expand file tree
/
Copy pathDockerfile
More file actions
186 lines (147 loc) · 5.17 KB
/
Dockerfile
File metadata and controls
186 lines (147 loc) · 5.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
# All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# syntax=docker/dockerfile:1.3
ARG BASE_IMG=nvcr.io/nvidia/base/ubuntu
ARG BASE_IMG_TAG=jammy-20250619
FROM $BASE_IMG:$BASE_IMG_TAG AS base
ARG RELEASE_TYPE="dev"
ARG VERSION=""
ARG VERSION_REV="0"
ARG DOWNLOAD_LLAMA_TOKENIZER="False"
ARG HF_ACCESS_TOKEN=""
ARG MODEL_PREDOWNLOAD_PATH="/workspace/models/"
# Embed the `git rev-parse HEAD` as a Docker metadata label
ARG GIT_COMMIT
LABEL git_commit=$GIT_COMMIT
RUN apt-get update && apt-get install -y \
bzip2 \
ca-certificates \
curl \
libgl1-mesa-glx \
libglib2.0-0 \
make \
tini \
wget \
&& apt-get clean
COPY ./docker/scripts/install_ffmpeg.sh scripts/install_ffmpeg.sh
RUN chmod +x scripts/install_ffmpeg.sh \
&& bash scripts/install_ffmpeg.sh \
&& rm scripts/install_ffmpeg.sh
# Install libreoffice
# For GPL-licensed components, we provide their source code in the container
# via `apt-get source` below to satisfy GPL requirements.
ARG GPL_LIBS="\
libfreetype6 \
libltdl7 \
libhunspell-1.7-0 \
libhyphen0 \
libdbus-1-3 \
"
ARG FORCE_REMOVE_PKGS="\
ucf \
liblangtag-common \
libjbig0 \
pinentry-curses \
gpg-agent \
gnupg-utils \
gpgsm \
gpg-wks-server \
gpg-wks-client \
gpgconf \
gnupg \
readline-common \
libreadline8 \
dirmngr \
libjpeg8 \
"
RUN sed -i 's/# deb-src/deb-src/' /etc/apt/sources.list \
&& apt-get update \
&& apt-get install -y --no-install-recommends \
dpkg-dev \
libreoffice \
$GPL_LIBS \
&& apt-get source $GPL_LIBS \
&& for pkg in $FORCE_REMOVE_PKGS; do \
dpkg --remove --force-depends "$pkg" || true; \
done \
&& apt-get clean
RUN curl -LsSf https://astral.sh/uv/install.sh | sh
ENV PATH=/root/.local/bin:$PATH
ENV UV_LINK_MODE=copy
RUN --mount=type=cache,target=/root/.cache/uv \
uv python install 3.12 \
&& uv venv --python 3.12 /opt/nv_ingest_runtime
ENV VIRTUAL_ENV=/opt/nv_ingest_runtime
ENV PATH=/opt/nv_ingest_runtime/bin:/root/.local/bin:$PATH
ENV LD_LIBRARY_PATH=/opt/nv_ingest_runtime/lib:$LD_LIBRARY_PATH
WORKDIR /workspace
FROM base AS nv_ingest_install
COPY ci ci
ENV HAYSTACK_TELEMETRY_ENABLED=False
# Ensure the NV_INGEST_VERSION is PEP 440 compatible
RUN if [ -z "${VERSION}" ]; then \
export VERSION="$(date +'%Y.%m.%d')"; \
fi; \
if [ "${RELEASE_TYPE}" = "dev" ]; then \
export NV_INGEST_VERSION_OVERRIDE="${VERSION}.dev${VERSION_REV}"; \
elif [ "${RELEASE_TYPE}" = "release" ]; then \
export NV_INGEST_VERSION_OVERRIDE="${VERSION}.post${VERSION_REV}"; \
else \
echo "Invalid RELEASE_TYPE: ${RELEASE_TYPE}"; \
exit 1; \
fi
ENV NV_INGEST_RELEASE_TYPE=${RELEASE_TYPE}
ENV NV_INGEST_VERSION_OVERRIDE=${NV_INGEST_VERSION_OVERRIDE}
SHELL ["/bin/bash", "-c"]
COPY scripts scripts
COPY tests tests
COPY data data
COPY api api
COPY client client
COPY src src
RUN rm -rf ./src/nv_ingest/dist ./src/dist ./client/dist ./api/dist
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install 'build>=1.2.2'
RUN --mount=type=cache,target=/root/.cache/uv \
chmod +x ./ci/scripts/build_pip_packages.sh \
&& ./ci/scripts/build_pip_packages.sh --type ${RELEASE_TYPE} --lib api \
&& ./ci/scripts/build_pip_packages.sh --type ${RELEASE_TYPE} --lib client \
&& ./ci/scripts/build_pip_packages.sh --type ${RELEASE_TYPE} --lib service
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install ./src/dist/*.whl \
&& uv pip install ./api/dist/*.whl \
&& uv pip install ./client/dist/*.whl
# Remove Ray's Java JAR (ray_dist.jar). It bundles shaded Jackson (e.g. jackson-core) and is only
# needed for Ray's Java API / cross-language. This image runs Python-only; removing it drops
# the bundled Java deps and reduces image size.
RUN rm -f /opt/nv_ingest_runtime/lib/python3.12/site-packages/ray/jars/ray_dist.jar
RUN rm -rf src
FROM nv_ingest_install AS runtime
COPY src/microservice_entrypoint.py ./
COPY config/default_pipeline.yaml ./config/
COPY ./docker/scripts/entrypoint.sh /workspace/docker/entrypoint.sh
COPY ./docker/scripts/entrypoint_source_ext.sh /workspace/docker/entrypoint_source_ext.sh
COPY ./docker/scripts/post_build_triggers.py /workspace/docker/post_build_triggers.py
RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=secret,id=hf_token,required=false \
python3 /workspace/docker/post_build_triggers.py
RUN chmod +x /workspace/docker/entrypoint.sh
ENTRYPOINT ["/usr/bin/tini", "--", "/workspace/docker/entrypoint.sh"]
FROM runtime AS test
RUN --mount=type=cache,target=/root/.cache/uv \
WHEEL="$(ls ./api/dist/*.whl)" \
&& uv pip install "${WHEEL}[test]"
FROM nv_ingest_install AS development
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install -e ./client
CMD ["/bin/bash"]
FROM nv_ingest_install AS docs
COPY docs docs
# Docs needs all the source code present so add it to the container
COPY src src
COPY api api
COPY client client
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install -r ./docs/requirements.txt
CMD ["bash", "-c", "cd /workspace/docs && make docs"]