Spaces:
Running
Running
File size: 6,366 Bytes
60ec08f 12926e4 6977997 12926e4 6977997 12926e4 7a3c8e5 60ec08f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 |
# syntax=docker/dockerfile:1
# Initialize device type args
# use build args in the docker build commmand with --build-arg="BUILDARG=true"
ARG USE_CUDA=false
ARG USE_OLLAMA=false
# Tested with cu117 for CUDA 11 and cu121 for CUDA 12 (default)
ARG USE_CUDA_VER=cu121
# any sentence transformer model; models to use can be found at https://huggingface.co/models?library=sentence-transformers
# Leaderboard: https://huggingface.co/spaces/mteb/leaderboard
# for better performance and multilangauge support use "intfloat/multilingual-e5-large" (~2.5GB) or "intfloat/multilingual-e5-base" (~1.5GB)
# IMPORTANT: If you change the embedding model (sentence-transformers/all-MiniLM-L6-v2) and vice versa, you aren't able to use RAG Chat with your previous documents loaded in the WebUI! You need to re-embed them.
ARG USE_EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
ARG USE_RERANKING_MODEL=""
# Override at your own risk - non-root configurations are untested
ARG UID=0
ARG GID=0
######## WebUI frontend ########
FROM --platform=$BUILDPLATFORM node:21-alpine3.19 as build
WORKDIR /app
RUN mkdir -p /app
RUN mkdir -p /app/backend
RUN mkdir -p /app/backend/data
RUN mkdir -p /app/backend/data/uploads
RUN mkdir -p /app/backend/data/vector_db
RUN chmod -R 777 /app
RUN chmod -R 777 /app/backend
RUN chmod -R 777 /app/backend/data
RUN chmod -R 777 /app/backend/data/uploads
RUN chmod -R 777 /app/backend/data/vector_db
COPY chroma.sqlite3 /app/backend/data/vector_db/chroma.sqlite3
COPY package.json package-lock.json ./
RUN npm ci
COPY . .
RUN npm run build
######## WebUI backend ########
FROM python:3.11-slim-bookworm as base
# Use args
ARG USE_CUDA
ARG USE_OLLAMA
ARG USE_CUDA_VER
ARG USE_EMBEDDING_MODEL
ARG USE_RERANKING_MODEL
ARG UID
ARG GID
## Basis ##
ENV ENV=prod \
PORT=8080 \
# pass build args to the build
USE_OLLAMA_DOCKER=${USE_OLLAMA} \
USE_CUDA_DOCKER=${USE_CUDA} \
USE_CUDA_DOCKER_VER=${USE_CUDA_VER} \
USE_EMBEDDING_MODEL_DOCKER=${USE_EMBEDDING_MODEL} \
USE_RERANKING_MODEL_DOCKER=${USE_RERANKING_MODEL}
## Basis URL Config ##
ENV OLLAMA_BASE_URL="/ollama" \
OPENAI_API_BASE_URL=""
## API Key and Security Config ##
ENV OPENAI_API_KEY="" \
WEBUI_SECRET_KEY="" \
SCARF_NO_ANALYTICS=true \
DO_NOT_TRACK=true \
ANONYMIZED_TELEMETRY=false
# Use locally bundled version of the LiteLLM cost map json
# to avoid repetitive startup connections
ENV LITELLM_LOCAL_MODEL_COST_MAP="True"
#### Other models #########################################################
## whisper TTS model settings ##
ENV WHISPER_MODEL="base" \
WHISPER_MODEL_DIR="/app/backend/data/cache/whisper/models"
## RAG Embedding model settings ##
ENV RAG_EMBEDDING_MODEL="$USE_EMBEDDING_MODEL_DOCKER" \
RAG_RERANKING_MODEL="$USE_RERANKING_MODEL_DOCKER" \
SENTENCE_TRANSFORMERS_HOME="/app/backend/data/cache/embedding/models"
## Hugging Face download cache ##
ENV HF_HOME="/app/backend/data/cache/embedding/models"
#### Other models ##########################################################
WORKDIR /app/backend
ENV HOME /root
# Create user and group if not root
RUN if [ $UID -ne 0 ]; then \
if [ $GID -ne 0 ]; then \
addgroup --gid $GID app; \
fi; \
adduser --uid $UID --gid $GID --home $HOME --disabled-password --no-create-home app; \
fi
RUN mkdir -p $HOME/.cache/chroma
RUN echo -n 00000000-0000-0000-0000-000000000000 > $HOME/.cache/chroma/telemetry_user_id
# Make sure the user has access to the app and root directory
RUN chown -R $UID:$GID /app $HOME
RUN if [ "$USE_OLLAMA" = "true" ]; then \
apt-get update && \
# Install pandoc and netcat
apt-get install -y --no-install-recommends pandoc netcat-openbsd curl && \
# for RAG OCR
apt-get install -y --no-install-recommends ffmpeg libsm6 libxext6 && \
# install helper tools
apt-get install -y --no-install-recommends curl jq && \
# install ollama
curl -fsSL https://ollama.com/install.sh | sh && \
# cleanup
rm -rf /var/lib/apt/lists/*; \
else \
apt-get update && \
# Install pandoc and netcat
apt-get install -y --no-install-recommends pandoc netcat-openbsd curl jq && \
# for RAG OCR
apt-get install -y --no-install-recommends ffmpeg libsm6 libxext6 && \
# cleanup
rm -rf /var/lib/apt/lists/*; \
fi
# install python dependencies
COPY --chown=$UID:$GID ./backend/requirements.txt ./requirements.txt
RUN pip3 install uv && \
if [ "$USE_CUDA" = "true" ]; then \
# If you use CUDA the whisper and embedding model will be downloaded on first use
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/$USE_CUDA_DOCKER_VER --no-cache-dir && \
uv pip install --system -r requirements.txt --no-cache-dir && \
python -c "import os; from sentence_transformers import SentenceTransformer; SentenceTransformer(os.environ['RAG_EMBEDDING_MODEL'], device='cpu')" && \
python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])"; \
else \
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir && \
uv pip install --system -r requirements.txt --no-cache-dir && \
python -c "import os; from sentence_transformers import SentenceTransformer; SentenceTransformer(os.environ['RAG_EMBEDDING_MODEL'], device='cpu')" && \
python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])"; \
fi; \
chown -R $UID:$GID /app/backend/data/
# copy embedding weight from build
# RUN mkdir -p /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2
# COPY --from=build /app/onnx /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx
# copy built frontend files
COPY --chown=$UID:$GID --from=build /app/build /app/build
COPY --chown=$UID:$GID --from=build /app/CHANGELOG.md /app/CHANGELOG.md
COPY --chown=$UID:$GID --from=build /app/package.json /app/package.json
# copy backend files
COPY --chown=$UID:$GID ./backend .
EXPOSE 8080
HEALTHCHECK CMD curl --silent --fail http://localhost:8080/health | jq -e '.status == true' || exit 1
USER $UID:$GID
CMD [ "bash", "start.sh"]
|