Spaces:
Running
Running
# syntax=docker/dockerfile:1.4 | |
ARG TARGETPLATFORM | |
ARG BUILDPLATFORM | |
# Other build arguments | |
ARG PYTHON_VERSION=3.10 | |
# Base stage with system dependencies | |
FROM python:${PYTHON_VERSION}-slim as base | |
# Declare ARG variables again within the build stage | |
ARG INSTALL_TYPE=basic | |
ARG ENABLE_GPU=false | |
# Platform-specific labels | |
LABEL maintainer="unclecode" | |
LABEL description="🔥🕷️ Crawl4AI: Open-source LLM Friendly Web Crawler & scraper" | |
LABEL version="1.0" | |
# Environment setup | |
ENV PYTHONUNBUFFERED=1 \ | |
# PYTHONDONTWRITEBYTECODE=1 \ | |
# PIP_NO_CACHE_DIR=1 \ | |
# PIP_DISABLE_PIP_VERSION_CHECK=1 \ | |
# PIP_DEFAULT_TIMEOUT=100 \ | |
# DEBIAN_FRONTEND=noninteractive | |
# Install system dependencies | |
RUN apt-get update && apt-get install -y --no-install-recommends \ | |
# build-essential \ | |
# curl \ | |
# wget \ | |
# gnupg \ | |
# git \ | |
# cmake \ | |
# pkg-config \ | |
# python3-dev \ | |
# libjpeg-dev \ | |
# libpng-dev \ | |
# && rm -rf /var/lib/apt/lists/* | |
# Playwright system dependencies for Linux | |
RUN apt-get update && apt-get install -y --no-install-recommends \ | |
# libglib2.0-0 \ | |
# libnss3 \ | |
# libnspr4 \ | |
# libatk1.0-0 \ | |
# libatk-bridge2.0-0 \ | |
# libcups2 \ | |
# libdrm2 \ | |
# libdbus-1-3 \ | |
# libxcb1 \ | |
# libxkbcommon0 \ | |
# libx11-6 \ | |
# libxcomposite1 \ | |
# libxdamage1 \ | |
# libxext6 \ | |
# libxfixes3 \ | |
# libxrandr2 \ | |
# libgbm1 \ | |
# libpango-1.0-0 \ | |
# libcairo2 \ | |
# libasound2 \ | |
# libatspi2.0-0 \ | |
# && rm -rf /var/lib/apt/lists/* | |
# GPU support if enabled and architecture is supported | |
RUN if [ "$ENABLE_GPU" = "true" ] && [ "$TARGETPLATFORM" = "linux/amd64" ] ; then \ | |
# apt-get update && apt-get install -y --no-install-recommends \ | |
# nvidia-cuda-toolkit \ | |
# && rm -rf /var/lib/apt/lists/* ; \ | |
# else \ | |
# echo "Skipping NVIDIA CUDA Toolkit installation (unsupported platform or GPU disabled)"; \ | |
# fi | |
# Create and set working directory | |
WORKDIR /app | |
# Copy the entire project | |
COPY . . | |
# Install base requirements | |
RUN pip install --no-cache-dir -r requirements.txt | |
# Install required library for FastAPI | |
RUN pip install fastapi uvicorn psutil | |
# Install ML dependencies first for better layer caching | |
RUN if [ "$INSTALL_TYPE" = "all" ] ; then \ | |
# pip install --no-cache-dir \ | |
# torch \ | |
# torchvision \ | |
# torchaudio \ | |
# scikit-learn \ | |
# nltk \ | |
# transformers \ | |
# tokenizers && \ | |
# python -m nltk.downloader punkt stopwords ; \ | |
# fi | |
# Install the package | |
RUN if [ "$INSTALL_TYPE" = "all" ] ; then \ | |
# pip install ".[all]" && \ | |
# python -m crawl4ai.model_loader ; \ | |
# elif [ "$INSTALL_TYPE" = "torch" ] ; then \ | |
# pip install ".[torch]" ; \ | |
# elif [ "$INSTALL_TYPE" = "transformer" ] ; then \ | |
# pip install ".[transformer]" && \ | |
# python -m crawl4ai.model_loader ; \ | |
# else \ | |
# pip install "." ; \ | |
# fi | |
# Install MkDocs and required plugins | |
RUN pip install --no-cache-dir \ | |
# mkdocs \ | |
# mkdocs-material \ | |
# mkdocs-terminal \ | |
# pymdown-extensions | |
# Build MkDocs documentation | |
RUN mkdocs build | |
# Install Playwright and browsers | |
RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \ | |
# playwright install chromium; \ | |
# elif [ "$TARGETPLATFORM" = "linux/arm64" ]; then \ | |
# playwright install chromium; \ | |
# fi | |
# Expose port | |
EXPOSE 8000 11235 9222 8080 | |
# Start the FastAPI server | |
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "11235"] | |
syntax=docker/dockerfile:1.4 | |
syntax=docker/dockerfile:1.4 | |
ARG TARGETPLATFORM | |
ARG BUILDPLATFORM | |
Other build arguments | |
ARG PYTHON_VERSION=3.10 | |
Base stage with system dependencies | |
FROM python:${PYTHON_VERSION}-slim as base | |
Declare ARG variables again within the build stage | |
ARG INSTALL_TYPE=basic | |
ARG ENABLE_GPU=false | |
Platform-specific labels | |
LABEL maintainer="unclecode" | |
LABEL description="🔥🕷️ Crawl4AI: Open-source LLM Friendly Web Crawler & scraper" | |
LABEL version="1.0" | |
Environment setup | |
ENV PYTHONUNBUFFERED=1 \ | |
PYTHONDONTWRITEBYTECODE=1 \ | |
PIP_NO_CACHE_DIR=1 \ | |
PIP_DISABLE_PIP_VERSION_CHECK=1 \ | |
PIP_DEFAULT_TIMEOUT=100 \ | |
DEBIAN_FRONTEND=noninteractive | |
Install system dependencies as root | |
RUN apt-get update && apt-get install -y --no-install-recommends \ | |
build-essential \ | |
curl \ | |
wget \ | |
gnupg \ | |
git \ | |
cmake \ | |
pkg-config \ | |
python3-dev \ | |
libjpeg-dev \ | |
libpng-dev \ | |
&& rm -rf /var/lib/apt/lists/* | |
Playwright system dependencies for Linux | |
RUN apt-get update && apt-get install -y --no-install-recommends \ | |
libglib2.0-0 \ | |
libnss3 \ | |
libnspr4 \ | |
libatk1.0-0 \ | |
libatk-bridge2.0-0 \ | |
libcups2 \ | |
libdrm2 \ | |
libdbus-1-3 \ | |
libxcb1 \ | |
libxkbcommon0 \ | |
libx11-6 \ | |
libxcomposite1 \ | |
libxdamage1 \ | |
libxext6 \ | |
libxfixes3 \ | |
libxrandr2 \ | |
libgbm1 \ | |
libpango-1.0-0 \ | |
libcairo2 \ | |
libasound2 \ | |
libatspi2.0-0 \ | |
&& rm -rf /var/lib/apt/lists/* | |
GPU support if enabled and architecture is supported | |
RUN if [ "$ENABLE_GPU" = "true" ] && [ "$TARGETPLATFORM" = "linux/amd64" ] ; then \ | |
apt-get update && apt-get install -y --no-install-recommends \ | |
nvidia-cuda-toolkit \ | |
&& rm -rf /var/lib/apt/lists/* ; \ | |
else \ | |
echo "Skipping NVIDIA CUDA Toolkit installation (unsupported platform or GPU disabled)"; \ | |
fi | |
Add a non-root user | |
RUN useradd -m -u 1000 user | |
USER user | |
ENV PATH="/home/user/.local/bin:$PATH" | |
Create and set working directory | |
WORKDIR /app | |
Copy the entire project with correct ownership | |
COPY --chown=user . . | |
Install base requirements | |
RUN pip install --no-cache-dir -r requirements.txt | |
Install required library for FastAPI | |
RUN pip install fastapi uvicorn psutil | |
Install ML dependencies first for better layer caching | |
RUN if [ "$INSTALL_TYPE" = "all" ] ; then \ | |
pip install --no-cache-dir \ | |
torch \ | |
torchvision \ | |
torchaudio \ | |
scikit-learn \ | |
nltk \ | |
transformers \ | |
tokenizers && \ | |
python -m nltk.downloader punkt stopwords ; \ | |
fi | |
Install the package | |
RUN if [ "$INSTALL_TYPE" = "all" ] ; then \ | |
pip install ".[all]" && \ | |
python -m crawl4ai.model_loader ; \ | |
elif [ "$INSTALL_TYPE" = "torch" ] ; then \ | |
pip install ".[torch]" ; \ | |
elif [ "$INSTALL_TYPE" = "transformer" ] ; then \ | |
pip install ".[transformer]" && \ | |
python -m crawl4ai.model_loader ; \ | |
else \ | |
pip install "." ; \ | |
fi | |
Install MkDocs and required plugins | |
RUN pip install --no-cache-dir \ | |
mkdocs \ | |
mkdocs-material \ | |
mkdocs-terminal \ | |
pymdown-extensions | |
Build MkDocs documentation | |
RUN mkdocs build | |
Install Playwright and browsers | |
RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \ | |
playwright install chromium; \ | |
elif [ "$TARGETPLATFORM" = "linux/arm64" ]; then \ | |
playwright install chromium; \ | |
fi | |
Expose port | |
EXPOSE 8000 11235 9222 8080 | |
RUN python -m playwright install chromium | |
Start the FastAPI server | |
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "11235"] |