# Multi-stage build for Go application with Python/Scrapling support FROM golang:1.24-alpine AS go-builder # Install build dependencies RUN apk add --no-cache git ca-certificates tzdata WORKDIR /app COPY go.mod go.sum ./ RUN go mod download COPY . . RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o facr-scraper . # Python stage for Scrapling FROM python:3.11-slim AS python-builder ENV PYTHONDONTWRITEBYTECODE=1 ENV PYTHONUNBUFFERED=1 # Install system dependencies for Playwright RUN apt-get update && apt-get install -y --no-install-recommends \ wget curl ca-certificates gnupg \ && rm -rf /var/lib/apt/lists/* # Create virtual environment and install Scrapling RUN python -m venv /opt/scrapling ENV PATH="/opt/scrapling/bin:$PATH" COPY requirements-scrapling.txt . RUN pip install --no-cache-dir -r requirements-scrapling.txt # Install Playwright browsers with deps in one layer RUN playwright install chromium --with-deps # Pre-download CloakBrowser patched Chromium binary so it doesn't # download at runtime (saves ~10-20s per cold-start request). RUN python -m cloakbrowser install # Fix Python symlinks RUN ln -sf /usr/local/bin/python /opt/scrapling/bin/python \ && ln -sf /usr/local/bin/python /opt/scrapling/bin/python3 # Final stage FROM python:3.11-slim ENV PYTHONDONTWRITEBYTECODE=1 ENV PYTHONUNBUFFERED=1 ENV PATH="/opt/scrapling/bin:$PATH" # Install runtime dependencies for both Go and Playwright RUN apt-get update && apt-get install -y --no-install-recommends \ ca-certificates wget curl \ libglib2.0-0 libgobject-2.0-0 libnspr4 libnss3 libdbus-1-3 \ libatk1.0-0 libatk-bridge2.0-0 libcups2 libexpat1 libxcb1 \ libxkbcommon0 libatspi2.0-0 libx11-6 libxcomposite1 libxdamage1 \ libxext6 libxfixes3 libxrandr2 libgbm1 libcairo2 libpango-1.0-0 \ libasound2 \ fonts-liberation fonts-noto-color-emoji fonts-noto-core \ fontconfig locales \ && rm -rf /var/lib/apt/lists/* # Create non-root user RUN useradd -m -u 1000 scraper # Copy Go binary COPY --from=go-builder /app/facr-scraper /usr/local/bin/facr-scraper # Copy Python environment COPY --from=python-builder /opt/scrapling /opt/scrapling # Copy Playwright browser cache COPY --from=python-builder /root/.cache/ms-playwright /home/scraper/.cache/ms-playwright # Copy CloakBrowser patched Chromium binary cache COPY --from=python-builder /root/.cloakbrowser /home/scraper/.cloakbrowser # Copy scrapling and cloakbrowser scripts COPY scripts/scrapling_fetch.py /opt/scrapling/scripts/scrapling_fetch.py COPY scripts/cloakbrowser_fetch.py /opt/scrapling/scripts/cloakbrowser_fetch.py # Create cache directory and fix permissions RUN mkdir -p /home/scraper/.cache && chown -R scraper:scraper /home/scraper /opt/scrapling USER scraper WORKDIR /home/scraper # Expose port EXPOSE 8686 # Health check HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ CMD curl -f http://localhost:8686/ || exit 1 CMD ["facr-scraper"]