mirror of
https://github.com/Dvorinka/facr-scraper.git
synced 2026-06-03 20:12:57 +00:00
102 lines
2.4 KiB
Docker
102 lines
2.4 KiB
Docker
# Multi-stage build for Go application with Python/Scrapling support
|
|
FROM golang:1.24-alpine AS go-builder
|
|
|
|
# Install build dependencies
|
|
RUN apk add --no-cache git ca-certificates tzdata
|
|
|
|
WORKDIR /app
|
|
COPY go.mod go.sum ./
|
|
RUN go mod download
|
|
|
|
COPY . .
|
|
RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o facr-scraper .
|
|
|
|
# Python stage for Scrapling
|
|
FROM python:3.11-slim AS python-builder
|
|
|
|
# Install system dependencies for Playwright
|
|
RUN apt-get update && apt-get install -y \
|
|
wget \
|
|
gnupg \
|
|
ca-certificates \
|
|
curl \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
# Create virtual environment and install Scrapling
|
|
RUN python -m venv /opt/scrapling
|
|
ENV PATH="/opt/scrapling/bin:$PATH"
|
|
COPY requirements-scrapling.txt .
|
|
RUN pip install --no-cache-dir -r requirements-scrapling.txt
|
|
|
|
# Install Playwright browsers
|
|
RUN playwright install chromium
|
|
RUN playwright install-deps
|
|
|
|
# Fix Python symlinks
|
|
RUN ln -sf /usr/local/bin/python /opt/scrapling/bin/python
|
|
RUN ln -sf /usr/local/bin/python /opt/scrapling/bin/python3
|
|
|
|
# Final stage
|
|
FROM python:3.11-slim
|
|
|
|
# Install runtime dependencies for both Go and Playwright
|
|
RUN apt-get update && apt-get install -y \
|
|
ca-certificates \
|
|
wget \
|
|
curl \
|
|
gnupg \
|
|
libglib2.0-0 \
|
|
libgobject-2.0-0 \
|
|
libnspr4 \
|
|
libnss3 \
|
|
libdbus-1-3 \
|
|
libatk1.0-0 \
|
|
libatk-bridge2.0-0 \
|
|
libcups2 \
|
|
libexpat1 \
|
|
libxcb1 \
|
|
libxkbcommon0 \
|
|
libatspi2.0-0 \
|
|
libx11-6 \
|
|
libxcomposite1 \
|
|
libxdamage1 \
|
|
libxext6 \
|
|
libxfixes3 \
|
|
libxrandr2 \
|
|
libgbm1 \
|
|
libcairo2 \
|
|
libpango-1.0-0 \
|
|
libasound2 \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
# Create non-root user
|
|
RUN useradd -m -u 1000 scraper
|
|
|
|
# Copy Go binary
|
|
COPY --from=go-builder /app/facr-scraper /usr/local/bin/facr-scraper
|
|
|
|
# Copy Python environment
|
|
COPY --from=python-builder /opt/scrapling /opt/scrapling
|
|
|
|
# Copy Playwright browser cache
|
|
COPY --from=python-builder /root/.cache/ms-playwright /home/scraper/.cache/ms-playwright
|
|
ENV PATH="/opt/scrapling/bin:$PATH"
|
|
|
|
# Copy scrapling script
|
|
COPY scripts/scrapling_fetch.py /opt/scrapling/scripts/scrapling_fetch.py
|
|
|
|
# Create cache directory for Playwright
|
|
RUN mkdir -p /home/scraper/.cache && chown -R scraper:scraper /home/scraper
|
|
|
|
USER scraper
|
|
WORKDIR /home/scraper
|
|
|
|
# Expose port
|
|
EXPOSE 8686
|
|
|
|
# Health check
|
|
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
|
CMD curl -f http://localhost:8686/ || exit 1
|
|
|
|
CMD ["facr-scraper"]
|