#!/usr/bin/env bash set -euo pipefail ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" ENV_FILE="${1:-$ROOT_DIR/.env.production}" COMPOSE_FILE="${2:-$ROOT_DIR/infra/docker-compose.prod.yml}" HEALTH_TIMEOUT_SECONDS="${DEPLOY_HEALTH_TIMEOUT_SECONDS:-240}" HEALTH_POLL_SECONDS="${DEPLOY_HEALTH_POLL_SECONDS:-2}" DEPLOY_BUILD="${DEPLOY_BUILD:-1}" DEPLOY_PULL="${DEPLOY_PULL:-1}" DEPLOY_RUN_SMOKE="${DEPLOY_RUN_SMOKE:-1}" DEPLOY_REMOVE_ORPHANS="${DEPLOY_REMOVE_ORPHANS:-1}" DEPLOY_PRINT_LOGS_ON_FAILURE="${DEPLOY_PRINT_LOGS_ON_FAILURE:-1}" DEPLOY_LOG_TAIL_LINES="${DEPLOY_LOG_TAIL_LINES:-200}" if [[ "${1:-}" == "--help" || "${1:-}" == "-h" ]]; then cat <<'USAGE' Usage: scripts/ops/deploy-prod.sh [env-file] [compose-file] Examples: scripts/ops/deploy-prod.sh scripts/ops/deploy-prod.sh .env.production infra/docker-compose.prod.yml Behavior: - runs production preflight checks - optionally pulls external images - performs compose deployment (with optional build) - waits for core services to become healthy - runs post-deploy smoke checks Optional env toggles: DEPLOY_PULL=1|0 DEPLOY_BUILD=1|0 DEPLOY_RUN_SMOKE=1|0 DEPLOY_REMOVE_ORPHANS=1|0 DEPLOY_HEALTH_TIMEOUT_SECONDS=240 DEPLOY_HEALTH_POLL_SECONDS=2 DEPLOY_PRINT_LOGS_ON_FAILURE=1|0 DEPLOY_LOG_TAIL_LINES=200 USAGE exit 0 fi is_true() { local raw="${1:-}" case "${raw,,}" in 1|true|yes|on) return 0 ;; *) return 1 ;; esac } print_failure_logs() { if ! is_true "$DEPLOY_PRINT_LOGS_ON_FAILURE"; then return fi if ! command -v docker >/dev/null 2>&1; then return fi echo "[info] last ${DEPLOY_LOG_TAIL_LINES} log lines from compose services:" docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" logs --tail "$DEPLOY_LOG_TAIL_LINES" || true } on_error() { local exit_code="$1" echo "[error] deployment failed (exit code ${exit_code})" >&2 print_failure_logs } trap 'on_error $?' ERR if ! [[ "$HEALTH_TIMEOUT_SECONDS" =~ ^[0-9]+$ ]] || [[ "$HEALTH_TIMEOUT_SECONDS" -lt 5 ]]; then echo "[error] DEPLOY_HEALTH_TIMEOUT_SECONDS must be an integer >= 5" >&2 exit 1 fi if ! [[ "$HEALTH_POLL_SECONDS" =~ ^[0-9]+$ ]] || [[ "$HEALTH_POLL_SECONDS" -lt 1 ]]; then echo "[error] DEPLOY_HEALTH_POLL_SECONDS must be an integer >= 1" >&2 exit 1 fi bash "$ROOT_DIR/scripts/ops/preflight-prod.sh" "$ENV_FILE" "$COMPOSE_FILE" if is_true "$DEPLOY_PULL"; then echo "[step] pulling external base images..." docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" pull gateway postgres rustfs rustfs-init || \ echo "[warn] one or more image pulls failed; continuing with local cache" fi echo "[step] deploying compose stack..." up_args=(--env-file "$ENV_FILE" -f "$COMPOSE_FILE" up -d) if is_true "$DEPLOY_BUILD"; then up_args+=(--build) fi if is_true "$DEPLOY_REMOVE_ORPHANS"; then up_args+=(--remove-orphans) fi docker compose "${up_args[@]}" wait_for_service() { local service="$1" local deadline=$((SECONDS + HEALTH_TIMEOUT_SECONDS)) while (( SECONDS < deadline )); do local container_id container_id="$(docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" ps -q "$service" || true)" if [[ -z "$container_id" ]]; then sleep "$HEALTH_POLL_SECONDS" continue fi local status status="$(docker inspect -f '{{if .State.Health}}{{.State.Health.Status}}{{else}}{{.State.Status}}{{end}}' "$container_id" 2>/dev/null || true)" case "$status" in healthy|running) echo "[ok] service healthy: $service" return 0 ;; unhealthy|dead|exited) echo "[error] service entered terminal unhealthy state: $service ($status)" >&2 return 1 ;; *) sleep "$HEALTH_POLL_SECONDS" ;; esac done echo "[error] timed out waiting for service health: $service" >&2 return 1 } echo "[step] waiting for service health checks..." runtime_services=(postgres rustfs auth api frontend gateway) for service in "${runtime_services[@]}"; do wait_for_service "$service" done if is_true "$DEPLOY_RUN_SMOKE"; then echo "[step] running smoke checks..." bash "$ROOT_DIR/scripts/ops/smoke-prod.sh" "$ENV_FILE" fi echo "[ok] production deployment completed successfully"