first commit

This commit is contained in:
Tomas Dvorak
2026-04-10 12:04:09 +02:00
commit 3cb40adb23
203 changed files with 40226 additions and 0 deletions
+165
View File
@@ -0,0 +1,165 @@
#!/usr/bin/env node
import { readFile, stat } from "node:fs/promises";
import path from "node:path";
const rootDir = process.cwd();
const manifestPath = path.resolve(rootDir, process.argv[2] || "apps/frontend/.vinxi/build/client/_build/.vite/manifest.json");
const assetsDir = path.resolve(path.dirname(path.dirname(manifestPath)), "assets");
const budget = {
maxTotalJsBytes: Number.parseInt(process.env.MAX_TOTAL_JS_BYTES || "320000", 10),
maxTotalCssBytes: Number.parseInt(process.env.MAX_TOTAL_CSS_BYTES || "50000", 10),
maxEntryJsBytes: Number.parseInt(process.env.MAX_ENTRY_JS_BYTES || "35000", 10),
maxRouteJsBytes: Number.parseInt(process.env.MAX_ROUTE_JS_BYTES || "45000", 10),
maxChunkJsBytes: Number.parseInt(process.env.MAX_CHUNK_JS_BYTES || "50000", 10)
};
function formatBytes(bytes) {
if (bytes < 1024) {
return `${bytes} B`;
}
if (bytes < 1024 * 1024) {
return `${(bytes / 1024).toFixed(1)} KB`;
}
return `${(bytes / (1024 * 1024)).toFixed(2)} MB`;
}
async function readManifest(filePath) {
const raw = await readFile(filePath, "utf8");
return JSON.parse(raw);
}
function parseRouteEntries(manifest) {
return Object.entries(manifest)
.filter(([key]) => key.startsWith("src/routes/") && key.includes("?pick=default&pick=$css"))
.map(([key, value]) => ({
routeKey: key,
file: value?.file
}))
.filter(entry => typeof entry.file === "string" && entry.file.endsWith(".js"));
}
async function fileSizeForAsset(assetFile) {
const absolute = path.resolve(assetsDir, path.basename(assetFile));
const info = await stat(absolute);
return info.size;
}
async function main() {
const failures = [];
let manifest;
try {
manifest = await readManifest(manifestPath);
} catch (error) {
console.error(
`[error] failed to read frontend build manifest at ${manifestPath}: ${error instanceof Error ? error.message : String(error)}`,
);
process.exit(1);
}
const values = Object.values(manifest).filter(Boolean);
const jsAssets = new Set();
const cssAssets = new Set();
for (const item of values) {
if (typeof item?.file === "string") {
if (item.file.endsWith(".js")) {
jsAssets.add(path.basename(item.file));
} else if (item.file.endsWith(".css")) {
cssAssets.add(path.basename(item.file));
}
}
if (Array.isArray(item?.css)) {
for (const cssFile of item.css) {
if (typeof cssFile === "string" && cssFile.endsWith(".css")) {
cssAssets.add(path.basename(cssFile));
}
}
}
}
let totalJsBytes = 0;
let totalCssBytes = 0;
let largestJsChunk = { name: "", size: 0 };
for (const jsFile of jsAssets) {
const size = await fileSizeForAsset(jsFile);
totalJsBytes += size;
if (size > largestJsChunk.size) {
largestJsChunk = { name: jsFile, size };
}
}
for (const cssFile of cssAssets) {
totalCssBytes += await fileSizeForAsset(cssFile);
}
const entryKey = "virtual:$vinxi/handler/client";
const entryFile = manifest[entryKey]?.file;
if (!entryFile) {
failures.push(`missing entry chunk in manifest: ${entryKey}`);
}
const entrySize = entryFile ? await fileSizeForAsset(entryFile) : 0;
const routeEntries = parseRouteEntries(manifest);
const routeSizes = [];
for (const route of routeEntries) {
routeSizes.push({
...route,
size: await fileSizeForAsset(route.file)
});
}
routeSizes.sort((left, right) => right.size - left.size);
const largestRoute = routeSizes[0] ?? { routeKey: "n/a", file: "n/a", size: 0 };
if (totalJsBytes > budget.maxTotalJsBytes) {
failures.push(`total JS size ${formatBytes(totalJsBytes)} exceeds budget ${formatBytes(budget.maxTotalJsBytes)}`);
}
if (totalCssBytes > budget.maxTotalCssBytes) {
failures.push(`total CSS size ${formatBytes(totalCssBytes)} exceeds budget ${formatBytes(budget.maxTotalCssBytes)}`);
}
if (entrySize > budget.maxEntryJsBytes) {
failures.push(`entry chunk ${path.basename(entryFile)} is ${formatBytes(entrySize)} (budget ${formatBytes(budget.maxEntryJsBytes)})`);
}
if (largestJsChunk.size > budget.maxChunkJsBytes) {
failures.push(
`largest JS chunk ${largestJsChunk.name} is ${formatBytes(largestJsChunk.size)} (budget ${formatBytes(budget.maxChunkJsBytes)})`,
);
}
if (largestRoute.size > budget.maxRouteJsBytes) {
failures.push(
`largest route chunk ${path.basename(largestRoute.file)} is ${formatBytes(largestRoute.size)} (budget ${formatBytes(budget.maxRouteJsBytes)})`,
);
}
console.log(`[info] frontend budget report (${manifestPath})`);
console.log(`[info] total JS: ${formatBytes(totalJsBytes)} (${jsAssets.size} files)`);
console.log(`[info] total CSS: ${formatBytes(totalCssBytes)} (${cssAssets.size} files)`);
console.log(`[info] entry JS: ${formatBytes(entrySize)} (${entryFile ? path.basename(entryFile) : "missing"})`);
console.log(`[info] largest JS chunk: ${formatBytes(largestJsChunk.size)} (${largestJsChunk.name || "none"})`);
console.log(
`[info] largest route chunk: ${formatBytes(largestRoute.size)} (${path.basename(largestRoute.file || "none")} / ${largestRoute.routeKey})`,
);
if (routeSizes.length) {
const topRoutes = routeSizes.slice(0, 5);
console.log("[info] top route chunks:");
for (const route of topRoutes) {
console.log(` - ${route.routeKey}: ${formatBytes(route.size)} (${path.basename(route.file)})`);
}
}
if (failures.length) {
for (const failure of failures) {
console.error(`[error] ${failure}`);
}
process.exit(1);
}
console.log("[ok] frontend bundle budgets passed.");
}
void main();
+276
View File
@@ -0,0 +1,276 @@
#!/usr/bin/env node
import { readFile } from "node:fs/promises";
import path from "node:path";
const targetPath = path.resolve(process.cwd(), process.argv[2] || ".env.production");
const requiredKeys = [
"PUBLIC_DOMAIN",
"PUBLIC_URL",
"TLS_EMAIL",
"BETTER_AUTH_SECRET",
"MAIL_ENCRYPTION_KEY",
"CORS_ALLOW_ORIGINS",
"AUTH_MAGIC_LINK_PROVIDER",
"AUTH_MAIL_FROM",
"AUTH_SMTP_HOST",
"AUTH_SMTP_PORT",
"AUTH_SMTP_SECURE",
"AUTH_SMTP_USER",
"AUTH_SMTP_PASSWORD",
"POSTGRES_PASSWORD",
"S3_REGION",
"S3_BUCKET",
"S3_ACCESS_KEY",
"S3_SECRET_KEY"
];
const secretKeys = [
"BETTER_AUTH_SECRET",
"MAIL_ENCRYPTION_KEY",
"AUTH_SMTP_PASSWORD",
"POSTGRES_PASSWORD",
"S3_ACCESS_KEY",
"S3_SECRET_KEY"
];
const optionalSecretKeys = [
"METRICS_AUTH_TOKEN"
];
const insecureSecretValues = new Set([
"",
"changeme",
"change-me",
"replace-me",
"replace-with-a-long-random-secret",
"replace-with-a-different-long-random-secret",
"replace-with-smtp-password",
"replace-with-strong-password",
"replace-with-access-key",
"replace-with-secret-key",
"replace-with-metrics-token"
]);
function stripOptionalQuotes(value) {
const trimmed = value.trim();
if ((trimmed.startsWith("\"") && trimmed.endsWith("\"")) || (trimmed.startsWith("'") && trimmed.endsWith("'"))) {
return trimmed.slice(1, -1);
}
return trimmed;
}
function parseEnv(raw) {
const result = new Map();
for (const line of raw.split(/\r?\n/)) {
const trimmed = line.trim();
if (!trimmed || trimmed.startsWith("#")) {
continue;
}
const normalized = trimmed.startsWith("export ") ? trimmed.slice("export ".length).trim() : trimmed;
const equalsIndex = normalized.indexOf("=");
if (equalsIndex <= 0) {
continue;
}
const key = normalized.slice(0, equalsIndex).trim();
const value = stripOptionalQuotes(normalized.slice(equalsIndex + 1));
result.set(key, value);
}
return result;
}
function isLocalHost(hostname) {
const normalized = String(hostname || "").toLowerCase();
return normalized === "localhost" || normalized === "127.0.0.1" || normalized === "::1";
}
function parseAbsoluteURL(value, envName, errors) {
try {
const parsed = new URL(value);
if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
errors.push(`${envName} must use http or https`);
return null;
}
return parsed;
} catch {
errors.push(`${envName} must be a valid absolute URL`);
return null;
}
}
function isLikelyEmail(value) {
const normalized = String(value || "").trim();
return /^[^\s@]+@[^\s@]+\.[^\s@]+$/.test(normalized);
}
function parseBoolean(value) {
const normalized = String(value || "").trim().toLowerCase();
if (normalized === "true" || normalized === "1" || normalized === "yes") {
return true;
}
if (normalized === "false" || normalized === "0" || normalized === "no") {
return false;
}
return null;
}
function normalizeOrigin(url) {
return `${url.protocol}//${url.host}`.toLowerCase();
}
async function main() {
const errors = [];
const warnings = [];
let raw;
try {
raw = await readFile(targetPath, "utf8");
} catch (error) {
console.error(`[error] failed to read ${targetPath}: ${error instanceof Error ? error.message : String(error)}`);
process.exit(1);
}
const env = parseEnv(raw);
for (const key of requiredKeys) {
const value = env.get(key);
if (!value || !value.trim()) {
errors.push(`${key} is required`);
}
}
for (const key of secretKeys) {
const value = String(env.get(key) || "").trim();
const normalized = value.toLowerCase();
if (insecureSecretValues.has(normalized) || value.length < 16) {
errors.push(`${key} must be a strong non-placeholder secret (minimum 16 characters)`);
}
}
for (const key of optionalSecretKeys) {
const value = String(env.get(key) || "").trim();
if (!value) {
continue;
}
const normalized = value.toLowerCase();
if (insecureSecretValues.has(normalized) || value.length < 16) {
errors.push(`${key} must be a strong non-placeholder secret (minimum 16 characters) when set`);
}
}
const publicURL = env.get("PUBLIC_URL");
const publicDomain = String(env.get("PUBLIC_DOMAIN") || "").trim().toLowerCase();
let normalizedPublicOrigin = "";
if (publicURL) {
const parsedPublicURL = parseAbsoluteURL(publicURL, "PUBLIC_URL", errors);
if (parsedPublicURL) {
normalizedPublicOrigin = normalizeOrigin(parsedPublicURL);
if (!isLocalHost(parsedPublicURL.hostname) && parsedPublicURL.protocol !== "https:") {
errors.push("PUBLIC_URL must use https for non-local deployments");
}
if (publicDomain && parsedPublicURL.hostname.toLowerCase() !== publicDomain) {
errors.push(`PUBLIC_URL host (${parsedPublicURL.hostname}) must match PUBLIC_DOMAIN (${publicDomain})`);
}
}
}
const corsOrigins = env.get("CORS_ALLOW_ORIGINS");
if (corsOrigins) {
if (corsOrigins.includes("*")) {
errors.push("CORS_ALLOW_ORIGINS cannot include '*'");
}
const normalizedCorsOrigins = new Set();
for (const rawOrigin of corsOrigins.split(",")) {
const origin = rawOrigin.trim();
if (!origin) {
continue;
}
const parsedOrigin = parseAbsoluteURL(origin, "CORS_ALLOW_ORIGINS", errors);
if (!parsedOrigin) {
continue;
}
if ((parsedOrigin.pathname && parsedOrigin.pathname !== "/") || parsedOrigin.search || parsedOrigin.hash) {
errors.push(`CORS_ALLOW_ORIGINS origin must not include path/query/fragment: ${origin}`);
}
if (!isLocalHost(parsedOrigin.hostname) && parsedOrigin.protocol !== "https:") {
errors.push(`CORS_ALLOW_ORIGINS origin must use https for non-local deployments: ${origin}`);
}
if (publicDomain !== "localhost" && isLocalHost(parsedOrigin.hostname)) {
errors.push(`CORS_ALLOW_ORIGINS cannot include localhost in production: ${origin}`);
}
normalizedCorsOrigins.add(normalizeOrigin(parsedOrigin));
}
if (normalizedPublicOrigin && !normalizedCorsOrigins.has(normalizedPublicOrigin)) {
errors.push(`CORS_ALLOW_ORIGINS must include PUBLIC_URL origin (${normalizedPublicOrigin})`);
}
}
const magicLinkProvider = String(env.get("AUTH_MAGIC_LINK_PROVIDER") || "").trim().toLowerCase();
if (magicLinkProvider !== "smtp") {
errors.push("AUTH_MAGIC_LINK_PROVIDER must be smtp for production deployments");
}
const authDevMailboxEnabled = String(env.get("AUTH_DEV_MAILBOX_ENABLED") || "").trim().toLowerCase();
if (authDevMailboxEnabled === "true" || authDevMailboxEnabled === "1" || authDevMailboxEnabled === "yes") {
errors.push("AUTH_DEV_MAILBOX_ENABLED must be false in production");
}
const smtpPort = Number.parseInt(String(env.get("AUTH_SMTP_PORT") || ""), 10);
if (Number.isNaN(smtpPort) || smtpPort < 1 || smtpPort > 65535) {
errors.push("AUTH_SMTP_PORT must be a valid TCP port");
}
const smtpSecure = parseBoolean(env.get("AUTH_SMTP_SECURE"));
if (smtpSecure === null) {
errors.push("AUTH_SMTP_SECURE must be true/false");
} else if (smtpSecure === false && smtpPort === 465) {
errors.push("AUTH_SMTP_SECURE should be true when AUTH_SMTP_PORT=465");
}
const smtpSkipVerify = parseBoolean(env.get("AUTH_SMTP_SKIP_VERIFY"));
if (smtpSkipVerify === true) {
errors.push("AUTH_SMTP_SKIP_VERIFY must not be enabled in production");
}
const smtpRejectUnauthorized = parseBoolean(env.get("AUTH_SMTP_TLS_REJECT_UNAUTHORIZED"));
if (smtpRejectUnauthorized === false) {
errors.push("AUTH_SMTP_TLS_REJECT_UNAUTHORIZED must not be false in production");
}
const tlsEmail = String(env.get("TLS_EMAIL") || "").trim();
if (!isLikelyEmail(tlsEmail)) {
errors.push("TLS_EMAIL must be a valid email address for ACME certificate registration");
}
const authMailFrom = String(env.get("AUTH_MAIL_FROM") || "").trim();
if (!isLikelyEmail(authMailFrom)) {
errors.push("AUTH_MAIL_FROM must be a valid sender email address");
}
if (String(env.get("BETTER_AUTH_SECRET") || "") === String(env.get("MAIL_ENCRYPTION_KEY") || "")) {
errors.push("BETTER_AUTH_SECRET and MAIL_ENCRYPTION_KEY must be different secrets");
}
if (publicDomain === "localhost") {
warnings.push("PUBLIC_DOMAIN is localhost; use a real domain for external production traffic.");
}
if (warnings.length) {
for (const warning of warnings) {
console.warn(`[warn] ${warning}`);
}
}
if (errors.length) {
for (const error of errors) {
console.error(`[error] ${error}`);
}
process.exit(1);
}
console.log(`[ok] ${targetPath} passed production environment validation.`);
}
void main();
+121
View File
@@ -0,0 +1,121 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
ENV_FILE="${1:-$ROOT_DIR/.env.production}"
BACKUP_ROOT="${2:-$ROOT_DIR/backups}"
KEEP_COUNT="${3:-14}"
LOCK_FILE="$BACKUP_ROOT/.backup.lock"
OPS_NOTIFY_ON_SUCCESS="${OPS_NOTIFY_ON_SUCCESS:-0}"
OPS_ALERT_WEBHOOK_URL="${OPS_ALERT_WEBHOOK_URL:-}"
OPS_ALERT_TIMEOUT_SECONDS="${OPS_ALERT_TIMEOUT_SECONDS:-10}"
OPS_ALERT_WEBHOOK_BEARER_TOKEN="${OPS_ALERT_WEBHOOK_BEARER_TOKEN:-}"
started_at_utc="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
latest_backup=""
if [[ "${1:-}" == "--help" || "${1:-}" == "-h" ]]; then
cat <<'USAGE'
Usage:
scripts/ops/backup-job.sh [env-file] [backup-root] [keep-count]
Examples:
scripts/ops/backup-job.sh
scripts/ops/backup-job.sh .env.production /var/backups/productier 30
Behavior:
- Acquires a non-blocking lock to avoid overlapping runs.
- Executes production backup.
- Verifies resulting backup integrity.
- Prunes old backups using keep-count retention.
- Sends webhook alerts when OPS_ALERT_WEBHOOK_URL is configured.
USAGE
exit 0
fi
if ! [[ "$KEEP_COUNT" =~ ^[0-9]+$ ]] || [[ "$KEEP_COUNT" -lt 1 ]]; then
echo "[error] keep-count must be a positive integer" >&2
exit 1
fi
json_escape() {
local value="$1"
value="${value//\\/\\\\}"
value="${value//\"/\\\"}"
value="${value//$'\n'/\\n}"
value="${value//$'\r'/\\r}"
printf '%s' "$value"
}
send_alert() {
local status="$1"
local message="$2"
if [[ -z "$OPS_ALERT_WEBHOOK_URL" ]]; then
return
fi
if ! command -v curl >/dev/null 2>&1; then
echo "[warn] OPS_ALERT_WEBHOOK_URL is set but curl is unavailable; skipping alert"
return
fi
local payload
payload="$(cat <<EOF
{"service":"productier-backup-job","status":"$(json_escape "$status")","startedAt":"$started_at_utc","finishedAt":"$(date -u +%Y-%m-%dT%H:%M:%SZ)","backup":"$(json_escape "$latest_backup")","message":"$(json_escape "$message")"}
EOF
)"
local auth_header=()
if [[ -n "$OPS_ALERT_WEBHOOK_BEARER_TOKEN" ]]; then
auth_header=(-H "Authorization: Bearer $OPS_ALERT_WEBHOOK_BEARER_TOKEN")
fi
if ! curl -fsS -m "$OPS_ALERT_TIMEOUT_SECONDS" \
-H "Content-Type: application/json" \
"${auth_header[@]}" \
-d "$payload" \
"$OPS_ALERT_WEBHOOK_URL" >/dev/null; then
echo "[warn] failed to deliver backup alert webhook"
fi
}
on_error() {
local exit_code="$1"
send_alert "failure" "backup job failed with exit code ${exit_code}"
}
trap 'on_error $?' ERR
mkdir -p "$BACKUP_ROOT"
if command -v flock >/dev/null 2>&1; then
exec 9>"$LOCK_FILE"
if ! flock -n 9; then
echo "[error] backup job already running (lock file: $LOCK_FILE)" >&2
exit 1
fi
else
echo "[warn] flock not found; running without lock protection"
fi
echo "[info] starting backup job (env=$ENV_FILE root=$BACKUP_ROOT keep=$KEEP_COUNT)"
bash "$ROOT_DIR/scripts/ops/backup-prod.sh" "$ENV_FILE" "$BACKUP_ROOT"
latest_backup="$(find "$BACKUP_ROOT" -mindepth 1 -maxdepth 1 -type d -printf '%f\n' \
| grep -E '^[0-9]{8}T[0-9]{6}Z$' \
| sort \
| tail -n 1)"
if [[ -z "$latest_backup" ]]; then
echo "[error] no backup directory found after backup execution" >&2
exit 1
fi
bash "$ROOT_DIR/scripts/ops/verify-backup.sh" "$BACKUP_ROOT/$latest_backup"
bash "$ROOT_DIR/scripts/ops/prune-backups.sh" "$BACKUP_ROOT" "$KEEP_COUNT"
echo "[ok] backup job completed"
if [[ "$OPS_NOTIFY_ON_SUCCESS" == "1" || "$OPS_NOTIFY_ON_SUCCESS" == "true" ]]; then
send_alert "success" "backup job completed successfully"
fi
+80
View File
@@ -0,0 +1,80 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
ENV_FILE="${1:-$ROOT_DIR/.env.production}"
BACKUP_ROOT="${2:-$ROOT_DIR/backups}"
COMPOSE_FILE="$ROOT_DIR/infra/docker-compose.prod.yml"
if [[ "${1:-}" == "--help" || "${1:-}" == "-h" ]]; then
cat <<'USAGE'
Usage:
scripts/ops/backup-prod.sh [env-file] [backup-root]
Examples:
scripts/ops/backup-prod.sh
scripts/ops/backup-prod.sh .env.production /var/backups/productier
Behavior:
- Validates production env configuration.
- Creates a timestamped backup directory.
- Dumps PostgreSQL to postgres.sql.gz.
- Syncs S3-compatible object data to s3/ via rustfs-init container.
- Writes SHA256 checksums and metadata.json.
USAGE
exit 0
fi
if [[ ! -f "$ENV_FILE" ]]; then
echo "[error] env file not found: $ENV_FILE" >&2
exit 1
fi
if ! command -v docker >/dev/null 2>&1; then
echo "[error] docker CLI is required" >&2
exit 1
fi
node "$ROOT_DIR/scripts/check-production-env.mjs" "$ENV_FILE"
timestamp="$(date -u +%Y%m%dT%H%M%SZ)"
backup_dir="$BACKUP_ROOT/$timestamp"
tmp_backup_dir="$BACKUP_ROOT/.tmp-$timestamp-$$"
mkdir -p "$tmp_backup_dir/s3"
echo "[info] writing backup to $backup_dir"
echo "[step] backing up postgres..."
docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" exec -T postgres \
pg_dump -U productier -d productier --format=plain --no-owner --no-privileges \
| gzip -9 > "$tmp_backup_dir/postgres.sql.gz"
echo "[step] backing up object storage..."
docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" run --rm --no-deps \
-v "$tmp_backup_dir/s3:/backup" \
--entrypoint /bin/sh \
rustfs-init \
-lc 'set -euo pipefail; endpoint="http://rustfs:9000"; aws --endpoint-url "$endpoint" s3 sync "s3://${S3_BUCKET:-productier}" /backup --no-progress'
echo "[step] writing checksums..."
(
cd "$tmp_backup_dir"
sha256sum postgres.sql.gz > checksums.sha256
if find s3 -type f -print -quit | grep -q .; then
find s3 -type f -print0 | sort -z | xargs -0 sha256sum >> checksums.sha256
fi
)
cat > "$tmp_backup_dir/metadata.json" <<EOF
{
"createdAt": "$timestamp",
"envFile": "$(basename "$ENV_FILE")",
"postgresDump": "postgres.sql.gz",
"objectStorageDir": "s3",
"composeFile": "infra/docker-compose.prod.yml"
}
EOF
mv "$tmp_backup_dir" "$backup_dir"
echo "[ok] backup complete: $backup_dir"
+147
View File
@@ -0,0 +1,147 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
ENV_FILE="${1:-$ROOT_DIR/.env.production}"
COMPOSE_FILE="${2:-$ROOT_DIR/infra/docker-compose.prod.yml}"
HEALTH_TIMEOUT_SECONDS="${DEPLOY_HEALTH_TIMEOUT_SECONDS:-240}"
HEALTH_POLL_SECONDS="${DEPLOY_HEALTH_POLL_SECONDS:-2}"
DEPLOY_BUILD="${DEPLOY_BUILD:-1}"
DEPLOY_PULL="${DEPLOY_PULL:-1}"
DEPLOY_RUN_SMOKE="${DEPLOY_RUN_SMOKE:-1}"
DEPLOY_REMOVE_ORPHANS="${DEPLOY_REMOVE_ORPHANS:-1}"
DEPLOY_PRINT_LOGS_ON_FAILURE="${DEPLOY_PRINT_LOGS_ON_FAILURE:-1}"
DEPLOY_LOG_TAIL_LINES="${DEPLOY_LOG_TAIL_LINES:-200}"
if [[ "${1:-}" == "--help" || "${1:-}" == "-h" ]]; then
cat <<'USAGE'
Usage:
scripts/ops/deploy-prod.sh [env-file] [compose-file]
Examples:
scripts/ops/deploy-prod.sh
scripts/ops/deploy-prod.sh .env.production infra/docker-compose.prod.yml
Behavior:
- runs production preflight checks
- optionally pulls external images
- performs compose deployment (with optional build)
- waits for core services to become healthy
- runs post-deploy smoke checks
Optional env toggles:
DEPLOY_PULL=1|0
DEPLOY_BUILD=1|0
DEPLOY_RUN_SMOKE=1|0
DEPLOY_REMOVE_ORPHANS=1|0
DEPLOY_HEALTH_TIMEOUT_SECONDS=240
DEPLOY_HEALTH_POLL_SECONDS=2
DEPLOY_PRINT_LOGS_ON_FAILURE=1|0
DEPLOY_LOG_TAIL_LINES=200
USAGE
exit 0
fi
is_true() {
local raw="${1:-}"
case "${raw,,}" in
1|true|yes|on) return 0 ;;
*) return 1 ;;
esac
}
print_failure_logs() {
if ! is_true "$DEPLOY_PRINT_LOGS_ON_FAILURE"; then
return
fi
if ! command -v docker >/dev/null 2>&1; then
return
fi
echo "[info] last ${DEPLOY_LOG_TAIL_LINES} log lines from compose services:"
docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" logs --tail "$DEPLOY_LOG_TAIL_LINES" || true
}
on_error() {
local exit_code="$1"
echo "[error] deployment failed (exit code ${exit_code})" >&2
print_failure_logs
}
trap 'on_error $?' ERR
if ! [[ "$HEALTH_TIMEOUT_SECONDS" =~ ^[0-9]+$ ]] || [[ "$HEALTH_TIMEOUT_SECONDS" -lt 5 ]]; then
echo "[error] DEPLOY_HEALTH_TIMEOUT_SECONDS must be an integer >= 5" >&2
exit 1
fi
if ! [[ "$HEALTH_POLL_SECONDS" =~ ^[0-9]+$ ]] || [[ "$HEALTH_POLL_SECONDS" -lt 1 ]]; then
echo "[error] DEPLOY_HEALTH_POLL_SECONDS must be an integer >= 1" >&2
exit 1
fi
bash "$ROOT_DIR/scripts/ops/preflight-prod.sh" "$ENV_FILE" "$COMPOSE_FILE"
if is_true "$DEPLOY_PULL"; then
echo "[step] pulling external base images..."
docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" pull gateway postgres rustfs rustfs-init || \
echo "[warn] one or more image pulls failed; continuing with local cache"
fi
echo "[step] deploying compose stack..."
up_args=(--env-file "$ENV_FILE" -f "$COMPOSE_FILE" up -d)
if is_true "$DEPLOY_BUILD"; then
up_args+=(--build)
fi
if is_true "$DEPLOY_REMOVE_ORPHANS"; then
up_args+=(--remove-orphans)
fi
docker compose "${up_args[@]}"
wait_for_service() {
local service="$1"
local deadline=$((SECONDS + HEALTH_TIMEOUT_SECONDS))
while (( SECONDS < deadline )); do
local container_id
container_id="$(docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" ps -q "$service" || true)"
if [[ -z "$container_id" ]]; then
sleep "$HEALTH_POLL_SECONDS"
continue
fi
local status
status="$(docker inspect -f '{{if .State.Health}}{{.State.Health.Status}}{{else}}{{.State.Status}}{{end}}' "$container_id" 2>/dev/null || true)"
case "$status" in
healthy|running)
echo "[ok] service healthy: $service"
return 0
;;
unhealthy|dead|exited)
echo "[error] service entered terminal unhealthy state: $service ($status)" >&2
return 1
;;
*)
sleep "$HEALTH_POLL_SECONDS"
;;
esac
done
echo "[error] timed out waiting for service health: $service" >&2
return 1
}
echo "[step] waiting for service health checks..."
runtime_services=(postgres rustfs auth api frontend gateway)
for service in "${runtime_services[@]}"; do
wait_for_service "$service"
done
if is_true "$DEPLOY_RUN_SMOKE"; then
echo "[step] running smoke checks..."
bash "$ROOT_DIR/scripts/ops/smoke-prod.sh" "$ENV_FILE"
fi
echo "[ok] production deployment completed successfully"
+79
View File
@@ -0,0 +1,79 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
ENV_FILE="${1:-$ROOT_DIR/.env.production}"
COMPOSE_FILE="${2:-$ROOT_DIR/infra/docker-compose.prod.yml}"
if [[ "${1:-}" == "--help" || "${1:-}" == "-h" ]]; then
cat <<'USAGE'
Usage:
scripts/ops/preflight-prod.sh [env-file] [compose-file]
Examples:
scripts/ops/preflight-prod.sh
scripts/ops/preflight-prod.sh .env.production infra/docker-compose.prod.yml
Checks:
- required host tools exist and docker daemon is reachable
- production environment values pass validation
- production compose file renders successfully
- expected core services are present in compose config
USAGE
exit 0
fi
if [[ ! -f "$ENV_FILE" ]]; then
echo "[error] env file not found: $ENV_FILE" >&2
exit 1
fi
if [[ ! -f "$COMPOSE_FILE" ]]; then
echo "[error] compose file not found: $COMPOSE_FILE" >&2
exit 1
fi
require_cmd() {
local cmd="$1"
if ! command -v "$cmd" >/dev/null 2>&1; then
echo "[error] required command not found: $cmd" >&2
exit 1
fi
}
for cmd in docker node bash; do
require_cmd "$cmd"
done
if ! docker info >/dev/null 2>&1; then
echo "[error] docker daemon is not reachable" >&2
exit 1
fi
echo "[step] validating production environment values..."
node "$ROOT_DIR/scripts/check-production-env.mjs" "$ENV_FILE"
echo "[step] validating compose rendering..."
if ! docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" config >/dev/null; then
echo "[error] docker compose config validation failed" >&2
exit 1
fi
mapfile -t configured_services < <(docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" config --services)
required_services=(gateway frontend auth api postgres rustfs rustfs-init)
for service in "${required_services[@]}"; do
found=0
for configured in "${configured_services[@]}"; do
if [[ "$configured" == "$service" ]]; then
found=1
break
fi
done
if [[ "$found" -ne 1 ]]; then
echo "[error] required service missing from compose config: $service" >&2
exit 1
fi
done
echo "[ok] production preflight checks passed"
+47
View File
@@ -0,0 +1,47 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
BACKUP_ROOT="${1:-$ROOT_DIR/backups}"
KEEP_COUNT="${2:-14}"
if [[ "${1:-}" == "--help" || "${1:-}" == "-h" ]]; then
cat <<'USAGE'
Usage:
scripts/ops/prune-backups.sh [backup-root] [keep-count]
Examples:
scripts/ops/prune-backups.sh
scripts/ops/prune-backups.sh /var/backups/productier 30
USAGE
exit 0
fi
if ! [[ "$KEEP_COUNT" =~ ^[0-9]+$ ]] || [[ "$KEEP_COUNT" -lt 1 ]]; then
echo "[error] keep-count must be a positive integer" >&2
exit 1
fi
if [[ ! -d "$BACKUP_ROOT" ]]; then
echo "[info] backup root does not exist; nothing to prune"
exit 0
fi
mapfile -t backup_dirs < <(
find "$BACKUP_ROOT" -mindepth 1 -maxdepth 1 -type d -printf '%f\n' \
| grep -E '^[0-9]{8}T[0-9]{6}Z$' \
| sort -r
)
if [[ "${#backup_dirs[@]}" -le "$KEEP_COUNT" ]]; then
echo "[info] no pruning needed (${#backup_dirs[@]} backup(s), keep=$KEEP_COUNT)"
exit 0
fi
for backup_name in "${backup_dirs[@]:$KEEP_COUNT}"; do
target="$BACKUP_ROOT/$backup_name"
echo "[step] removing old backup: $target"
rm -rf "$target"
done
echo "[ok] pruned backups, kept latest $KEEP_COUNT"
+147
View File
@@ -0,0 +1,147 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
ENV_FILE="${1:-$ROOT_DIR/.env.production}"
BACKUP_REF="${2:-latest}"
BACKUP_ROOT="${3:-$ROOT_DIR/backups}"
COMPOSE_FILE="$ROOT_DIR/infra/docker-compose.prod.yml"
DRILL_DB="${DRILL_DB:-1}"
DRILL_S3="${DRILL_S3:-1}"
KEEP_DRILL_DB="${KEEP_DRILL_DB:-0}"
KEEP_DRILL_BUCKET="${KEEP_DRILL_BUCKET:-0}"
if [[ "${1:-}" == "--help" || "${1:-}" == "-h" ]]; then
cat <<'USAGE'
Usage:
scripts/ops/restore-drill.sh [env-file] [backup-dir-or-latest] [backup-root]
Examples:
scripts/ops/restore-drill.sh
scripts/ops/restore-drill.sh .env.production latest /var/backups/productier
scripts/ops/restore-drill.sh .env.production backups/20260401T120000Z
Behavior:
- Verifies selected backup.
- Imports DB dump into temporary drill database and runs sanity queries.
- Optionally syncs backup objects into a temporary drill bucket.
- Cleans up temporary DB/bucket unless KEEP_DRILL_DB=1 or KEEP_DRILL_BUCKET=1.
USAGE
exit 0
fi
if [[ ! -f "$ENV_FILE" ]]; then
echo "[error] env file not found: $ENV_FILE" >&2
exit 1
fi
if ! command -v docker >/dev/null 2>&1; then
echo "[error] docker CLI is required" >&2
exit 1
fi
node "$ROOT_DIR/scripts/check-production-env.mjs" "$ENV_FILE"
resolve_latest_backup() {
local root="$1"
find "$root" -mindepth 1 -maxdepth 1 -type d -printf '%f\n' \
| grep -E '^[0-9]{8}T[0-9]{6}Z$' \
| sort \
| tail -n 1
}
if [[ "$BACKUP_REF" == "latest" ]]; then
latest_name="$(resolve_latest_backup "$BACKUP_ROOT")"
if [[ -z "$latest_name" ]]; then
echo "[error] no backups found in $BACKUP_ROOT" >&2
exit 1
fi
BACKUP_DIR="$BACKUP_ROOT/$latest_name"
elif [[ -d "$BACKUP_REF" ]]; then
BACKUP_DIR="$BACKUP_REF"
else
BACKUP_DIR="$BACKUP_ROOT/$BACKUP_REF"
fi
if [[ ! -d "$BACKUP_DIR" ]]; then
echo "[error] backup directory not found: $BACKUP_DIR" >&2
exit 1
fi
bash "$ROOT_DIR/scripts/ops/verify-backup.sh" "$BACKUP_DIR"
timestamp_compact="$(date -u +%Y%m%d%H%M%S)"
drill_db_name="productier_drill_${timestamp_compact}"
drill_bucket_name="productier-drill-${timestamp_compact}"
db_created=0
bucket_created=0
cleanup_db() {
if [[ "$db_created" != "1" ]]; then
return
fi
if [[ "$KEEP_DRILL_DB" == "1" ]]; then
echo "[info] KEEP_DRILL_DB=1, retaining drill database: $drill_db_name"
return
fi
docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" exec -T postgres \
psql -U productier -d postgres -v ON_ERROR_STOP=1 \
-c "DROP DATABASE IF EXISTS ${drill_db_name};" >/dev/null
}
cleanup_bucket() {
if [[ "$bucket_created" != "1" ]]; then
return
fi
if [[ "$KEEP_DRILL_BUCKET" == "1" ]]; then
echo "[info] KEEP_DRILL_BUCKET=1, retaining drill bucket: $drill_bucket_name"
return
fi
docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" run --rm --no-deps \
--entrypoint /bin/sh \
rustfs-init \
-lc "set -euo pipefail; endpoint='http://rustfs:9000'; aws --endpoint-url \"\$endpoint\" s3 rb \"s3://${drill_bucket_name}\" --force >/dev/null 2>&1 || true"
}
cleanup() {
cleanup_db
cleanup_bucket
}
trap cleanup EXIT
if [[ "$DRILL_DB" == "1" ]]; then
echo "[step] running DB restore drill..."
docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" exec -T postgres \
psql -U productier -d postgres -v ON_ERROR_STOP=1 \
-c "CREATE DATABASE ${drill_db_name};" >/dev/null
db_created=1
gunzip -c "$BACKUP_DIR/postgres.sql.gz" \
| docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" exec -T postgres \
psql -U productier -d "$drill_db_name" -v ON_ERROR_STOP=1 >/dev/null
table_count="$(docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" exec -T postgres \
psql -U productier -d "$drill_db_name" -At -c "SELECT count(*) FROM information_schema.tables WHERE table_schema='public';")"
if ! [[ "$table_count" =~ ^[0-9]+$ ]] || [[ "$table_count" -lt 1 ]]; then
echo "[error] DB restore drill failed: unexpected table count ($table_count)" >&2
exit 1
fi
echo "[ok] DB restore drill passed (public tables: $table_count)"
fi
if [[ "$DRILL_S3" == "1" ]]; then
if [[ -d "$BACKUP_DIR/s3" ]]; then
echo "[step] running object storage restore drill..."
docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" run --rm --no-deps \
-v "$BACKUP_DIR/s3:/restore:ro" \
--entrypoint /bin/sh \
rustfs-init \
-lc "set -euo pipefail; endpoint='http://rustfs:9000'; aws --endpoint-url \"\$endpoint\" s3api head-bucket --bucket '${drill_bucket_name}' >/dev/null 2>&1 || aws --endpoint-url \"\$endpoint\" s3api create-bucket --bucket '${drill_bucket_name}'; aws --endpoint-url \"\$endpoint\" s3 sync /restore \"s3://${drill_bucket_name}\" --no-progress; count=\$(aws --endpoint-url \"\$endpoint\" s3 ls \"s3://${drill_bucket_name}\" --recursive | wc -l); echo \"[ok] S3 restore drill passed (objects: \$count)\""
bucket_created=1
else
echo "[warn] skipping S3 restore drill: no s3/ directory in backup"
fi
fi
echo "[ok] restore drill completed using backup: $BACKUP_DIR"
+86
View File
@@ -0,0 +1,86 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
ENV_FILE="${1:-$ROOT_DIR/.env.production}"
BACKUP_DIR="${2:-}"
COMPOSE_FILE="$ROOT_DIR/infra/docker-compose.prod.yml"
RESET_DB="${RESET_DB:-0}"
RESTORE_S3="${RESTORE_S3:-1}"
FORCE="${FORCE:-0}"
if [[ "${1:-}" == "--help" || "${1:-}" == "-h" ]]; then
cat <<'USAGE'
Usage:
scripts/ops/restore-prod.sh [env-file] <backup-dir>
Examples:
scripts/ops/restore-prod.sh .env.production backups/20260401T120000Z
FORCE=1 RESET_DB=1 RESTORE_S3=1 scripts/ops/restore-prod.sh .env.production backups/...
Safety flags:
FORCE=1 required to run restore
RESET_DB=1 drops and recreates public schema before DB import (recommended)
RESTORE_S3=1 syncs object storage from backup (default: 1)
USAGE
exit 0
fi
if [[ "$FORCE" != "1" ]]; then
echo "[error] restore is destructive; set FORCE=1 to continue" >&2
exit 1
fi
if [[ ! -f "$ENV_FILE" ]]; then
echo "[error] env file not found: $ENV_FILE" >&2
exit 1
fi
if [[ -z "$BACKUP_DIR" ]]; then
echo "[error] backup directory is required" >&2
exit 1
fi
if [[ ! -d "$BACKUP_DIR" ]]; then
echo "[error] backup directory not found: $BACKUP_DIR" >&2
exit 1
fi
if [[ ! -f "$BACKUP_DIR/postgres.sql.gz" ]]; then
echo "[error] missing postgres dump: $BACKUP_DIR/postgres.sql.gz" >&2
exit 1
fi
if ! command -v docker >/dev/null 2>&1; then
echo "[error] docker CLI is required" >&2
exit 1
fi
node "$ROOT_DIR/scripts/check-production-env.mjs" "$ENV_FILE"
echo "[step] restoring postgres from $BACKUP_DIR/postgres.sql.gz ..."
if [[ "$RESET_DB" == "1" ]]; then
docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" exec -T postgres \
psql -U productier -d productier -v ON_ERROR_STOP=1 \
-c 'DROP SCHEMA IF EXISTS public CASCADE;' \
-c 'CREATE SCHEMA public;'
fi
gunzip -c "$BACKUP_DIR/postgres.sql.gz" \
| docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" exec -T postgres \
psql -U productier -d productier -v ON_ERROR_STOP=1
if [[ "$RESTORE_S3" == "1" ]]; then
if [[ -d "$BACKUP_DIR/s3" ]]; then
echo "[step] restoring object storage from $BACKUP_DIR/s3 ..."
docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" run --rm --no-deps \
-v "$BACKUP_DIR/s3:/restore:ro" \
--entrypoint /bin/sh \
rustfs-init \
-lc 'set -euo pipefail; endpoint="http://rustfs:9000"; aws --endpoint-url "$endpoint" s3 sync /restore "s3://${S3_BUCKET:-productier}" --delete --no-progress'
else
echo "[warn] object storage restore skipped; backup directory has no s3/ folder"
fi
fi
echo "[ok] restore complete"
+119
View File
@@ -0,0 +1,119 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
ENV_FILE="${1:-$ROOT_DIR/.env.production}"
TIMEOUT_SECONDS="${OPS_SMOKE_TIMEOUT_SECONDS:-15}"
INSECURE_TLS="${OPS_SMOKE_INSECURE_TLS:-0}"
VERIFY_HTTP_REDIRECT="${VERIFY_HTTP_REDIRECT:-1}"
if [[ "${1:-}" == "--help" || "${1:-}" == "-h" ]]; then
cat <<'USAGE'
Usage:
scripts/ops/smoke-prod.sh [env-file]
Examples:
scripts/ops/smoke-prod.sh
OPS_SMOKE_INSECURE_TLS=1 scripts/ops/smoke-prod.sh .env.production
Checks:
- Public homepage responds with 2xx/3xx
- /v1/health returns {"ok":true}
- Security headers exist on public response
- Optional HTTP->HTTPS redirect validation for PUBLIC_DOMAIN
USAGE
exit 0
fi
if [[ ! -f "$ENV_FILE" ]]; then
echo "[error] env file not found: $ENV_FILE" >&2
exit 1
fi
node "$ROOT_DIR/scripts/check-production-env.mjs" "$ENV_FILE"
read_env_value() {
local key="$1"
local file="$2"
local value
value="$(
awk -F '=' -v k="$key" '
/^[[:space:]]*#/ { next }
/^[[:space:]]*$/ { next }
{
line=$0
gsub(/^[[:space:]]+|[[:space:]]+$/, "", line)
if (index(line, "export ") == 1) {
sub(/^export[[:space:]]+/, "", line)
}
split(line, parts, "=")
if (parts[1] == k) {
val=substr(line, index(line, "=") + 1)
gsub(/^[[:space:]]+|[[:space:]]+$/, "", val)
if ((substr(val,1,1) == "\"" && substr(val,length(val),1) == "\"") || (substr(val,1,1) == "'"'"'" && substr(val,length(val),1) == "'"'"'")) {
val=substr(val, 2, length(val)-2)
}
print val
exit
}
}
' "$file"
)"
printf '%s' "$value"
}
PUBLIC_URL="$(read_env_value PUBLIC_URL "$ENV_FILE")"
PUBLIC_DOMAIN="$(read_env_value PUBLIC_DOMAIN "$ENV_FILE")"
if [[ -z "$PUBLIC_URL" || -z "$PUBLIC_DOMAIN" ]]; then
echo "[error] missing PUBLIC_URL or PUBLIC_DOMAIN in $ENV_FILE" >&2
exit 1
fi
curl_args=(-fsS --max-time "$TIMEOUT_SECONDS")
if [[ "$INSECURE_TLS" == "1" || "$INSECURE_TLS" == "true" ]]; then
curl_args+=(-k)
fi
echo "[step] checking homepage status..."
home_status="$(curl "${curl_args[@]}" -o /dev/null -w '%{http_code}' "$PUBLIC_URL/")"
if [[ ! "$home_status" =~ ^(2|3)[0-9]{2}$ ]]; then
echo "[error] homepage status is $home_status (expected 2xx/3xx)" >&2
exit 1
fi
echo "[step] checking API health payload..."
health_payload="$(curl "${curl_args[@]}" "$PUBLIC_URL/v1/health")"
node -e '
const payload = JSON.parse(process.argv[1]);
if (!payload || payload.ok !== true) {
console.error("[error] /v1/health did not return ok=true");
process.exit(1);
}
' "$health_payload"
echo "[step] checking security headers..."
headers="$(curl "${curl_args[@]}" -D - -o /dev/null "$PUBLIC_URL/")"
for expected_header in "strict-transport-security" "x-content-type-options" "x-frame-options" "referrer-policy"; do
if ! printf '%s\n' "$headers" | tr '[:upper:]' '[:lower:]' | grep -q "^${expected_header}:"; then
echo "[error] missing security header: $expected_header" >&2
exit 1
fi
done
if [[ "$VERIFY_HTTP_REDIRECT" == "1" || "$VERIFY_HTTP_REDIRECT" == "true" ]]; then
echo "[step] checking HTTP->HTTPS redirect..."
redirect_headers="$(curl -sS --max-time "$TIMEOUT_SECONDS" -D - -o /dev/null "http://$PUBLIC_DOMAIN/" || true)"
redirect_status="$(printf '%s\n' "$redirect_headers" | awk 'NR==1 {print $2}')"
redirect_location="$(printf '%s\n' "$redirect_headers" | awk 'BEGIN{IGNORECASE=1} /^Location:/ {sub(/\r$/, "", $2); print $2; exit}')"
if [[ "$redirect_status" != "301" && "$redirect_status" != "302" && "$redirect_status" != "307" && "$redirect_status" != "308" ]]; then
echo "[error] expected redirect status from http://$PUBLIC_DOMAIN but got '$redirect_status'" >&2
exit 1
fi
if [[ "$redirect_location" != https://"$PUBLIC_DOMAIN"* ]]; then
echo "[error] expected redirect location to https://$PUBLIC_DOMAIN but got '$redirect_location'" >&2
exit 1
fi
fi
echo "[ok] production smoke checks passed for $PUBLIC_URL"
+137
View File
@@ -0,0 +1,137 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
ENV_FILE="${1:-$ROOT_DIR/.env.production}"
BACKUP_REF="${2:-latest}"
BACKUP_ROOT="${3:-$ROOT_DIR/backups}"
COMPOSE_FILE="$ROOT_DIR/infra/docker-compose.prod.yml"
DRILL_PROJECT_NAME="${DRILL_PROJECT_NAME:-productier-drill-$(date -u +%Y%m%d%H%M%S)}"
DRILL_CLEANUP_ON_EXIT="${DRILL_CLEANUP_ON_EXIT:-1}"
DRILL_WEBHOOK_URL="${DRILL_WEBHOOK_URL:-${OPS_ALERT_WEBHOOK_URL:-}}"
DRILL_WEBHOOK_BEARER_TOKEN="${DRILL_WEBHOOK_BEARER_TOKEN:-${OPS_ALERT_WEBHOOK_BEARER_TOKEN:-}}"
DRILL_NOTIFY_ON_SUCCESS="${DRILL_NOTIFY_ON_SUCCESS:-1}"
DRILL_WEBHOOK_TIMEOUT_SECONDS="${DRILL_WEBHOOK_TIMEOUT_SECONDS:-10}"
result_status="success"
result_message="restore drill completed successfully"
services_started=0
if [[ "${1:-}" == "--help" || "${1:-}" == "-h" ]]; then
cat <<'USAGE'
Usage:
scripts/ops/staging-drill.sh [env-file] [backup-ref|latest] [backup-root]
Examples:
scripts/ops/staging-drill.sh
scripts/ops/staging-drill.sh .env.production latest /var/backups/productier
scripts/ops/staging-drill.sh .env.production 20260401T120000Z
Behavior:
- Starts isolated drill infra using a temporary COMPOSE_PROJECT_NAME.
- Runs restore-drill checks against the selected backup.
- Sends pass/fail webhook notifications when configured.
- Tears down drill infra by default (set DRILL_CLEANUP_ON_EXIT=0 to keep it).
USAGE
exit 0
fi
if [[ ! -f "$ENV_FILE" ]]; then
echo "[error] env file not found: $ENV_FILE" >&2
exit 1
fi
if ! command -v docker >/dev/null 2>&1; then
echo "[error] docker CLI is required" >&2
exit 1
fi
json_escape() {
local value="$1"
value="${value//\\/\\\\}"
value="${value//\"/\\\"}"
value="${value//$'\n'/\\n}"
value="${value//$'\r'/\\r}"
printf '%s' "$value"
}
send_webhook() {
local status="$1"
local message="$2"
if [[ -z "$DRILL_WEBHOOK_URL" ]]; then
return
fi
if ! command -v curl >/dev/null 2>&1; then
echo "[warn] DRILL_WEBHOOK_URL is set but curl is unavailable; skipping webhook"
return
fi
local payload
payload="$(cat <<EOF
{"service":"productier-restore-drill","status":"$(json_escape "$status")","project":"$(json_escape "$DRILL_PROJECT_NAME")","finishedAt":"$(date -u +%Y-%m-%dT%H:%M:%SZ)","message":"$(json_escape "$message")"}
EOF
)"
local auth_header=()
if [[ -n "$DRILL_WEBHOOK_BEARER_TOKEN" ]]; then
auth_header=(-H "Authorization: Bearer $DRILL_WEBHOOK_BEARER_TOKEN")
fi
if ! curl -fsS -m "$DRILL_WEBHOOK_TIMEOUT_SECONDS" \
-H "Content-Type: application/json" \
"${auth_header[@]}" \
-d "$payload" \
"$DRILL_WEBHOOK_URL" >/dev/null; then
echo "[warn] failed to deliver restore drill webhook"
fi
}
cleanup() {
if [[ "$DRILL_CLEANUP_ON_EXIT" == "1" && "$services_started" == "1" ]]; then
echo "[step] tearing down drill stack (project: $DRILL_PROJECT_NAME)"
COMPOSE_PROJECT_NAME="$DRILL_PROJECT_NAME" \
docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" down -v --remove-orphans >/dev/null 2>&1 || true
fi
if [[ "$result_status" == "success" ]]; then
if [[ "$DRILL_NOTIFY_ON_SUCCESS" == "1" || "$DRILL_NOTIFY_ON_SUCCESS" == "true" ]]; then
send_webhook "success" "$result_message"
fi
else
send_webhook "failure" "$result_message"
fi
}
on_error() {
local exit_code="$1"
result_status="failure"
result_message="restore drill failed with exit code ${exit_code}"
}
trap cleanup EXIT
trap 'on_error $?' ERR
node "$ROOT_DIR/scripts/check-production-env.mjs" "$ENV_FILE"
echo "[step] starting drill infra (project: $DRILL_PROJECT_NAME)"
COMPOSE_PROJECT_NAME="$DRILL_PROJECT_NAME" \
docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" up -d postgres rustfs rustfs-init >/dev/null
services_started=1
echo "[step] waiting for postgres readiness..."
for _ in $(seq 1 60); do
if COMPOSE_PROJECT_NAME="$DRILL_PROJECT_NAME" \
docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" exec -T postgres \
pg_isready -U productier -d productier >/dev/null 2>&1; then
break
fi
sleep 1
done
echo "[step] running restore drill against backup ref: $BACKUP_REF"
COMPOSE_PROJECT_NAME="$DRILL_PROJECT_NAME" \
bash "$ROOT_DIR/scripts/ops/restore-drill.sh" "$ENV_FILE" "$BACKUP_REF" "$BACKUP_ROOT"
result_status="success"
result_message="restore drill completed successfully using backup ref ${BACKUP_REF}"
echo "[ok] staging drill completed"
+27
View File
@@ -0,0 +1,27 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
scripts=(
"$ROOT_DIR/scripts/ops/backup-prod.sh"
"$ROOT_DIR/scripts/ops/backup-job.sh"
"$ROOT_DIR/scripts/ops/preflight-prod.sh"
"$ROOT_DIR/scripts/ops/deploy-prod.sh"
"$ROOT_DIR/scripts/ops/prune-backups.sh"
"$ROOT_DIR/scripts/ops/restore-prod.sh"
"$ROOT_DIR/scripts/ops/restore-drill.sh"
"$ROOT_DIR/scripts/ops/staging-drill.sh"
"$ROOT_DIR/scripts/ops/verify-backup.sh"
"$ROOT_DIR/scripts/ops/smoke-prod.sh"
)
echo "[step] shell syntax check..."
bash -n "${scripts[@]}"
echo "[step] help smoke check..."
for script in "${scripts[@]}"; do
bash "$script" --help >/dev/null
done
echo "[ok] ops script checks passed"
+37
View File
@@ -0,0 +1,37 @@
#!/usr/bin/env bash
set -euo pipefail
if [[ "${1:-}" == "--help" || "${1:-}" == "-h" || $# -ne 1 ]]; then
cat <<'USAGE'
Usage:
scripts/ops/verify-backup.sh <backup-dir>
Checks:
- postgres.sql.gz integrity
- checksums.sha256 verification
USAGE
exit 0
fi
BACKUP_DIR="$1"
if [[ ! -d "$BACKUP_DIR" ]]; then
echo "[error] backup directory not found: $BACKUP_DIR" >&2
exit 1
fi
if [[ ! -f "$BACKUP_DIR/postgres.sql.gz" ]]; then
echo "[error] missing postgres.sql.gz in $BACKUP_DIR" >&2
exit 1
fi
if [[ ! -f "$BACKUP_DIR/checksums.sha256" ]]; then
echo "[error] missing checksums.sha256 in $BACKUP_DIR" >&2
exit 1
fi
echo "[step] validating postgres.sql.gz stream..."
gunzip -t "$BACKUP_DIR/postgres.sql.gz"
echo "[step] validating checksums..."
(cd "$BACKUP_DIR" && sha256sum -c checksums.sha256)
echo "[ok] backup verified: $BACKUP_DIR"