This commit is contained in:
Tomas Dvorak
2026-03-12 19:11:08 +01:00
parent 7773947450
commit 455bf61302
7 changed files with 724 additions and 240 deletions
+79
View File
@@ -0,0 +1,79 @@
#!/usr/bin/env python3
import argparse
import contextlib
import logging
import sys
def response_body_bytes(response) -> bytes:
body = getattr(response, "body", None)
if isinstance(body, (bytes, bytearray)):
return bytes(body)
if isinstance(body, str):
return body.encode("utf-8")
text = getattr(response, "text", None)
if isinstance(text, str):
return text.encode("utf-8")
return str(response).encode("utf-8")
def main() -> int:
parser = argparse.ArgumentParser()
parser.add_argument("--url", required=True)
parser.add_argument("--referer", default="")
parser.add_argument("--timeout-ms", type=int, default=45000)
parser.add_argument("--wait-ms", type=int, default=1000)
args = parser.parse_args()
try:
from scrapling.fetchers import StealthyFetcher
except Exception as exc:
print(f"Scrapling import failed: {exc}", file=sys.stderr)
return 2
logging.getLogger().setLevel(logging.ERROR)
extra_headers = {}
if args.referer:
extra_headers["Referer"] = args.referer
fetch_kwargs = {
"headless": True,
"network_idle": True,
"google_search": False,
"solve_cloudflare": True,
"timeout": args.timeout_ms,
"wait": args.wait_ms,
}
if extra_headers:
fetch_kwargs["extra_headers"] = extra_headers
try:
with contextlib.redirect_stdout(sys.stderr):
response = StealthyFetcher.fetch(args.url, **fetch_kwargs)
except Exception as exc:
print(f"Scrapling fetch failed: {exc}", file=sys.stderr)
return 1
status = getattr(response, "status", None)
if isinstance(status, int) and status >= 400:
print(f"Scrapling returned HTTP {status}", file=sys.stderr)
return 1
body = response_body_bytes(response)
if not body:
print("Scrapling returned an empty body", file=sys.stderr)
return 1
try:
sys.stdout.buffer.write(body)
except BrokenPipeError:
return 0
return 0
if __name__ == "__main__":
raise SystemExit(main())