mirror of
https://github.com/Dvorinka/facr-scraper.git
synced 2026-06-03 20:12:57 +00:00
80 lines
2.1 KiB
Python
80 lines
2.1 KiB
Python
#!/usr/bin/env python3
|
|
|
|
import argparse
|
|
import contextlib
|
|
import logging
|
|
import sys
|
|
|
|
|
|
def response_body_bytes(response) -> bytes:
|
|
body = getattr(response, "body", None)
|
|
if isinstance(body, (bytes, bytearray)):
|
|
return bytes(body)
|
|
if isinstance(body, str):
|
|
return body.encode("utf-8")
|
|
|
|
text = getattr(response, "text", None)
|
|
if isinstance(text, str):
|
|
return text.encode("utf-8")
|
|
|
|
return str(response).encode("utf-8")
|
|
|
|
|
|
def main() -> int:
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--url", required=True)
|
|
parser.add_argument("--referer", default="")
|
|
parser.add_argument("--timeout-ms", type=int, default=45000)
|
|
parser.add_argument("--wait-ms", type=int, default=1000)
|
|
args = parser.parse_args()
|
|
|
|
try:
|
|
from scrapling.fetchers import StealthyFetcher
|
|
except Exception as exc:
|
|
print(f"Scrapling import failed: {exc}", file=sys.stderr)
|
|
return 2
|
|
|
|
logging.getLogger().setLevel(logging.ERROR)
|
|
|
|
extra_headers = {}
|
|
if args.referer:
|
|
extra_headers["Referer"] = args.referer
|
|
|
|
fetch_kwargs = {
|
|
"headless": True,
|
|
"network_idle": True,
|
|
"google_search": False,
|
|
"solve_cloudflare": True,
|
|
"timeout": args.timeout_ms,
|
|
"wait": args.wait_ms,
|
|
}
|
|
if extra_headers:
|
|
fetch_kwargs["extra_headers"] = extra_headers
|
|
|
|
try:
|
|
with contextlib.redirect_stdout(sys.stderr):
|
|
response = StealthyFetcher.fetch(args.url, **fetch_kwargs)
|
|
except Exception as exc:
|
|
print(f"Scrapling fetch failed: {exc}", file=sys.stderr)
|
|
return 1
|
|
|
|
status = getattr(response, "status", None)
|
|
if isinstance(status, int) and status >= 400:
|
|
print(f"Scrapling returned HTTP {status}", file=sys.stderr)
|
|
return 1
|
|
|
|
body = response_body_bytes(response)
|
|
if not body:
|
|
print("Scrapling returned an empty body", file=sys.stderr)
|
|
return 1
|
|
|
|
try:
|
|
sys.stdout.buffer.write(body)
|
|
except BrokenPipeError:
|
|
return 0
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|