mirror of
https://github.com/Dvorinka/facr-scraper.git
synced 2026-06-03 20:12:57 +00:00
upload
This commit is contained in:
@@ -0,0 +1,79 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import argparse
|
||||
import contextlib
|
||||
import logging
|
||||
import sys
|
||||
|
||||
|
||||
def response_body_bytes(response) -> bytes:
|
||||
body = getattr(response, "body", None)
|
||||
if isinstance(body, (bytes, bytearray)):
|
||||
return bytes(body)
|
||||
if isinstance(body, str):
|
||||
return body.encode("utf-8")
|
||||
|
||||
text = getattr(response, "text", None)
|
||||
if isinstance(text, str):
|
||||
return text.encode("utf-8")
|
||||
|
||||
return str(response).encode("utf-8")
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--url", required=True)
|
||||
parser.add_argument("--referer", default="")
|
||||
parser.add_argument("--timeout-ms", type=int, default=45000)
|
||||
parser.add_argument("--wait-ms", type=int, default=1000)
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
from scrapling.fetchers import StealthyFetcher
|
||||
except Exception as exc:
|
||||
print(f"Scrapling import failed: {exc}", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
logging.getLogger().setLevel(logging.ERROR)
|
||||
|
||||
extra_headers = {}
|
||||
if args.referer:
|
||||
extra_headers["Referer"] = args.referer
|
||||
|
||||
fetch_kwargs = {
|
||||
"headless": True,
|
||||
"network_idle": True,
|
||||
"google_search": False,
|
||||
"solve_cloudflare": True,
|
||||
"timeout": args.timeout_ms,
|
||||
"wait": args.wait_ms,
|
||||
}
|
||||
if extra_headers:
|
||||
fetch_kwargs["extra_headers"] = extra_headers
|
||||
|
||||
try:
|
||||
with contextlib.redirect_stdout(sys.stderr):
|
||||
response = StealthyFetcher.fetch(args.url, **fetch_kwargs)
|
||||
except Exception as exc:
|
||||
print(f"Scrapling fetch failed: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
status = getattr(response, "status", None)
|
||||
if isinstance(status, int) and status >= 400:
|
||||
print(f"Scrapling returned HTTP {status}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
body = response_body_bytes(response)
|
||||
if not body:
|
||||
print("Scrapling returned an empty body", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
try:
|
||||
sys.stdout.buffer.write(body)
|
||||
except BrokenPipeError:
|
||||
return 0
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
Reference in New Issue
Block a user