mirror of
https://github.com/Dvorinka/facr-scraper.git
synced 2026-06-03 20:12:57 +00:00
upload
This commit is contained in:
@@ -1,6 +1,4 @@
|
|||||||
export LOGOAPI_BASE_URL="https://logoapi.sportcreative.eu" # or your real logoapi base URL
|
export LOGOAPI_BASE_URL="https://logoapi.sportcreative.eu" # or your real logoapi base URL
|
||||||
export SMTP_HOST="smtp.purelymail.com"
|
# Cloudflare Browser Rendering API configuration
|
||||||
export SMTP_PORT="465"
|
export CLOUDFLARE_ACCOUNT_ID="2154bf34f65a995f9b85aa17fee9da43" # Your Cloudflare account ID
|
||||||
export SMTP_USER="info@tdvorak.dev"
|
export CLOUDFLARE_API_TOKEN="TdhMaQWPnxCwc-g22W9l-A26hYTdkn_9tQCUKZ0h" # API token with Browser Rendering - Edit permission
|
||||||
export SMTP_PASS="%8s3Yad*!b3*t"
|
|
||||||
export MISSING_LOGO_NOTIFY_TO="info@tdvorak.dev"
|
|
||||||
|
|||||||
@@ -0,0 +1,2 @@
|
|||||||
|
.venv-scrapling/
|
||||||
|
__pycache__/
|
||||||
@@ -11,20 +11,5 @@ require (
|
|||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/andybalholm/cascadia v1.3.3 // indirect
|
github.com/andybalholm/cascadia v1.3.3 // indirect
|
||||||
github.com/chromedp/cdproto v0.0.0-20250803210736-d308e07a266d // indirect
|
|
||||||
github.com/chromedp/chromedp v0.14.1 // indirect
|
|
||||||
github.com/chromedp/sysutil v1.1.0 // indirect
|
|
||||||
github.com/go-json-experiment/json v0.0.0-20250813024750-ebf49471dced // indirect
|
|
||||||
github.com/go-rod/rod v0.108.1 // indirect
|
|
||||||
github.com/gobwas/httphead v0.1.0 // indirect
|
|
||||||
github.com/gobwas/pool v0.2.1 // indirect
|
|
||||||
github.com/gobwas/ws v1.4.0 // indirect
|
|
||||||
github.com/ledongthuc/pdf v0.0.0-20250511090121-5959a4027728 // indirect
|
|
||||||
github.com/ysmood/fetchup v0.5.2 // indirect
|
|
||||||
github.com/ysmood/goob v0.4.0 // indirect
|
|
||||||
github.com/ysmood/got v0.41.0 // indirect
|
|
||||||
github.com/ysmood/gson v0.7.3 // indirect
|
|
||||||
github.com/ysmood/leakless v0.9.0 // indirect
|
|
||||||
golang.org/x/net v0.43.0 // indirect
|
golang.org/x/net v0.43.0 // indirect
|
||||||
golang.org/x/sys v0.35.0 // indirect
|
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -1,62 +1,10 @@
|
|||||||
github.com/PuerkitoBio/goquery v1.8.1 h1:uQxhNlArOIdbrH1tr0UXwdVFgDcZDrZVdcpygAcwmWM=
|
|
||||||
github.com/PuerkitoBio/goquery v1.8.1/go.mod h1:Q8ICL1kNUJ2sXGoAhPGUdYDJvgQgHzJsnnd3H7Ho5jQ=
|
|
||||||
github.com/PuerkitoBio/goquery v1.10.3 h1:pFYcNSqHxBD06Fpj/KsbStFRsgRATgnf3LeXiUkhzPo=
|
github.com/PuerkitoBio/goquery v1.10.3 h1:pFYcNSqHxBD06Fpj/KsbStFRsgRATgnf3LeXiUkhzPo=
|
||||||
github.com/PuerkitoBio/goquery v1.10.3/go.mod h1:tMUX0zDMHXYlAQk6p35XxQMqMweEKB7iK7iLNd4RH4Y=
|
github.com/PuerkitoBio/goquery v1.10.3/go.mod h1:tMUX0zDMHXYlAQk6p35XxQMqMweEKB7iK7iLNd4RH4Y=
|
||||||
github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c=
|
|
||||||
github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
|
|
||||||
github.com/andybalholm/cascadia v1.3.3 h1:AG2YHrzJIm4BZ19iwJ/DAua6Btl3IwJX+VI4kktS1LM=
|
github.com/andybalholm/cascadia v1.3.3 h1:AG2YHrzJIm4BZ19iwJ/DAua6Btl3IwJX+VI4kktS1LM=
|
||||||
github.com/andybalholm/cascadia v1.3.3/go.mod h1:xNd9bqTn98Ln4DwST8/nG+H0yuB8Hmgu1YHNnWw0GeA=
|
github.com/andybalholm/cascadia v1.3.3/go.mod h1:xNd9bqTn98Ln4DwST8/nG+H0yuB8Hmgu1YHNnWw0GeA=
|
||||||
github.com/chromedp/cdproto v0.0.0-20250803210736-d308e07a266d h1:ZtA1sedVbEW7EW80Iz2GR3Ye6PwbJAJXjv7D74xG6HU=
|
|
||||||
github.com/chromedp/cdproto v0.0.0-20250803210736-d308e07a266d/go.mod h1:NItd7aLkcfOA/dcMXvl8p1u+lQqioRMq/SqDp71Pb/k=
|
|
||||||
github.com/chromedp/chromedp v0.14.1 h1:0uAbnxewy/Q+Bg7oafVePE/6EXEho9hnaC38f+TTENg=
|
|
||||||
github.com/chromedp/chromedp v0.14.1/go.mod h1:rHzAv60xDE7VNy/MYtTUrYreSc0ujt2O1/C3bzctYBo=
|
|
||||||
github.com/chromedp/sysutil v1.1.0 h1:PUFNv5EcprjqXZD9nJb9b/c9ibAbxiYo4exNWZyipwM=
|
|
||||||
github.com/chromedp/sysutil v1.1.0/go.mod h1:WiThHUdltqCNKGc4gaU50XgYjwjYIhKWoHGPTUfWTJ8=
|
|
||||||
github.com/go-json-experiment/json v0.0.0-20250813024750-ebf49471dced h1:Q311OHjMh/u5E2TITc++WlTP5We0xNseRMkHDyvhW7I=
|
|
||||||
github.com/go-json-experiment/json v0.0.0-20250813024750-ebf49471dced/go.mod h1:TiCD2a1pcmjd7YnhGH0f/zKNcCD06B029pHhzV23c2M=
|
|
||||||
github.com/go-rod/rod v0.100.0 h1:tEKIb5wS3pGUpW4oJPYDxOKmRXaZbd6S+YVjJ6BHBBY=
|
|
||||||
github.com/go-rod/rod v0.100.0/go.mod h1:h9igqSGReLmOWyHtdf0AtUd0mdkHFu3gFwBeV+stleM=
|
|
||||||
github.com/go-rod/rod v0.108.1 h1:2lKs+v/+B/2pbGKZgNIRbURhduTKNDZ3PXIvTRAV2Mg=
|
|
||||||
github.com/go-rod/rod v0.108.1/go.mod h1:yNvL687cwcjgebRuArQN9AStFdm8iS/e/rzImrS9Pzg=
|
|
||||||
github.com/go-rod/rod v0.114.0 h1:P+zLOqsj+vKf4C86SfjP6ymyPl9VXoYKm+ceCeQms6Y=
|
|
||||||
github.com/go-rod/rod v0.114.0/go.mod h1:aiedSEFg5DwG/fnNbUOTPMTTWX3MRj6vIs/a684Mthw=
|
|
||||||
github.com/go-rod/rod v0.116.2 h1:A5t2Ky2A+5eD/ZJQr1EfsQSe5rms5Xof/qj296e+ZqA=
|
|
||||||
github.com/go-rod/rod v0.116.2/go.mod h1:H+CMO9SCNc2TJ2WfrG+pKhITz57uGNYU43qYHh438Mg=
|
|
||||||
github.com/gobwas/httphead v0.1.0 h1:exrUm0f4YX0L7EBwZHuCF4GDp8aJfVeBrlLQrs6NqWU=
|
|
||||||
github.com/gobwas/httphead v0.1.0/go.mod h1:O/RXo79gxV8G+RqlR/otEwx4Q36zl9rqC5u12GKvMCM=
|
|
||||||
github.com/gobwas/pool v0.2.1 h1:xfeeEhW7pwmX8nuLVlqbzVc7udMDrwetjEv+TZIz1og=
|
|
||||||
github.com/gobwas/pool v0.2.1/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw=
|
|
||||||
github.com/gobwas/ws v1.4.0 h1:CTaoG1tojrh4ucGPcoJFiAQUAsEWekEWvLy7GsVNqGs=
|
|
||||||
github.com/gobwas/ws v1.4.0/go.mod h1:G3gNqMNtPppf5XUz7O4shetPpcZ1VJ7zt18dlUeakrc=
|
|
||||||
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||||
github.com/gorilla/mux v1.8.0 h1:i40aqfkR1h2SlN9hojwV5ZA91wcXFOvkdNIeFDP5koI=
|
|
||||||
github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So=
|
|
||||||
github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY=
|
github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY=
|
||||||
github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ=
|
github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ=
|
||||||
github.com/ledongthuc/pdf v0.0.0-20250511090121-5959a4027728 h1:QwWKgMY28TAXaDl+ExRDqGQltzXqN/xypdKP86niVn8=
|
|
||||||
github.com/ledongthuc/pdf v0.0.0-20250511090121-5959a4027728/go.mod h1:1fEHWurg7pvf5SG6XNE5Q8UZmOwex51Mkx3SLhrW5B4=
|
|
||||||
github.com/ysmood/fetchup v0.2.3/go.mod h1:xhibcRKziSvol0H1/pj33dnKrYyI2ebIvz5cOOkYGns=
|
|
||||||
github.com/ysmood/fetchup v0.5.2 h1:P9w3OIA7RSNEEFvEmOiTq09IOu42C96PMyZ1MWd8TAs=
|
|
||||||
github.com/ysmood/fetchup v0.5.2/go.mod h1:yCv8s8itjsCul1LGXJ1Q+8EQnZcVjfbZ4+l1zDm4StE=
|
|
||||||
github.com/ysmood/goob v0.3.0/go.mod h1:S3lq113Y91y1UBf1wj1pFOxeahvfKkCk6mTWTWbDdWs=
|
|
||||||
github.com/ysmood/goob v0.4.0 h1:HsxXhyLBeGzWXnqVKtmT9qM7EuVs/XOgkX7T6r1o1AQ=
|
|
||||||
github.com/ysmood/goob v0.4.0/go.mod h1:u6yx7ZhS4Exf2MwciFr6nIM8knHQIE22lFpWHnfql18=
|
|
||||||
github.com/ysmood/gop v0.0.2/go.mod h1:rr5z2z27oGEbyB787hpEcx4ab8cCiPnKxn0SUHt6xzk=
|
|
||||||
github.com/ysmood/got v0.12.0/go.mod h1:pE1l4LOwOBhQg6A/8IAatkGp7uZjnalzrZolnlhhMgY=
|
|
||||||
github.com/ysmood/got v0.31.2/go.mod h1:pE1l4LOwOBhQg6A/8IAatkGp7uZjnalzrZolnlhhMgY=
|
|
||||||
github.com/ysmood/got v0.34.1/go.mod h1:yddyjq/PmAf08RMLSwDjPyCvHvYed+WjHnQxpH851LM=
|
|
||||||
github.com/ysmood/got v0.41.0 h1:XiFH311ltTSGyxjeKcNvy7dzbJjjTzn6DBgK313JHBs=
|
|
||||||
github.com/ysmood/got v0.41.0/go.mod h1:W7DdpuX6skL3NszLmAsC5hT7JAhuLZhByVzHTq874Qg=
|
|
||||||
github.com/ysmood/gotrace v0.2.2/go.mod h1:TzhIG7nHDry5//eYZDYcTzuJLYQIkykJzCRIo4/dzQM=
|
|
||||||
github.com/ysmood/gotrace v0.6.0/go.mod h1:TzhIG7nHDry5//eYZDYcTzuJLYQIkykJzCRIo4/dzQM=
|
|
||||||
github.com/ysmood/gson v0.6.4/go.mod h1:3Kzs5zDl21g5F/BlLTNcuAGAYLKt2lV5G8D1zF3RNmg=
|
|
||||||
github.com/ysmood/gson v0.7.1/go.mod h1:3Kzs5zDl21g5F/BlLTNcuAGAYLKt2lV5G8D1zF3RNmg=
|
|
||||||
github.com/ysmood/gson v0.7.3 h1:QFkWbTH8MxyUTKPkVWAENJhxqdBa4lYTQWqZCiLG6kE=
|
|
||||||
github.com/ysmood/gson v0.7.3/go.mod h1:3Kzs5zDl21g5F/BlLTNcuAGAYLKt2lV5G8D1zF3RNmg=
|
|
||||||
github.com/ysmood/leakless v0.7.0/go.mod h1:R8iAXPRaG97QJwqxs74RdwzcRHT1SWCGTNqY8q0JvMQ=
|
|
||||||
github.com/ysmood/leakless v0.8.0/go.mod h1:R8iAXPRaG97QJwqxs74RdwzcRHT1SWCGTNqY8q0JvMQ=
|
|
||||||
github.com/ysmood/leakless v0.9.0 h1:qxCG5VirSBvmi3uynXFkcnLMzkphdh3xx5FtrORwDCU=
|
|
||||||
github.com/ysmood/leakless v0.9.0/go.mod h1:R8iAXPRaG97QJwqxs74RdwzcRHT1SWCGTNqY8q0JvMQ=
|
|
||||||
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
|
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
|
||||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||||
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
|
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
|
||||||
@@ -71,11 +19,8 @@ golang.org/x/mod v0.15.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
|
|||||||
golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
|
golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
|
||||||
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||||
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
|
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
|
||||||
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
|
|
||||||
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
|
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
|
||||||
golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
|
golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
|
||||||
golang.org/x/net v0.7.0 h1:rJrUqqhjsgNp7KqAIc25s9pZnjU7TUcSY7HcVZjdn1g=
|
|
||||||
golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
|
|
||||||
golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
|
golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
|
||||||
golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk=
|
golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk=
|
||||||
golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
|
golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
|
||||||
@@ -92,19 +37,15 @@ golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
|||||||
golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
||||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||||
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
|
||||||
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
|
||||||
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||||
golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||||
golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||||
golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI=
|
|
||||||
golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
|
|
||||||
golang.org/x/telemetry v0.0.0-20240228155512-f48c80bd79b2/go.mod h1:TeRTkGYfJXctD9OcfyVLyj2J3IxLnKwHJR8f4D8a3YE=
|
golang.org/x/telemetry v0.0.0-20240228155512-f48c80bd79b2/go.mod h1:TeRTkGYfJXctD9OcfyVLyj2J3IxLnKwHJR8f4D8a3YE=
|
||||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||||
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
|
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
|
||||||
@@ -116,7 +57,6 @@ golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY=
|
|||||||
golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM=
|
golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM=
|
||||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||||
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||||
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
|
||||||
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
|
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
|
||||||
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
|
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
|
||||||
golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
|
golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
|
||||||
|
|||||||
@@ -2,15 +2,17 @@ package main
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"crypto/tls"
|
"context"
|
||||||
|
_ "embed"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"log"
|
"log"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/smtp"
|
|
||||||
neturl "net/url"
|
neturl "net/url"
|
||||||
"os"
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"path/filepath"
|
||||||
"regexp"
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
@@ -30,6 +32,613 @@ type Competition struct {
|
|||||||
Table *CompetitionTable `json:"table,omitempty"`
|
Table *CompetitionTable `json:"table,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Cloudflare Browser Rendering API structures
|
||||||
|
type CloudflareCrawlRequest struct {
|
||||||
|
URL string `json:"url"`
|
||||||
|
Limit int `json:"limit,omitempty"`
|
||||||
|
Depth int `json:"depth,omitempty"`
|
||||||
|
Formats []string `json:"formats,omitempty"`
|
||||||
|
Render bool `json:"render,omitempty"`
|
||||||
|
Source string `json:"source,omitempty"`
|
||||||
|
Options map[string]interface{} `json:"options,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type CloudflareCrawlResponse struct {
|
||||||
|
Success bool `json:"success"`
|
||||||
|
Result string `json:"result"` // job ID
|
||||||
|
}
|
||||||
|
|
||||||
|
type CloudflareCrawlJob struct {
|
||||||
|
ID string `json:"id"`
|
||||||
|
Status string `json:"status"`
|
||||||
|
BrowserSecondsUsed float64 `json:"browserSecondsUsed"`
|
||||||
|
Total int `json:"total"`
|
||||||
|
Finished int `json:"finished"`
|
||||||
|
Records []CloudflareCrawlRecord `json:"records"`
|
||||||
|
Cursor string `json:"cursor,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type CloudflareCrawlRecord struct {
|
||||||
|
URL string `json:"url"`
|
||||||
|
Status string `json:"status"`
|
||||||
|
Markdown string `json:"markdown,omitempty"`
|
||||||
|
HTML string `json:"html,omitempty"`
|
||||||
|
JSON interface{} `json:"json,omitempty"`
|
||||||
|
Metadata map[string]interface{} `json:"metadata"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type CloudflareClient struct {
|
||||||
|
AccountID string
|
||||||
|
APIToken string
|
||||||
|
BaseURL string
|
||||||
|
Client *http.Client
|
||||||
|
}
|
||||||
|
|
||||||
|
type fetchOptions struct {
|
||||||
|
Referer string
|
||||||
|
}
|
||||||
|
|
||||||
|
const (
|
||||||
|
browserUserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0 Safari/537.36"
|
||||||
|
browserAccept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8"
|
||||||
|
browserAcceptLanguage = "cs-CZ,cs;q=0.9,en;q=0.8"
|
||||||
|
scraplingHelperPath = "scripts/scrapling_fetch.py"
|
||||||
|
)
|
||||||
|
|
||||||
|
//go:embed scripts/scrapling_fetch.py
|
||||||
|
var embeddedScraplingHelper string
|
||||||
|
|
||||||
|
var (
|
||||||
|
embeddedScraplingHelperOnce sync.Once
|
||||||
|
embeddedScraplingHelperFile string
|
||||||
|
embeddedScraplingHelperErr error
|
||||||
|
)
|
||||||
|
|
||||||
|
// NewCloudflareClient creates a new Cloudflare Browser Rendering API client
|
||||||
|
func NewCloudflareClient() *CloudflareClient {
|
||||||
|
accountID := strings.TrimSpace(os.Getenv("CLOUDFLARE_ACCOUNT_ID"))
|
||||||
|
apiToken := strings.TrimSpace(os.Getenv("CLOUDFLARE_API_TOKEN"))
|
||||||
|
|
||||||
|
if accountID == "" || apiToken == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return &CloudflareClient{
|
||||||
|
AccountID: accountID,
|
||||||
|
APIToken: apiToken,
|
||||||
|
BaseURL: "https://api.cloudflare.com/client/v4",
|
||||||
|
Client: &http.Client{
|
||||||
|
Timeout: 30 * time.Second,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// StartCrawl initiates a crawl job
|
||||||
|
func (c *CloudflareClient) StartCrawl(ctx context.Context, req CloudflareCrawlRequest) (string, error) {
|
||||||
|
if c == nil {
|
||||||
|
return "", fmt.Errorf("Cloudflare client not initialized")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set defaults
|
||||||
|
if req.Limit == 0 {
|
||||||
|
req.Limit = 10
|
||||||
|
}
|
||||||
|
if req.Depth == 0 {
|
||||||
|
req.Depth = 1
|
||||||
|
}
|
||||||
|
if len(req.Formats) == 0 {
|
||||||
|
req.Formats = []string{"html", "markdown"}
|
||||||
|
}
|
||||||
|
if req.Source == "" {
|
||||||
|
req.Source = "all"
|
||||||
|
}
|
||||||
|
|
||||||
|
// Restrict to specific URL patterns for fotbal.cz to avoid crawling unrelated content
|
||||||
|
if req.Options == nil {
|
||||||
|
req.Options = make(map[string]interface{})
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only crawl URLs from the same domain and specific paths
|
||||||
|
includePatterns := []string{
|
||||||
|
"https://www.fotbal.cz/**",
|
||||||
|
}
|
||||||
|
excludePatterns := []string{
|
||||||
|
"**/api/**",
|
||||||
|
"**/static/**",
|
||||||
|
"**/media/**",
|
||||||
|
}
|
||||||
|
|
||||||
|
req.Options["includePatterns"] = includePatterns
|
||||||
|
req.Options["excludePatterns"] = excludePatterns
|
||||||
|
req.Options["includeExternalLinks"] = false
|
||||||
|
req.Options["includeSubdomains"] = false
|
||||||
|
|
||||||
|
body, err := json.Marshal(req)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("failed to marshal request: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
url := fmt.Sprintf("%s/accounts/%s/browser-rendering/crawl", c.BaseURL, c.AccountID)
|
||||||
|
httpReq, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(body))
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("failed to create request: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
httpReq.Header.Set("Authorization", "Bearer "+c.APIToken)
|
||||||
|
httpReq.Header.Set("Content-Type", "application/json")
|
||||||
|
|
||||||
|
resp, err := c.Client.Do(httpReq)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("failed to send request: %w", err)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
body, _ := io.ReadAll(resp.Body)
|
||||||
|
return "", fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
|
||||||
|
}
|
||||||
|
|
||||||
|
var crawlResp CloudflareCrawlResponse
|
||||||
|
if err := json.NewDecoder(resp.Body).Decode(&crawlResp); err != nil {
|
||||||
|
return "", fmt.Errorf("failed to decode response: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !crawlResp.Success {
|
||||||
|
return "", fmt.Errorf("API returned unsuccessful response")
|
||||||
|
}
|
||||||
|
|
||||||
|
return crawlResp.Result, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetCrawlResults retrieves the results of a crawl job
|
||||||
|
func (c *CloudflareClient) GetCrawlResults(ctx context.Context, jobID string, limit int) (*CloudflareCrawlJob, error) {
|
||||||
|
if c == nil {
|
||||||
|
return nil, fmt.Errorf("Cloudflare client not initialized")
|
||||||
|
}
|
||||||
|
|
||||||
|
url := fmt.Sprintf("%s/accounts/%s/browser-rendering/crawl/%s", c.BaseURL, c.AccountID, jobID)
|
||||||
|
if limit > 0 {
|
||||||
|
url += fmt.Sprintf("?limit=%d", limit)
|
||||||
|
}
|
||||||
|
|
||||||
|
httpReq, err := http.NewRequestWithContext(ctx, "GET", url, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to create request: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
httpReq.Header.Set("Authorization", "Bearer "+c.APIToken)
|
||||||
|
|
||||||
|
resp, err := c.Client.Do(httpReq)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to send request: %w", err)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
body, _ := io.ReadAll(resp.Body)
|
||||||
|
return nil, fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
|
||||||
|
}
|
||||||
|
|
||||||
|
var result struct {
|
||||||
|
Success bool `json:"success"`
|
||||||
|
Result CloudflareCrawlJob `json:"result"`
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to decode response: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !result.Success {
|
||||||
|
return nil, fmt.Errorf("API returned unsuccessful response")
|
||||||
|
}
|
||||||
|
|
||||||
|
return &result.Result, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// WaitForCrawlCompletion waits for a crawl job to complete and returns the results
|
||||||
|
func (c *CloudflareClient) WaitForCrawlCompletion(ctx context.Context, jobID string, maxAttempts int, delay time.Duration) (*CloudflareCrawlJob, error) {
|
||||||
|
if c == nil {
|
||||||
|
return nil, fmt.Errorf("Cloudflare client not initialized")
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := 0; i < maxAttempts; i++ {
|
||||||
|
job, err := c.GetCrawlResults(ctx, jobID, 1) // Use limit=1 for status checks
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if job.Status != "running" {
|
||||||
|
// Get full results
|
||||||
|
fullJob, err := c.GetCrawlResults(ctx, jobID, 0) // No limit for full results
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return fullJob, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return nil, ctx.Err()
|
||||||
|
case <-time.After(delay):
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil, fmt.Errorf("crawl job did not complete within timeout")
|
||||||
|
}
|
||||||
|
|
||||||
|
// CrawlURL performs a complete crawl operation for a single URL
|
||||||
|
func (c *CloudflareClient) CrawlURL(ctx context.Context, url string) (*CloudflareCrawlJob, error) {
|
||||||
|
if c == nil {
|
||||||
|
return nil, fmt.Errorf("Cloudflare client not initialized")
|
||||||
|
}
|
||||||
|
|
||||||
|
req := CloudflareCrawlRequest{
|
||||||
|
URL: url,
|
||||||
|
Limit: 1, // Only crawl the specific URL
|
||||||
|
Depth: 0, // Don't follow links
|
||||||
|
Formats: []string{"html", "markdown"},
|
||||||
|
Render: true,
|
||||||
|
Source: "links", // Only crawl the specific URL, not sitemaps
|
||||||
|
}
|
||||||
|
|
||||||
|
jobID, err := c.StartCrawl(ctx, req)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to start crawl: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for completion with reasonable timeout
|
||||||
|
ctx, cancel := context.WithTimeout(ctx, 2*time.Minute)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
job, err := c.WaitForCrawlCompletion(ctx, jobID, 24, 5*time.Second)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to wait for crawl completion: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return job, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func newBrowserRequest(url string, opts fetchOptions) (*http.Request, error) {
|
||||||
|
req, err := http.NewRequest("GET", url, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to create request: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
req.Header.Set("User-Agent", browserUserAgent)
|
||||||
|
req.Header.Set("Accept", browserAccept)
|
||||||
|
req.Header.Set("Accept-Language", browserAcceptLanguage)
|
||||||
|
if opts.Referer != "" {
|
||||||
|
req.Header.Set("Referer", opts.Referer)
|
||||||
|
}
|
||||||
|
|
||||||
|
return req, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func looksLikeCloudflareBlock(body []byte) bool {
|
||||||
|
if len(body) == 0 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
lower := strings.ToLower(string(body))
|
||||||
|
hardSignals := []string{
|
||||||
|
"<title>just a moment...</title>",
|
||||||
|
"attention required!",
|
||||||
|
"enable javascript and cookies to continue",
|
||||||
|
"checking if the site connection is secure",
|
||||||
|
"cf-browser-verification",
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, signal := range hardSignals {
|
||||||
|
if strings.Contains(lower, signal) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if strings.Contains(lower, "/cdn-cgi/challenge-platform/") &&
|
||||||
|
(strings.Contains(lower, "window._cf_chl_opt") ||
|
||||||
|
strings.Contains(lower, "__cf_chl_rt_tk") ||
|
||||||
|
strings.Contains(lower, "cf_chl_seq_")) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func compactErrorText(s string) string {
|
||||||
|
s = strings.Join(strings.Fields(strings.TrimSpace(s)), " ")
|
||||||
|
if len(s) > 220 {
|
||||||
|
return s[:217] + "..."
|
||||||
|
}
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
|
func fetchPageDirect(url string, opts fetchOptions) ([]byte, error) {
|
||||||
|
req, err := newBrowserRequest(url, opts)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
client := &http.Client{Timeout: 15 * time.Second}
|
||||||
|
resp, err := client.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("direct request failed: %w", err)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
body, err := io.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to read response body: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
return nil, fmt.Errorf("direct request returned HTTP %d", resp.StatusCode)
|
||||||
|
}
|
||||||
|
if looksLikeCloudflareBlock(body) {
|
||||||
|
return nil, fmt.Errorf("direct request returned a Cloudflare challenge page")
|
||||||
|
}
|
||||||
|
|
||||||
|
return body, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func fetchPageWithWget(url string, opts fetchOptions) ([]byte, error) {
|
||||||
|
if _, err := exec.LookPath("wget"); err != nil {
|
||||||
|
return nil, fmt.Errorf("wget not available: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
args := []string{
|
||||||
|
"--quiet",
|
||||||
|
"--tries=1",
|
||||||
|
"--timeout=15",
|
||||||
|
"--max-redirect=10",
|
||||||
|
"--output-document=-",
|
||||||
|
"--user-agent=" + browserUserAgent,
|
||||||
|
"--header=Accept: " + browserAccept,
|
||||||
|
"--header=Accept-Language: " + browserAcceptLanguage,
|
||||||
|
}
|
||||||
|
if opts.Referer != "" {
|
||||||
|
args = append(args, "--header=Referer: "+opts.Referer)
|
||||||
|
}
|
||||||
|
args = append(args, url)
|
||||||
|
|
||||||
|
cmd := exec.CommandContext(ctx, "wget", args...)
|
||||||
|
var stdout bytes.Buffer
|
||||||
|
var stderr bytes.Buffer
|
||||||
|
cmd.Stdout = &stdout
|
||||||
|
cmd.Stderr = &stderr
|
||||||
|
|
||||||
|
if err := cmd.Run(); err != nil {
|
||||||
|
details := compactErrorText(stderr.String())
|
||||||
|
if details == "" {
|
||||||
|
details = compactErrorText(err.Error())
|
||||||
|
}
|
||||||
|
return nil, fmt.Errorf("wget request failed: %s", details)
|
||||||
|
}
|
||||||
|
|
||||||
|
body := stdout.Bytes()
|
||||||
|
if len(body) == 0 {
|
||||||
|
return nil, fmt.Errorf("wget returned an empty body")
|
||||||
|
}
|
||||||
|
if looksLikeCloudflareBlock(body) {
|
||||||
|
return nil, fmt.Errorf("wget returned a Cloudflare challenge page")
|
||||||
|
}
|
||||||
|
|
||||||
|
return body, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func firstExistingFile(paths ...string) string {
|
||||||
|
for _, path := range paths {
|
||||||
|
path = strings.TrimSpace(path)
|
||||||
|
if path == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if info, err := os.Stat(path); err == nil && !info.IsDir() {
|
||||||
|
return path
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
func firstExecutable(paths ...string) string {
|
||||||
|
for _, path := range paths {
|
||||||
|
path = strings.TrimSpace(path)
|
||||||
|
if path == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if strings.ContainsRune(path, os.PathSeparator) {
|
||||||
|
if info, err := os.Stat(path); err == nil && !info.IsDir() {
|
||||||
|
return path
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if resolved, err := exec.LookPath(path); err == nil {
|
||||||
|
return resolved
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
func ensureEmbeddedScraplingHelper() (string, error) {
|
||||||
|
embeddedScraplingHelperOnce.Do(func() {
|
||||||
|
if strings.TrimSpace(embeddedScraplingHelper) == "" {
|
||||||
|
embeddedScraplingHelperErr = fmt.Errorf("embedded Scrapling helper is empty")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
file, err := os.CreateTemp("", "facr-scrapling-*.py")
|
||||||
|
if err != nil {
|
||||||
|
embeddedScraplingHelperErr = fmt.Errorf("create embedded Scrapling helper: %w", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer file.Close()
|
||||||
|
|
||||||
|
if _, err := file.WriteString(embeddedScraplingHelper); err != nil {
|
||||||
|
embeddedScraplingHelperErr = fmt.Errorf("write embedded Scrapling helper: %w", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err := file.Chmod(0600); err != nil {
|
||||||
|
embeddedScraplingHelperErr = fmt.Errorf("chmod embedded Scrapling helper: %w", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
embeddedScraplingHelperFile = file.Name()
|
||||||
|
})
|
||||||
|
|
||||||
|
if embeddedScraplingHelperErr != nil {
|
||||||
|
return "", embeddedScraplingHelperErr
|
||||||
|
}
|
||||||
|
if embeddedScraplingHelperFile == "" {
|
||||||
|
return "", fmt.Errorf("embedded Scrapling helper path is empty")
|
||||||
|
}
|
||||||
|
return embeddedScraplingHelperFile, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func findScraplingHelperScript() (string, error) {
|
||||||
|
cwd, _ := os.Getwd()
|
||||||
|
|
||||||
|
exePath, _ := os.Executable()
|
||||||
|
exeDir := ""
|
||||||
|
if exePath != "" {
|
||||||
|
exeDir = filepath.Dir(exePath)
|
||||||
|
}
|
||||||
|
|
||||||
|
if path := firstExistingFile(
|
||||||
|
os.Getenv("SCRAPLING_SCRIPT"),
|
||||||
|
filepath.Join(cwd, scraplingHelperPath),
|
||||||
|
filepath.Join(exeDir, scraplingHelperPath),
|
||||||
|
); path != "" {
|
||||||
|
return path, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return ensureEmbeddedScraplingHelper()
|
||||||
|
}
|
||||||
|
|
||||||
|
func findScraplingPython() string {
|
||||||
|
cwd, _ := os.Getwd()
|
||||||
|
|
||||||
|
exePath, _ := os.Executable()
|
||||||
|
exeDir := ""
|
||||||
|
if exePath != "" {
|
||||||
|
exeDir = filepath.Dir(exePath)
|
||||||
|
}
|
||||||
|
|
||||||
|
return firstExecutable(
|
||||||
|
os.Getenv("SCRAPLING_PYTHON_BIN"),
|
||||||
|
filepath.Join(cwd, ".venv-scrapling", "bin", "python3"),
|
||||||
|
filepath.Join(cwd, ".venv-scrapling", "bin", "python"),
|
||||||
|
filepath.Join(cwd, ".venv", "bin", "python3"),
|
||||||
|
filepath.Join(cwd, ".venv", "bin", "python"),
|
||||||
|
filepath.Join(exeDir, ".venv-scrapling", "bin", "python3"),
|
||||||
|
filepath.Join(exeDir, ".venv-scrapling", "bin", "python"),
|
||||||
|
filepath.Join(exeDir, ".venv", "bin", "python3"),
|
||||||
|
filepath.Join(exeDir, ".venv", "bin", "python"),
|
||||||
|
"python3",
|
||||||
|
"python",
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
func fetchPageWithScrapling(url string, opts fetchOptions) ([]byte, error) {
|
||||||
|
pythonBin := findScraplingPython()
|
||||||
|
if pythonBin == "" {
|
||||||
|
return nil, fmt.Errorf("Scrapling skipped: no Python runtime found")
|
||||||
|
}
|
||||||
|
|
||||||
|
helperScript, err := findScraplingHelperScript()
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("Scrapling skipped: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 90*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
args := []string{helperScript, "--url", url}
|
||||||
|
if opts.Referer != "" {
|
||||||
|
args = append(args, "--referer", opts.Referer)
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd := exec.CommandContext(ctx, pythonBin, args...)
|
||||||
|
var stdout bytes.Buffer
|
||||||
|
var stderr bytes.Buffer
|
||||||
|
cmd.Stdout = &stdout
|
||||||
|
cmd.Stderr = &stderr
|
||||||
|
|
||||||
|
if err := cmd.Run(); err != nil {
|
||||||
|
details := compactErrorText(stderr.String())
|
||||||
|
if details == "" {
|
||||||
|
details = compactErrorText(err.Error())
|
||||||
|
}
|
||||||
|
return nil, fmt.Errorf("Scrapling request failed: %s", details)
|
||||||
|
}
|
||||||
|
|
||||||
|
body := stdout.Bytes()
|
||||||
|
if len(body) == 0 {
|
||||||
|
return nil, fmt.Errorf("Scrapling returned an empty body")
|
||||||
|
}
|
||||||
|
if looksLikeCloudflareBlock(body) {
|
||||||
|
return nil, fmt.Errorf("Scrapling returned a Cloudflare challenge page")
|
||||||
|
}
|
||||||
|
|
||||||
|
return body, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func fetchPageWithFallback(url string) ([]byte, error) {
|
||||||
|
return fetchPageWithFallbackOptions(url, fetchOptions{})
|
||||||
|
}
|
||||||
|
|
||||||
|
// fetchPageWithFallback tries Go HTTP first, then wget, then Scrapling, then Cloudflare Browser Rendering.
|
||||||
|
func fetchPageWithFallbackOptions(url string, opts fetchOptions) ([]byte, error) {
|
||||||
|
body, err := fetchPageDirect(url, opts)
|
||||||
|
if err == nil {
|
||||||
|
return body, nil
|
||||||
|
}
|
||||||
|
log.Printf("Direct request failed for %s: %v", url, err)
|
||||||
|
|
||||||
|
body, err = fetchPageWithWget(url, opts)
|
||||||
|
if err == nil {
|
||||||
|
log.Printf("Successfully retrieved content via wget for %s", url)
|
||||||
|
return body, nil
|
||||||
|
}
|
||||||
|
log.Printf("wget fallback failed for %s: %v", url, err)
|
||||||
|
|
||||||
|
body, err = fetchPageWithScrapling(url, opts)
|
||||||
|
if err == nil {
|
||||||
|
log.Printf("Successfully retrieved content via Scrapling for %s", url)
|
||||||
|
return body, nil
|
||||||
|
}
|
||||||
|
log.Printf("Scrapling fallback failed for %s: %v", url, err)
|
||||||
|
|
||||||
|
if cfClient := NewCloudflareClient(); cfClient != nil {
|
||||||
|
log.Printf("Attempting Cloudflare crawl fallback for %s", url)
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
job, err := cfClient.CrawlURL(ctx, url)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("Cloudflare crawl failed for %s: %v", url, err)
|
||||||
|
return nil, fmt.Errorf("go scraping failed, wget failed, Scrapling failed, and Cloudflare crawl failed: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(job.Records) > 0 && job.Records[0].Status == "completed" {
|
||||||
|
body := []byte(job.Records[0].HTML)
|
||||||
|
if looksLikeCloudflareBlock(body) {
|
||||||
|
return nil, fmt.Errorf("Cloudflare crawl returned a challenge page")
|
||||||
|
}
|
||||||
|
log.Printf("Successfully retrieved content via Cloudflare crawl for %s", url)
|
||||||
|
return body, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Printf("Cloudflare crawl returned no completed records for %s", url)
|
||||||
|
return nil, fmt.Errorf("Cloudflare crawl returned no completed records")
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil, fmt.Errorf("go scraping failed, wget failed, Scrapling failed, and Cloudflare client is not available")
|
||||||
|
}
|
||||||
|
|
||||||
// parseCompetitionMatchesFromFotbal scrapes matches from the public fotbal.cz
|
// parseCompetitionMatchesFromFotbal scrapes matches from the public fotbal.cz
|
||||||
// competition page (e.g., https://www.fotbal.cz/souteze/turnaje/table/{id}).
|
// competition page (e.g., https://www.fotbal.cz/souteze/turnaje/table/{id}).
|
||||||
// It filters to only include matches involving the given clubName if provided.
|
// It filters to only include matches involving the given clubName if provided.
|
||||||
@@ -38,26 +647,16 @@ func parseCompetitionMatchesFromFotbal(pageURL, clubType, clubName, clubID strin
|
|||||||
if pageURL == "" {
|
if pageURL == "" {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
// Request with browser-like headers; some fotbal.cz pages 404 without them
|
|
||||||
req, _ := http.NewRequest("GET", pageURL, nil)
|
body, err := fetchPageWithFallback(pageURL)
|
||||||
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0 Safari/537.36")
|
|
||||||
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8")
|
|
||||||
req.Header.Set("Accept-Language", "cs-CZ,cs;q=0.9,en;q=0.8")
|
|
||||||
client := &http.Client{Timeout: 15 * time.Second}
|
|
||||||
resp, err := client.Do(req)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("fotbal.cz matches fetch error for %s: %v", pageURL, err)
|
log.Printf("fotbal.cz matches fetch failed for %s: %v", pageURL, err)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
defer resp.Body.Close()
|
|
||||||
if resp.StatusCode != http.StatusOK {
|
// If we still don't have body content, return nil
|
||||||
log.Printf("fotbal.cz matches non-200 for %s: %d", pageURL, resp.StatusCode)
|
if len(body) == 0 {
|
||||||
return nil
|
log.Printf("No content available for %s", pageURL)
|
||||||
}
|
|
||||||
// Read body to optionally save and to allow multiple reads
|
|
||||||
body, err := io.ReadAll(resp.Body)
|
|
||||||
if err != nil {
|
|
||||||
log.Printf("fotbal.cz matches read error for %s: %v", pageURL, err)
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
// Debug: save full HTML if env toggled
|
// Debug: save full HTML if env toggled
|
||||||
@@ -376,10 +975,6 @@ func parseCompetitionMatchesFromIS(detailURL, clubType, clubName, clubID string)
|
|||||||
|
|
||||||
var logoCache = map[string]string{}
|
var logoCache = map[string]string{}
|
||||||
|
|
||||||
var missingLogoNotified = map[string]bool{}
|
|
||||||
|
|
||||||
var missingLogoMu sync.Mutex
|
|
||||||
|
|
||||||
type logoAPISearchResult struct {
|
type logoAPISearchResult struct {
|
||||||
ID string `json:"id"`
|
ID string `json:"id"`
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
@@ -516,7 +1111,7 @@ func getLogoFromLogoAPI(teamName string, teamID string) string {
|
|||||||
return r.LogoURL
|
return r.LogoURL
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// No strong match – treat as "no logo" so upstream can fall back to FACR and send notification.
|
// No strong match – treat as "no logo" so upstream can fall back to FACR assets.
|
||||||
logoCache[cacheKey] = ""
|
logoCache[cacheKey] = ""
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
@@ -584,93 +1179,6 @@ func getLogoBySearch(name string) string {
|
|||||||
return best
|
return best
|
||||||
}
|
}
|
||||||
|
|
||||||
func notifyMissingLogo(teamName string, teamID string) {
|
|
||||||
name := strings.ToLower(strings.TrimSpace(teamName))
|
|
||||||
id := strings.ToLower(strings.TrimSpace(teamID))
|
|
||||||
if name == "" && id == "" {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
key := name
|
|
||||||
if id != "" {
|
|
||||||
if key != "" {
|
|
||||||
key += "|"
|
|
||||||
}
|
|
||||||
key += id
|
|
||||||
}
|
|
||||||
missingLogoMu.Lock()
|
|
||||||
if missingLogoNotified[key] {
|
|
||||||
missingLogoMu.Unlock()
|
|
||||||
return
|
|
||||||
}
|
|
||||||
missingLogoNotified[key] = true
|
|
||||||
missingLogoMu.Unlock()
|
|
||||||
if err := sendMissingLogoEmail(teamName, teamID); err != nil {
|
|
||||||
log.Printf("error sending missing logo email for %s (%s): %v", teamName, teamID, err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func sendMissingLogoEmail(teamName string, teamID string) error {
|
|
||||||
host := strings.TrimSpace(os.Getenv("SMTP_HOST"))
|
|
||||||
if host == "" {
|
|
||||||
host = "smtp.purelymail.com"
|
|
||||||
}
|
|
||||||
port := strings.TrimSpace(os.Getenv("SMTP_PORT"))
|
|
||||||
if port == "" {
|
|
||||||
port = "465"
|
|
||||||
}
|
|
||||||
user := strings.TrimSpace(os.Getenv("SMTP_USER"))
|
|
||||||
pass := os.Getenv("SMTP_PASS")
|
|
||||||
if user == "" || pass == "" {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
to := strings.TrimSpace(os.Getenv("MISSING_LOGO_NOTIFY_TO"))
|
|
||||||
if to == "" {
|
|
||||||
to = "info@tdvorak.dev"
|
|
||||||
}
|
|
||||||
addr := host + ":" + port
|
|
||||||
subject := "Missing local logo for club"
|
|
||||||
body := fmt.Sprintf("A club logo is missing in logoapi.\n\nName: %s\nID: %s\nTime: %s\n", teamName, teamID, time.Now().Format(time.RFC3339))
|
|
||||||
var msg bytes.Buffer
|
|
||||||
msg.WriteString("From: " + user + "\r\n")
|
|
||||||
msg.WriteString("To: " + to + "\r\n")
|
|
||||||
msg.WriteString("Subject: " + subject + "\r\n")
|
|
||||||
msg.WriteString("MIME-Version: 1.0\r\n")
|
|
||||||
msg.WriteString("Content-Type: text/plain; charset=utf-8\r\n")
|
|
||||||
msg.WriteString("\r\n")
|
|
||||||
msg.WriteString(body)
|
|
||||||
tlsConfig := &tls.Config{ServerName: host}
|
|
||||||
conn, err := tls.Dial("tcp", addr, tlsConfig)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
defer conn.Close()
|
|
||||||
c, err := smtp.NewClient(conn, host)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
defer c.Close()
|
|
||||||
if err := c.Auth(smtp.PlainAuth("", user, pass, host)); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if err := c.Mail(user); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if err := c.Rcpt(to); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
w, err := c.Data()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if _, err := w.Write(msg.Bytes()); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if err := w.Close(); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
return c.Quit()
|
|
||||||
}
|
|
||||||
|
|
||||||
func getLogo(teamName string, teamID string) string {
|
func getLogo(teamName string, teamID string) string {
|
||||||
placeholder := "https://www.fotbal.cz/dist/img/logo-club-empty.svg"
|
placeholder := "https://www.fotbal.cz/dist/img/logo-club-empty.svg"
|
||||||
name := strings.ToLower(strings.TrimSpace(teamName))
|
name := strings.ToLower(strings.TrimSpace(teamName))
|
||||||
@@ -680,7 +1188,6 @@ func getLogo(teamName string, teamID string) string {
|
|||||||
if logo := getLogoFromLogoAPI(teamName, teamID); logo != "" {
|
if logo := getLogoFromLogoAPI(teamName, teamID); logo != "" {
|
||||||
return logo
|
return logo
|
||||||
}
|
}
|
||||||
notifyMissingLogo(teamName, teamID)
|
|
||||||
// If we have a team ID, construct the official logo URL directly.
|
// If we have a team ID, construct the official logo URL directly.
|
||||||
// This avoids wrong matches for duplicate names (e.g., multiple "Ořechov").
|
// This avoids wrong matches for duplicate names (e.g., multiple "Ořechov").
|
||||||
if tid := strings.TrimSpace(teamID); tid != "" {
|
if tid := strings.TrimSpace(teamID); tid != "" {
|
||||||
@@ -736,26 +1243,18 @@ func getClubSearch(w http.ResponseWriter, r *http.Request) {
|
|||||||
vals.Set("q", q)
|
vals.Set("q", q)
|
||||||
searchURL := "https://www.fotbal.cz/club/hledej?" + vals.Encode()
|
searchURL := "https://www.fotbal.cz/club/hledej?" + vals.Encode()
|
||||||
|
|
||||||
req, err := http.NewRequest("GET", searchURL, nil)
|
fetchSearchPage := func(url string) ([]byte, error) {
|
||||||
if err != nil {
|
return fetchPageWithFallbackOptions(url, fetchOptions{
|
||||||
http.Error(w, fmt.Sprintf("Error creating request: %v", err), http.StatusInternalServerError)
|
Referer: "https://www.fotbal.cz/club/hledej",
|
||||||
return
|
})
|
||||||
}
|
}
|
||||||
// Set headers to mimic a browser; fotbal.cz may 404 otherwise
|
|
||||||
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0 Safari/537.36")
|
// Try direct HTTP request first
|
||||||
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8")
|
body, err := fetchSearchPage(searchURL)
|
||||||
req.Header.Set("Accept-Language", "cs-CZ,cs;q=0.9,en;q=0.8")
|
|
||||||
req.Header.Set("Referer", "https://www.fotbal.cz/club/hledej")
|
|
||||||
client := &http.Client{}
|
|
||||||
resp, err := client.Do(req)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
http.Error(w, fmt.Sprintf("Error fetching search page: %v", err), http.StatusInternalServerError)
|
log.Printf("Direct search request failed for %s: %v", searchURL, err)
|
||||||
return
|
|
||||||
}
|
// Retry with quoted query for short tokens
|
||||||
defer resp.Body.Close()
|
|
||||||
if resp.StatusCode != http.StatusOK {
|
|
||||||
// Retry once. If query has very short tokens, try quoting the whole query.
|
|
||||||
resp.Body.Close()
|
|
||||||
searchURL2 := searchURL
|
searchURL2 := searchURL
|
||||||
tokens := strings.Fields(q)
|
tokens := strings.Fields(q)
|
||||||
for _, t := range tokens {
|
for _, t := range tokens {
|
||||||
@@ -766,18 +1265,11 @@ func getClubSearch(w http.ResponseWriter, r *http.Request) {
|
|||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
req2, _ := http.NewRequest("GET", searchURL2, nil)
|
|
||||||
req2.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0 Safari/537.36")
|
body, err = fetchSearchPage(searchURL2)
|
||||||
req2.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
|
if err != nil {
|
||||||
req2.Header.Set("Accept-Language", "en-US,en;q=0.9")
|
log.Printf("Retried search request failed for %s: %v", searchURL2, err)
|
||||||
resp2, err2 := client.Do(req2)
|
// Return empty results instead of error
|
||||||
if err2 != nil {
|
|
||||||
http.Error(w, fmt.Sprintf("Error fetching (retry): %v", err2), http.StatusBadGateway)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
defer resp2.Body.Close()
|
|
||||||
if resp2.StatusCode != http.StatusOK {
|
|
||||||
// Treat as no results instead of surfacing error to client
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
w.Header().Set("Content-Type", "application/json")
|
||||||
json.NewEncoder(w).Encode(map[string]any{
|
json.NewEncoder(w).Encode(map[string]any{
|
||||||
"query": q,
|
"query": q,
|
||||||
@@ -786,11 +1278,9 @@ func getClubSearch(w http.ResponseWriter, r *http.Request) {
|
|||||||
})
|
})
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
// replace resp with resp2 for downstream parsing
|
|
||||||
resp = resp2
|
|
||||||
}
|
}
|
||||||
|
|
||||||
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(body))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
http.Error(w, fmt.Sprintf("Error parsing HTML: %v", err), http.StatusInternalServerError)
|
http.Error(w, fmt.Sprintf("Error parsing HTML: %v", err), http.StatusInternalServerError)
|
||||||
return
|
return
|
||||||
@@ -887,19 +1377,13 @@ func getClubTables(w http.ResponseWriter, r *http.Request) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
url := fmt.Sprintf("%s/%s", baseURL, clubID)
|
url := fmt.Sprintf("%s/%s", baseURL, clubID)
|
||||||
resp, err := http.Get(url)
|
body, err := fetchPageWithFallback(url)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
http.Error(w, fmt.Sprintf("Error fetching club data: %v", err), http.StatusInternalServerError)
|
http.Error(w, fmt.Sprintf("Error fetching club data: %v", err), http.StatusInternalServerError)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
defer resp.Body.Close()
|
|
||||||
|
|
||||||
if resp.StatusCode != http.StatusOK {
|
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(body))
|
||||||
http.Error(w, fmt.Sprintf("Error: received status code %d", resp.StatusCode), resp.StatusCode)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
http.Error(w, fmt.Sprintf("Error parsing HTML: %v", err), http.StatusInternalServerError)
|
http.Error(w, fmt.Sprintf("Error parsing HTML: %v", err), http.StatusInternalServerError)
|
||||||
return
|
return
|
||||||
@@ -1067,17 +1551,12 @@ func getClubInfo(w http.ResponseWriter, r *http.Request) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
url := fmt.Sprintf("%s/%s", baseURL, clubID)
|
url := fmt.Sprintf("%s/%s", baseURL, clubID)
|
||||||
resp, err := http.Get(url)
|
body, err := fetchPageWithFallback(url)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
http.Error(w, fmt.Sprintf("Error fetching club data: %v", err), http.StatusInternalServerError)
|
http.Error(w, fmt.Sprintf("Error fetching club data: %v", err), http.StatusInternalServerError)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
defer resp.Body.Close()
|
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(body))
|
||||||
if resp.StatusCode != http.StatusOK {
|
|
||||||
http.Error(w, fmt.Sprintf("Error: received status code %d", resp.StatusCode), resp.StatusCode)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
http.Error(w, fmt.Sprintf("Error parsing HTML: %v", err), http.StatusInternalServerError)
|
http.Error(w, fmt.Sprintf("Error parsing HTML: %v", err), http.StatusInternalServerError)
|
||||||
return
|
return
|
||||||
|
|||||||
@@ -0,0 +1 @@
|
|||||||
|
scrapling[fetchers]==0.4.1
|
||||||
@@ -0,0 +1,79 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import contextlib
|
||||||
|
import logging
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
def response_body_bytes(response) -> bytes:
|
||||||
|
body = getattr(response, "body", None)
|
||||||
|
if isinstance(body, (bytes, bytearray)):
|
||||||
|
return bytes(body)
|
||||||
|
if isinstance(body, str):
|
||||||
|
return body.encode("utf-8")
|
||||||
|
|
||||||
|
text = getattr(response, "text", None)
|
||||||
|
if isinstance(text, str):
|
||||||
|
return text.encode("utf-8")
|
||||||
|
|
||||||
|
return str(response).encode("utf-8")
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> int:
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("--url", required=True)
|
||||||
|
parser.add_argument("--referer", default="")
|
||||||
|
parser.add_argument("--timeout-ms", type=int, default=45000)
|
||||||
|
parser.add_argument("--wait-ms", type=int, default=1000)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
try:
|
||||||
|
from scrapling.fetchers import StealthyFetcher
|
||||||
|
except Exception as exc:
|
||||||
|
print(f"Scrapling import failed: {exc}", file=sys.stderr)
|
||||||
|
return 2
|
||||||
|
|
||||||
|
logging.getLogger().setLevel(logging.ERROR)
|
||||||
|
|
||||||
|
extra_headers = {}
|
||||||
|
if args.referer:
|
||||||
|
extra_headers["Referer"] = args.referer
|
||||||
|
|
||||||
|
fetch_kwargs = {
|
||||||
|
"headless": True,
|
||||||
|
"network_idle": True,
|
||||||
|
"google_search": False,
|
||||||
|
"solve_cloudflare": True,
|
||||||
|
"timeout": args.timeout_ms,
|
||||||
|
"wait": args.wait_ms,
|
||||||
|
}
|
||||||
|
if extra_headers:
|
||||||
|
fetch_kwargs["extra_headers"] = extra_headers
|
||||||
|
|
||||||
|
try:
|
||||||
|
with contextlib.redirect_stdout(sys.stderr):
|
||||||
|
response = StealthyFetcher.fetch(args.url, **fetch_kwargs)
|
||||||
|
except Exception as exc:
|
||||||
|
print(f"Scrapling fetch failed: {exc}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
status = getattr(response, "status", None)
|
||||||
|
if isinstance(status, int) and status >= 400:
|
||||||
|
print(f"Scrapling returned HTTP {status}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
body = response_body_bytes(response)
|
||||||
|
if not body:
|
||||||
|
print("Scrapling returned an empty body", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
try:
|
||||||
|
sys.stdout.buffer.write(body)
|
||||||
|
except BrokenPipeError:
|
||||||
|
return 0
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
raise SystemExit(main())
|
||||||
Reference in New Issue
Block a user