This commit is contained in:
Tomas Dvorak
2026-03-12 19:11:08 +01:00
parent 7773947450
commit 455bf61302
7 changed files with 724 additions and 240 deletions
+3 -5
View File
@@ -1,6 +1,4 @@
export LOGOAPI_BASE_URL="https://logoapi.sportcreative.eu" # or your real logoapi base URL export LOGOAPI_BASE_URL="https://logoapi.sportcreative.eu" # or your real logoapi base URL
export SMTP_HOST="smtp.purelymail.com" # Cloudflare Browser Rendering API configuration
export SMTP_PORT="465" export CLOUDFLARE_ACCOUNT_ID="2154bf34f65a995f9b85aa17fee9da43" # Your Cloudflare account ID
export SMTP_USER="info@tdvorak.dev" export CLOUDFLARE_API_TOKEN="TdhMaQWPnxCwc-g22W9l-A26hYTdkn_9tQCUKZ0h" # API token with Browser Rendering - Edit permission
export SMTP_PASS="%8s3Yad*!b3*t"
export MISSING_LOGO_NOTIFY_TO="info@tdvorak.dev"
+2
View File
@@ -0,0 +1,2 @@
.venv-scrapling/
__pycache__/
-15
View File
@@ -11,20 +11,5 @@ require (
require ( require (
github.com/andybalholm/cascadia v1.3.3 // indirect github.com/andybalholm/cascadia v1.3.3 // indirect
github.com/chromedp/cdproto v0.0.0-20250803210736-d308e07a266d // indirect
github.com/chromedp/chromedp v0.14.1 // indirect
github.com/chromedp/sysutil v1.1.0 // indirect
github.com/go-json-experiment/json v0.0.0-20250813024750-ebf49471dced // indirect
github.com/go-rod/rod v0.108.1 // indirect
github.com/gobwas/httphead v0.1.0 // indirect
github.com/gobwas/pool v0.2.1 // indirect
github.com/gobwas/ws v1.4.0 // indirect
github.com/ledongthuc/pdf v0.0.0-20250511090121-5959a4027728 // indirect
github.com/ysmood/fetchup v0.5.2 // indirect
github.com/ysmood/goob v0.4.0 // indirect
github.com/ysmood/got v0.41.0 // indirect
github.com/ysmood/gson v0.7.3 // indirect
github.com/ysmood/leakless v0.9.0 // indirect
golang.org/x/net v0.43.0 // indirect golang.org/x/net v0.43.0 // indirect
golang.org/x/sys v0.35.0 // indirect
) )
-60
View File
@@ -1,62 +1,10 @@
github.com/PuerkitoBio/goquery v1.8.1 h1:uQxhNlArOIdbrH1tr0UXwdVFgDcZDrZVdcpygAcwmWM=
github.com/PuerkitoBio/goquery v1.8.1/go.mod h1:Q8ICL1kNUJ2sXGoAhPGUdYDJvgQgHzJsnnd3H7Ho5jQ=
github.com/PuerkitoBio/goquery v1.10.3 h1:pFYcNSqHxBD06Fpj/KsbStFRsgRATgnf3LeXiUkhzPo= github.com/PuerkitoBio/goquery v1.10.3 h1:pFYcNSqHxBD06Fpj/KsbStFRsgRATgnf3LeXiUkhzPo=
github.com/PuerkitoBio/goquery v1.10.3/go.mod h1:tMUX0zDMHXYlAQk6p35XxQMqMweEKB7iK7iLNd4RH4Y= github.com/PuerkitoBio/goquery v1.10.3/go.mod h1:tMUX0zDMHXYlAQk6p35XxQMqMweEKB7iK7iLNd4RH4Y=
github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c=
github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
github.com/andybalholm/cascadia v1.3.3 h1:AG2YHrzJIm4BZ19iwJ/DAua6Btl3IwJX+VI4kktS1LM= github.com/andybalholm/cascadia v1.3.3 h1:AG2YHrzJIm4BZ19iwJ/DAua6Btl3IwJX+VI4kktS1LM=
github.com/andybalholm/cascadia v1.3.3/go.mod h1:xNd9bqTn98Ln4DwST8/nG+H0yuB8Hmgu1YHNnWw0GeA= github.com/andybalholm/cascadia v1.3.3/go.mod h1:xNd9bqTn98Ln4DwST8/nG+H0yuB8Hmgu1YHNnWw0GeA=
github.com/chromedp/cdproto v0.0.0-20250803210736-d308e07a266d h1:ZtA1sedVbEW7EW80Iz2GR3Ye6PwbJAJXjv7D74xG6HU=
github.com/chromedp/cdproto v0.0.0-20250803210736-d308e07a266d/go.mod h1:NItd7aLkcfOA/dcMXvl8p1u+lQqioRMq/SqDp71Pb/k=
github.com/chromedp/chromedp v0.14.1 h1:0uAbnxewy/Q+Bg7oafVePE/6EXEho9hnaC38f+TTENg=
github.com/chromedp/chromedp v0.14.1/go.mod h1:rHzAv60xDE7VNy/MYtTUrYreSc0ujt2O1/C3bzctYBo=
github.com/chromedp/sysutil v1.1.0 h1:PUFNv5EcprjqXZD9nJb9b/c9ibAbxiYo4exNWZyipwM=
github.com/chromedp/sysutil v1.1.0/go.mod h1:WiThHUdltqCNKGc4gaU50XgYjwjYIhKWoHGPTUfWTJ8=
github.com/go-json-experiment/json v0.0.0-20250813024750-ebf49471dced h1:Q311OHjMh/u5E2TITc++WlTP5We0xNseRMkHDyvhW7I=
github.com/go-json-experiment/json v0.0.0-20250813024750-ebf49471dced/go.mod h1:TiCD2a1pcmjd7YnhGH0f/zKNcCD06B029pHhzV23c2M=
github.com/go-rod/rod v0.100.0 h1:tEKIb5wS3pGUpW4oJPYDxOKmRXaZbd6S+YVjJ6BHBBY=
github.com/go-rod/rod v0.100.0/go.mod h1:h9igqSGReLmOWyHtdf0AtUd0mdkHFu3gFwBeV+stleM=
github.com/go-rod/rod v0.108.1 h1:2lKs+v/+B/2pbGKZgNIRbURhduTKNDZ3PXIvTRAV2Mg=
github.com/go-rod/rod v0.108.1/go.mod h1:yNvL687cwcjgebRuArQN9AStFdm8iS/e/rzImrS9Pzg=
github.com/go-rod/rod v0.114.0 h1:P+zLOqsj+vKf4C86SfjP6ymyPl9VXoYKm+ceCeQms6Y=
github.com/go-rod/rod v0.114.0/go.mod h1:aiedSEFg5DwG/fnNbUOTPMTTWX3MRj6vIs/a684Mthw=
github.com/go-rod/rod v0.116.2 h1:A5t2Ky2A+5eD/ZJQr1EfsQSe5rms5Xof/qj296e+ZqA=
github.com/go-rod/rod v0.116.2/go.mod h1:H+CMO9SCNc2TJ2WfrG+pKhITz57uGNYU43qYHh438Mg=
github.com/gobwas/httphead v0.1.0 h1:exrUm0f4YX0L7EBwZHuCF4GDp8aJfVeBrlLQrs6NqWU=
github.com/gobwas/httphead v0.1.0/go.mod h1:O/RXo79gxV8G+RqlR/otEwx4Q36zl9rqC5u12GKvMCM=
github.com/gobwas/pool v0.2.1 h1:xfeeEhW7pwmX8nuLVlqbzVc7udMDrwetjEv+TZIz1og=
github.com/gobwas/pool v0.2.1/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw=
github.com/gobwas/ws v1.4.0 h1:CTaoG1tojrh4ucGPcoJFiAQUAsEWekEWvLy7GsVNqGs=
github.com/gobwas/ws v1.4.0/go.mod h1:G3gNqMNtPppf5XUz7O4shetPpcZ1VJ7zt18dlUeakrc=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/gorilla/mux v1.8.0 h1:i40aqfkR1h2SlN9hojwV5ZA91wcXFOvkdNIeFDP5koI=
github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So=
github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY= github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY=
github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ= github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ=
github.com/ledongthuc/pdf v0.0.0-20250511090121-5959a4027728 h1:QwWKgMY28TAXaDl+ExRDqGQltzXqN/xypdKP86niVn8=
github.com/ledongthuc/pdf v0.0.0-20250511090121-5959a4027728/go.mod h1:1fEHWurg7pvf5SG6XNE5Q8UZmOwex51Mkx3SLhrW5B4=
github.com/ysmood/fetchup v0.2.3/go.mod h1:xhibcRKziSvol0H1/pj33dnKrYyI2ebIvz5cOOkYGns=
github.com/ysmood/fetchup v0.5.2 h1:P9w3OIA7RSNEEFvEmOiTq09IOu42C96PMyZ1MWd8TAs=
github.com/ysmood/fetchup v0.5.2/go.mod h1:yCv8s8itjsCul1LGXJ1Q+8EQnZcVjfbZ4+l1zDm4StE=
github.com/ysmood/goob v0.3.0/go.mod h1:S3lq113Y91y1UBf1wj1pFOxeahvfKkCk6mTWTWbDdWs=
github.com/ysmood/goob v0.4.0 h1:HsxXhyLBeGzWXnqVKtmT9qM7EuVs/XOgkX7T6r1o1AQ=
github.com/ysmood/goob v0.4.0/go.mod h1:u6yx7ZhS4Exf2MwciFr6nIM8knHQIE22lFpWHnfql18=
github.com/ysmood/gop v0.0.2/go.mod h1:rr5z2z27oGEbyB787hpEcx4ab8cCiPnKxn0SUHt6xzk=
github.com/ysmood/got v0.12.0/go.mod h1:pE1l4LOwOBhQg6A/8IAatkGp7uZjnalzrZolnlhhMgY=
github.com/ysmood/got v0.31.2/go.mod h1:pE1l4LOwOBhQg6A/8IAatkGp7uZjnalzrZolnlhhMgY=
github.com/ysmood/got v0.34.1/go.mod h1:yddyjq/PmAf08RMLSwDjPyCvHvYed+WjHnQxpH851LM=
github.com/ysmood/got v0.41.0 h1:XiFH311ltTSGyxjeKcNvy7dzbJjjTzn6DBgK313JHBs=
github.com/ysmood/got v0.41.0/go.mod h1:W7DdpuX6skL3NszLmAsC5hT7JAhuLZhByVzHTq874Qg=
github.com/ysmood/gotrace v0.2.2/go.mod h1:TzhIG7nHDry5//eYZDYcTzuJLYQIkykJzCRIo4/dzQM=
github.com/ysmood/gotrace v0.6.0/go.mod h1:TzhIG7nHDry5//eYZDYcTzuJLYQIkykJzCRIo4/dzQM=
github.com/ysmood/gson v0.6.4/go.mod h1:3Kzs5zDl21g5F/BlLTNcuAGAYLKt2lV5G8D1zF3RNmg=
github.com/ysmood/gson v0.7.1/go.mod h1:3Kzs5zDl21g5F/BlLTNcuAGAYLKt2lV5G8D1zF3RNmg=
github.com/ysmood/gson v0.7.3 h1:QFkWbTH8MxyUTKPkVWAENJhxqdBa4lYTQWqZCiLG6kE=
github.com/ysmood/gson v0.7.3/go.mod h1:3Kzs5zDl21g5F/BlLTNcuAGAYLKt2lV5G8D1zF3RNmg=
github.com/ysmood/leakless v0.7.0/go.mod h1:R8iAXPRaG97QJwqxs74RdwzcRHT1SWCGTNqY8q0JvMQ=
github.com/ysmood/leakless v0.8.0/go.mod h1:R8iAXPRaG97QJwqxs74RdwzcRHT1SWCGTNqY8q0JvMQ=
github.com/ysmood/leakless v0.9.0 h1:qxCG5VirSBvmi3uynXFkcnLMzkphdh3xx5FtrORwDCU=
github.com/ysmood/leakless v0.9.0/go.mod h1:R8iAXPRaG97QJwqxs74RdwzcRHT1SWCGTNqY8q0JvMQ=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
@@ -71,11 +19,8 @@ golang.org/x/mod v0.15.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/net v0.7.0 h1:rJrUqqhjsgNp7KqAIc25s9pZnjU7TUcSY7HcVZjdn1g=
golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk= golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk=
golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44= golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
@@ -92,19 +37,15 @@ golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI=
golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
golang.org/x/telemetry v0.0.0-20240228155512-f48c80bd79b2/go.mod h1:TeRTkGYfJXctD9OcfyVLyj2J3IxLnKwHJR8f4D8a3YE= golang.org/x/telemetry v0.0.0-20240228155512-f48c80bd79b2/go.mod h1:TeRTkGYfJXctD9OcfyVLyj2J3IxLnKwHJR8f4D8a3YE=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
@@ -116,7 +57,6 @@ golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY=
golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM= golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
+639 -160
View File
@@ -2,15 +2,17 @@ package main
import ( import (
"bytes" "bytes"
"crypto/tls" "context"
_ "embed"
"encoding/json" "encoding/json"
"fmt" "fmt"
"io" "io"
"log" "log"
"net/http" "net/http"
"net/smtp"
neturl "net/url" neturl "net/url"
"os" "os"
"os/exec"
"path/filepath"
"regexp" "regexp"
"strings" "strings"
"sync" "sync"
@@ -30,6 +32,613 @@ type Competition struct {
Table *CompetitionTable `json:"table,omitempty"` Table *CompetitionTable `json:"table,omitempty"`
} }
// Cloudflare Browser Rendering API structures
type CloudflareCrawlRequest struct {
URL string `json:"url"`
Limit int `json:"limit,omitempty"`
Depth int `json:"depth,omitempty"`
Formats []string `json:"formats,omitempty"`
Render bool `json:"render,omitempty"`
Source string `json:"source,omitempty"`
Options map[string]interface{} `json:"options,omitempty"`
}
type CloudflareCrawlResponse struct {
Success bool `json:"success"`
Result string `json:"result"` // job ID
}
type CloudflareCrawlJob struct {
ID string `json:"id"`
Status string `json:"status"`
BrowserSecondsUsed float64 `json:"browserSecondsUsed"`
Total int `json:"total"`
Finished int `json:"finished"`
Records []CloudflareCrawlRecord `json:"records"`
Cursor string `json:"cursor,omitempty"`
}
type CloudflareCrawlRecord struct {
URL string `json:"url"`
Status string `json:"status"`
Markdown string `json:"markdown,omitempty"`
HTML string `json:"html,omitempty"`
JSON interface{} `json:"json,omitempty"`
Metadata map[string]interface{} `json:"metadata"`
}
type CloudflareClient struct {
AccountID string
APIToken string
BaseURL string
Client *http.Client
}
type fetchOptions struct {
Referer string
}
const (
browserUserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0 Safari/537.36"
browserAccept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8"
browserAcceptLanguage = "cs-CZ,cs;q=0.9,en;q=0.8"
scraplingHelperPath = "scripts/scrapling_fetch.py"
)
//go:embed scripts/scrapling_fetch.py
var embeddedScraplingHelper string
var (
embeddedScraplingHelperOnce sync.Once
embeddedScraplingHelperFile string
embeddedScraplingHelperErr error
)
// NewCloudflareClient creates a new Cloudflare Browser Rendering API client
func NewCloudflareClient() *CloudflareClient {
accountID := strings.TrimSpace(os.Getenv("CLOUDFLARE_ACCOUNT_ID"))
apiToken := strings.TrimSpace(os.Getenv("CLOUDFLARE_API_TOKEN"))
if accountID == "" || apiToken == "" {
return nil
}
return &CloudflareClient{
AccountID: accountID,
APIToken: apiToken,
BaseURL: "https://api.cloudflare.com/client/v4",
Client: &http.Client{
Timeout: 30 * time.Second,
},
}
}
// StartCrawl initiates a crawl job
func (c *CloudflareClient) StartCrawl(ctx context.Context, req CloudflareCrawlRequest) (string, error) {
if c == nil {
return "", fmt.Errorf("Cloudflare client not initialized")
}
// Set defaults
if req.Limit == 0 {
req.Limit = 10
}
if req.Depth == 0 {
req.Depth = 1
}
if len(req.Formats) == 0 {
req.Formats = []string{"html", "markdown"}
}
if req.Source == "" {
req.Source = "all"
}
// Restrict to specific URL patterns for fotbal.cz to avoid crawling unrelated content
if req.Options == nil {
req.Options = make(map[string]interface{})
}
// Only crawl URLs from the same domain and specific paths
includePatterns := []string{
"https://www.fotbal.cz/**",
}
excludePatterns := []string{
"**/api/**",
"**/static/**",
"**/media/**",
}
req.Options["includePatterns"] = includePatterns
req.Options["excludePatterns"] = excludePatterns
req.Options["includeExternalLinks"] = false
req.Options["includeSubdomains"] = false
body, err := json.Marshal(req)
if err != nil {
return "", fmt.Errorf("failed to marshal request: %w", err)
}
url := fmt.Sprintf("%s/accounts/%s/browser-rendering/crawl", c.BaseURL, c.AccountID)
httpReq, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(body))
if err != nil {
return "", fmt.Errorf("failed to create request: %w", err)
}
httpReq.Header.Set("Authorization", "Bearer "+c.APIToken)
httpReq.Header.Set("Content-Type", "application/json")
resp, err := c.Client.Do(httpReq)
if err != nil {
return "", fmt.Errorf("failed to send request: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
return "", fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
}
var crawlResp CloudflareCrawlResponse
if err := json.NewDecoder(resp.Body).Decode(&crawlResp); err != nil {
return "", fmt.Errorf("failed to decode response: %w", err)
}
if !crawlResp.Success {
return "", fmt.Errorf("API returned unsuccessful response")
}
return crawlResp.Result, nil
}
// GetCrawlResults retrieves the results of a crawl job
func (c *CloudflareClient) GetCrawlResults(ctx context.Context, jobID string, limit int) (*CloudflareCrawlJob, error) {
if c == nil {
return nil, fmt.Errorf("Cloudflare client not initialized")
}
url := fmt.Sprintf("%s/accounts/%s/browser-rendering/crawl/%s", c.BaseURL, c.AccountID, jobID)
if limit > 0 {
url += fmt.Sprintf("?limit=%d", limit)
}
httpReq, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}
httpReq.Header.Set("Authorization", "Bearer "+c.APIToken)
resp, err := c.Client.Do(httpReq)
if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
}
var result struct {
Success bool `json:"success"`
Result CloudflareCrawlJob `json:"result"`
}
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
return nil, fmt.Errorf("failed to decode response: %w", err)
}
if !result.Success {
return nil, fmt.Errorf("API returned unsuccessful response")
}
return &result.Result, nil
}
// WaitForCrawlCompletion waits for a crawl job to complete and returns the results
func (c *CloudflareClient) WaitForCrawlCompletion(ctx context.Context, jobID string, maxAttempts int, delay time.Duration) (*CloudflareCrawlJob, error) {
if c == nil {
return nil, fmt.Errorf("Cloudflare client not initialized")
}
for i := 0; i < maxAttempts; i++ {
job, err := c.GetCrawlResults(ctx, jobID, 1) // Use limit=1 for status checks
if err != nil {
return nil, err
}
if job.Status != "running" {
// Get full results
fullJob, err := c.GetCrawlResults(ctx, jobID, 0) // No limit for full results
if err != nil {
return nil, err
}
return fullJob, nil
}
select {
case <-ctx.Done():
return nil, ctx.Err()
case <-time.After(delay):
continue
}
}
return nil, fmt.Errorf("crawl job did not complete within timeout")
}
// CrawlURL performs a complete crawl operation for a single URL
func (c *CloudflareClient) CrawlURL(ctx context.Context, url string) (*CloudflareCrawlJob, error) {
if c == nil {
return nil, fmt.Errorf("Cloudflare client not initialized")
}
req := CloudflareCrawlRequest{
URL: url,
Limit: 1, // Only crawl the specific URL
Depth: 0, // Don't follow links
Formats: []string{"html", "markdown"},
Render: true,
Source: "links", // Only crawl the specific URL, not sitemaps
}
jobID, err := c.StartCrawl(ctx, req)
if err != nil {
return nil, fmt.Errorf("failed to start crawl: %w", err)
}
// Wait for completion with reasonable timeout
ctx, cancel := context.WithTimeout(ctx, 2*time.Minute)
defer cancel()
job, err := c.WaitForCrawlCompletion(ctx, jobID, 24, 5*time.Second)
if err != nil {
return nil, fmt.Errorf("failed to wait for crawl completion: %w", err)
}
return job, nil
}
func newBrowserRequest(url string, opts fetchOptions) (*http.Request, error) {
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}
req.Header.Set("User-Agent", browserUserAgent)
req.Header.Set("Accept", browserAccept)
req.Header.Set("Accept-Language", browserAcceptLanguage)
if opts.Referer != "" {
req.Header.Set("Referer", opts.Referer)
}
return req, nil
}
func looksLikeCloudflareBlock(body []byte) bool {
if len(body) == 0 {
return false
}
lower := strings.ToLower(string(body))
hardSignals := []string{
"<title>just a moment...</title>",
"attention required!",
"enable javascript and cookies to continue",
"checking if the site connection is secure",
"cf-browser-verification",
}
for _, signal := range hardSignals {
if strings.Contains(lower, signal) {
return true
}
}
if strings.Contains(lower, "/cdn-cgi/challenge-platform/") &&
(strings.Contains(lower, "window._cf_chl_opt") ||
strings.Contains(lower, "__cf_chl_rt_tk") ||
strings.Contains(lower, "cf_chl_seq_")) {
return true
}
return false
}
func compactErrorText(s string) string {
s = strings.Join(strings.Fields(strings.TrimSpace(s)), " ")
if len(s) > 220 {
return s[:217] + "..."
}
return s
}
func fetchPageDirect(url string, opts fetchOptions) ([]byte, error) {
req, err := newBrowserRequest(url, opts)
if err != nil {
return nil, err
}
client := &http.Client{Timeout: 15 * time.Second}
resp, err := client.Do(req)
if err != nil {
return nil, fmt.Errorf("direct request failed: %w", err)
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("failed to read response body: %w", err)
}
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("direct request returned HTTP %d", resp.StatusCode)
}
if looksLikeCloudflareBlock(body) {
return nil, fmt.Errorf("direct request returned a Cloudflare challenge page")
}
return body, nil
}
func fetchPageWithWget(url string, opts fetchOptions) ([]byte, error) {
if _, err := exec.LookPath("wget"); err != nil {
return nil, fmt.Errorf("wget not available: %w", err)
}
ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second)
defer cancel()
args := []string{
"--quiet",
"--tries=1",
"--timeout=15",
"--max-redirect=10",
"--output-document=-",
"--user-agent=" + browserUserAgent,
"--header=Accept: " + browserAccept,
"--header=Accept-Language: " + browserAcceptLanguage,
}
if opts.Referer != "" {
args = append(args, "--header=Referer: "+opts.Referer)
}
args = append(args, url)
cmd := exec.CommandContext(ctx, "wget", args...)
var stdout bytes.Buffer
var stderr bytes.Buffer
cmd.Stdout = &stdout
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
details := compactErrorText(stderr.String())
if details == "" {
details = compactErrorText(err.Error())
}
return nil, fmt.Errorf("wget request failed: %s", details)
}
body := stdout.Bytes()
if len(body) == 0 {
return nil, fmt.Errorf("wget returned an empty body")
}
if looksLikeCloudflareBlock(body) {
return nil, fmt.Errorf("wget returned a Cloudflare challenge page")
}
return body, nil
}
func firstExistingFile(paths ...string) string {
for _, path := range paths {
path = strings.TrimSpace(path)
if path == "" {
continue
}
if info, err := os.Stat(path); err == nil && !info.IsDir() {
return path
}
}
return ""
}
func firstExecutable(paths ...string) string {
for _, path := range paths {
path = strings.TrimSpace(path)
if path == "" {
continue
}
if strings.ContainsRune(path, os.PathSeparator) {
if info, err := os.Stat(path); err == nil && !info.IsDir() {
return path
}
continue
}
if resolved, err := exec.LookPath(path); err == nil {
return resolved
}
}
return ""
}
func ensureEmbeddedScraplingHelper() (string, error) {
embeddedScraplingHelperOnce.Do(func() {
if strings.TrimSpace(embeddedScraplingHelper) == "" {
embeddedScraplingHelperErr = fmt.Errorf("embedded Scrapling helper is empty")
return
}
file, err := os.CreateTemp("", "facr-scrapling-*.py")
if err != nil {
embeddedScraplingHelperErr = fmt.Errorf("create embedded Scrapling helper: %w", err)
return
}
defer file.Close()
if _, err := file.WriteString(embeddedScraplingHelper); err != nil {
embeddedScraplingHelperErr = fmt.Errorf("write embedded Scrapling helper: %w", err)
return
}
if err := file.Chmod(0600); err != nil {
embeddedScraplingHelperErr = fmt.Errorf("chmod embedded Scrapling helper: %w", err)
return
}
embeddedScraplingHelperFile = file.Name()
})
if embeddedScraplingHelperErr != nil {
return "", embeddedScraplingHelperErr
}
if embeddedScraplingHelperFile == "" {
return "", fmt.Errorf("embedded Scrapling helper path is empty")
}
return embeddedScraplingHelperFile, nil
}
func findScraplingHelperScript() (string, error) {
cwd, _ := os.Getwd()
exePath, _ := os.Executable()
exeDir := ""
if exePath != "" {
exeDir = filepath.Dir(exePath)
}
if path := firstExistingFile(
os.Getenv("SCRAPLING_SCRIPT"),
filepath.Join(cwd, scraplingHelperPath),
filepath.Join(exeDir, scraplingHelperPath),
); path != "" {
return path, nil
}
return ensureEmbeddedScraplingHelper()
}
func findScraplingPython() string {
cwd, _ := os.Getwd()
exePath, _ := os.Executable()
exeDir := ""
if exePath != "" {
exeDir = filepath.Dir(exePath)
}
return firstExecutable(
os.Getenv("SCRAPLING_PYTHON_BIN"),
filepath.Join(cwd, ".venv-scrapling", "bin", "python3"),
filepath.Join(cwd, ".venv-scrapling", "bin", "python"),
filepath.Join(cwd, ".venv", "bin", "python3"),
filepath.Join(cwd, ".venv", "bin", "python"),
filepath.Join(exeDir, ".venv-scrapling", "bin", "python3"),
filepath.Join(exeDir, ".venv-scrapling", "bin", "python"),
filepath.Join(exeDir, ".venv", "bin", "python3"),
filepath.Join(exeDir, ".venv", "bin", "python"),
"python3",
"python",
)
}
func fetchPageWithScrapling(url string, opts fetchOptions) ([]byte, error) {
pythonBin := findScraplingPython()
if pythonBin == "" {
return nil, fmt.Errorf("Scrapling skipped: no Python runtime found")
}
helperScript, err := findScraplingHelperScript()
if err != nil {
return nil, fmt.Errorf("Scrapling skipped: %w", err)
}
ctx, cancel := context.WithTimeout(context.Background(), 90*time.Second)
defer cancel()
args := []string{helperScript, "--url", url}
if opts.Referer != "" {
args = append(args, "--referer", opts.Referer)
}
cmd := exec.CommandContext(ctx, pythonBin, args...)
var stdout bytes.Buffer
var stderr bytes.Buffer
cmd.Stdout = &stdout
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
details := compactErrorText(stderr.String())
if details == "" {
details = compactErrorText(err.Error())
}
return nil, fmt.Errorf("Scrapling request failed: %s", details)
}
body := stdout.Bytes()
if len(body) == 0 {
return nil, fmt.Errorf("Scrapling returned an empty body")
}
if looksLikeCloudflareBlock(body) {
return nil, fmt.Errorf("Scrapling returned a Cloudflare challenge page")
}
return body, nil
}
func fetchPageWithFallback(url string) ([]byte, error) {
return fetchPageWithFallbackOptions(url, fetchOptions{})
}
// fetchPageWithFallback tries Go HTTP first, then wget, then Scrapling, then Cloudflare Browser Rendering.
func fetchPageWithFallbackOptions(url string, opts fetchOptions) ([]byte, error) {
body, err := fetchPageDirect(url, opts)
if err == nil {
return body, nil
}
log.Printf("Direct request failed for %s: %v", url, err)
body, err = fetchPageWithWget(url, opts)
if err == nil {
log.Printf("Successfully retrieved content via wget for %s", url)
return body, nil
}
log.Printf("wget fallback failed for %s: %v", url, err)
body, err = fetchPageWithScrapling(url, opts)
if err == nil {
log.Printf("Successfully retrieved content via Scrapling for %s", url)
return body, nil
}
log.Printf("Scrapling fallback failed for %s: %v", url, err)
if cfClient := NewCloudflareClient(); cfClient != nil {
log.Printf("Attempting Cloudflare crawl fallback for %s", url)
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
defer cancel()
job, err := cfClient.CrawlURL(ctx, url)
if err != nil {
log.Printf("Cloudflare crawl failed for %s: %v", url, err)
return nil, fmt.Errorf("go scraping failed, wget failed, Scrapling failed, and Cloudflare crawl failed: %w", err)
}
if len(job.Records) > 0 && job.Records[0].Status == "completed" {
body := []byte(job.Records[0].HTML)
if looksLikeCloudflareBlock(body) {
return nil, fmt.Errorf("Cloudflare crawl returned a challenge page")
}
log.Printf("Successfully retrieved content via Cloudflare crawl for %s", url)
return body, nil
}
log.Printf("Cloudflare crawl returned no completed records for %s", url)
return nil, fmt.Errorf("Cloudflare crawl returned no completed records")
}
return nil, fmt.Errorf("go scraping failed, wget failed, Scrapling failed, and Cloudflare client is not available")
}
// parseCompetitionMatchesFromFotbal scrapes matches from the public fotbal.cz // parseCompetitionMatchesFromFotbal scrapes matches from the public fotbal.cz
// competition page (e.g., https://www.fotbal.cz/souteze/turnaje/table/{id}). // competition page (e.g., https://www.fotbal.cz/souteze/turnaje/table/{id}).
// It filters to only include matches involving the given clubName if provided. // It filters to only include matches involving the given clubName if provided.
@@ -38,26 +647,16 @@ func parseCompetitionMatchesFromFotbal(pageURL, clubType, clubName, clubID strin
if pageURL == "" { if pageURL == "" {
return nil return nil
} }
// Request with browser-like headers; some fotbal.cz pages 404 without them
req, _ := http.NewRequest("GET", pageURL, nil) body, err := fetchPageWithFallback(pageURL)
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0 Safari/537.36")
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8")
req.Header.Set("Accept-Language", "cs-CZ,cs;q=0.9,en;q=0.8")
client := &http.Client{Timeout: 15 * time.Second}
resp, err := client.Do(req)
if err != nil { if err != nil {
log.Printf("fotbal.cz matches fetch error for %s: %v", pageURL, err) log.Printf("fotbal.cz matches fetch failed for %s: %v", pageURL, err)
return nil return nil
} }
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK { // If we still don't have body content, return nil
log.Printf("fotbal.cz matches non-200 for %s: %d", pageURL, resp.StatusCode) if len(body) == 0 {
return nil log.Printf("No content available for %s", pageURL)
}
// Read body to optionally save and to allow multiple reads
body, err := io.ReadAll(resp.Body)
if err != nil {
log.Printf("fotbal.cz matches read error for %s: %v", pageURL, err)
return nil return nil
} }
// Debug: save full HTML if env toggled // Debug: save full HTML if env toggled
@@ -376,10 +975,6 @@ func parseCompetitionMatchesFromIS(detailURL, clubType, clubName, clubID string)
var logoCache = map[string]string{} var logoCache = map[string]string{}
var missingLogoNotified = map[string]bool{}
var missingLogoMu sync.Mutex
type logoAPISearchResult struct { type logoAPISearchResult struct {
ID string `json:"id"` ID string `json:"id"`
Name string `json:"name"` Name string `json:"name"`
@@ -516,7 +1111,7 @@ func getLogoFromLogoAPI(teamName string, teamID string) string {
return r.LogoURL return r.LogoURL
} }
} }
// No strong match treat as "no logo" so upstream can fall back to FACR and send notification. // No strong match treat as "no logo" so upstream can fall back to FACR assets.
logoCache[cacheKey] = "" logoCache[cacheKey] = ""
return "" return ""
} }
@@ -584,93 +1179,6 @@ func getLogoBySearch(name string) string {
return best return best
} }
func notifyMissingLogo(teamName string, teamID string) {
name := strings.ToLower(strings.TrimSpace(teamName))
id := strings.ToLower(strings.TrimSpace(teamID))
if name == "" && id == "" {
return
}
key := name
if id != "" {
if key != "" {
key += "|"
}
key += id
}
missingLogoMu.Lock()
if missingLogoNotified[key] {
missingLogoMu.Unlock()
return
}
missingLogoNotified[key] = true
missingLogoMu.Unlock()
if err := sendMissingLogoEmail(teamName, teamID); err != nil {
log.Printf("error sending missing logo email for %s (%s): %v", teamName, teamID, err)
}
}
func sendMissingLogoEmail(teamName string, teamID string) error {
host := strings.TrimSpace(os.Getenv("SMTP_HOST"))
if host == "" {
host = "smtp.purelymail.com"
}
port := strings.TrimSpace(os.Getenv("SMTP_PORT"))
if port == "" {
port = "465"
}
user := strings.TrimSpace(os.Getenv("SMTP_USER"))
pass := os.Getenv("SMTP_PASS")
if user == "" || pass == "" {
return nil
}
to := strings.TrimSpace(os.Getenv("MISSING_LOGO_NOTIFY_TO"))
if to == "" {
to = "info@tdvorak.dev"
}
addr := host + ":" + port
subject := "Missing local logo for club"
body := fmt.Sprintf("A club logo is missing in logoapi.\n\nName: %s\nID: %s\nTime: %s\n", teamName, teamID, time.Now().Format(time.RFC3339))
var msg bytes.Buffer
msg.WriteString("From: " + user + "\r\n")
msg.WriteString("To: " + to + "\r\n")
msg.WriteString("Subject: " + subject + "\r\n")
msg.WriteString("MIME-Version: 1.0\r\n")
msg.WriteString("Content-Type: text/plain; charset=utf-8\r\n")
msg.WriteString("\r\n")
msg.WriteString(body)
tlsConfig := &tls.Config{ServerName: host}
conn, err := tls.Dial("tcp", addr, tlsConfig)
if err != nil {
return err
}
defer conn.Close()
c, err := smtp.NewClient(conn, host)
if err != nil {
return err
}
defer c.Close()
if err := c.Auth(smtp.PlainAuth("", user, pass, host)); err != nil {
return err
}
if err := c.Mail(user); err != nil {
return err
}
if err := c.Rcpt(to); err != nil {
return err
}
w, err := c.Data()
if err != nil {
return err
}
if _, err := w.Write(msg.Bytes()); err != nil {
return err
}
if err := w.Close(); err != nil {
return err
}
return c.Quit()
}
func getLogo(teamName string, teamID string) string { func getLogo(teamName string, teamID string) string {
placeholder := "https://www.fotbal.cz/dist/img/logo-club-empty.svg" placeholder := "https://www.fotbal.cz/dist/img/logo-club-empty.svg"
name := strings.ToLower(strings.TrimSpace(teamName)) name := strings.ToLower(strings.TrimSpace(teamName))
@@ -680,7 +1188,6 @@ func getLogo(teamName string, teamID string) string {
if logo := getLogoFromLogoAPI(teamName, teamID); logo != "" { if logo := getLogoFromLogoAPI(teamName, teamID); logo != "" {
return logo return logo
} }
notifyMissingLogo(teamName, teamID)
// If we have a team ID, construct the official logo URL directly. // If we have a team ID, construct the official logo URL directly.
// This avoids wrong matches for duplicate names (e.g., multiple "Ořechov"). // This avoids wrong matches for duplicate names (e.g., multiple "Ořechov").
if tid := strings.TrimSpace(teamID); tid != "" { if tid := strings.TrimSpace(teamID); tid != "" {
@@ -736,26 +1243,18 @@ func getClubSearch(w http.ResponseWriter, r *http.Request) {
vals.Set("q", q) vals.Set("q", q)
searchURL := "https://www.fotbal.cz/club/hledej?" + vals.Encode() searchURL := "https://www.fotbal.cz/club/hledej?" + vals.Encode()
req, err := http.NewRequest("GET", searchURL, nil) fetchSearchPage := func(url string) ([]byte, error) {
if err != nil { return fetchPageWithFallbackOptions(url, fetchOptions{
http.Error(w, fmt.Sprintf("Error creating request: %v", err), http.StatusInternalServerError) Referer: "https://www.fotbal.cz/club/hledej",
return })
} }
// Set headers to mimic a browser; fotbal.cz may 404 otherwise
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0 Safari/537.36") // Try direct HTTP request first
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8") body, err := fetchSearchPage(searchURL)
req.Header.Set("Accept-Language", "cs-CZ,cs;q=0.9,en;q=0.8")
req.Header.Set("Referer", "https://www.fotbal.cz/club/hledej")
client := &http.Client{}
resp, err := client.Do(req)
if err != nil { if err != nil {
http.Error(w, fmt.Sprintf("Error fetching search page: %v", err), http.StatusInternalServerError) log.Printf("Direct search request failed for %s: %v", searchURL, err)
return
} // Retry with quoted query for short tokens
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
// Retry once. If query has very short tokens, try quoting the whole query.
resp.Body.Close()
searchURL2 := searchURL searchURL2 := searchURL
tokens := strings.Fields(q) tokens := strings.Fields(q)
for _, t := range tokens { for _, t := range tokens {
@@ -766,18 +1265,11 @@ func getClubSearch(w http.ResponseWriter, r *http.Request) {
break break
} }
} }
req2, _ := http.NewRequest("GET", searchURL2, nil)
req2.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0 Safari/537.36") body, err = fetchSearchPage(searchURL2)
req2.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8") if err != nil {
req2.Header.Set("Accept-Language", "en-US,en;q=0.9") log.Printf("Retried search request failed for %s: %v", searchURL2, err)
resp2, err2 := client.Do(req2) // Return empty results instead of error
if err2 != nil {
http.Error(w, fmt.Sprintf("Error fetching (retry): %v", err2), http.StatusBadGateway)
return
}
defer resp2.Body.Close()
if resp2.StatusCode != http.StatusOK {
// Treat as no results instead of surfacing error to client
w.Header().Set("Content-Type", "application/json") w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(map[string]any{ json.NewEncoder(w).Encode(map[string]any{
"query": q, "query": q,
@@ -786,11 +1278,9 @@ func getClubSearch(w http.ResponseWriter, r *http.Request) {
}) })
return return
} }
// replace resp with resp2 for downstream parsing
resp = resp2
} }
doc, err := goquery.NewDocumentFromReader(resp.Body) doc, err := goquery.NewDocumentFromReader(bytes.NewReader(body))
if err != nil { if err != nil {
http.Error(w, fmt.Sprintf("Error parsing HTML: %v", err), http.StatusInternalServerError) http.Error(w, fmt.Sprintf("Error parsing HTML: %v", err), http.StatusInternalServerError)
return return
@@ -887,19 +1377,13 @@ func getClubTables(w http.ResponseWriter, r *http.Request) {
} }
url := fmt.Sprintf("%s/%s", baseURL, clubID) url := fmt.Sprintf("%s/%s", baseURL, clubID)
resp, err := http.Get(url) body, err := fetchPageWithFallback(url)
if err != nil { if err != nil {
http.Error(w, fmt.Sprintf("Error fetching club data: %v", err), http.StatusInternalServerError) http.Error(w, fmt.Sprintf("Error fetching club data: %v", err), http.StatusInternalServerError)
return return
} }
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK { doc, err := goquery.NewDocumentFromReader(bytes.NewReader(body))
http.Error(w, fmt.Sprintf("Error: received status code %d", resp.StatusCode), resp.StatusCode)
return
}
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil { if err != nil {
http.Error(w, fmt.Sprintf("Error parsing HTML: %v", err), http.StatusInternalServerError) http.Error(w, fmt.Sprintf("Error parsing HTML: %v", err), http.StatusInternalServerError)
return return
@@ -1067,17 +1551,12 @@ func getClubInfo(w http.ResponseWriter, r *http.Request) {
} }
url := fmt.Sprintf("%s/%s", baseURL, clubID) url := fmt.Sprintf("%s/%s", baseURL, clubID)
resp, err := http.Get(url) body, err := fetchPageWithFallback(url)
if err != nil { if err != nil {
http.Error(w, fmt.Sprintf("Error fetching club data: %v", err), http.StatusInternalServerError) http.Error(w, fmt.Sprintf("Error fetching club data: %v", err), http.StatusInternalServerError)
return return
} }
defer resp.Body.Close() doc, err := goquery.NewDocumentFromReader(bytes.NewReader(body))
if resp.StatusCode != http.StatusOK {
http.Error(w, fmt.Sprintf("Error: received status code %d", resp.StatusCode), resp.StatusCode)
return
}
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil { if err != nil {
http.Error(w, fmt.Sprintf("Error parsing HTML: %v", err), http.StatusInternalServerError) http.Error(w, fmt.Sprintf("Error parsing HTML: %v", err), http.StatusInternalServerError)
return return
+1
View File
@@ -0,0 +1 @@
scrapling[fetchers]==0.4.1
+79
View File
@@ -0,0 +1,79 @@
#!/usr/bin/env python3
import argparse
import contextlib
import logging
import sys
def response_body_bytes(response) -> bytes:
body = getattr(response, "body", None)
if isinstance(body, (bytes, bytearray)):
return bytes(body)
if isinstance(body, str):
return body.encode("utf-8")
text = getattr(response, "text", None)
if isinstance(text, str):
return text.encode("utf-8")
return str(response).encode("utf-8")
def main() -> int:
parser = argparse.ArgumentParser()
parser.add_argument("--url", required=True)
parser.add_argument("--referer", default="")
parser.add_argument("--timeout-ms", type=int, default=45000)
parser.add_argument("--wait-ms", type=int, default=1000)
args = parser.parse_args()
try:
from scrapling.fetchers import StealthyFetcher
except Exception as exc:
print(f"Scrapling import failed: {exc}", file=sys.stderr)
return 2
logging.getLogger().setLevel(logging.ERROR)
extra_headers = {}
if args.referer:
extra_headers["Referer"] = args.referer
fetch_kwargs = {
"headless": True,
"network_idle": True,
"google_search": False,
"solve_cloudflare": True,
"timeout": args.timeout_ms,
"wait": args.wait_ms,
}
if extra_headers:
fetch_kwargs["extra_headers"] = extra_headers
try:
with contextlib.redirect_stdout(sys.stderr):
response = StealthyFetcher.fetch(args.url, **fetch_kwargs)
except Exception as exc:
print(f"Scrapling fetch failed: {exc}", file=sys.stderr)
return 1
status = getattr(response, "status", None)
if isinstance(status, int) and status >= 400:
print(f"Scrapling returned HTTP {status}", file=sys.stderr)
return 1
body = response_body_bytes(response)
if not body:
print("Scrapling returned an empty body", file=sys.stderr)
return 1
try:
sys.stdout.buffer.write(body)
except BrokenPipeError:
return 0
return 0
if __name__ == "__main__":
raise SystemExit(main())