diff --git a/go.mod b/go.mod index 94a282e51c322449161e93d896c6645edc3b1789..a022a73e17cc8628fac0681d60e902dab534e47f 100644 --- a/go.mod +++ b/go.mod @@ -3,12 +3,11 @@ module git.autistici.org/ale/crawl go 1.15 require ( - github.com/PuerkitoBio/goquery v1.8.0 + github.com/PuerkitoBio/goquery v1.8.1 github.com/PuerkitoBio/purell v1.2.0 github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect github.com/google/go-cmp v0.5.6 github.com/google/uuid v1.1.1 // indirect github.com/pborman/uuid v1.2.1 github.com/syndtr/goleveldb v1.0.0 - golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2 // indirect ) diff --git a/go.sum b/go.sum index 773a3dd94292e95d99e8cff387484cd4bb9416a5..04e06019f56cfc3c44a479d0f3ba0dcae0f3b5c7 100644 --- a/go.sum +++ b/go.sum @@ -4,6 +4,8 @@ github.com/PuerkitoBio/goquery v1.7.1 h1:oE+T06D+1T7LNrn91B4aERsRIeCLJ/oPSa6xB9F github.com/PuerkitoBio/goquery v1.7.1/go.mod h1:XY0pP4kfraEmmV1O7Uf6XyjoslwsneBbgeDjLYuN8xY= github.com/PuerkitoBio/goquery v1.8.0 h1:PJTF7AmFCFKk1N6V6jmKfrNH9tV5pNE6lZMkG0gta/U= github.com/PuerkitoBio/goquery v1.8.0/go.mod h1:ypIiRMtY7COPGk+I/YbZLbxsxn9g5ejnI2HSMtkjZvI= +github.com/PuerkitoBio/goquery v1.8.1 h1:uQxhNlArOIdbrH1tr0UXwdVFgDcZDrZVdcpygAcwmWM= +github.com/PuerkitoBio/goquery v1.8.1/go.mod h1:Q8ICL1kNUJ2sXGoAhPGUdYDJvgQgHzJsnnd3H7Ho5jQ= github.com/PuerkitoBio/purell v0.0.0-20180310210909-975f53781597 h1:1H3FyRw7YsqIty9WHPOVEGJaFJ1sfGVZ3PPDUw3ob2w= github.com/PuerkitoBio/purell v0.0.0-20180310210909-975f53781597/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0= github.com/PuerkitoBio/purell v0.1.0 h1:N8Bcc53nei5frgNYgAKo93qMUVdU5LUGHCBv8efdVcM= @@ -50,20 +52,30 @@ github.com/syndtr/goleveldb v0.0.0-20190923125748-758128399b1d h1:OgkXbz/O0zsJoa github.com/syndtr/goleveldb v0.0.0-20190923125748-758128399b1d/go.mod h1:9OrXJhf154huy1nPWmuSrkgjPUtUNhA+Zmy+6AESzuA= github.com/syndtr/goleveldb v1.0.0 h1:fBdIW9lB4Iz0n9khmH8w27SJ3QEJ7+IgjPEwGSZiFdE= github.com/syndtr/goleveldb v1.0.0/go.mod h1:ZVVdQEZoIme9iO1Ch2Jdy24qqXrMMOU6lpPAyBWyWuQ= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190926025831-c00fd9afed17 h1:qPnAdmjNA41t3QBTx2mFGf/SD1IoslhYu7AmdsVzCcs= golang.org/x/net v0.0.0-20190926025831-c00fd9afed17/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210614182718-04defd469f4e h1:XpT3nA5TvE525Ne3hInMh6+GETgn27Zfm9dxsThnX2Q= golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20210916014120-12bc252f5db8 h1:/6y1LfuqNuQdHAm0jjtPtgRcxIxjVZgm5OTu8/QhZvk= golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20220617184016-355a448f1bc9 h1:Yqz/iviulwKwAREEeUd3nbBFn0XuyJqkoft2IlrvOhc= golang.org/x/net v0.0.0-20220617184016-355a448f1bc9/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.7.0 h1:rJrUqqhjsgNp7KqAIc25s9pZnjU7TUcSY7HcVZjdn1g= +golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f h1:wMNYb4v58l5UBM7MYRLPG6ZhfOqbKu7X5eyFl8ZhKvA= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a h1:1BGLXjeY4akVXGgbC9HugT3Jv3hCI0z56oJR5vAMgBU= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -71,14 +83,23 @@ golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.7.0 h1:4BRB4x83lYWy72KwLD/qYDuTu7q9PjSagHvijDw7cLo= +golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/vendor/github.com/PuerkitoBio/goquery/README.md b/vendor/github.com/PuerkitoBio/goquery/README.md index 775223401dd38af858ef7184ed4b0994cce0dab4..582ccac90fec189d9177b0e53111d8425b7c5a79 100644 --- a/vendor/github.com/PuerkitoBio/goquery/README.md +++ b/vendor/github.com/PuerkitoBio/goquery/README.md @@ -40,6 +40,7 @@ Please note that because of the net/html dependency, goquery requires Go1.1+ and **Note that goquery's API is now stable, and will not break.** +* **2023-02-18 (v1.8.1)** : Update `go.mod` dependencies, update CI workflow. * **2021-10-25 (v1.8.0)** : Add `Render` function to render a `Selection` to an `io.Writer` (thanks [@anthonygedeon](https://github.com/anthonygedeon)). * **2021-07-11 (v1.7.1)** : Update go.mod dependencies and add dependabot config (thanks [@jauderho](https://github.com/jauderho)). * **2021-06-14 (v1.7.0)** : Add `Single` and `SingleMatcher` functions to optimize first-match selection (thanks [@gdollardollar](https://github.com/gdollardollar)). @@ -154,6 +155,8 @@ func main() { - [Geziyor](https://github.com/geziyor/geziyor), a fast web crawling & scraping framework for Go. Supports JS rendering. - [Pagser](https://github.com/foolin/pagser), a simple, easy, extensible, configurable HTML parser to struct based on goquery and struct tags. - [stitcherd](https://github.com/vhodges/stitcherd), A server for doing server side includes using css selectors and DOM updates. +- [goskyr](https://github.com/jakopako/goskyr), an easily configurable command-line scraper written in Go. +- [goGetJS](https://github.com/davemolk/goGetJS), a tool for extracting, searching, and saving JavaScript files (with optional headless browser). ## Support diff --git a/vendor/github.com/PuerkitoBio/goquery/go.mod b/vendor/github.com/PuerkitoBio/goquery/go.mod index 4b5a30963a39fdcf6e9afd73fbf298801340fa81..4e18f4454158ffffc8b23e7e6f14f14a889fbf39 100644 --- a/vendor/github.com/PuerkitoBio/goquery/go.mod +++ b/vendor/github.com/PuerkitoBio/goquery/go.mod @@ -2,7 +2,7 @@ module github.com/PuerkitoBio/goquery require ( github.com/andybalholm/cascadia v1.3.1 - golang.org/x/net v0.0.0-20210916014120-12bc252f5db8 + golang.org/x/net v0.7.0 ) go 1.13 diff --git a/vendor/github.com/PuerkitoBio/goquery/go.sum b/vendor/github.com/PuerkitoBio/goquery/go.sum index 167f12d2db63d97a0dbb16fead68d9478eea6378..1b5d5bb9844fe803ed96895e45d388a770b42598 100644 --- a/vendor/github.com/PuerkitoBio/goquery/go.sum +++ b/vendor/github.com/PuerkitoBio/goquery/go.sum @@ -1,9 +1,33 @@ github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c= github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA= -golang.org/x/net v0.0.0-20210916014120-12bc252f5db8 h1:/6y1LfuqNuQdHAm0jjtPtgRcxIxjVZgm5OTu8/QhZvk= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.7.0 h1:rJrUqqhjsgNp7KqAIc25s9pZnjU7TUcSY7HcVZjdn1g= +golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/vendor/github.com/PuerkitoBio/goquery/utilities.go b/vendor/github.com/PuerkitoBio/goquery/utilities.go index 6d243cdd63ad55f3fbb728d864fe9e6a5eaf8a02..ecd3453f7c8b0589f10a6a0a0282dddfb46a981f 100644 --- a/vendor/github.com/PuerkitoBio/goquery/utilities.go +++ b/vendor/github.com/PuerkitoBio/goquery/utilities.go @@ -58,8 +58,8 @@ func nodeName(node *html.Node) string { } } -// Render renders the html of the first element from selector and writes it to -// the writer. It behaves the same as OuterHtml but writes to w instead of +// Render renders the HTML of the first item in the selection and writes it to +// the writer. It behaves the same as OuterHtml but writes to w instead of // returning the string. func Render(w io.Writer, s *Selection) error { if s.Length() == 0 { @@ -73,7 +73,7 @@ func Render(w io.Writer, s *Selection) error { // the selection - that is, the HTML including the first element's // tag and attributes. // -// Unlike InnerHtml, this is a function and not a method on the Selection, +// Unlike Html, this is a function and not a method on the Selection, // because this is not a jQuery method (in javascript-land, this is // a property provided by the DOM). func OuterHtml(s *Selection) (string, error) { diff --git a/vendor/golang.org/x/net/AUTHORS b/vendor/golang.org/x/net/AUTHORS deleted file mode 100644 index 15167cd746c560e5b3d3b233a169aa64d3e9101e..0000000000000000000000000000000000000000 --- a/vendor/golang.org/x/net/AUTHORS +++ /dev/null @@ -1,3 +0,0 @@ -# This source code refers to The Go Authors for copyright purposes. -# The master list of authors is in the main Go distribution, -# visible at http://tip.golang.org/AUTHORS. diff --git a/vendor/golang.org/x/net/CONTRIBUTORS b/vendor/golang.org/x/net/CONTRIBUTORS deleted file mode 100644 index 1c4577e9680611383f46044d17fa343a96997c3c..0000000000000000000000000000000000000000 --- a/vendor/golang.org/x/net/CONTRIBUTORS +++ /dev/null @@ -1,3 +0,0 @@ -# This source code was written by the Go contributors. -# The master list of contributors is in the main Go distribution, -# visible at http://tip.golang.org/CONTRIBUTORS. diff --git a/vendor/golang.org/x/net/html/parse.go b/vendor/golang.org/x/net/html/parse.go index 038941d70852266a6e851493578fa528b93fee25..46a89eda6c19954b7fccde101a6c87bd225ded83 100644 --- a/vendor/golang.org/x/net/html/parse.go +++ b/vendor/golang.org/x/net/html/parse.go @@ -184,7 +184,7 @@ func (p *parser) clearStackToContext(s scope) { } } -// parseGenericRawTextElements implements the generic raw text element parsing +// parseGenericRawTextElement implements the generic raw text element parsing // algorithm defined in 12.2.6.2. // https://html.spec.whatwg.org/multipage/parsing.html#parsing-elements-that-contain-only-text // TODO: Since both RAWTEXT and RCDATA states are treated as tokenizer's part @@ -734,7 +734,7 @@ func inHeadIM(p *parser) bool { return false } -// 12.2.6.4.5. +// Section 12.2.6.4.5. func inHeadNoscriptIM(p *parser) bool { switch p.tok.Type { case DoctypeToken: diff --git a/vendor/golang.org/x/net/html/render.go b/vendor/golang.org/x/net/html/render.go index b46d81ca6d2df9d946fcd0d8ca2209f5e8a47774..497e132042b01fa0eb6062c7e8d3faf3478664d3 100644 --- a/vendor/golang.org/x/net/html/render.go +++ b/vendor/golang.org/x/net/html/render.go @@ -85,7 +85,7 @@ func render1(w writer, n *Node) error { if _, err := w.WriteString("<!--"); err != nil { return err } - if _, err := w.WriteString(n.Data); err != nil { + if err := escape(w, n.Data); err != nil { return err } if _, err := w.WriteString("-->"); err != nil { @@ -96,7 +96,7 @@ func render1(w writer, n *Node) error { if _, err := w.WriteString("<!DOCTYPE "); err != nil { return err } - if _, err := w.WriteString(n.Data); err != nil { + if err := escape(w, n.Data); err != nil { return err } if n.Attr != nil { diff --git a/vendor/golang.org/x/net/html/token.go b/vendor/golang.org/x/net/html/token.go index 877709f991b53bdefeb172a8de24bb7707ec2cf7..50f7c6aac8dea194078dc52559ddfdcd67dbebf7 100644 --- a/vendor/golang.org/x/net/html/token.go +++ b/vendor/golang.org/x/net/html/token.go @@ -110,9 +110,9 @@ func (t Token) String() string { case SelfClosingTagToken: return "<" + t.tagString() + "/>" case CommentToken: - return "<!--" + t.Data + "-->" + return "<!--" + EscapeString(t.Data) + "-->" case DoctypeToken: - return "<!DOCTYPE " + t.Data + ">" + return "<!DOCTYPE " + EscapeString(t.Data) + ">" } return "Invalid(" + strconv.Itoa(int(t.Type)) + ")" } @@ -598,6 +598,11 @@ scriptDataDoubleEscapeEnd: // readComment reads the next comment token starting with "<!--". The opening // "<!--" has already been consumed. func (z *Tokenizer) readComment() { + // When modifying this function, consider manually increasing the suffixLen + // constant in func TestComments, from 6 to e.g. 9 or more. That increase + // should only be temporary, not committed, as it exponentially affects the + // test running time. + z.data.start = z.raw.end defer func() { if z.data.end < z.data.start { @@ -605,14 +610,13 @@ func (z *Tokenizer) readComment() { z.data.end = z.data.start } }() - for dashCount := 2; ; { + + var dashCount int + beginning := true + for { c := z.readByte() if z.err != nil { - // Ignore up to two dashes at EOF. - if dashCount > 2 { - dashCount = 2 - } - z.data.end = z.raw.end - dashCount + z.data.end = z.calculateAbruptCommentDataEnd() return } switch c { @@ -620,7 +624,7 @@ func (z *Tokenizer) readComment() { dashCount++ continue case '>': - if dashCount >= 2 { + if dashCount >= 2 || beginning { z.data.end = z.raw.end - len("-->") return } @@ -628,17 +632,50 @@ func (z *Tokenizer) readComment() { if dashCount >= 2 { c = z.readByte() if z.err != nil { - z.data.end = z.raw.end + z.data.end = z.calculateAbruptCommentDataEnd() return - } - if c == '>' { + } else if c == '>' { z.data.end = z.raw.end - len("--!>") return + } else if c == '-' { + dashCount = 1 + beginning = false + continue } } } dashCount = 0 + beginning = false + } +} + +func (z *Tokenizer) calculateAbruptCommentDataEnd() int { + raw := z.Raw() + const prefixLen = len("<!--") + if len(raw) >= prefixLen { + raw = raw[prefixLen:] + if hasSuffix(raw, "--!") { + return z.raw.end - 3 + } else if hasSuffix(raw, "--") { + return z.raw.end - 2 + } else if hasSuffix(raw, "-") { + return z.raw.end - 1 + } + } + return z.raw.end +} + +func hasSuffix(b []byte, suffix string) bool { + if len(b) < len(suffix) { + return false } + b = b[len(b)-len(suffix):] + for i := range b { + if b[i] != suffix[i] { + return false + } + } + return true } // readUntilCloseAngle reads until the next ">". diff --git a/vendor/golang.org/x/text/AUTHORS b/vendor/golang.org/x/text/AUTHORS deleted file mode 100644 index 15167cd746c560e5b3d3b233a169aa64d3e9101e..0000000000000000000000000000000000000000 --- a/vendor/golang.org/x/text/AUTHORS +++ /dev/null @@ -1,3 +0,0 @@ -# This source code refers to The Go Authors for copyright purposes. -# The master list of authors is in the main Go distribution, -# visible at http://tip.golang.org/AUTHORS. diff --git a/vendor/golang.org/x/text/CONTRIBUTORS b/vendor/golang.org/x/text/CONTRIBUTORS deleted file mode 100644 index 1c4577e9680611383f46044d17fa343a96997c3c..0000000000000000000000000000000000000000 --- a/vendor/golang.org/x/text/CONTRIBUTORS +++ /dev/null @@ -1,3 +0,0 @@ -# This source code was written by the Go contributors. -# The master list of contributors is in the main Go distribution, -# visible at http://tip.golang.org/CONTRIBUTORS. diff --git a/vendor/golang.org/x/text/unicode/bidi/core.go b/vendor/golang.org/x/text/unicode/bidi/core.go index e4c0811016c2acd3f54b0ec93d59f84f8cbba190..9d2ae547b5ed4d91ba20783bd00eec10fc2d0247 100644 --- a/vendor/golang.org/x/text/unicode/bidi/core.go +++ b/vendor/golang.org/x/text/unicode/bidi/core.go @@ -193,14 +193,14 @@ func (p *paragraph) run() { // // At the end of this function: // -// - The member variable matchingPDI is set to point to the index of the -// matching PDI character for each isolate initiator character. If there is -// no matching PDI, it is set to the length of the input text. For other -// characters, it is set to -1. -// - The member variable matchingIsolateInitiator is set to point to the -// index of the matching isolate initiator character for each PDI character. -// If there is no matching isolate initiator, or the character is not a PDI, -// it is set to -1. +// - The member variable matchingPDI is set to point to the index of the +// matching PDI character for each isolate initiator character. If there is +// no matching PDI, it is set to the length of the input text. For other +// characters, it is set to -1. +// - The member variable matchingIsolateInitiator is set to point to the +// index of the matching isolate initiator character for each PDI character. +// If there is no matching isolate initiator, or the character is not a PDI, +// it is set to -1. func (p *paragraph) determineMatchingIsolates() { p.matchingPDI = make([]int, p.Len()) p.matchingIsolateInitiator = make([]int, p.Len()) @@ -435,7 +435,7 @@ func maxLevel(a, b level) level { } // Rule X10, second bullet: Determine the start-of-sequence (sos) and end-of-sequence (eos) types, -// either L or R, for each isolating run sequence. +// either L or R, for each isolating run sequence. func (p *paragraph) isolatingRunSequence(indexes []int) *isolatingRunSequence { length := len(indexes) types := make([]Class, length) @@ -495,9 +495,9 @@ func (s *isolatingRunSequence) resolveWeakTypes() { if t == NSM { s.types[i] = precedingCharacterType } else { - if t.in(LRI, RLI, FSI, PDI) { - precedingCharacterType = ON - } + // if t.in(LRI, RLI, FSI, PDI) { + // precedingCharacterType = ON + // } precedingCharacterType = t } } @@ -905,7 +905,7 @@ func (p *paragraph) getLevels(linebreaks []int) []level { // Lines are concatenated from left to right. So for example, the fifth // character from the left on the third line is // -// getReordering(linebreaks)[linebreaks[1] + 4] +// getReordering(linebreaks)[linebreaks[1] + 4] // // (linebreaks[1] is the position after the last character of the second // line, which is also the index of the first character on the third line, diff --git a/vendor/golang.org/x/text/unicode/bidi/trieval.go b/vendor/golang.org/x/text/unicode/bidi/trieval.go index 4c459c4b72e0ebc286e0426f83b68b4c4bc7478a..6a796e2214c693c9252d4c30fc61c752f9cbef8d 100644 --- a/vendor/golang.org/x/text/unicode/bidi/trieval.go +++ b/vendor/golang.org/x/text/unicode/bidi/trieval.go @@ -37,18 +37,6 @@ const ( unknownClass = ^Class(0) ) -var controlToClass = map[rune]Class{ - 0x202D: LRO, // LeftToRightOverride, - 0x202E: RLO, // RightToLeftOverride, - 0x202A: LRE, // LeftToRightEmbedding, - 0x202B: RLE, // RightToLeftEmbedding, - 0x202C: PDF, // PopDirectionalFormat, - 0x2066: LRI, // LeftToRightIsolate, - 0x2067: RLI, // RightToLeftIsolate, - 0x2068: FSI, // FirstStrongIsolate, - 0x2069: PDI, // PopDirectionalIsolate, -} - // A trie entry has the following bits: // 7..5 XOR mask for brackets // 4 1: Bracket open, 0: Bracket close diff --git a/vendor/golang.org/x/text/unicode/norm/forminfo.go b/vendor/golang.org/x/text/unicode/norm/forminfo.go index 526c7033ac464cc1fe840feac6bb84d81917514c..d69ccb4f976116769948e4127a6865bf6353a468 100644 --- a/vendor/golang.org/x/text/unicode/norm/forminfo.go +++ b/vendor/golang.org/x/text/unicode/norm/forminfo.go @@ -110,10 +110,11 @@ func (p Properties) BoundaryAfter() bool { } // We pack quick check data in 4 bits: -// 5: Combines forward (0 == false, 1 == true) -// 4..3: NFC_QC Yes(00), No (10), or Maybe (11) -// 2: NFD_QC Yes (0) or No (1). No also means there is a decomposition. -// 1..0: Number of trailing non-starters. +// +// 5: Combines forward (0 == false, 1 == true) +// 4..3: NFC_QC Yes(00), No (10), or Maybe (11) +// 2: NFD_QC Yes (0) or No (1). No also means there is a decomposition. +// 1..0: Number of trailing non-starters. // // When all 4 bits are zero, the character is inert, meaning it is never // influenced by normalization. diff --git a/vendor/golang.org/x/text/unicode/norm/normalize.go b/vendor/golang.org/x/text/unicode/norm/normalize.go index 95efcf26e81d7ab5608a42dddaa6b331200c8fbd..4747ad07a839c12bf7d40fc68d450c54e4e1afc2 100644 --- a/vendor/golang.org/x/text/unicode/norm/normalize.go +++ b/vendor/golang.org/x/text/unicode/norm/normalize.go @@ -18,16 +18,17 @@ import ( // A Form denotes a canonical representation of Unicode code points. // The Unicode-defined normalization and equivalence forms are: // -// NFC Unicode Normalization Form C -// NFD Unicode Normalization Form D -// NFKC Unicode Normalization Form KC -// NFKD Unicode Normalization Form KD +// NFC Unicode Normalization Form C +// NFD Unicode Normalization Form D +// NFKC Unicode Normalization Form KC +// NFKD Unicode Normalization Form KD // // For a Form f, this documentation uses the notation f(x) to mean // the bytes or string x converted to the given form. // A position n in x is called a boundary if conversion to the form can // proceed independently on both sides: -// f(x) == append(f(x[0:n]), f(x[n:])...) +// +// f(x) == append(f(x[0:n]), f(x[n:])...) // // References: https://unicode.org/reports/tr15/ and // https://unicode.org/notes/tn5/. diff --git a/vendor/golang.org/x/text/unicode/norm/tables13.0.0.go b/vendor/golang.org/x/text/unicode/norm/tables13.0.0.go index 96a130d30e9e2085a6ec6fbeb99c699b31070d50..9115ef257e83c28924427f08b7c4c3c035c0156f 100644 --- a/vendor/golang.org/x/text/unicode/norm/tables13.0.0.go +++ b/vendor/golang.org/x/text/unicode/norm/tables13.0.0.go @@ -7315,7 +7315,7 @@ const recompMapPacked = "" + "\x00V\x03\x03\x00\x00\x1e|" + // 0x00560303: 0x00001E7C "\x00v\x03\x03\x00\x00\x1e}" + // 0x00760303: 0x00001E7D "\x00V\x03#\x00\x00\x1e~" + // 0x00560323: 0x00001E7E - "\x00v\x03#\x00\x00\x1e\u007f" + // 0x00760323: 0x00001E7F + "\x00v\x03#\x00\x00\x1e\x7f" + // 0x00760323: 0x00001E7F "\x00W\x03\x00\x00\x00\x1e\x80" + // 0x00570300: 0x00001E80 "\x00w\x03\x00\x00\x00\x1e\x81" + // 0x00770300: 0x00001E81 "\x00W\x03\x01\x00\x00\x1e\x82" + // 0x00570301: 0x00001E82 @@ -7342,7 +7342,7 @@ const recompMapPacked = "" + "\x00t\x03\b\x00\x00\x1e\x97" + // 0x00740308: 0x00001E97 "\x00w\x03\n\x00\x00\x1e\x98" + // 0x0077030A: 0x00001E98 "\x00y\x03\n\x00\x00\x1e\x99" + // 0x0079030A: 0x00001E99 - "\x01\u007f\x03\a\x00\x00\x1e\x9b" + // 0x017F0307: 0x00001E9B + "\x01\x7f\x03\a\x00\x00\x1e\x9b" + // 0x017F0307: 0x00001E9B "\x00A\x03#\x00\x00\x1e\xa0" + // 0x00410323: 0x00001EA0 "\x00a\x03#\x00\x00\x1e\xa1" + // 0x00610323: 0x00001EA1 "\x00A\x03\t\x00\x00\x1e\xa2" + // 0x00410309: 0x00001EA2 diff --git a/vendor/golang.org/x/text/width/tables10.0.0.go b/vendor/golang.org/x/text/width/tables10.0.0.go index 186b1d4efac5ab474427370ebd23d237929066f8..cd9d91cafbb885a918f4c93e61d7a14f8fb00fcb 100644 --- a/vendor/golang.org/x/text/width/tables10.0.0.go +++ b/vendor/golang.org/x/text/width/tables10.0.0.go @@ -1146,21 +1146,31 @@ var widthIndex = [1408]uint8{ } // inverseData contains 4-byte entries of the following format: -// <length> <modified UTF-8-encoded rune> <0 padding> +// +// <length> <modified UTF-8-encoded rune> <0 padding> +// // The last byte of the UTF-8-encoded rune is xor-ed with the last byte of the // UTF-8 encoding of the original rune. Mappings often have the following // pattern: -// A -> A (U+FF21 -> U+0041) -// B -> B (U+FF22 -> U+0042) -// ... +// +// A -> A (U+FF21 -> U+0041) +// B -> B (U+FF22 -> U+0042) +// ... +// // By xor-ing the last byte the same entry can be shared by many mappings. This // reduces the total number of distinct entries by about two thirds. // The resulting entry for the aforementioned mappings is -// { 0x01, 0xE0, 0x00, 0x00 } +// +// { 0x01, 0xE0, 0x00, 0x00 } +// // Using this entry to map U+FF21 (UTF-8 [EF BC A1]), we get -// E0 ^ A1 = 41. +// +// E0 ^ A1 = 41. +// // Similarly, for U+FF22 (UTF-8 [EF BC A2]), we get -// E0 ^ A2 = 42. +// +// E0 ^ A2 = 42. +// // Note that because of the xor-ing, the byte sequence stored in the entry is // not valid UTF-8. var inverseData = [150][4]byte{ diff --git a/vendor/golang.org/x/text/width/tables11.0.0.go b/vendor/golang.org/x/text/width/tables11.0.0.go index 990f7622f175572b89d9a6db71b766f327d11eb8..327eaef9b70162f2bc96beb85586312289cf0b86 100644 --- a/vendor/golang.org/x/text/width/tables11.0.0.go +++ b/vendor/golang.org/x/text/width/tables11.0.0.go @@ -1158,21 +1158,31 @@ var widthIndex = [1408]uint8{ } // inverseData contains 4-byte entries of the following format: -// <length> <modified UTF-8-encoded rune> <0 padding> +// +// <length> <modified UTF-8-encoded rune> <0 padding> +// // The last byte of the UTF-8-encoded rune is xor-ed with the last byte of the // UTF-8 encoding of the original rune. Mappings often have the following // pattern: -// A -> A (U+FF21 -> U+0041) -// B -> B (U+FF22 -> U+0042) -// ... +// +// A -> A (U+FF21 -> U+0041) +// B -> B (U+FF22 -> U+0042) +// ... +// // By xor-ing the last byte the same entry can be shared by many mappings. This // reduces the total number of distinct entries by about two thirds. // The resulting entry for the aforementioned mappings is -// { 0x01, 0xE0, 0x00, 0x00 } +// +// { 0x01, 0xE0, 0x00, 0x00 } +// // Using this entry to map U+FF21 (UTF-8 [EF BC A1]), we get -// E0 ^ A1 = 41. +// +// E0 ^ A1 = 41. +// // Similarly, for U+FF22 (UTF-8 [EF BC A2]), we get -// E0 ^ A2 = 42. +// +// E0 ^ A2 = 42. +// // Note that because of the xor-ing, the byte sequence stored in the entry is // not valid UTF-8. var inverseData = [150][4]byte{ diff --git a/vendor/golang.org/x/text/width/tables12.0.0.go b/vendor/golang.org/x/text/width/tables12.0.0.go index 85296297e38c947d5e29d3a2aae1e600ba5cc09f..5c14ade6d9b1c2109a9fa979d54d61b41c1e6a05 100644 --- a/vendor/golang.org/x/text/width/tables12.0.0.go +++ b/vendor/golang.org/x/text/width/tables12.0.0.go @@ -1178,21 +1178,31 @@ var widthIndex = [1408]uint8{ } // inverseData contains 4-byte entries of the following format: -// <length> <modified UTF-8-encoded rune> <0 padding> +// +// <length> <modified UTF-8-encoded rune> <0 padding> +// // The last byte of the UTF-8-encoded rune is xor-ed with the last byte of the // UTF-8 encoding of the original rune. Mappings often have the following // pattern: -// A -> A (U+FF21 -> U+0041) -// B -> B (U+FF22 -> U+0042) -// ... +// +// A -> A (U+FF21 -> U+0041) +// B -> B (U+FF22 -> U+0042) +// ... +// // By xor-ing the last byte the same entry can be shared by many mappings. This // reduces the total number of distinct entries by about two thirds. // The resulting entry for the aforementioned mappings is -// { 0x01, 0xE0, 0x00, 0x00 } +// +// { 0x01, 0xE0, 0x00, 0x00 } +// // Using this entry to map U+FF21 (UTF-8 [EF BC A1]), we get -// E0 ^ A1 = 41. +// +// E0 ^ A1 = 41. +// // Similarly, for U+FF22 (UTF-8 [EF BC A2]), we get -// E0 ^ A2 = 42. +// +// E0 ^ A2 = 42. +// // Note that because of the xor-ing, the byte sequence stored in the entry is // not valid UTF-8. var inverseData = [150][4]byte{ diff --git a/vendor/golang.org/x/text/width/tables13.0.0.go b/vendor/golang.org/x/text/width/tables13.0.0.go index bac3f1aee34131bcef5f8321520f9c5bee080302..ab258e3848c586eab1f70125088316a221124d7f 100644 --- a/vendor/golang.org/x/text/width/tables13.0.0.go +++ b/vendor/golang.org/x/text/width/tables13.0.0.go @@ -1179,21 +1179,31 @@ var widthIndex = [1408]uint8{ } // inverseData contains 4-byte entries of the following format: -// <length> <modified UTF-8-encoded rune> <0 padding> +// +// <length> <modified UTF-8-encoded rune> <0 padding> +// // The last byte of the UTF-8-encoded rune is xor-ed with the last byte of the // UTF-8 encoding of the original rune. Mappings often have the following // pattern: -// A -> A (U+FF21 -> U+0041) -// B -> B (U+FF22 -> U+0042) -// ... +// +// A -> A (U+FF21 -> U+0041) +// B -> B (U+FF22 -> U+0042) +// ... +// // By xor-ing the last byte the same entry can be shared by many mappings. This // reduces the total number of distinct entries by about two thirds. // The resulting entry for the aforementioned mappings is -// { 0x01, 0xE0, 0x00, 0x00 } +// +// { 0x01, 0xE0, 0x00, 0x00 } +// // Using this entry to map U+FF21 (UTF-8 [EF BC A1]), we get -// E0 ^ A1 = 41. +// +// E0 ^ A1 = 41. +// // Similarly, for U+FF22 (UTF-8 [EF BC A2]), we get -// E0 ^ A2 = 42. +// +// E0 ^ A2 = 42. +// // Note that because of the xor-ing, the byte sequence stored in the entry is // not valid UTF-8. var inverseData = [150][4]byte{ diff --git a/vendor/golang.org/x/text/width/tables9.0.0.go b/vendor/golang.org/x/text/width/tables9.0.0.go index b3db84f6f9b6459f51fad53942e0396f83ed9a27..6781f3d960bd360ea6764cc8c1dd5eb7f9a6a1aa 100644 --- a/vendor/golang.org/x/text/width/tables9.0.0.go +++ b/vendor/golang.org/x/text/width/tables9.0.0.go @@ -1114,21 +1114,31 @@ var widthIndex = [1408]uint8{ } // inverseData contains 4-byte entries of the following format: -// <length> <modified UTF-8-encoded rune> <0 padding> +// +// <length> <modified UTF-8-encoded rune> <0 padding> +// // The last byte of the UTF-8-encoded rune is xor-ed with the last byte of the // UTF-8 encoding of the original rune. Mappings often have the following // pattern: -// A -> A (U+FF21 -> U+0041) -// B -> B (U+FF22 -> U+0042) -// ... +// +// A -> A (U+FF21 -> U+0041) +// B -> B (U+FF22 -> U+0042) +// ... +// // By xor-ing the last byte the same entry can be shared by many mappings. This // reduces the total number of distinct entries by about two thirds. // The resulting entry for the aforementioned mappings is -// { 0x01, 0xE0, 0x00, 0x00 } +// +// { 0x01, 0xE0, 0x00, 0x00 } +// // Using this entry to map U+FF21 (UTF-8 [EF BC A1]), we get -// E0 ^ A1 = 41. +// +// E0 ^ A1 = 41. +// // Similarly, for U+FF22 (UTF-8 [EF BC A2]), we get -// E0 ^ A2 = 42. +// +// E0 ^ A2 = 42. +// // Note that because of the xor-ing, the byte sequence stored in the entry is // not valid UTF-8. var inverseData = [150][4]byte{ diff --git a/vendor/modules.txt b/vendor/modules.txt index 93048d7b074537a36a34e27103cb761f1f4cd4fa..55e3cd66c66d660cf334b93d6beb34588a97b1ce 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -1,4 +1,4 @@ -# github.com/PuerkitoBio/goquery v1.8.0 +# github.com/PuerkitoBio/goquery v1.8.1 ## explicit github.com/PuerkitoBio/goquery # github.com/PuerkitoBio/purell v1.2.0 @@ -37,13 +37,11 @@ github.com/syndtr/goleveldb/leveldb/opt github.com/syndtr/goleveldb/leveldb/storage github.com/syndtr/goleveldb/leveldb/table github.com/syndtr/goleveldb/leveldb/util -# golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2 -## explicit -# golang.org/x/net v0.0.0-20220617184016-355a448f1bc9 +# golang.org/x/net v0.7.0 golang.org/x/net/html golang.org/x/net/html/atom golang.org/x/net/idna -# golang.org/x/text v0.3.7 +# golang.org/x/text v0.7.0 golang.org/x/text/secure/bidirule golang.org/x/text/transform golang.org/x/text/unicode/bidi