Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision
  • better-queue
  • master
2 results

Target

Select target project
  • ale/crawl
1 result
Select Git revision
  • better-queue
  • master
2 results
Show changes
Commits on Source (2)
Showing
with 167 additions and 85 deletions
......@@ -3,12 +3,11 @@ module git.autistici.org/ale/crawl
go 1.15
require (
github.com/PuerkitoBio/goquery v1.8.0
github.com/PuerkitoBio/goquery v1.8.1
github.com/PuerkitoBio/purell v1.2.0
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect
github.com/google/go-cmp v0.5.6
github.com/google/uuid v1.1.1 // indirect
github.com/pborman/uuid v1.2.1
github.com/syndtr/goleveldb v1.0.0
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2 // indirect
)
......@@ -4,6 +4,8 @@ github.com/PuerkitoBio/goquery v1.7.1 h1:oE+T06D+1T7LNrn91B4aERsRIeCLJ/oPSa6xB9F
github.com/PuerkitoBio/goquery v1.7.1/go.mod h1:XY0pP4kfraEmmV1O7Uf6XyjoslwsneBbgeDjLYuN8xY=
github.com/PuerkitoBio/goquery v1.8.0 h1:PJTF7AmFCFKk1N6V6jmKfrNH9tV5pNE6lZMkG0gta/U=
github.com/PuerkitoBio/goquery v1.8.0/go.mod h1:ypIiRMtY7COPGk+I/YbZLbxsxn9g5ejnI2HSMtkjZvI=
github.com/PuerkitoBio/goquery v1.8.1 h1:uQxhNlArOIdbrH1tr0UXwdVFgDcZDrZVdcpygAcwmWM=
github.com/PuerkitoBio/goquery v1.8.1/go.mod h1:Q8ICL1kNUJ2sXGoAhPGUdYDJvgQgHzJsnnd3H7Ho5jQ=
github.com/PuerkitoBio/purell v0.0.0-20180310210909-975f53781597 h1:1H3FyRw7YsqIty9WHPOVEGJaFJ1sfGVZ3PPDUw3ob2w=
github.com/PuerkitoBio/purell v0.0.0-20180310210909-975f53781597/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
github.com/PuerkitoBio/purell v0.1.0 h1:N8Bcc53nei5frgNYgAKo93qMUVdU5LUGHCBv8efdVcM=
......@@ -50,20 +52,30 @@ github.com/syndtr/goleveldb v0.0.0-20190923125748-758128399b1d h1:OgkXbz/O0zsJoa
github.com/syndtr/goleveldb v0.0.0-20190923125748-758128399b1d/go.mod h1:9OrXJhf154huy1nPWmuSrkgjPUtUNhA+Zmy+6AESzuA=
github.com/syndtr/goleveldb v1.0.0 h1:fBdIW9lB4Iz0n9khmH8w27SJ3QEJ7+IgjPEwGSZiFdE=
github.com/syndtr/goleveldb v1.0.0/go.mod h1:ZVVdQEZoIme9iO1Ch2Jdy24qqXrMMOU6lpPAyBWyWuQ=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20190926025831-c00fd9afed17 h1:qPnAdmjNA41t3QBTx2mFGf/SD1IoslhYu7AmdsVzCcs=
golang.org/x/net v0.0.0-20190926025831-c00fd9afed17/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20210614182718-04defd469f4e h1:XpT3nA5TvE525Ne3hInMh6+GETgn27Zfm9dxsThnX2Q=
golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8 h1:/6y1LfuqNuQdHAm0jjtPtgRcxIxjVZgm5OTu8/QhZvk=
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20220617184016-355a448f1bc9 h1:Yqz/iviulwKwAREEeUd3nbBFn0XuyJqkoft2IlrvOhc=
golang.org/x/net v0.0.0-20220617184016-355a448f1bc9/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/net v0.7.0 h1:rJrUqqhjsgNp7KqAIc25s9pZnjU7TUcSY7HcVZjdn1g=
golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f h1:wMNYb4v58l5UBM7MYRLPG6ZhfOqbKu7X5eyFl8ZhKvA=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a h1:1BGLXjeY4akVXGgbC9HugT3Jv3hCI0z56oJR5vAMgBU=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
......@@ -71,14 +83,23 @@ golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.7.0 h1:4BRB4x83lYWy72KwLD/qYDuTu7q9PjSagHvijDw7cLo=
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
......
......@@ -40,6 +40,7 @@ Please note that because of the net/html dependency, goquery requires Go1.1+ and
**Note that goquery's API is now stable, and will not break.**
* **2023-02-18 (v1.8.1)** : Update `go.mod` dependencies, update CI workflow.
* **2021-10-25 (v1.8.0)** : Add `Render` function to render a `Selection` to an `io.Writer` (thanks [@anthonygedeon](https://github.com/anthonygedeon)).
* **2021-07-11 (v1.7.1)** : Update go.mod dependencies and add dependabot config (thanks [@jauderho](https://github.com/jauderho)).
* **2021-06-14 (v1.7.0)** : Add `Single` and `SingleMatcher` functions to optimize first-match selection (thanks [@gdollardollar](https://github.com/gdollardollar)).
......@@ -154,6 +155,8 @@ func main() {
- [Geziyor](https://github.com/geziyor/geziyor), a fast web crawling & scraping framework for Go. Supports JS rendering.
- [Pagser](https://github.com/foolin/pagser), a simple, easy, extensible, configurable HTML parser to struct based on goquery and struct tags.
- [stitcherd](https://github.com/vhodges/stitcherd), A server for doing server side includes using css selectors and DOM updates.
- [goskyr](https://github.com/jakopako/goskyr), an easily configurable command-line scraper written in Go.
- [goGetJS](https://github.com/davemolk/goGetJS), a tool for extracting, searching, and saving JavaScript files (with optional headless browser).
## Support
......
......@@ -2,7 +2,7 @@ module github.com/PuerkitoBio/goquery
require (
github.com/andybalholm/cascadia v1.3.1
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8
golang.org/x/net v0.7.0
)
go 1.13
github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c=
github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8 h1:/6y1LfuqNuQdHAm0jjtPtgRcxIxjVZgm5OTu8/QhZvk=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/net v0.7.0 h1:rJrUqqhjsgNp7KqAIc25s9pZnjU7TUcSY7HcVZjdn1g=
golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
......@@ -58,7 +58,7 @@ func nodeName(node *html.Node) string {
}
}
// Render renders the html of the first element from selector and writes it to
// Render renders the HTML of the first item in the selection and writes it to
// the writer. It behaves the same as OuterHtml but writes to w instead of
// returning the string.
func Render(w io.Writer, s *Selection) error {
......@@ -73,7 +73,7 @@ func Render(w io.Writer, s *Selection) error {
// the selection - that is, the HTML including the first element's
// tag and attributes.
//
// Unlike InnerHtml, this is a function and not a method on the Selection,
// Unlike Html, this is a function and not a method on the Selection,
// because this is not a jQuery method (in javascript-land, this is
// a property provided by the DOM).
func OuterHtml(s *Selection) (string, error) {
......
# This source code refers to The Go Authors for copyright purposes.
# The master list of authors is in the main Go distribution,
# visible at http://tip.golang.org/AUTHORS.
# This source code was written by the Go contributors.
# The master list of contributors is in the main Go distribution,
# visible at http://tip.golang.org/CONTRIBUTORS.
......@@ -184,7 +184,7 @@ func (p *parser) clearStackToContext(s scope) {
}
}
// parseGenericRawTextElements implements the generic raw text element parsing
// parseGenericRawTextElement implements the generic raw text element parsing
// algorithm defined in 12.2.6.2.
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-elements-that-contain-only-text
// TODO: Since both RAWTEXT and RCDATA states are treated as tokenizer's part
......@@ -734,7 +734,7 @@ func inHeadIM(p *parser) bool {
return false
}
// 12.2.6.4.5.
// Section 12.2.6.4.5.
func inHeadNoscriptIM(p *parser) bool {
switch p.tok.Type {
case DoctypeToken:
......
......@@ -85,7 +85,7 @@ func render1(w writer, n *Node) error {
if _, err := w.WriteString("<!--"); err != nil {
return err
}
if _, err := w.WriteString(n.Data); err != nil {
if err := escape(w, n.Data); err != nil {
return err
}
if _, err := w.WriteString("-->"); err != nil {
......@@ -96,7 +96,7 @@ func render1(w writer, n *Node) error {
if _, err := w.WriteString("<!DOCTYPE "); err != nil {
return err
}
if _, err := w.WriteString(n.Data); err != nil {
if err := escape(w, n.Data); err != nil {
return err
}
if n.Attr != nil {
......
......@@ -110,9 +110,9 @@ func (t Token) String() string {
case SelfClosingTagToken:
return "<" + t.tagString() + "/>"
case CommentToken:
return "<!--" + t.Data + "-->"
return "<!--" + EscapeString(t.Data) + "-->"
case DoctypeToken:
return "<!DOCTYPE " + t.Data + ">"
return "<!DOCTYPE " + EscapeString(t.Data) + ">"
}
return "Invalid(" + strconv.Itoa(int(t.Type)) + ")"
}
......@@ -598,6 +598,11 @@ scriptDataDoubleEscapeEnd:
// readComment reads the next comment token starting with "<!--". The opening
// "<!--" has already been consumed.
func (z *Tokenizer) readComment() {
// When modifying this function, consider manually increasing the suffixLen
// constant in func TestComments, from 6 to e.g. 9 or more. That increase
// should only be temporary, not committed, as it exponentially affects the
// test running time.
z.data.start = z.raw.end
defer func() {
if z.data.end < z.data.start {
......@@ -605,14 +610,13 @@ func (z *Tokenizer) readComment() {
z.data.end = z.data.start
}
}()
for dashCount := 2; ; {
var dashCount int
beginning := true
for {
c := z.readByte()
if z.err != nil {
// Ignore up to two dashes at EOF.
if dashCount > 2 {
dashCount = 2
}
z.data.end = z.raw.end - dashCount
z.data.end = z.calculateAbruptCommentDataEnd()
return
}
switch c {
......@@ -620,7 +624,7 @@ func (z *Tokenizer) readComment() {
dashCount++
continue
case '>':
if dashCount >= 2 {
if dashCount >= 2 || beginning {
z.data.end = z.raw.end - len("-->")
return
}
......@@ -628,19 +632,52 @@ func (z *Tokenizer) readComment() {
if dashCount >= 2 {
c = z.readByte()
if z.err != nil {
z.data.end = z.raw.end
z.data.end = z.calculateAbruptCommentDataEnd()
return
}
if c == '>' {
} else if c == '>' {
z.data.end = z.raw.end - len("--!>")
return
} else if c == '-' {
dashCount = 1
beginning = false
continue
}
}
}
dashCount = 0
beginning = false
}
}
func (z *Tokenizer) calculateAbruptCommentDataEnd() int {
raw := z.Raw()
const prefixLen = len("<!--")
if len(raw) >= prefixLen {
raw = raw[prefixLen:]
if hasSuffix(raw, "--!") {
return z.raw.end - 3
} else if hasSuffix(raw, "--") {
return z.raw.end - 2
} else if hasSuffix(raw, "-") {
return z.raw.end - 1
}
}
return z.raw.end
}
func hasSuffix(b []byte, suffix string) bool {
if len(b) < len(suffix) {
return false
}
b = b[len(b)-len(suffix):]
for i := range b {
if b[i] != suffix[i] {
return false
}
}
return true
}
// readUntilCloseAngle reads until the next ">".
func (z *Tokenizer) readUntilCloseAngle() {
z.data.start = z.raw.end
......
# This source code refers to The Go Authors for copyright purposes.
# The master list of authors is in the main Go distribution,
# visible at http://tip.golang.org/AUTHORS.
# This source code was written by the Go contributors.
# The master list of contributors is in the main Go distribution,
# visible at http://tip.golang.org/CONTRIBUTORS.
......@@ -495,9 +495,9 @@ func (s *isolatingRunSequence) resolveWeakTypes() {
if t == NSM {
s.types[i] = precedingCharacterType
} else {
if t.in(LRI, RLI, FSI, PDI) {
precedingCharacterType = ON
}
// if t.in(LRI, RLI, FSI, PDI) {
// precedingCharacterType = ON
// }
precedingCharacterType = t
}
}
......
......@@ -37,18 +37,6 @@ const (
unknownClass = ^Class(0)
)
var controlToClass = map[rune]Class{
0x202D: LRO, // LeftToRightOverride,
0x202E: RLO, // RightToLeftOverride,
0x202A: LRE, // LeftToRightEmbedding,
0x202B: RLE, // RightToLeftEmbedding,
0x202C: PDF, // PopDirectionalFormat,
0x2066: LRI, // LeftToRightIsolate,
0x2067: RLI, // RightToLeftIsolate,
0x2068: FSI, // FirstStrongIsolate,
0x2069: PDI, // PopDirectionalIsolate,
}
// A trie entry has the following bits:
// 7..5 XOR mask for brackets
// 4 1: Bracket open, 0: Bracket close
......
......@@ -110,6 +110,7 @@ func (p Properties) BoundaryAfter() bool {
}
// We pack quick check data in 4 bits:
//
// 5: Combines forward (0 == false, 1 == true)
// 4..3: NFC_QC Yes(00), No (10), or Maybe (11)
// 2: NFD_QC Yes (0) or No (1). No also means there is a decomposition.
......
......@@ -27,6 +27,7 @@ import (
// the bytes or string x converted to the given form.
// A position n in x is called a boundary if conversion to the form can
// proceed independently on both sides:
//
// f(x) == append(f(x[0:n]), f(x[n:])...)
//
// References: https://unicode.org/reports/tr15/ and
......
......@@ -7315,7 +7315,7 @@ const recompMapPacked = "" +
"\x00V\x03\x03\x00\x00\x1e|" + // 0x00560303: 0x00001E7C
"\x00v\x03\x03\x00\x00\x1e}" + // 0x00760303: 0x00001E7D
"\x00V\x03#\x00\x00\x1e~" + // 0x00560323: 0x00001E7E
"\x00v\x03#\x00\x00\x1e\u007f" + // 0x00760323: 0x00001E7F
"\x00v\x03#\x00\x00\x1e\x7f" + // 0x00760323: 0x00001E7F
"\x00W\x03\x00\x00\x00\x1e\x80" + // 0x00570300: 0x00001E80
"\x00w\x03\x00\x00\x00\x1e\x81" + // 0x00770300: 0x00001E81
"\x00W\x03\x01\x00\x00\x1e\x82" + // 0x00570301: 0x00001E82
......@@ -7342,7 +7342,7 @@ const recompMapPacked = "" +
"\x00t\x03\b\x00\x00\x1e\x97" + // 0x00740308: 0x00001E97
"\x00w\x03\n\x00\x00\x1e\x98" + // 0x0077030A: 0x00001E98
"\x00y\x03\n\x00\x00\x1e\x99" + // 0x0079030A: 0x00001E99
"\x01\u007f\x03\a\x00\x00\x1e\x9b" + // 0x017F0307: 0x00001E9B
"\x01\x7f\x03\a\x00\x00\x1e\x9b" + // 0x017F0307: 0x00001E9B
"\x00A\x03#\x00\x00\x1e\xa0" + // 0x00410323: 0x00001EA0
"\x00a\x03#\x00\x00\x1e\xa1" + // 0x00610323: 0x00001EA1
"\x00A\x03\t\x00\x00\x1e\xa2" + // 0x00410309: 0x00001EA2
......
......@@ -1146,21 +1146,31 @@ var widthIndex = [1408]uint8{
}
// inverseData contains 4-byte entries of the following format:
//
// <length> <modified UTF-8-encoded rune> <0 padding>
//
// The last byte of the UTF-8-encoded rune is xor-ed with the last byte of the
// UTF-8 encoding of the original rune. Mappings often have the following
// pattern:
//
// A -> A (U+FF21 -> U+0041)
// B -> B (U+FF22 -> U+0042)
// ...
//
// By xor-ing the last byte the same entry can be shared by many mappings. This
// reduces the total number of distinct entries by about two thirds.
// The resulting entry for the aforementioned mappings is
//
// { 0x01, 0xE0, 0x00, 0x00 }
//
// Using this entry to map U+FF21 (UTF-8 [EF BC A1]), we get
//
// E0 ^ A1 = 41.
//
// Similarly, for U+FF22 (UTF-8 [EF BC A2]), we get
//
// E0 ^ A2 = 42.
//
// Note that because of the xor-ing, the byte sequence stored in the entry is
// not valid UTF-8.
var inverseData = [150][4]byte{
......
......@@ -1158,21 +1158,31 @@ var widthIndex = [1408]uint8{
}
// inverseData contains 4-byte entries of the following format:
//
// <length> <modified UTF-8-encoded rune> <0 padding>
//
// The last byte of the UTF-8-encoded rune is xor-ed with the last byte of the
// UTF-8 encoding of the original rune. Mappings often have the following
// pattern:
//
// A -> A (U+FF21 -> U+0041)
// B -> B (U+FF22 -> U+0042)
// ...
//
// By xor-ing the last byte the same entry can be shared by many mappings. This
// reduces the total number of distinct entries by about two thirds.
// The resulting entry for the aforementioned mappings is
//
// { 0x01, 0xE0, 0x00, 0x00 }
//
// Using this entry to map U+FF21 (UTF-8 [EF BC A1]), we get
//
// E0 ^ A1 = 41.
//
// Similarly, for U+FF22 (UTF-8 [EF BC A2]), we get
//
// E0 ^ A2 = 42.
//
// Note that because of the xor-ing, the byte sequence stored in the entry is
// not valid UTF-8.
var inverseData = [150][4]byte{
......