Commit f4455ea6 authored by ale's avatar ale
Browse files

Update dependencies (legacy and go.mod)

parent a67708b7
Pipeline #5026 passed with stage
in 36 seconds
......@@ -3,11 +3,10 @@ module git.autistici.org/ale/crawl
require (
github.com/PuerkitoBio/goquery v1.5.0
github.com/PuerkitoBio/purell v0.0.0-20180310210909-975f53781597
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578
github.com/andybalholm/cascadia v1.0.0
github.com/golang/snappy v0.0.1
github.com/pborman/uuid v0.0.0-20171128162732-e53336930665
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect
github.com/google/go-cmp v0.3.1
github.com/google/uuid v1.1.1 // indirect
github.com/pborman/uuid v1.2.0
github.com/syndtr/goleveldb v0.0.0-20190923125748-758128399b1d
golang.org/x/net v0.0.0-20190926025831-c00fd9afed17
golang.org/x/text v0.3.0
golang.org/x/net v0.0.0-20190926025831-c00fd9afed17 // indirect
)
......@@ -4,20 +4,30 @@ github.com/PuerkitoBio/purell v0.0.0-20180310210909-975f53781597 h1:1H3FyRw7YsqI
github.com/PuerkitoBio/purell v0.0.0-20180310210909-975f53781597/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV/sSk/8dngufqelfh6jnri85riMAaF/M=
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
github.com/andybalholm/cascadia v0.0.0-20181012154424-680b6a57bda4/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o=
github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
github.com/fsnotify/fsnotify v1.4.7 h1:IXs+QLmnXW2CcXuY+8Mzv/fWEsPGWxqefPtCP5CnV9I=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/golang/protobuf v1.2.0 h1:P3YflyNX/ehuJFLhxviNdFxQPkGK5cDcApsge1SqnvM=
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/snappy v0.0.0-20190904063534-ff6b7dc882cf/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4=
github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/google/go-cmp v0.3.1 h1:Xye71clBPdm5HgqGwUkwhbynsUJZhDbS20FvLhQ2izg=
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/uuid v1.0.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.1.1 h1:Gkbcsh/GbpXz7lPftLA3P6TYMwjCLYm83jiFQZF/3gY=
github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI=
github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
github.com/onsi/ginkgo v1.7.0 h1:WSHQ+IS43OoUrWtD1/bbclrwK8TTH5hzp+umCiuxHgs=
github.com/onsi/ginkgo v1.7.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
github.com/onsi/gomega v1.4.3 h1:RE1xgDvH7imwFD45h+u2SgIfERHlS2yNG4DObb5BSKU=
github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
github.com/pborman/uuid v0.0.0-20171128162732-e53336930665 h1:7G9lvlxEu1ZPLqJnsRY1MuoBaf2Mg4qbtcxNRXKdzFs=
github.com/pborman/uuid v0.0.0-20171128162732-e53336930665/go.mod h1:VyrYX9gd7irzKovcSS6BIIEwPRkP2Wm2m9ufcdFSJ34=
github.com/pborman/uuid v1.2.0 h1:J7Q5mO4ysT1dv8hyrUGHb9+ooztCXu1D8MY8DZYsu3g=
github.com/pborman/uuid v1.2.0/go.mod h1:X/NO0urCmaxf9VXbdlT7C2Yzkj2IKimNn4k+gtPdI/k=
github.com/syndtr/goleveldb v0.0.0-20190923125748-758128399b1d h1:OgkXbz/O0zsJoaB+z6n/a3bNGCbCWhBPLfGr6qaBprM=
github.com/syndtr/goleveldb v0.0.0-20190923125748-758128399b1d/go.mod h1:9OrXJhf154huy1nPWmuSrkgjPUtUNhA+Zmy+6AESzuA=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
......@@ -26,14 +36,18 @@ golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73r
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190926025831-c00fd9afed17 h1:qPnAdmjNA41t3QBTx2mFGf/SD1IoslhYu7AmdsVzCcs=
golang.org/x/net v0.0.0-20190926025831-c00fd9afed17/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f h1:wMNYb4v58l5UBM7MYRLPG6ZhfOqbKu7X5eyFl8ZhKvA=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a h1:1BGLXjeY4akVXGgbC9HugT3Jv3hCI0z56oJR5vAMgBU=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/text v0.0.0-20190829152558-3d0f7978add9/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4=
gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys=
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ=
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
gopkg.in/yaml.v2 v2.2.1 h1:mUhvW9EsL+naU5Q3cakzfE91YhliOondGd6ZrsDBHQE=
gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
......@@ -138,9 +138,11 @@ func main() {
- [Goq][goq], an HTML deserialization and scraping library based on goquery and struct tags.
- [andybalholm/cascadia][cascadia], the CSS selector library used by goquery.
- [suntong/cascadia][cascadiacli], a command-line interface to the cascadia CSS selector library, useful to test selectors.
- [asciimoo/colly](https://github.com/asciimoo/colly), a lightning fast and elegant Scraping Framework
- [gocolly/colly](https://github.com/gocolly/colly), a lightning fast and elegant Scraping Framework
- [gnulnx/goperf](https://github.com/gnulnx/goperf), a website performance test tool that also fetches static assets.
- [MontFerret/ferret](https://github.com/MontFerret/ferret), declarative web scraping.
- [tacusci/berrycms](https://github.com/tacusci/berrycms), a modern simple to use CMS with easy to write plugins
- [Dataflow kit](https://github.com/slotix/dataflowkit), Web Scraping framework for Gophers.
## Support
......
......@@ -7,14 +7,16 @@ import (
"regexp"
"strconv"
"strings"
"golang.org/x/net/html"
)
// a parser for CSS selectors
type parser struct {
s string // the source text
i int // the current position
// if `false`, parsing a pseudo-element
// returns an error.
acceptPseudoElements bool
}
// parseEscape parses a backslash escape.
......@@ -56,6 +58,26 @@ func (p *parser) parseEscape() (result string, err error) {
return result, nil
}
// toLowerASCII returns s with all ASCII capital letters lowercased.
func toLowerASCII(s string) string {
var b []byte
for i := 0; i < len(s); i++ {
if c := s[i]; 'A' <= c && c <= 'Z' {
if b == nil {
b = make([]byte, len(s))
copy(b, s)
}
b[i] = s[i] + ('a' - 'A')
}
}
if b == nil {
return s
}
return string(b)
}
func hexDigit(c byte) bool {
return '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F'
}
......@@ -280,92 +302,92 @@ func (p *parser) consumeClosingParenthesis() bool {
}
// parseTypeSelector parses a type selector (one that matches by tag name).
func (p *parser) parseTypeSelector() (result Selector, err error) {
func (p *parser) parseTypeSelector() (result tagSelector, err error) {
tag, err := p.parseIdentifier()
if err != nil {
return nil, err
return
}
return typeSelector(tag), nil
return tagSelector{tag: toLowerASCII(tag)}, nil
}
// parseIDSelector parses a selector that matches by id attribute.
func (p *parser) parseIDSelector() (Selector, error) {
func (p *parser) parseIDSelector() (idSelector, error) {
if p.i >= len(p.s) {
return nil, fmt.Errorf("expected id selector (#id), found EOF instead")
return idSelector{}, fmt.Errorf("expected id selector (#id), found EOF instead")
}
if p.s[p.i] != '#' {
return nil, fmt.Errorf("expected id selector (#id), found '%c' instead", p.s[p.i])
return idSelector{}, fmt.Errorf("expected id selector (#id), found '%c' instead", p.s[p.i])
}
p.i++
id, err := p.parseName()
if err != nil {
return nil, err
return idSelector{}, err
}
return attributeEqualsSelector("id", id), nil
return idSelector{id: id}, nil
}
// parseClassSelector parses a selector that matches by class attribute.
func (p *parser) parseClassSelector() (Selector, error) {
func (p *parser) parseClassSelector() (classSelector, error) {
if p.i >= len(p.s) {
return nil, fmt.Errorf("expected class selector (.class), found EOF instead")
return classSelector{}, fmt.Errorf("expected class selector (.class), found EOF instead")
}
if p.s[p.i] != '.' {
return nil, fmt.Errorf("expected class selector (.class), found '%c' instead", p.s[p.i])
return classSelector{}, fmt.Errorf("expected class selector (.class), found '%c' instead", p.s[p.i])
}
p.i++
class, err := p.parseIdentifier()
if err != nil {
return nil, err
return classSelector{}, err
}
return attributeIncludesSelector("class", class), nil
return classSelector{class: class}, nil
}
// parseAttributeSelector parses a selector that matches by attribute value.
func (p *parser) parseAttributeSelector() (Selector, error) {
func (p *parser) parseAttributeSelector() (attrSelector, error) {
if p.i >= len(p.s) {
return nil, fmt.Errorf("expected attribute selector ([attribute]), found EOF instead")
return attrSelector{}, fmt.Errorf("expected attribute selector ([attribute]), found EOF instead")
}
if p.s[p.i] != '[' {
return nil, fmt.Errorf("expected attribute selector ([attribute]), found '%c' instead", p.s[p.i])
return attrSelector{}, fmt.Errorf("expected attribute selector ([attribute]), found '%c' instead", p.s[p.i])
}
p.i++
p.skipWhitespace()
key, err := p.parseIdentifier()
if err != nil {
return nil, err
return attrSelector{}, err
}
key = toLowerASCII(key)
p.skipWhitespace()
if p.i >= len(p.s) {
return nil, errors.New("unexpected EOF in attribute selector")
return attrSelector{}, errors.New("unexpected EOF in attribute selector")
}
if p.s[p.i] == ']' {
p.i++
return attributeExistsSelector(key), nil
return attrSelector{key: key, operation: ""}, nil
}
if p.i+2 >= len(p.s) {
return nil, errors.New("unexpected EOF in attribute selector")
return attrSelector{}, errors.New("unexpected EOF in attribute selector")
}
op := p.s[p.i : p.i+2]
if op[0] == '=' {
op = "="
} else if op[1] != '=' {
return nil, fmt.Errorf(`expected equality operator, found "%s" instead`, op)
return attrSelector{}, fmt.Errorf(`expected equality operator, found "%s" instead`, op)
}
p.i += len(op)
p.skipWhitespace()
if p.i >= len(p.s) {
return nil, errors.New("unexpected EOF in attribute selector")
return attrSelector{}, errors.New("unexpected EOF in attribute selector")
}
var val string
var rx *regexp.Regexp
......@@ -380,88 +402,84 @@ func (p *parser) parseAttributeSelector() (Selector, error) {
}
}
if err != nil {
return nil, err
return attrSelector{}, err
}
p.skipWhitespace()
if p.i >= len(p.s) {
return nil, errors.New("unexpected EOF in attribute selector")
return attrSelector{}, errors.New("unexpected EOF in attribute selector")
}
if p.s[p.i] != ']' {
return nil, fmt.Errorf("expected ']', found '%c' instead", p.s[p.i])
return attrSelector{}, fmt.Errorf("expected ']', found '%c' instead", p.s[p.i])
}
p.i++
switch op {
case "=":
return attributeEqualsSelector(key, val), nil
case "!=":
return attributeNotEqualSelector(key, val), nil
case "~=":
return attributeIncludesSelector(key, val), nil
case "|=":
return attributeDashmatchSelector(key, val), nil
case "^=":
return attributePrefixSelector(key, val), nil
case "$=":
return attributeSuffixSelector(key, val), nil
case "*=":
return attributeSubstringSelector(key, val), nil
case "#=":
return attributeRegexSelector(key, rx), nil
}
return nil, fmt.Errorf("attribute operator %q is not supported", op)
case "=", "!=", "~=", "|=", "^=", "$=", "*=", "#=":
return attrSelector{key: key, val: val, operation: op, regexp: rx}, nil
default:
return attrSelector{}, fmt.Errorf("attribute operator %q is not supported", op)
}
}
var errExpectedParenthesis = errors.New("expected '(' but didn't find it")
var errExpectedClosingParenthesis = errors.New("expected ')' but didn't find it")
var errUnmatchedParenthesis = errors.New("unmatched '('")
// parsePseudoclassSelector parses a pseudoclass selector like :not(p).
func (p *parser) parsePseudoclassSelector() (Selector, error) {
// parsePseudoclassSelector parses a pseudoclass selector like :not(p) or a pseudo-element
// For backwards compatibility, both ':' and '::' prefix are allowed for pseudo-elements.
// https://drafts.csswg.org/selectors-3/#pseudo-elements
// Returning a nil `Sel` (and a nil `error`) means we found a pseudo-element.
func (p *parser) parsePseudoclassSelector() (out Sel, pseudoElement string, err error) {
if p.i >= len(p.s) {
return nil, fmt.Errorf("expected pseudoclass selector (:pseudoclass), found EOF instead")
return nil, "", fmt.Errorf("expected pseudoclass selector (:pseudoclass), found EOF instead")
}
if p.s[p.i] != ':' {
return nil, fmt.Errorf("expected attribute selector (:pseudoclass), found '%c' instead", p.s[p.i])
return nil, "", fmt.Errorf("expected attribute selector (:pseudoclass), found '%c' instead", p.s[p.i])
}
p.i++
var mustBePseudoElement bool
if p.i >= len(p.s) {
return nil, "", fmt.Errorf("got empty pseudoclass (or pseudoelement)")
}
if p.s[p.i] == ':' { // we found a pseudo-element
mustBePseudoElement = true
p.i++
}
name, err := p.parseIdentifier()
if err != nil {
return nil, err
return
}
name = toLowerASCII(name)
if mustBePseudoElement && (name != "after" && name != "backdrop" && name != "before" &&
name != "cue" && name != "first-letter" && name != "first-line" && name != "grammar-error" &&
name != "marker" && name != "placeholder" && name != "selection" && name != "spelling-error") {
return out, "", fmt.Errorf("unknown pseudoelement :%s", name)
}
switch name {
case "not", "has", "haschild":
if !p.consumeParenthesis() {
return nil, errExpectedParenthesis
return out, "", errExpectedParenthesis
}
sel, parseErr := p.parseSelectorGroup()
if parseErr != nil {
return nil, parseErr
return out, "", parseErr
}
if !p.consumeClosingParenthesis() {
return nil, errExpectedClosingParenthesis
return out, "", errExpectedClosingParenthesis
}
switch name {
case "not":
return negatedSelector(sel), nil
case "has":
return hasDescendantSelector(sel), nil
case "haschild":
return hasChildSelector(sel), nil
}
out = relativePseudoClassSelector{name: name, match: sel}
case "contains", "containsown":
if !p.consumeParenthesis() {
return nil, errExpectedParenthesis
return out, "", errExpectedParenthesis
}
if p.i == len(p.s) {
return nil, errUnmatchedParenthesis
return out, "", errUnmatchedParenthesis
}
var val string
switch p.s[p.i] {
......@@ -471,95 +489,75 @@ func (p *parser) parsePseudoclassSelector() (Selector, error) {
val, err = p.parseIdentifier()
}
if err != nil {
return nil, err
return out, "", err
}
val = strings.ToLower(val)
p.skipWhitespace()
if p.i >= len(p.s) {
return nil, errors.New("unexpected EOF in pseudo selector")
return out, "", errors.New("unexpected EOF in pseudo selector")
}
if !p.consumeClosingParenthesis() {
return nil, errExpectedClosingParenthesis
return out, "", errExpectedClosingParenthesis
}
switch name {
case "contains":
return textSubstrSelector(val), nil
case "containsown":
return ownTextSubstrSelector(val), nil
}
out = containsPseudoClassSelector{own: name == "containsown", value: val}
case "matches", "matchesown":
if !p.consumeParenthesis() {
return nil, errExpectedParenthesis
return out, "", errExpectedParenthesis
}
rx, err := p.parseRegex()
if err != nil {
return nil, err
return out, "", err
}
if p.i >= len(p.s) {
return nil, errors.New("unexpected EOF in pseudo selector")
return out, "", errors.New("unexpected EOF in pseudo selector")
}
if !p.consumeClosingParenthesis() {
return nil, errExpectedClosingParenthesis
return out, "", errExpectedClosingParenthesis
}
switch name {
case "matches":
return textRegexSelector(rx), nil
case "matchesown":
return ownTextRegexSelector(rx), nil
}
out = regexpPseudoClassSelector{own: name == "matchesown", regexp: rx}
case "nth-child", "nth-last-child", "nth-of-type", "nth-last-of-type":
if !p.consumeParenthesis() {
return nil, errExpectedParenthesis
return out, "", errExpectedParenthesis
}
a, b, err := p.parseNth()
if err != nil {
return nil, err
return out, "", err
}
if !p.consumeClosingParenthesis() {
return nil, errExpectedClosingParenthesis
}
if a == 0 {
switch name {
case "nth-child":
return simpleNthChildSelector(b, false), nil
case "nth-of-type":
return simpleNthChildSelector(b, true), nil
case "nth-last-child":
return simpleNthLastChildSelector(b, false), nil
case "nth-last-of-type":
return simpleNthLastChildSelector(b, true), nil
}
return out, "", errExpectedClosingParenthesis
}
return nthChildSelector(a, b,
name == "nth-last-child" || name == "nth-last-of-type",
name == "nth-of-type" || name == "nth-last-of-type"),
nil
last := name == "nth-last-child" || name == "nth-last-of-type"
ofType := name == "nth-of-type" || name == "nth-last-of-type"
out = nthPseudoClassSelector{a: a, b: b, last: last, ofType: ofType}
case "first-child":
return simpleNthChildSelector(1, false), nil
out = nthPseudoClassSelector{a: 0, b: 1, ofType: false, last: false}
case "last-child":
return simpleNthLastChildSelector(1, false), nil
out = nthPseudoClassSelector{a: 0, b: 1, ofType: false, last: true}
case "first-of-type":
return simpleNthChildSelector(1, true), nil
out = nthPseudoClassSelector{a: 0, b: 1, ofType: true, last: false}
case "last-of-type":
return simpleNthLastChildSelector(1, true), nil
out = nthPseudoClassSelector{a: 0, b: 1, ofType: true, last: true}
case "only-child":
return onlyChildSelector(false), nil
out = onlyChildPseudoClassSelector{ofType: false}
case "only-of-type":
return onlyChildSelector(true), nil
out = onlyChildPseudoClassSelector{ofType: true}
case "input":
return inputSelector, nil
out = inputPseudoClassSelector{}
case "empty":
return emptyElementSelector, nil
out = emptyElementPseudoClassSelector{}
case "root":
return rootSelector, nil
out = rootPseudoClassSelector{}
case "after", "backdrop", "before", "cue", "first-letter", "first-line", "grammar-error", "marker", "placeholder", "selection", "spelling-error":
return nil, name, nil
default:
return out, "", fmt.Errorf("unknown pseudoclass or pseudoelement :%s", name)
}
return nil, fmt.Errorf("unknown pseudoclass :%s", name)
return
}
// parseInteger parses a decimal integer.
......@@ -705,8 +703,8 @@ invalid:
// parseSimpleSelectorSequence parses a selector sequence that applies to
// a single element.
func (p *parser) parseSimpleSelectorSequence() (Selector, error) {
var result Selector
func (p *parser) parseSimpleSelectorSequence() (Sel, error) {
var selectors []Sel
if p.i >= len(p.s) {
return nil, errors.New("expected selector, found EOF instead")
......@@ -723,13 +721,17 @@ func (p *parser) parseSimpleSelectorSequence() (Selector, error) {
if err != nil {
return nil, err
}
result = r
selectors = append(selectors, r)
}
var pseudoElement string
loop:
for p.i < len(p.s) {
var ns Selector
var err error
var (
ns Sel
newPseudoElement string
err error
)
switch p.s[p.i] {
case '#':
ns, err = p.parseIDSelector()
......@@ -738,44 +740,57 @@ loop:
case '[':
ns, err = p.parseAttributeSelector()
case ':':
ns, err = p.parsePseudoclassSelector()
ns, newPseudoElement, err = p.parsePseudoclassSelector()
default:
break loop
}
if err != nil {
return nil, err
}
if result == nil {
result = ns
// From https://drafts.csswg.org/selectors-3/#pseudo-elements :
// "Only one pseudo-element may appear per selector, and if present
// it must appear after the sequence of simple selectors that
// represents the subjects of the selector.""
if ns == nil { // we found a pseudo-element
if pseudoElement != "" {
return nil, fmt.Errorf("only one pseudo-element is accepted per selector, got %s and %s", pseudoElement, newPseudoElement)
}
if !p.acceptPseudoElements {
return nil, fmt.Errorf("pseudo-element %s found, but pseudo-elements support is disabled", newPseudoElement)
}
pseudoElement = newPseudoElement
} else {
result = intersectionSelector(result, ns)
if pseudoElement != "" {
return nil, fmt.Errorf("pseudo-element %s must be at the end of selector", pseudoElement)
}
selectors = append(selectors, ns)
}
}
if result == nil {
result = func(n *html.Node) bool {
return n.Type == html.ElementNode
}
}
return result, nil
if len(selectors) == 1 && pseudoElement == "" { // no need wrap the selectors in compoundSelector
return selectors[0], nil
}
return compoundSelector{selectors: selectors, pseudoElement: pseudoElement}, nil
}
// parseSelector parses a selector that may include combinators.
func (p *parser) parseSelector() (result Selector, err error) {
func (p *parser) parseSelector() (Sel, error) {
p.skipWhitespace()
result, err = p.parseSimpleSelectorSequence()
result, err := p.parseSimpleSelectorSequence()