Commit f16901a7 authored by renovate's avatar renovate
Browse files

Update module github.com/PuerkitoBio/goquery to v1.8.0

parent ef2c4100
Pipeline #22932 failed
......@@ -3,7 +3,7 @@ module git.autistici.org/ale/crawl
go 1.15
require (
github.com/PuerkitoBio/goquery v1.7.1
github.com/PuerkitoBio/goquery v1.8.0
github.com/PuerkitoBio/purell v0.1.0
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect
github.com/google/go-cmp v0.5.6
......
......@@ -2,6 +2,8 @@ github.com/PuerkitoBio/goquery v1.5.0 h1:uGvmFXOA73IKluu/F84Xd1tt/z07GYm8X49XKHP
github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg=
github.com/PuerkitoBio/goquery v1.7.1 h1:oE+T06D+1T7LNrn91B4aERsRIeCLJ/oPSa6xB9FPnz4=
github.com/PuerkitoBio/goquery v1.7.1/go.mod h1:XY0pP4kfraEmmV1O7Uf6XyjoslwsneBbgeDjLYuN8xY=
github.com/PuerkitoBio/goquery v1.8.0 h1:PJTF7AmFCFKk1N6V6jmKfrNH9tV5pNE6lZMkG0gta/U=
github.com/PuerkitoBio/goquery v1.8.0/go.mod h1:ypIiRMtY7COPGk+I/YbZLbxsxn9g5ejnI2HSMtkjZvI=
github.com/PuerkitoBio/purell v0.0.0-20180310210909-975f53781597 h1:1H3FyRw7YsqIty9WHPOVEGJaFJ1sfGVZ3PPDUw3ob2w=
github.com/PuerkitoBio/purell v0.0.0-20180310210909-975f53781597/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
github.com/PuerkitoBio/purell v0.1.0 h1:N8Bcc53nei5frgNYgAKo93qMUVdU5LUGHCBv8efdVcM=
......@@ -12,6 +14,8 @@ github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRy
github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
github.com/andybalholm/cascadia v1.2.0 h1:vuRCkM5Ozh/BfmsaTm26kbjm0mIOM3yS5Ek/F5h18aE=
github.com/andybalholm/cascadia v1.2.0/go.mod h1:YCyR8vOZT9aZ1CHEd8ap0gMVm2aFgxBp0T0eFw1RUQY=
github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c=
github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
github.com/fsnotify/fsnotify v1.4.7 h1:IXs+QLmnXW2CcXuY+8Mzv/fWEsPGWxqefPtCP5CnV9I=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/golang/protobuf v1.2.0 h1:P3YflyNX/ehuJFLhxviNdFxQPkGK5cDcApsge1SqnvM=
......@@ -48,6 +52,8 @@ golang.org/x/net v0.0.0-20190926025831-c00fd9afed17 h1:qPnAdmjNA41t3QBTx2mFGf/SD
golang.org/x/net v0.0.0-20190926025831-c00fd9afed17/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20210614182718-04defd469f4e h1:XpT3nA5TvE525Ne3hInMh6+GETgn27Zfm9dxsThnX2Q=
golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8 h1:/6y1LfuqNuQdHAm0jjtPtgRcxIxjVZgm5OTu8/QhZvk=
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f h1:wMNYb4v58l5UBM7MYRLPG6ZhfOqbKu7X5eyFl8ZhKvA=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
......
arch:
- amd64
- ppc64le
language: go
go:
- 1.7.x
- 1.8.x
- 1.9.x
- 1.10.x
- 1.11.x
- 1.12.x
- 1.13.x
- 1.14.x
- 1.15.x
- tip
jobs:
exclude:
- arch: ppc64le
go: 1.7.x
- arch: ppc64le
go: 1.8.x
- arch: ppc64le
go: 1.9.x
- arch: ppc64le
go: 1.10.x
- arch: ppc64le
go: 1.11.x
- arch: ppc64le
go: 1.12.x
# goquery - a little like that j-thing, only in Go
[![builds.sr.ht status](https://builds.sr.ht/~mna/goquery/commits/fedora.yml.svg)](https://builds.sr.ht/~mna/goquery/commits/fedora.yml?)
[![build status](https://secure.travis-ci.org/PuerkitoBio/goquery.svg?branch=master)](http://travis-ci.org/PuerkitoBio/goquery)
[![Build Status](https://github.com/PuerkitoBio/goquery/actions/workflows/test.yml/badge.svg?branch=master)](https://github.com/PuerkitoBio/goquery/actions)
[![Go Reference](https://pkg.go.dev/badge/github.com/PuerkitoBio/goquery.svg)](https://pkg.go.dev/github.com/PuerkitoBio/goquery)
[![Sourcegraph Badge](https://sourcegraph.com/github.com/PuerkitoBio/goquery/-/badge.svg)](https://sourcegraph.com/github.com/PuerkitoBio/goquery?badge)
......@@ -41,6 +40,7 @@ Please note that because of the net/html dependency, goquery requires Go1.1+ and
**Note that goquery's API is now stable, and will not break.**
* **2021-10-25 (v1.8.0)** : Add `Render` function to render a `Selection` to an `io.Writer` (thanks [@anthonygedeon](https://github.com/anthonygedeon)).
* **2021-07-11 (v1.7.1)** : Update go.mod dependencies and add dependabot config (thanks [@jauderho](https://github.com/jauderho)).
* **2021-06-14 (v1.7.0)** : Add `Single` and `SingleMatcher` functions to optimize first-match selection (thanks [@gdollardollar](https://github.com/gdollardollar)).
* **2021-01-11 (v1.6.1)** : Fix panic when calling `{Prepend,Append,Set}Html` on a `Selection` that contains non-Element nodes.
......
module github.com/PuerkitoBio/goquery
require (
github.com/andybalholm/cascadia v1.2.0
golang.org/x/net v0.0.0-20210614182718-04defd469f4e
github.com/andybalholm/cascadia v1.3.1
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8
)
go 1.13
github.com/andybalholm/cascadia v1.2.0 h1:vuRCkM5Ozh/BfmsaTm26kbjm0mIOM3yS5Ek/F5h18aE=
github.com/andybalholm/cascadia v1.2.0/go.mod h1:YCyR8vOZT9aZ1CHEd8ap0gMVm2aFgxBp0T0eFw1RUQY=
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20210614182718-04defd469f4e h1:XpT3nA5TvE525Ne3hInMh6+GETgn27Zfm9dxsThnX2Q=
golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c=
github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8 h1:/6y1LfuqNuQdHAm0jjtPtgRcxIxjVZgm5OTu8/QhZvk=
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
......
......@@ -2,6 +2,7 @@ package goquery
import (
"bytes"
"io"
"golang.org/x/net/html"
)
......@@ -50,13 +51,24 @@ func nodeName(node *html.Node) string {
case html.ElementNode, html.DoctypeNode:
return node.Data
default:
if node.Type >= 0 && int(node.Type) < len(nodeNames) {
if int(node.Type) < len(nodeNames) {
return nodeNames[node.Type]
}
return ""
}
}
// Render renders the html of the first element from selector and writes it to
// the writer. It behaves the same as OuterHtml but writes to w instead of
// returning the string.
func Render(w io.Writer, s *Selection) error {
if s.Length() == 0 {
return nil
}
n := s.Get(0)
return html.Render(w, n)
}
// OuterHtml returns the outer HTML rendering of the first item in
// the selection - that is, the HTML including the first element's
// tag and attributes.
......@@ -66,12 +78,7 @@ func nodeName(node *html.Node) string {
// a property provided by the DOM).
func OuterHtml(s *Selection) (string, error) {
var buf bytes.Buffer
if s.Length() == 0 {
return "", nil
}
n := s.Get(0)
if err := html.Render(&buf, n); err != nil {
if err := Render(&buf, s); err != nil {
return "", err
}
return buf.String(), nil
......
module github.com/andybalholm/cascadia
require golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01
go 1.16
go 1.13
require golang.org/x/net v0.0.0-20210916014120-12bc252f5db8
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8 h1:/6y1LfuqNuQdHAm0jjtPtgRcxIxjVZgm5OTu8/QhZvk=
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
......@@ -36,7 +36,7 @@ func (p *parser) parseEscape() (result string, err error) {
for i = start; i < start+6 && i < len(p.s) && hexDigit(p.s[i]); i++ {
// empty
}
v, _ := strconv.ParseUint(p.s[start:i], 16, 21)
v, _ := strconv.ParseUint(p.s[start:i], 16, 64)
if len(p.s) > i {
switch p.s[i] {
case '\r':
......@@ -409,6 +409,19 @@ func (p *parser) parseAttributeSelector() (attrSelector, error) {
if p.i >= len(p.s) {
return attrSelector{}, errors.New("unexpected EOF in attribute selector")
}
// check if the attribute contains an ignore case flag
ignoreCase := false
if p.s[p.i] == 'i' || p.s[p.i] == 'I' {
ignoreCase = true
p.i++
}
p.skipWhitespace()
if p.i >= len(p.s) {
return attrSelector{}, errors.New("unexpected EOF in attribute selector")
}
if p.s[p.i] != ']' {
return attrSelector{}, fmt.Errorf("expected ']', found '%c' instead", p.s[p.i])
}
......@@ -416,15 +429,17 @@ func (p *parser) parseAttributeSelector() (attrSelector, error) {
switch op {
case "=", "!=", "~=", "|=", "^=", "$=", "*=", "#=":
return attrSelector{key: key, val: val, operation: op, regexp: rx}, nil
return attrSelector{key: key, val: val, operation: op, regexp: rx, insensitive: ignoreCase}, nil
default:
return attrSelector{}, fmt.Errorf("attribute operator %q is not supported", op)
}
}
var errExpectedParenthesis = errors.New("expected '(' but didn't find it")
var errExpectedClosingParenthesis = errors.New("expected ')' but didn't find it")
var errUnmatchedParenthesis = errors.New("unmatched '('")
var (
errExpectedParenthesis = errors.New("expected '(' but didn't find it")
errExpectedClosingParenthesis = errors.New("expected ')' but didn't find it")
errUnmatchedParenthesis = errors.New("unmatched '('")
)
// parsePseudoclassSelector parses a pseudoclass selector like :not(p) or a pseudo-element
// For backwards compatibility, both ':' and '::' prefix are allowed for pseudo-elements.
......@@ -552,6 +567,37 @@ func (p *parser) parsePseudoclassSelector() (out Sel, pseudoElement string, err
out = emptyElementPseudoClassSelector{}
case "root":
out = rootPseudoClassSelector{}
case "link":
out = linkPseudoClassSelector{}
case "lang":
if !p.consumeParenthesis() {
return out, "", errExpectedParenthesis
}
if p.i == len(p.s) {
return out, "", errUnmatchedParenthesis
}
val, err := p.parseIdentifier()
if err != nil {
return out, "", err
}
val = strings.ToLower(val)
p.skipWhitespace()
if p.i >= len(p.s) {
return out, "", errors.New("unexpected EOF in pseudo selector")
}
if !p.consumeClosingParenthesis() {
return out, "", errExpectedClosingParenthesis
}
out = langPseudoClassSelector{lang: val}
case "enabled":
out = enabledPseudoClassSelector{}
case "disabled":
out = disabledPseudoClassSelector{}
case "checked":
out = checkedPseudoClassSelector{}
case "visited", "hover", "active", "focus", "target":
// Not applicable in a static context: never match.
out = neverMatchSelector{value: ":" + name}
case "after", "backdrop", "before", "cue", "first-letter", "first-line", "grammar-error", "marker", "placeholder", "selection", "spelling-error":
return nil, name, nil
default:
......@@ -714,6 +760,9 @@ func (p *parser) parseSimpleSelectorSequence() (Sel, error) {
case '*':
// It's the universal selector. Just skip over it, since it doesn't affect the meaning.
p.i++
if p.i+2 < len(p.s) && p.s[p.i:p.i+2] == "|*" { // other version of universal selector
p.i += 2
}
case '#', '.', '[', ':':
// There's no type selector. Wait to process the other till the main loop.
default:
......
package cascadia
import (
"bytes"
"fmt"
"regexp"
"strings"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
)
// This file implements the pseudo classes selectors,
// which share the implementation of PseudoElement() and Specificity()
type abstractPseudoClass struct{}
func (s abstractPseudoClass) Specificity() Specificity {
return Specificity{0, 1, 0}
}
func (c abstractPseudoClass) PseudoElement() string {
return ""
}
type relativePseudoClassSelector struct {
name string // one of "not", "has", "haschild"
match SelectorGroup
}
func (s relativePseudoClassSelector) Match(n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
switch s.name {
case "not":
// matches elements that do not match a.
return !s.match.Match(n)
case "has":
// matches elements with any descendant that matches a.
return hasDescendantMatch(n, s.match)
case "haschild":
// matches elements with a child that matches a.
return hasChildMatch(n, s.match)
default:
panic(fmt.Sprintf("unsupported relative pseudo class selector : %s", s.name))
}
}
// hasChildMatch returns whether n has any child that matches a.
func hasChildMatch(n *html.Node, a Matcher) bool {
for c := n.FirstChild; c != nil; c = c.NextSibling {
if a.Match(c) {
return true
}
}
return false
}
// hasDescendantMatch performs a depth-first search of n's descendants,
// testing whether any of them match a. It returns true as soon as a match is
// found, or false if no match is found.
func hasDescendantMatch(n *html.Node, a Matcher) bool {
for c := n.FirstChild; c != nil; c = c.NextSibling {
if a.Match(c) || (c.Type == html.ElementNode && hasDescendantMatch(c, a)) {
return true
}
}
return false
}
// Specificity returns the specificity of the most specific selectors
// in the pseudo-class arguments.
// See https://www.w3.org/TR/selectors/#specificity-rules
func (s relativePseudoClassSelector) Specificity() Specificity {
var max Specificity
for _, sel := range s.match {
newSpe := sel.Specificity()
if max.Less(newSpe) {
max = newSpe
}
}
return max
}
func (c relativePseudoClassSelector) PseudoElement() string {
return ""
}
type containsPseudoClassSelector struct {
abstractPseudoClass
value string
own bool
}
func (s containsPseudoClassSelector) Match(n *html.Node) bool {
var text string
if s.own {
// matches nodes that directly contain the given text
text = strings.ToLower(nodeOwnText(n))
} else {
// matches nodes that contain the given text.
text = strings.ToLower(nodeText(n))
}
return strings.Contains(text, s.value)
}
type regexpPseudoClassSelector struct {
abstractPseudoClass
regexp *regexp.Regexp
own bool
}
func (s regexpPseudoClassSelector) Match(n *html.Node) bool {
var text string
if s.own {
// matches nodes whose text directly matches the specified regular expression
text = nodeOwnText(n)
} else {
// matches nodes whose text matches the specified regular expression
text = nodeText(n)
}
return s.regexp.MatchString(text)
}
// writeNodeText writes the text contained in n and its descendants to b.
func writeNodeText(n *html.Node, b *bytes.Buffer) {
switch n.Type {
case html.TextNode:
b.WriteString(n.Data)
case html.ElementNode:
for c := n.FirstChild; c != nil; c = c.NextSibling {
writeNodeText(c, b)
}
}
}
// nodeText returns the text contained in n and its descendants.
func nodeText(n *html.Node) string {
var b bytes.Buffer
writeNodeText(n, &b)
return b.String()
}
// nodeOwnText returns the contents of the text nodes that are direct
// children of n.
func nodeOwnText(n *html.Node) string {
var b bytes.Buffer
for c := n.FirstChild; c != nil; c = c.NextSibling {
if c.Type == html.TextNode {
b.WriteString(c.Data)
}
}
return b.String()
}
type nthPseudoClassSelector struct {
abstractPseudoClass
a, b int
last, ofType bool
}
func (s nthPseudoClassSelector) Match(n *html.Node) bool {
if s.a == 0 {
if s.last {
return simpleNthLastChildMatch(s.b, s.ofType, n)
} else {
return simpleNthChildMatch(s.b, s.ofType, n)
}
}
return nthChildMatch(s.a, s.b, s.last, s.ofType, n)
}
// nthChildMatch implements :nth-child(an+b).
// If last is true, implements :nth-last-child instead.
// If ofType is true, implements :nth-of-type instead.
func nthChildMatch(a, b int, last, ofType bool, n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
parent := n.Parent
if parent == nil {
return false
}
if parent.Type == html.DocumentNode {
return false
}
i := -1
count := 0
for c := parent.FirstChild; c != nil; c = c.NextSibling {
if (c.Type != html.ElementNode) || (ofType && c.Data != n.Data) {
continue
}
count++
if c == n {
i = count
if !last {
break
}
}
}
if i == -1 {
// This shouldn't happen, since n should always be one of its parent's children.
return false
}
if last {
i = count - i + 1
}
i -= b
if a == 0 {
return i == 0
}
return i%a == 0 && i/a >= 0
}
// simpleNthChildMatch implements :nth-child(b).
// If ofType is true, implements :nth-of-type instead.
func simpleNthChildMatch(b int, ofType bool, n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
parent := n.Parent
if parent == nil {
return false
}
if parent.Type == html.DocumentNode {
return false
}
count := 0
for c := parent.FirstChild; c != nil; c = c.NextSibling {
if c.Type != html.ElementNode || (ofType && c.Data != n.Data) {
continue
}
count++
if c == n {
return count == b
}
if count >= b {
return false
}
}
return false
}
// simpleNthLastChildMatch implements :nth-last-child(b).
// If ofType is true, implements :nth-last-of-type instead.
func simpleNthLastChildMatch(b int, ofType bool, n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
parent := n.Parent
if parent == nil {
return false
}
if parent.Type == html.DocumentNode {
return false
}
count := 0
for c := parent.LastChild; c != nil; c = c.PrevSibling {
if c.Type != html.ElementNode || (ofType && c.Data != n.Data) {
continue
}
count++
if c == n {
return count == b
}
if count >= b {
return false
}
}
return false
}
type onlyChildPseudoClassSelector struct {
abstractPseudoClass
ofType bool
}
// Match implements :only-child.
// If `ofType` is true, it implements :only-of-type instead.
func (s onlyChildPseudoClassSelector) Match(n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
parent := n.Parent
if parent == nil {
return false
}
if parent.Type == html.DocumentNode {
return false
}
count := 0
for c := parent.FirstChild; c != nil; c = c.NextSibling {
if (c.Type != html.ElementNode) || (s.ofType && c.Data != n.Data) {
continue
}
count++
if count > 1 {
return false
}
}
return count == 1
}
type inputPseudoClassSelector struct {
abstractPseudoClass
}