Commit ef2c4100 authored by ale's avatar ale
Browse files

Merge branch 'renovate/github.com-puerkitobio-goquery-1.x' into 'master'

Update module github.com/PuerkitoBio/goquery to v1.7.1

See merge request !3
parents 877afafd 557f9d88
Pipeline #18311 passed with stages
in 1 minute and 52 seconds
...@@ -3,12 +3,12 @@ module git.autistici.org/ale/crawl ...@@ -3,12 +3,12 @@ module git.autistici.org/ale/crawl
go 1.15 go 1.15
require ( require (
github.com/PuerkitoBio/goquery v1.5.0 github.com/PuerkitoBio/goquery v1.7.1
github.com/PuerkitoBio/purell v0.1.0 github.com/PuerkitoBio/purell v0.1.0
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect
github.com/google/go-cmp v0.5.6 github.com/google/go-cmp v0.5.6
github.com/google/uuid v1.1.1 // indirect github.com/google/uuid v1.1.1 // indirect
github.com/pborman/uuid v1.2.1 github.com/pborman/uuid v1.2.1
github.com/syndtr/goleveldb v0.0.0-20190923125748-758128399b1d github.com/syndtr/goleveldb v0.0.0-20190923125748-758128399b1d
golang.org/x/net v0.0.0-20190926025831-c00fd9afed17 // indirect golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2 // indirect
) )
github.com/PuerkitoBio/goquery v1.5.0 h1:uGvmFXOA73IKluu/F84Xd1tt/z07GYm8X49XKHP7EJk= github.com/PuerkitoBio/goquery v1.5.0 h1:uGvmFXOA73IKluu/F84Xd1tt/z07GYm8X49XKHP7EJk=
github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg= github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg=
github.com/PuerkitoBio/goquery v1.7.1 h1:oE+T06D+1T7LNrn91B4aERsRIeCLJ/oPSa6xB9FPnz4=
github.com/PuerkitoBio/goquery v1.7.1/go.mod h1:XY0pP4kfraEmmV1O7Uf6XyjoslwsneBbgeDjLYuN8xY=
github.com/PuerkitoBio/purell v0.0.0-20180310210909-975f53781597 h1:1H3FyRw7YsqIty9WHPOVEGJaFJ1sfGVZ3PPDUw3ob2w= github.com/PuerkitoBio/purell v0.0.0-20180310210909-975f53781597 h1:1H3FyRw7YsqIty9WHPOVEGJaFJ1sfGVZ3PPDUw3ob2w=
github.com/PuerkitoBio/purell v0.0.0-20180310210909-975f53781597/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0= github.com/PuerkitoBio/purell v0.0.0-20180310210909-975f53781597/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
github.com/PuerkitoBio/purell v0.1.0 h1:N8Bcc53nei5frgNYgAKo93qMUVdU5LUGHCBv8efdVcM= github.com/PuerkitoBio/purell v0.1.0 h1:N8Bcc53nei5frgNYgAKo93qMUVdU5LUGHCBv8efdVcM=
...@@ -8,6 +10,8 @@ github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV ...@@ -8,6 +10,8 @@ github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE= github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o= github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o=
github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
github.com/andybalholm/cascadia v1.2.0 h1:vuRCkM5Ozh/BfmsaTm26kbjm0mIOM3yS5Ek/F5h18aE=
github.com/andybalholm/cascadia v1.2.0/go.mod h1:YCyR8vOZT9aZ1CHEd8ap0gMVm2aFgxBp0T0eFw1RUQY=
github.com/fsnotify/fsnotify v1.4.7 h1:IXs+QLmnXW2CcXuY+8Mzv/fWEsPGWxqefPtCP5CnV9I= github.com/fsnotify/fsnotify v1.4.7 h1:IXs+QLmnXW2CcXuY+8Mzv/fWEsPGWxqefPtCP5CnV9I=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/golang/protobuf v1.2.0 h1:P3YflyNX/ehuJFLhxviNdFxQPkGK5cDcApsge1SqnvM= github.com/golang/protobuf v1.2.0 h1:P3YflyNX/ehuJFLhxviNdFxQPkGK5cDcApsge1SqnvM=
...@@ -42,13 +46,20 @@ golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73r ...@@ -42,13 +46,20 @@ golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73r
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190926025831-c00fd9afed17 h1:qPnAdmjNA41t3QBTx2mFGf/SD1IoslhYu7AmdsVzCcs= golang.org/x/net v0.0.0-20190926025831-c00fd9afed17 h1:qPnAdmjNA41t3QBTx2mFGf/SD1IoslhYu7AmdsVzCcs=
golang.org/x/net v0.0.0-20190926025831-c00fd9afed17/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190926025831-c00fd9afed17/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20210614182718-04defd469f4e h1:XpT3nA5TvE525Ne3hInMh6+GETgn27Zfm9dxsThnX2Q=
golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f h1:wMNYb4v58l5UBM7MYRLPG6ZhfOqbKu7X5eyFl8ZhKvA= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f h1:wMNYb4v58l5UBM7MYRLPG6ZhfOqbKu7X5eyFl8ZhKvA=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a h1:1BGLXjeY4akVXGgbC9HugT3Jv3hCI0z56oJR5vAMgBU= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a h1:1BGLXjeY4akVXGgbC9HugT3Jv3hCI0z56oJR5vAMgBU=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg= golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
......
arch:
- amd64
- ppc64le
language: go language: go
go: go:
- 1.1
- 1.2.x
- 1.3.x
- 1.4.x
- 1.5.x
- 1.6.x
- 1.7.x - 1.7.x
- 1.8.x - 1.8.x
- 1.9.x - 1.9.x
- "1.10.x" - 1.10.x
- 1.11.x - 1.11.x
- 1.12.x
- 1.13.x
- 1.14.x
- 1.15.x
- tip - tip
jobs:
exclude:
- arch: ppc64le
go: 1.7.x
- arch: ppc64le
go: 1.8.x
- arch: ppc64le
go: 1.9.x
- arch: ppc64le
go: 1.10.x
- arch: ppc64le
go: 1.11.x
- arch: ppc64le
go: 1.12.x
Copyright (c) 2012-2016, Martin Angers & Contributors Copyright (c) 2012-2021, Martin Angers & Contributors
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
......
# goquery - a little like that j-thing, only in Go # goquery - a little like that j-thing, only in Go
[![build status](https://secure.travis-ci.org/PuerkitoBio/goquery.svg?branch=master)](http://travis-ci.org/PuerkitoBio/goquery) [![GoDoc](https://godoc.org/github.com/PuerkitoBio/goquery?status.png)](http://godoc.org/github.com/PuerkitoBio/goquery) [![Sourcegraph Badge](https://sourcegraph.com/github.com/PuerkitoBio/goquery/-/badge.svg)](https://sourcegraph.com/github.com/PuerkitoBio/goquery?badge)
[![builds.sr.ht status](https://builds.sr.ht/~mna/goquery/commits/fedora.yml.svg)](https://builds.sr.ht/~mna/goquery/commits/fedora.yml?)
[![build status](https://secure.travis-ci.org/PuerkitoBio/goquery.svg?branch=master)](http://travis-ci.org/PuerkitoBio/goquery)
[![Go Reference](https://pkg.go.dev/badge/github.com/PuerkitoBio/goquery.svg)](https://pkg.go.dev/github.com/PuerkitoBio/goquery)
[![Sourcegraph Badge](https://sourcegraph.com/github.com/PuerkitoBio/goquery/-/badge.svg)](https://sourcegraph.com/github.com/PuerkitoBio/goquery?badge)
goquery brings a syntax and a set of features similar to [jQuery][] to the [Go language][go]. It is based on Go's [net/html package][html] and the CSS Selector library [cascadia][]. Since the net/html parser returns nodes, and not a full-featured DOM tree, jQuery's stateful manipulation functions (like height(), css(), detach()) have been left off. goquery brings a syntax and a set of features similar to [jQuery][] to the [Go language][go]. It is based on Go's [net/html package][html] and the CSS Selector library [cascadia][]. Since the net/html parser returns nodes, and not a full-featured DOM tree, jQuery's stateful manipulation functions (like height(), css(), detach()) have been left off.
...@@ -19,7 +23,7 @@ Syntax-wise, it is as close as possible to jQuery, with the same function names ...@@ -19,7 +23,7 @@ Syntax-wise, it is as close as possible to jQuery, with the same function names
## Installation ## Installation
Please note that because of the net/html dependency, goquery requires Go1.1+. Please note that because of the net/html dependency, goquery requires Go1.1+ and is tested on Go1.7+.
$ go get github.com/PuerkitoBio/goquery $ go get github.com/PuerkitoBio/goquery
...@@ -37,6 +41,11 @@ Please note that because of the net/html dependency, goquery requires Go1.1+. ...@@ -37,6 +41,11 @@ Please note that because of the net/html dependency, goquery requires Go1.1+.
**Note that goquery's API is now stable, and will not break.** **Note that goquery's API is now stable, and will not break.**
* **2021-07-11 (v1.7.1)** : Update go.mod dependencies and add dependabot config (thanks [@jauderho](https://github.com/jauderho)).
* **2021-06-14 (v1.7.0)** : Add `Single` and `SingleMatcher` functions to optimize first-match selection (thanks [@gdollardollar](https://github.com/gdollardollar)).
* **2021-01-11 (v1.6.1)** : Fix panic when calling `{Prepend,Append,Set}Html` on a `Selection` that contains non-Element nodes.
* **2020-10-08 (v1.6.0)** : Parse html in context of the container node for all functions that deal with html strings (`AfterHtml`, `AppendHtml`, etc.). Thanks to [@thiemok][thiemok] and [@davidjwilkins][djw] for their work on this.
* **2020-02-04 (v1.5.1)** : Update module dependencies.
* **2018-11-15 (v1.5.0)** : Go module support (thanks @Zaba505). * **2018-11-15 (v1.5.0)** : Go module support (thanks @Zaba505).
* **2018-06-07 (v1.4.1)** : Add `NewDocumentFromReader` examples. * **2018-06-07 (v1.4.1)** : Add `NewDocumentFromReader` examples.
* **2018-03-24 (v1.4.0)** : Deprecate `NewDocument(url)` and `NewDocumentFromResponse(response)`. * **2018-03-24 (v1.4.0)** : Deprecate `NewDocument(url)` and `NewDocumentFromResponse(response)`.
...@@ -47,7 +56,7 @@ Please note that because of the net/html dependency, goquery requires Go1.1+. ...@@ -47,7 +56,7 @@ Please note that because of the net/html dependency, goquery requires Go1.1+.
* **2016-08-28 (v1.0.1)** : Optimize performance for large documents. * **2016-08-28 (v1.0.1)** : Optimize performance for large documents.
* **2016-07-27 (v1.0.0)** : Tag version 1.0.0. * **2016-07-27 (v1.0.0)** : Tag version 1.0.0.
* **2016-06-15** : Invalid selector strings internally compile to a `Matcher` implementation that never matches any node (instead of a panic). So for example, `doc.Find("~")` returns an empty `*Selection` object. * **2016-06-15** : Invalid selector strings internally compile to a `Matcher` implementation that never matches any node (instead of a panic). So for example, `doc.Find("~")` returns an empty `*Selection` object.
* **2016-02-02** : Add `NodeName` utility function similar to the DOM's `nodeName` property. It returns the tag name of the first element in a selection, and other relevant values of non-element nodes (see godoc for details). Add `OuterHtml` utility function similar to the DOM's `outerHTML` property (named `OuterHtml` in small caps for consistency with the existing `Html` method on the `Selection`). * **2016-02-02** : Add `NodeName` utility function similar to the DOM's `nodeName` property. It returns the tag name of the first element in a selection, and other relevant values of non-element nodes (see [doc][] for details). Add `OuterHtml` utility function similar to the DOM's `outerHTML` property (named `OuterHtml` in small caps for consistency with the existing `Html` method on the `Selection`).
* **2015-04-20** : Add `AttrOr` helper method to return the attribute's value or a default value if absent. Thanks to [piotrkowalczuk][piotr]. * **2015-04-20** : Add `AttrOr` helper method to return the attribute's value or a default value if absent. Thanks to [piotrkowalczuk][piotr].
* **2015-02-04** : Add more manipulation functions - Prepend* - thanks again to [Andrew Stone][thatguystone]. * **2015-02-04** : Add more manipulation functions - Prepend* - thanks again to [Andrew Stone][thatguystone].
* **2014-11-28** : Add more manipulation functions - ReplaceWith*, Wrap* and Unwrap - thanks again to [Andrew Stone][thatguystone]. * **2014-11-28** : Add more manipulation functions - ReplaceWith*, Wrap* and Unwrap - thanks again to [Andrew Stone][thatguystone].
...@@ -76,7 +85,7 @@ jQuery often has many variants for the same function (no argument, a selector st ...@@ -76,7 +85,7 @@ jQuery often has many variants for the same function (no argument, a selector st
Utility functions that are not in jQuery but are useful in Go are implemented as functions (that take a `*Selection` as parameter), to avoid a potential naming clash on the `*Selection`'s methods (reserved for jQuery-equivalent behaviour). Utility functions that are not in jQuery but are useful in Go are implemented as functions (that take a `*Selection` as parameter), to avoid a potential naming clash on the `*Selection`'s methods (reserved for jQuery-equivalent behaviour).
The complete [godoc reference documentation can be found here][doc]. The complete [package reference documentation can be found here][doc].
Please note that Cascadia's selectors do not necessarily match all supported selectors of jQuery (Sizzle). See the [cascadia project][cascadia] for details. Invalid selector strings compile to a `Matcher` that fails to match any node. Behaviour of the various functions that take a selector string as argument follows from that fact, e.g. (where `~` is an invalid selector string): Please note that Cascadia's selectors do not necessarily match all supported selectors of jQuery (Sizzle). See the [cascadia project][cascadia] for details. Invalid selector strings compile to a `Matcher` that fails to match any node. Behaviour of the various functions that take a selector string as argument follows from that fact, e.g. (where `~` is an invalid selector string):
...@@ -120,12 +129,11 @@ func ExampleScrape() { ...@@ -120,12 +129,11 @@ func ExampleScrape() {
} }
// Find the review items // Find the review items
doc.Find(".sidebar-reviews article .content-block").Each(func(i int, s *goquery.Selection) { doc.Find(".left-content article .post-title").Each(func(i int, s *goquery.Selection) {
// For each item found, get the band and title // For each item found, get the title
band := s.Find("a").Text() title := s.Find("a").Text()
title := s.Find("i").Text() fmt.Printf("Review %d: %s\n", i, title)
fmt.Printf("Review %d: %s - %s\n", i, band, title) })
})
} }
func main() { func main() {
...@@ -138,9 +146,14 @@ func main() { ...@@ -138,9 +146,14 @@ func main() {
- [Goq][goq], an HTML deserialization and scraping library based on goquery and struct tags. - [Goq][goq], an HTML deserialization and scraping library based on goquery and struct tags.
- [andybalholm/cascadia][cascadia], the CSS selector library used by goquery. - [andybalholm/cascadia][cascadia], the CSS selector library used by goquery.
- [suntong/cascadia][cascadiacli], a command-line interface to the cascadia CSS selector library, useful to test selectors. - [suntong/cascadia][cascadiacli], a command-line interface to the cascadia CSS selector library, useful to test selectors.
- [asciimoo/colly](https://github.com/asciimoo/colly), a lightning fast and elegant Scraping Framework - [gocolly/colly](https://github.com/gocolly/colly), a lightning fast and elegant Scraping Framework
- [gnulnx/goperf](https://github.com/gnulnx/goperf), a website performance test tool that also fetches static assets. - [gnulnx/goperf](https://github.com/gnulnx/goperf), a website performance test tool that also fetches static assets.
- [MontFerret/ferret](https://github.com/MontFerret/ferret), declarative web scraping. - [MontFerret/ferret](https://github.com/MontFerret/ferret), declarative web scraping.
- [tacusci/berrycms](https://github.com/tacusci/berrycms), a modern simple to use CMS with easy to write plugins
- [Dataflow kit](https://github.com/slotix/dataflowkit), Web Scraping framework for Gophers.
- [Geziyor](https://github.com/geziyor/geziyor), a fast web crawling & scraping framework for Go. Supports JS rendering.
- [Pagser](https://github.com/foolin/pagser), a simple, easy, extensible, configurable HTML parser to struct based on goquery and struct tags.
- [stitcherd](https://github.com/vhodges/stitcherd), A server for doing server side includes using css selectors and DOM updates.
## Support ## Support
...@@ -153,8 +166,9 @@ There are a number of ways you can support the project: ...@@ -153,8 +166,9 @@ There are a number of ways you can support the project:
* Pull requests: please discuss new code in an issue first, unless the fix is really trivial. * Pull requests: please discuss new code in an issue first, unless the fix is really trivial.
- Make sure new code is tested. - Make sure new code is tested.
- Be mindful of existing code - PRs that break existing code have a high probability of being declined, unless it fixes a serious issue. - Be mindful of existing code - PRs that break existing code have a high probability of being declined, unless it fixes a serious issue.
* Sponsor the developer
If you desperately want to send money my way, I have a BuyMeACoffee.com page: - See the Github Sponsor button at the top of the repo on github
- or via BuyMeACoffee.com, below
<a href="https://www.buymeacoffee.com/mna" target="_blank"><img src="https://www.buymeacoffee.com/assets/img/custom_images/orange_img.png" alt="Buy Me A Coffee" style="height: 41px !important;width: 174px !important;box-shadow: 0px 3px 2px 0px rgba(190, 190, 190, 0.5) !important;-webkit-box-shadow: 0px 3px 2px 0px rgba(190, 190, 190, 0.5) !important;" ></a> <a href="https://www.buymeacoffee.com/mna" target="_blank"><img src="https://www.buymeacoffee.com/assets/img/custom_images/orange_img.png" alt="Buy Me A Coffee" style="height: 41px !important;width: 174px !important;box-shadow: 0px 3px 2px 0px rgba(190, 190, 190, 0.5) !important;-webkit-box-shadow: 0px 3px 2px 0px rgba(190, 190, 190, 0.5) !important;" ></a>
...@@ -169,11 +183,13 @@ The [BSD 3-Clause license][bsd], the same as the [Go language][golic]. Cascadia' ...@@ -169,11 +183,13 @@ The [BSD 3-Clause license][bsd], the same as the [Go language][golic]. Cascadia'
[bsd]: http://opensource.org/licenses/BSD-3-Clause [bsd]: http://opensource.org/licenses/BSD-3-Clause
[golic]: http://golang.org/LICENSE [golic]: http://golang.org/LICENSE
[caslic]: https://github.com/andybalholm/cascadia/blob/master/LICENSE [caslic]: https://github.com/andybalholm/cascadia/blob/master/LICENSE
[doc]: http://godoc.org/github.com/PuerkitoBio/goquery [doc]: https://pkg.go.dev/github.com/PuerkitoBio/goquery
[index]: http://api.jquery.com/index/ [index]: http://api.jquery.com/index/
[gonet]: https://github.com/golang/net/ [gonet]: https://github.com/golang/net/
[html]: http://godoc.org/golang.org/x/net/html [html]: https://pkg.go.dev/golang.org/x/net/html
[wiki]: https://github.com/PuerkitoBio/goquery/wiki/Tips-and-tricks [wiki]: https://github.com/PuerkitoBio/goquery/wiki/Tips-and-tricks
[thatguystone]: https://github.com/thatguystone [thatguystone]: https://github.com/thatguystone
[piotr]: https://github.com/piotrkowalczuk [piotr]: https://github.com/piotrkowalczuk
[goq]: https://github.com/andrewstuart/goq [goq]: https://github.com/andrewstuart/goq
[thiemok]: https://github.com/thiemok
[djw]: https://github.com/davidjwilkins
module github.com/PuerkitoBio/goquery module github.com/PuerkitoBio/goquery
require ( require (
github.com/andybalholm/cascadia v1.0.0 github.com/andybalholm/cascadia v1.2.0
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a golang.org/x/net v0.0.0-20210614182718-04defd469f4e
) )
go 1.13
github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o= github.com/andybalholm/cascadia v1.2.0 h1:vuRCkM5Ozh/BfmsaTm26kbjm0mIOM3yS5Ek/F5h18aE=
github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= github.com/andybalholm/cascadia v1.2.0/go.mod h1:YCyR8vOZT9aZ1CHEd8ap0gMVm2aFgxBp0T0eFw1RUQY=
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a h1:gOpx8G595UYyvj8UK4+OFyY4rx037g3fmfhe5SasG3U= golang.org/x/net v0.0.0-20210614182718-04defd469f4e h1:XpT3nA5TvE525Ne3hInMh6+GETgn27Zfm9dxsThnX2Q=
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
...@@ -39,8 +39,15 @@ func (s *Selection) AfterSelection(sel *Selection) *Selection { ...@@ -39,8 +39,15 @@ func (s *Selection) AfterSelection(sel *Selection) *Selection {
// AfterHtml parses the html and inserts it after the set of matched elements. // AfterHtml parses the html and inserts it after the set of matched elements.
// //
// This follows the same rules as Selection.Append. // This follows the same rules as Selection.Append.
func (s *Selection) AfterHtml(html string) *Selection { func (s *Selection) AfterHtml(htmlStr string) *Selection {
return s.AfterNodes(parseHtml(html)...) return s.eachNodeHtml(htmlStr, true, func(node *html.Node, nodes []*html.Node) {
nextSibling := node.NextSibling
for _, n := range nodes {
if node.Parent != nil {
node.Parent.InsertBefore(n, nextSibling)
}
}
})
} }
// AfterNodes inserts the nodes after each element in the set of matched elements. // AfterNodes inserts the nodes after each element in the set of matched elements.
...@@ -85,8 +92,12 @@ func (s *Selection) AppendSelection(sel *Selection) *Selection { ...@@ -85,8 +92,12 @@ func (s *Selection) AppendSelection(sel *Selection) *Selection {
} }
// AppendHtml parses the html and appends it to the set of matched elements. // AppendHtml parses the html and appends it to the set of matched elements.
func (s *Selection) AppendHtml(html string) *Selection { func (s *Selection) AppendHtml(htmlStr string) *Selection {
return s.AppendNodes(parseHtml(html)...) return s.eachNodeHtml(htmlStr, false, func(node *html.Node, nodes []*html.Node) {
for _, n := range nodes {
node.AppendChild(n)
}
})
} }
// AppendNodes appends the specified nodes to each node in the set of matched elements. // AppendNodes appends the specified nodes to each node in the set of matched elements.
...@@ -123,8 +134,14 @@ func (s *Selection) BeforeSelection(sel *Selection) *Selection { ...@@ -123,8 +134,14 @@ func (s *Selection) BeforeSelection(sel *Selection) *Selection {
// BeforeHtml parses the html and inserts it before the set of matched elements. // BeforeHtml parses the html and inserts it before the set of matched elements.
// //
// This follows the same rules as Selection.Append. // This follows the same rules as Selection.Append.
func (s *Selection) BeforeHtml(html string) *Selection { func (s *Selection) BeforeHtml(htmlStr string) *Selection {
return s.BeforeNodes(parseHtml(html)...) return s.eachNodeHtml(htmlStr, true, func(node *html.Node, nodes []*html.Node) {
for _, n := range nodes {
if node.Parent != nil {
node.Parent.InsertBefore(n, node)
}
}
})
} }
// BeforeNodes inserts the nodes before each element in the set of matched elements. // BeforeNodes inserts the nodes before each element in the set of matched elements.
...@@ -184,8 +201,13 @@ func (s *Selection) PrependSelection(sel *Selection) *Selection { ...@@ -184,8 +201,13 @@ func (s *Selection) PrependSelection(sel *Selection) *Selection {
} }
// PrependHtml parses the html and prepends it to the set of matched elements. // PrependHtml parses the html and prepends it to the set of matched elements.
func (s *Selection) PrependHtml(html string) *Selection { func (s *Selection) PrependHtml(htmlStr string) *Selection {
return s.PrependNodes(parseHtml(html)...) return s.eachNodeHtml(htmlStr, false, func(node *html.Node, nodes []*html.Node) {
firstChild := node.FirstChild
for _, n := range nodes {
node.InsertBefore(n, firstChild)
}
})
} }
// PrependNodes prepends the specified nodes to each node in the set of // PrependNodes prepends the specified nodes to each node in the set of
...@@ -212,14 +234,19 @@ func (s *Selection) Remove() *Selection { ...@@ -212,14 +234,19 @@ func (s *Selection) Remove() *Selection {
return s return s
} }
// RemoveFiltered removes the set of matched elements by selector. // RemoveFiltered removes from the current set of matched elements those that
// It returns the Selection of removed nodes. // match the selector filter. It returns the Selection of removed nodes.
//
// For example if the selection s contains "<h1>", "<h2>" and "<h3>"
// and s.RemoveFiltered("h2") is called, only the "<h2>" node is removed
// (and returned), while "<h1>" and "<h3>" are kept in the document.
func (s *Selection) RemoveFiltered(selector string) *Selection { func (s *Selection) RemoveFiltered(selector string) *Selection {
return s.RemoveMatcher(compileMatcher(selector)) return s.RemoveMatcher(compileMatcher(selector))
} }
// RemoveMatcher removes the set of matched elements. // RemoveMatcher removes from the current set of matched elements those that
// It returns the Selection of removed nodes. // match the Matcher filter. It returns the Selection of removed nodes.
// See RemoveFiltered for additional information.
func (s *Selection) RemoveMatcher(m Matcher) *Selection { func (s *Selection) RemoveMatcher(m Matcher) *Selection {
return s.FilterMatcher(m).Remove() return s.FilterMatcher(m).Remove()
} }
...@@ -256,8 +283,16 @@ func (s *Selection) ReplaceWithSelection(sel *Selection) *Selection { ...@@ -256,8 +283,16 @@ func (s *Selection) ReplaceWithSelection(sel *Selection) *Selection {
// It returns the removed elements. // It returns the removed elements.
// //
// This follows the same rules as Selection.Append. // This follows the same rules as Selection.Append.
func (s *Selection) ReplaceWithHtml(html string) *Selection { func (s *Selection) ReplaceWithHtml(htmlStr string) *Selection {
return s.ReplaceWithNodes(parseHtml(html)...) s.eachNodeHtml(htmlStr, true, func(node *html.Node, nodes []*html.Node) {
nextSibling := node.NextSibling
for _, n := range nodes {
if node.Parent != nil {
node.Parent.InsertBefore(n, nextSibling)
}
}
})
return s.Remove()
} }
// ReplaceWithNodes replaces each element in the set of matched elements with // ReplaceWithNodes replaces each element in the set of matched elements with
...@@ -272,8 +307,17 @@ func (s *Selection) ReplaceWithNodes(ns ...*html.Node) *Selection { ...@@ -272,8 +307,17 @@ func (s *Selection) ReplaceWithNodes(ns ...*html.Node) *Selection {
// SetHtml sets the html content of each element in the selection to // SetHtml sets the html content of each element in the selection to
// specified html string. // specified html string.
func (s *Selection) SetHtml(html string) *Selection { func (s *Selection) SetHtml(htmlStr string) *Selection {
return setHtmlNodes(s, parseHtml(html)...) for _, context := range s.Nodes {
for c := context.FirstChild; c != nil; c = context.FirstChild {
context.RemoveChild(c)
}
}
return s.eachNodeHtml(htmlStr, false, func(node *html.Node, nodes []*html.Node) {
for _, n := range nodes {
node.AppendChild(n)
}
})
} }
// SetText sets the content of each element in the selection to specified content. // SetText sets the content of each element in the selection to specified content.
...@@ -329,8 +373,23 @@ func (s *Selection) WrapSelection(sel *Selection) *Selection { ...@@ -329,8 +373,23 @@ func (s *Selection) WrapSelection(sel *Selection) *Selection {
// most child of the given HTML. // most child of the given HTML.
// //
// It returns the original set of elements. // It returns the original set of elements.
func (s *Selection) WrapHtml(html string) *Selection { func (s *Selection) WrapHtml(htmlStr string) *Selection {
return s.wrapNodes(parseHtml(html)...) nodesMap := make(map[string][]*html.Node)
for _, context := range s.Nodes {
var parent *html.Node
if context.Parent != nil {
parent = context.Parent
} else {
parent = &html.Node{Type: html.ElementNode}
}
nodes, found := nodesMap[nodeName(parent)]
if !found {
nodes = parseHtmlWithContext(htmlStr, parent)
nodesMap[nodeName(parent)] = nodes
}
newSingleSelection(context, s.document).wrapAllNodes(cloneNodes(nodes)...)
}
return s
} }
// WrapNode wraps each element in the set of matched elements inside the inner- // WrapNode wraps each element in the set of matched elements inside the inner-
...@@ -382,8 +441,18 @@ func (s *Selection) WrapAllSelection(sel *Selection) *Selection { ...@@ -382,8 +441,18 @@ func (s *Selection) WrapAllSelection(sel *Selection) *Selection {
// document. // document.
// //
// It returns the original set of elements. // It returns the original set of elements.
func (s *Selection) WrapAllHtml(html string) *Selection { func (s *Selection) WrapAllHtml(htmlStr string) *Selection {
return s.wrapAllNodes(parseHtml(html)...) var context *html.Node
var nodes []*html.Node
if len(s.Nodes) > 0 {
context = s.Nodes[0]
if context.Parent != nil {
nodes = parseHtmlWithContext(htmlStr, context)
} else {
nodes = parseHtml(htmlStr)
}
}
return s.wrapAllNodes(nodes...)
} }
func (s *Selection) wrapAllNodes(ns ...*html.Node) *Selection { func (s *Selection) wrapAllNodes(ns ...*html.Node) *Selection {
...@@ -452,8 +521,17 @@ func (s *Selection) WrapInnerSelection(sel *Selection) *Selection { ...@@ -452,8 +521,17 @@ func (s *Selection) WrapInnerSelection(sel *Selection) *Selection {
// cloned before being inserted into the document. // cloned before being inserted into the document.
// //
// It returns the original set of elements. // It returns the original set of elements.
func (s *Selection) WrapInnerHtml(html string) *Selection { func (s *Selection) WrapInnerHtml(htmlStr string) *Selection {
return s.wrapInnerNodes(parseHtml(html)...) nodesMap := make(map[string][]*html.Node)
for _, context := range s.Nodes {
nodes, found := nodesMap[nodeName(context)]
if !found {
nodes = parseHtmlWithContext(htmlStr, context)
nodesMap[nodeName(context)] = nodes
}
newSingleSelection(context, s.document).wrapInnerNodes(cloneNodes(nodes)...)
}
return s
} }
// WrapInnerNode wraps an HTML structure, matched by the given selector, around // WrapInnerNode wraps an HTML structure, matched by the given selector, around
...@@ -493,16 +571,14 @@ func parseHtml(h string) []*html.Node { ...@@ -493,16 +571,14 @@ func parseHtml(h string) []*html.Node {
return nodes return nodes
} }
func setHtmlNodes(s *Selection, ns ...*html.Node) *Selection { func parseHtmlWithContext(h string, context *html.Node) []*html.Node {
for _, n := range s.Nodes { // Errors are only returned when the io.Reader returns any error besides
for c := n.FirstChild; c != nil; c = n.FirstChild { // EOF, but strings.Reader never will
n.RemoveChild(c) nodes, err := html.ParseFragment(strings.NewReader(h), context)
} if err != nil {
for _, c := range ns { panic("goquery: failed to parse HTML: " + err.Error())
n.AppendChild(cloneNode(c))
}
} }
return s return nodes
} }
// Get the first child that is an ElementNode // Get the first child that is an ElementNode
...@@ -572,3 +648,32 @@ func (s *Selection) manipulateNodes(ns []*html.Node, reverse bool, ...@@ -572,3 +648,32 @@ func (s *Selection) manipulateNodes(ns []*html.Node, reverse bool,
return s return s
} }