Commit 66156e2a authored by renovate's avatar renovate
Browse files

Update module github.com/PuerkitoBio/goquery to v1.7.0

parent 877afafd
Pipeline #17443 passed with stages
in 1 minute and 49 seconds
......@@ -3,12 +3,11 @@ module git.autistici.org/ale/crawl
go 1.15
require (
github.com/PuerkitoBio/goquery v1.5.0
github.com/PuerkitoBio/goquery v1.7.0
github.com/PuerkitoBio/purell v0.1.0
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect
github.com/google/go-cmp v0.5.6
github.com/google/uuid v1.1.1 // indirect
github.com/pborman/uuid v1.2.1
github.com/syndtr/goleveldb v0.0.0-20190923125748-758128399b1d
golang.org/x/net v0.0.0-20190926025831-c00fd9afed17 // indirect
)
github.com/PuerkitoBio/goquery v1.5.0 h1:uGvmFXOA73IKluu/F84Xd1tt/z07GYm8X49XKHP7EJk=
github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg=
github.com/PuerkitoBio/goquery v1.7.0 h1:O5SP3b9JWqMSVMG69zMfj577zwkSNpxrFf7ybS74eiw=
github.com/PuerkitoBio/goquery v1.7.0/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc=
github.com/PuerkitoBio/purell v0.0.0-20180310210909-975f53781597 h1:1H3FyRw7YsqIty9WHPOVEGJaFJ1sfGVZ3PPDUw3ob2w=
github.com/PuerkitoBio/purell v0.0.0-20180310210909-975f53781597/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
github.com/PuerkitoBio/purell v0.1.0 h1:N8Bcc53nei5frgNYgAKo93qMUVdU5LUGHCBv8efdVcM=
......@@ -8,6 +10,8 @@ github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o=
github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo=
github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
github.com/fsnotify/fsnotify v1.4.7 h1:IXs+QLmnXW2CcXuY+8Mzv/fWEsPGWxqefPtCP5CnV9I=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/golang/protobuf v1.2.0 h1:P3YflyNX/ehuJFLhxviNdFxQPkGK5cDcApsge1SqnvM=
......@@ -42,6 +46,8 @@ golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73r
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190926025831-c00fd9afed17 h1:qPnAdmjNA41t3QBTx2mFGf/SD1IoslhYu7AmdsVzCcs=
golang.org/x/net v0.0.0-20190926025831-c00fd9afed17/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2 h1:CCH4IOTTfewWjGOlSp+zGcjutRKlBEZQ6wTn8ozI/nI=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f h1:wMNYb4v58l5UBM7MYRLPG6ZhfOqbKu7X5eyFl8ZhKvA=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
......
arch:
- amd64
- ppc64le
language: go
go:
- 1.1
- 1.2.x
- 1.3.x
- 1.4.x
- 1.5.x
- 1.6.x
- 1.7.x
- 1.8.x
- 1.9.x
- "1.10.x"
- 1.10.x
- 1.11.x
- 1.12.x
- 1.13.x
- 1.14.x
- 1.15.x
- tip
jobs:
exclude:
- arch: ppc64le
go: 1.7.x
- arch: ppc64le
go: 1.8.x
- arch: ppc64le
go: 1.9.x
- arch: ppc64le
go: 1.10.x
- arch: ppc64le
go: 1.11.x
- arch: ppc64le
go: 1.12.x
Copyright (c) 2012-2016, Martin Angers & Contributors
Copyright (c) 2012-2021, Martin Angers & Contributors
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
......
# goquery - a little like that j-thing, only in Go
[![build status](https://secure.travis-ci.org/PuerkitoBio/goquery.svg?branch=master)](http://travis-ci.org/PuerkitoBio/goquery) [![GoDoc](https://godoc.org/github.com/PuerkitoBio/goquery?status.png)](http://godoc.org/github.com/PuerkitoBio/goquery) [![Sourcegraph Badge](https://sourcegraph.com/github.com/PuerkitoBio/goquery/-/badge.svg)](https://sourcegraph.com/github.com/PuerkitoBio/goquery?badge)
[![builds.sr.ht status](https://builds.sr.ht/~mna/goquery/commits/fedora.yml.svg)](https://builds.sr.ht/~mna/goquery/commits/fedora.yml?) [![build status](https://secure.travis-ci.org/PuerkitoBio/goquery.svg?branch=master)](http://travis-ci.org/PuerkitoBio/goquery) [![Go Reference](https://pkg.go.dev/badge/github.com/PuerkitoBio/goquery.svg)](https://pkg.go.dev/github.com/PuerkitoBio/goquery) [![Sourcegraph Badge](https://sourcegraph.com/github.com/PuerkitoBio/goquery/-/badge.svg)](https://sourcegraph.com/github.com/PuerkitoBio/goquery?badge)
goquery brings a syntax and a set of features similar to [jQuery][] to the [Go language][go]. It is based on Go's [net/html package][html] and the CSS Selector library [cascadia][]. Since the net/html parser returns nodes, and not a full-featured DOM tree, jQuery's stateful manipulation functions (like height(), css(), detach()) have been left off.
......@@ -19,7 +19,7 @@ Syntax-wise, it is as close as possible to jQuery, with the same function names
## Installation
Please note that because of the net/html dependency, goquery requires Go1.1+.
Please note that because of the net/html dependency, goquery requires Go1.1+ and is tested on Go1.7+.
$ go get github.com/PuerkitoBio/goquery
......@@ -37,6 +37,10 @@ Please note that because of the net/html dependency, goquery requires Go1.1+.
**Note that goquery's API is now stable, and will not break.**
* **2021-06-14 (v1.7.0)** : Add `Single` and `SingleMatcher` functions to optimize first-match selection (thanks [@gdollardollar](https://github.com/gdollardollar)).
* **2021-01-11 (v1.6.1)** : Fix panic when calling `{Prepend,Append,Set}Html` on a `Selection` that contains non-Element nodes.
* **2020-10-08 (v1.6.0)** : Parse html in context of the container node for all functions that deal with html strings (`AfterHtml`, `AppendHtml`, etc.). Thanks to [@thiemok][thiemok] and [@davidjwilkins][djw] for their work on this.
* **2020-02-04 (v1.5.1)** : Update module dependencies.
* **2018-11-15 (v1.5.0)** : Go module support (thanks @Zaba505).
* **2018-06-07 (v1.4.1)** : Add `NewDocumentFromReader` examples.
* **2018-03-24 (v1.4.0)** : Deprecate `NewDocument(url)` and `NewDocumentFromResponse(response)`.
......@@ -47,7 +51,7 @@ Please note that because of the net/html dependency, goquery requires Go1.1+.
* **2016-08-28 (v1.0.1)** : Optimize performance for large documents.
* **2016-07-27 (v1.0.0)** : Tag version 1.0.0.
* **2016-06-15** : Invalid selector strings internally compile to a `Matcher` implementation that never matches any node (instead of a panic). So for example, `doc.Find("~")` returns an empty `*Selection` object.
* **2016-02-02** : Add `NodeName` utility function similar to the DOM's `nodeName` property. It returns the tag name of the first element in a selection, and other relevant values of non-element nodes (see godoc for details). Add `OuterHtml` utility function similar to the DOM's `outerHTML` property (named `OuterHtml` in small caps for consistency with the existing `Html` method on the `Selection`).
* **2016-02-02** : Add `NodeName` utility function similar to the DOM's `nodeName` property. It returns the tag name of the first element in a selection, and other relevant values of non-element nodes (see [doc][] for details). Add `OuterHtml` utility function similar to the DOM's `outerHTML` property (named `OuterHtml` in small caps for consistency with the existing `Html` method on the `Selection`).
* **2015-04-20** : Add `AttrOr` helper method to return the attribute's value or a default value if absent. Thanks to [piotrkowalczuk][piotr].
* **2015-02-04** : Add more manipulation functions - Prepend* - thanks again to [Andrew Stone][thatguystone].
* **2014-11-28** : Add more manipulation functions - ReplaceWith*, Wrap* and Unwrap - thanks again to [Andrew Stone][thatguystone].
......@@ -76,7 +80,7 @@ jQuery often has many variants for the same function (no argument, a selector st
Utility functions that are not in jQuery but are useful in Go are implemented as functions (that take a `*Selection` as parameter), to avoid a potential naming clash on the `*Selection`'s methods (reserved for jQuery-equivalent behaviour).
The complete [godoc reference documentation can be found here][doc].
The complete [package reference documentation can be found here][doc].
Please note that Cascadia's selectors do not necessarily match all supported selectors of jQuery (Sizzle). See the [cascadia project][cascadia] for details. Invalid selector strings compile to a `Matcher` that fails to match any node. Behaviour of the various functions that take a selector string as argument follows from that fact, e.g. (where `~` is an invalid selector string):
......@@ -120,12 +124,11 @@ func ExampleScrape() {
}
// Find the review items
doc.Find(".sidebar-reviews article .content-block").Each(func(i int, s *goquery.Selection) {
// For each item found, get the band and title
band := s.Find("a").Text()
title := s.Find("i").Text()
fmt.Printf("Review %d: %s - %s\n", i, band, title)
})
doc.Find(".left-content article .post-title").Each(func(i int, s *goquery.Selection) {
// For each item found, get the title
title := s.Find("a").Text()
fmt.Printf("Review %d: %s\n", i, title)
})
}
func main() {
......@@ -138,9 +141,14 @@ func main() {
- [Goq][goq], an HTML deserialization and scraping library based on goquery and struct tags.
- [andybalholm/cascadia][cascadia], the CSS selector library used by goquery.
- [suntong/cascadia][cascadiacli], a command-line interface to the cascadia CSS selector library, useful to test selectors.
- [asciimoo/colly](https://github.com/asciimoo/colly), a lightning fast and elegant Scraping Framework
- [gocolly/colly](https://github.com/gocolly/colly), a lightning fast and elegant Scraping Framework
- [gnulnx/goperf](https://github.com/gnulnx/goperf), a website performance test tool that also fetches static assets.
- [MontFerret/ferret](https://github.com/MontFerret/ferret), declarative web scraping.
- [tacusci/berrycms](https://github.com/tacusci/berrycms), a modern simple to use CMS with easy to write plugins
- [Dataflow kit](https://github.com/slotix/dataflowkit), Web Scraping framework for Gophers.
- [Geziyor](https://github.com/geziyor/geziyor), a fast web crawling & scraping framework for Go. Supports JS rendering.
- [Pagser](https://github.com/foolin/pagser), a simple, easy, extensible, configurable HTML parser to struct based on goquery and struct tags.
- [stitcherd](https://github.com/vhodges/stitcherd), A server for doing server side includes using css selectors and DOM updates.
## Support
......@@ -153,8 +161,9 @@ There are a number of ways you can support the project:
* Pull requests: please discuss new code in an issue first, unless the fix is really trivial.
- Make sure new code is tested.
- Be mindful of existing code - PRs that break existing code have a high probability of being declined, unless it fixes a serious issue.
If you desperately want to send money my way, I have a BuyMeACoffee.com page:
* Sponsor the developer
- See the Github Sponsor button at the top of the repo on github
- or via BuyMeACoffee.com, below
<a href="https://www.buymeacoffee.com/mna" target="_blank"><img src="https://www.buymeacoffee.com/assets/img/custom_images/orange_img.png" alt="Buy Me A Coffee" style="height: 41px !important;width: 174px !important;box-shadow: 0px 3px 2px 0px rgba(190, 190, 190, 0.5) !important;-webkit-box-shadow: 0px 3px 2px 0px rgba(190, 190, 190, 0.5) !important;" ></a>
......@@ -169,11 +178,13 @@ The [BSD 3-Clause license][bsd], the same as the [Go language][golic]. Cascadia'
[bsd]: http://opensource.org/licenses/BSD-3-Clause
[golic]: http://golang.org/LICENSE
[caslic]: https://github.com/andybalholm/cascadia/blob/master/LICENSE
[doc]: http://godoc.org/github.com/PuerkitoBio/goquery
[doc]: https://pkg.go.dev/github.com/PuerkitoBio/goquery
[index]: http://api.jquery.com/index/
[gonet]: https://github.com/golang/net/
[html]: http://godoc.org/golang.org/x/net/html
[html]: https://pkg.go.dev/golang.org/x/net/html
[wiki]: https://github.com/PuerkitoBio/goquery/wiki/Tips-and-tricks
[thatguystone]: https://github.com/thatguystone
[piotr]: https://github.com/piotrkowalczuk
[goq]: https://github.com/andrewstuart/goq
[thiemok]: https://github.com/thiemok
[djw]: https://github.com/davidjwilkins
module github.com/PuerkitoBio/goquery
require (
github.com/andybalholm/cascadia v1.0.0
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a
github.com/andybalholm/cascadia v1.1.0
golang.org/x/net v0.0.0-20200202094626-16171245cfb2
)
go 1.13
github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o=
github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo=
github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a h1:gOpx8G595UYyvj8UK4+OFyY4rx037g3fmfhe5SasG3U=
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2 h1:CCH4IOTTfewWjGOlSp+zGcjutRKlBEZQ6wTn8ozI/nI=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
......@@ -39,8 +39,15 @@ func (s *Selection) AfterSelection(sel *Selection) *Selection {
// AfterHtml parses the html and inserts it after the set of matched elements.
//
// This follows the same rules as Selection.Append.
func (s *Selection) AfterHtml(html string) *Selection {
return s.AfterNodes(parseHtml(html)...)
func (s *Selection) AfterHtml(htmlStr string) *Selection {
return s.eachNodeHtml(htmlStr, true, func(node *html.Node, nodes []*html.Node) {
nextSibling := node.NextSibling
for _, n := range nodes {
if node.Parent != nil {
node.Parent.InsertBefore(n, nextSibling)
}
}
})
}
// AfterNodes inserts the nodes after each element in the set of matched elements.
......@@ -85,8 +92,12 @@ func (s *Selection) AppendSelection(sel *Selection) *Selection {
}
// AppendHtml parses the html and appends it to the set of matched elements.
func (s *Selection) AppendHtml(html string) *Selection {
return s.AppendNodes(parseHtml(html)...)
func (s *Selection) AppendHtml(htmlStr string) *Selection {
return s.eachNodeHtml(htmlStr, false, func(node *html.Node, nodes []*html.Node) {
for _, n := range nodes {
node.AppendChild(n)
}
})
}
// AppendNodes appends the specified nodes to each node in the set of matched elements.
......@@ -123,8 +134,14 @@ func (s *Selection) BeforeSelection(sel *Selection) *Selection {
// BeforeHtml parses the html and inserts it before the set of matched elements.
//
// This follows the same rules as Selection.Append.
func (s *Selection) BeforeHtml(html string) *Selection {
return s.BeforeNodes(parseHtml(html)...)
func (s *Selection) BeforeHtml(htmlStr string) *Selection {
return s.eachNodeHtml(htmlStr, true, func(node *html.Node, nodes []*html.Node) {
for _, n := range nodes {
if node.Parent != nil {
node.Parent.InsertBefore(n, node)
}
}
})
}
// BeforeNodes inserts the nodes before each element in the set of matched elements.
......@@ -184,8 +201,13 @@ func (s *Selection) PrependSelection(sel *Selection) *Selection {
}
// PrependHtml parses the html and prepends it to the set of matched elements.
func (s *Selection) PrependHtml(html string) *Selection {
return s.PrependNodes(parseHtml(html)...)
func (s *Selection) PrependHtml(htmlStr string) *Selection {
return s.eachNodeHtml(htmlStr, false, func(node *html.Node, nodes []*html.Node) {
firstChild := node.FirstChild
for _, n := range nodes {
node.InsertBefore(n, firstChild)
}
})
}
// PrependNodes prepends the specified nodes to each node in the set of
......@@ -212,14 +234,19 @@ func (s *Selection) Remove() *Selection {
return s
}
// RemoveFiltered removes the set of matched elements by selector.
// It returns the Selection of removed nodes.
// RemoveFiltered removes from the current set of matched elements those that
// match the selector filter. It returns the Selection of removed nodes.
//
// For example if the selection s contains "<h1>", "<h2>" and "<h3>"
// and s.RemoveFiltered("h2") is called, only the "<h2>" node is removed
// (and returned), while "<h1>" and "<h3>" are kept in the document.
func (s *Selection) RemoveFiltered(selector string) *Selection {
return s.RemoveMatcher(compileMatcher(selector))
}
// RemoveMatcher removes the set of matched elements.
// It returns the Selection of removed nodes.
// RemoveMatcher removes from the current set of matched elements those that
// match the Matcher filter. It returns the Selection of removed nodes.
// See RemoveFiltered for additional information.
func (s *Selection) RemoveMatcher(m Matcher) *Selection {
return s.FilterMatcher(m).Remove()
}
......@@ -256,8 +283,16 @@ func (s *Selection) ReplaceWithSelection(sel *Selection) *Selection {
// It returns the removed elements.
//
// This follows the same rules as Selection.Append.
func (s *Selection) ReplaceWithHtml(html string) *Selection {
return s.ReplaceWithNodes(parseHtml(html)...)
func (s *Selection) ReplaceWithHtml(htmlStr string) *Selection {
s.eachNodeHtml(htmlStr, true, func(node *html.Node, nodes []*html.Node) {
nextSibling := node.NextSibling
for _, n := range nodes {
if node.Parent != nil {
node.Parent.InsertBefore(n, nextSibling)
}
}
})
return s.Remove()
}
// ReplaceWithNodes replaces each element in the set of matched elements with
......@@ -272,8 +307,17 @@ func (s *Selection) ReplaceWithNodes(ns ...*html.Node) *Selection {
// SetHtml sets the html content of each element in the selection to
// specified html string.
func (s *Selection) SetHtml(html string) *Selection {
return setHtmlNodes(s, parseHtml(html)...)
func (s *Selection) SetHtml(htmlStr string) *Selection {
for _, context := range s.Nodes {
for c := context.FirstChild; c != nil; c = context.FirstChild {
context.RemoveChild(c)
}
}
return s.eachNodeHtml(htmlStr, false, func(node *html.Node, nodes []*html.Node) {
for _, n := range nodes {
node.AppendChild(n)
}
})
}
// SetText sets the content of each element in the selection to specified content.
......@@ -329,8 +373,23 @@ func (s *Selection) WrapSelection(sel *Selection) *Selection {
// most child of the given HTML.
//
// It returns the original set of elements.
func (s *Selection) WrapHtml(html string) *Selection {
return s.wrapNodes(parseHtml(html)...)
func (s *Selection) WrapHtml(htmlStr string) *Selection {
nodesMap := make(map[string][]*html.Node)
for _, context := range s.Nodes {
var parent *html.Node
if context.Parent != nil {
parent = context.Parent
} else {
parent = &html.Node{Type: html.ElementNode}
}
nodes, found := nodesMap[nodeName(parent)]
if !found {
nodes = parseHtmlWithContext(htmlStr, parent)
nodesMap[nodeName(parent)] = nodes
}
newSingleSelection(context, s.document).wrapAllNodes(cloneNodes(nodes)...)
}
return s
}
// WrapNode wraps each element in the set of matched elements inside the inner-
......@@ -382,8 +441,18 @@ func (s *Selection) WrapAllSelection(sel *Selection) *Selection {
// document.
//
// It returns the original set of elements.
func (s *Selection) WrapAllHtml(html string) *Selection {
return s.wrapAllNodes(parseHtml(html)...)
func (s *Selection) WrapAllHtml(htmlStr string) *Selection {
var context *html.Node
var nodes []*html.Node
if len(s.Nodes) > 0 {
context = s.Nodes[0]
if context.Parent != nil {
nodes = parseHtmlWithContext(htmlStr, context)
} else {
nodes = parseHtml(htmlStr)
}
}
return s.wrapAllNodes(nodes...)
}
func (s *Selection) wrapAllNodes(ns ...*html.Node) *Selection {
......@@ -452,8 +521,17 @@ func (s *Selection) WrapInnerSelection(sel *Selection) *Selection {
// cloned before being inserted into the document.
//
// It returns the original set of elements.
func (s *Selection) WrapInnerHtml(html string) *Selection {
return s.wrapInnerNodes(parseHtml(html)...)
func (s *Selection) WrapInnerHtml(htmlStr string) *Selection {
nodesMap := make(map[string][]*html.Node)
for _, context := range s.Nodes {
nodes, found := nodesMap[nodeName(context)]
if !found {
nodes = parseHtmlWithContext(htmlStr, context)
nodesMap[nodeName(context)] = nodes
}
newSingleSelection(context, s.document).wrapInnerNodes(cloneNodes(nodes)...)
}
return s
}
// WrapInnerNode wraps an HTML structure, matched by the given selector, around
......@@ -493,16 +571,14 @@ func parseHtml(h string) []*html.Node {
return nodes
}
func setHtmlNodes(s *Selection, ns ...*html.Node) *Selection {
for _, n := range s.Nodes {
for c := n.FirstChild; c != nil; c = n.FirstChild {
n.RemoveChild(c)
}
for _, c := range ns {
n.AppendChild(cloneNode(c))
}
func parseHtmlWithContext(h string, context *html.Node) []*html.Node {
// Errors are only returned when the io.Reader returns any error besides
// EOF, but strings.Reader never will
nodes, err := html.ParseFragment(strings.NewReader(h), context)
if err != nil {
panic("goquery: failed to parse HTML: " + err.Error())
}
return s
return nodes
}
// Get the first child that is an ElementNode
......@@ -572,3 +648,32 @@ func (s *Selection) manipulateNodes(ns []*html.Node, reverse bool,
return s
}
// eachNodeHtml parses the given html string and inserts the resulting nodes in the dom with the mergeFn.
// The parsed nodes are inserted for each element of the selection.
// isParent can be used to indicate that the elements of the selection should be treated as the parent for the parsed html.
// A cache is used to avoid parsing the html multiple times should the elements of the selection result in the same context.
func (s *Selection) eachNodeHtml(htmlStr string, isParent bool, mergeFn func(n *html.Node, nodes []*html.Node)) *Selection {
// cache to avoid parsing the html for the same context multiple times
nodeCache := make(map[string][]*html.Node)
var context *html.Node
for _, n := range s.Nodes {
if isParent {
context = n.Parent
} else {
if n.Type != html.ElementNode {
continue
}
context = n
}
if context != nil {
nodes, found := nodeCache[nodeName(context)]
if !found {
nodes = parseHtmlWithContext(htmlStr, context)
nodeCache[nodeName(context)] = nodes
}
mergeFn(n, cloneNodes(nodes))
}
}
return s
}
......@@ -7,7 +7,6 @@ import (
"net/url"
"github.com/andybalholm/cascadia"
"golang.org/x/net/html"
)
......@@ -122,6 +121,45 @@ type Matcher interface {
Filter([]*html.Node) []*html.Node
}
// Single compiles a selector string to a Matcher that stops after the first
// match is found.
//
// By default, Selection.Find and other functions that accept a selector string
// to select nodes will use all matches corresponding to that selector. By
// using the Matcher returned by Single, at most the first match will be
// selected.
//
// For example, those two statements are semantically equivalent:
//
// sel1 := doc.Find("a").First()
// sel2 := doc.FindMatcher(goquery.Single("a"))
//
// The one using Single is optimized to be potentially much faster on large
// documents.
//
// Only the behaviour of the MatchAll method of the Matcher interface is
// altered compared to standard Matchers. This means that the single-selection
// property of the Matcher only applies for Selection methods where the Matcher
// is used to select nodes, not to filter or check if a node matches the
// Matcher - in those cases, the behaviour of the Matcher is unchanged (e.g.
// FilterMatcher(Single("div")) will still result in a Selection with multiple
// "div"s if there were many "div"s in the Selection to begin with).
func Single(selector string) Matcher {
return singleMatcher{compileMatcher(selector)}
}
// SingleMatcher returns a Matcher matches the same nodes as m, but that stops
// after the first match is found.
//
// See the documentation of function Single for more details.
func SingleMatcher(m Matcher) Matcher {
if _, ok := m.(singleMatcher); ok {
// m is already a singleMatcher
return m
}
return singleMatcher{m}
}
// compileMatcher compiles the selector string s and returns
// the corresponding Matcher. If s is an invalid selector string,
// it returns a Matcher that fails all matches.
......@@ -133,6 +171,30 @@ func compileMatcher(s string) Matcher {
return cs
}
type singleMatcher struct {
Matcher
}
func (m singleMatcher) MatchAll(n *html.Node) []*html.Node {
// Optimized version - stops finding at the first match (cascadia-compiled
// matchers all use this code path).
if mm, ok := m.Matcher.(interface{ MatchFirst(*html.Node) *html.Node }); ok {
node := mm.MatchFirst(n)
if node == nil {
return nil
}
return []*html.Node{node}
}
// Fallback version, for e.g. test mocks that don't provide the MatchFirst
// method.
nodes := m.Matcher.MatchAll(n)
if len(nodes) > 0 {
return nodes[:1:1]
}
return nil
}
// invalidMatcher is a Matcher that always fails to match.
type invalidMatcher struct{}
......
......@@ -36,12 +36,22 @@ func NodeName(s *Selection) string {
if s.Length() == 0 {
return ""
}
switch n := s.Get(0); n.Type {
return nodeName(s.Get(0))
}
// nodeName returns the node name of the given html node.
// See NodeName for additional details on behaviour.
func nodeName(node *html.Node) string {
if node == nil {
return ""
}
switch node.Type {
case html.ElementNode, html.DoctypeNode:
return n.Data
return node.Data
default:
if n.Type >= 0 && int(n.Type) < len(nodeNames) {
return nodeNames[n.Type]
if node.Type >= 0 && int(node.Type) < len(nodeNames) {
return nodeNames[node.Type]
}
return ""
}
......
......@@ -5,3 +5,5 @@
The Cascadia package implements CSS selectors for use with the parse trees produced by the html package.
To test CSS selectors without writing Go code, check out [cascadia](https://github.com/suntong/cascadia) the command line tool, a thin wrapper around this package.
[Refer to godoc here](https://godoc.org/github.com/andybalholm/cascadia).
......@@ -7,8 +7,6 @@ import (
"regexp"
"strconv"
"strings"
"golang.org/x/net/html"
)
// a parser for CSS selectors
......@@ -56,6 +54,26 @@ func (p *parser) parseEscape() (result string, err error) {
return result, nil
}