Commit b3d41948 authored by ale's avatar ale

Update dependencies

parent 86a0bd2d
# goquery - a little like that j-thing, only in Go
[![build status](https://secure.travis-ci.org/PuerkitoBio/goquery.svg?branch=master)](http://travis-ci.org/PuerkitoBio/goquery) [![GoDoc](https://godoc.org/github.com/PuerkitoBio/goquery?status.png)](http://godoc.org/github.com/PuerkitoBio/goquery) [![Sourcegraph Badge](https://sourcegraph.com/github.com/PuerkitoBio/goquery/-/badge.svg)](https://sourcegraph.com/github.com/PuerkitoBio/goquery?badge)
goquery brings a syntax and a set of features similar to [jQuery][] to the [Go language][go]. It is based on Go's [net/html package][html] and the CSS Selector library [cascadia][]. Since the net/html parser returns nodes, and not a full-featured DOM tree, jQuery's stateful manipulation functions (like height(), css(), detach()) have been left off.
Also, because the net/html parser requires UTF-8 encoding, so does goquery: it is the caller's responsibility to ensure that the source document provides UTF-8 encoded HTML. See the [wiki][] for various options to do this.
......@@ -15,6 +14,7 @@ Syntax-wise, it is as close as possible to jQuery, with the same function names
* [API](#api)
* [Examples](#examples)
* [Related Projects](#related-projects)
* [Support](#support)
* [License](#license)
## Installation
......@@ -37,6 +37,10 @@ Please note that because of the net/html dependency, goquery requires Go1.1+.
**Note that goquery's API is now stable, and will not break.**
* **2018-06-07 (v1.4.1)** : Add `NewDocumentFromReader` examples.
* **2018-03-24 (v1.4.0)** : Deprecate `NewDocument(url)` and `NewDocumentFromResponse(response)`.
* **2018-01-28 (v1.3.0)** : Add `ToEnd` constant to `Slice` until the end of the selection (thanks to @davidjwilkins for raising the issue).
* **2018-01-11 (v1.2.0)** : Add `AddBack*` and deprecate `AndSelf` (thanks to @davidjwilkins).
* **2017-02-12 (v1.1.0)** : Add `SetHtml` and `SetText` (thanks to @glebtv).
* **2016-12-29 (v1.0.2)** : Optimize allocations for `Selection.Text` (thanks to @radovskyb).
* **2016-08-28 (v1.0.1)** : Optimize performance for large documents.
......@@ -92,12 +96,24 @@ package main
import (
"fmt"
"log"
"net/http"
"github.com/PuerkitoBio/goquery"
)
func ExampleScrape() {
doc, err := goquery.NewDocument("http://metalsucks.net")
// Request the HTML page.
res, err := http.Get("http://metalsucks.net")
if err != nil {
log.Fatal(err)
}
defer res.Body.Close()
if res.StatusCode != 200 {
log.Fatalf("status code error: %d %s", res.StatusCode, res.Status)
}
// Load the HTML document
doc, err := goquery.NewDocumentFromReader(res.Body)
if err != nil {
log.Fatal(err)
}
......@@ -122,6 +138,23 @@ func main() {
- [andybalholm/cascadia][cascadia], the CSS selector library used by goquery.
- [suntong/cascadia][cascadiacli], a command-line interface to the cascadia CSS selector library, useful to test selectors.
- [asciimoo/colly](https://github.com/asciimoo/colly), a lightning fast and elegant Scraping Framework
- [gnulnx/goperf](https://github.com/gnulnx/goperf), a website performance test tool that also fetches static assets.
## Support
There are a number of ways you can support the project:
* Use it, star it, build something with it, spread the word!
- If you do build something open-source or otherwise publicly-visible, let me know so I can add it to the [Related Projects](#related-projects) section!
* Raise issues to improve the project (note: doc typos and clarifications are issues too!)
- Please search existing issues before opening a new one - it may have already been adressed.
* Pull requests: please discuss new code in an issue first, unless the fix is really trivial.
- Make sure new code is tested.
- Be mindful of existing code - PRs that break existing code have a high probability of being declined, unless it fixes a serious issue.
If you desperately want to send money my way, I have a BuyMeACoffee.com page:
<a href="https://www.buymeacoffee.com/mna" target="_blank"><img src="https://www.buymeacoffee.com/assets/img/custom_images/orange_img.png" alt="Buy Me A Coffee" style="height: 41px !important;width: 174px !important;box-shadow: 0px 3px 2px 0px rgba(190, 190, 190, 0.5) !important;-webkit-box-shadow: 0px 3px 2px 0px rgba(190, 190, 190, 0.5) !important;" ></a>
## License
......
......@@ -4,6 +4,16 @@ import (
"golang.org/x/net/html"
)
const (
maxUint = ^uint(0)
maxInt = int(maxUint >> 1)
// ToEnd is a special index value that can be used as end index in a call
// to Slice so that all elements are selected until the end of the Selection.
// It is equivalent to passing (*Selection).Length().
ToEnd = maxInt
)
// First reduces the set of matched elements to the first in the set.
// It returns a new Selection object, and an empty Selection object if the
// the selection is empty.
......@@ -35,12 +45,23 @@ func (s *Selection) Eq(index int) *Selection {
}
// Slice reduces the set of matched elements to a subset specified by a range
// of indices.
// of indices. The start index is 0-based and indicates the index of the first
// element to select. The end index is 0-based and indicates the index at which
// the elements stop being selected (the end index is not selected).
//
// The indices may be negative, in which case they represent an offset from the
// end of the selection.
//
// The special value ToEnd may be specified as end index, in which case all elements
// until the end are selected. This works both for a positive and negative start
// index.
func (s *Selection) Slice(start, end int) *Selection {
if start < 0 {
start += len(s.Nodes)
}
if end < 0 {
if end == ToEnd {
end = len(s.Nodes)
} else if end < 0 {
end += len(s.Nodes)
}
return pushStack(s, s.Nodes[start:end])
......
......@@ -41,6 +41,30 @@ func (s *Selection) AddNodes(nodes ...*html.Node) *Selection {
// AndSelf adds the previous set of elements on the stack to the current set.
// It returns a new Selection object containing the current Selection combined
// with the previous one.
// Deprecated: This function has been deprecated and is now an alias for AddBack().
func (s *Selection) AndSelf() *Selection {
return s.AddBack()
}
// AddBack adds the previous set of elements on the stack to the current set.
// It returns a new Selection object containing the current Selection combined
// with the previous one.
func (s *Selection) AddBack() *Selection {
return s.AddSelection(s.prevSel)
}
// AddBackFiltered reduces the previous set of elements on the stack to those that
// match the selector string, and adds them to the current set.
// It returns a new Selection object containing the current Selection combined
// with the filtered previous one
func (s *Selection) AddBackFiltered(selector string) *Selection {
return s.AddSelection(s.prevSel.Filter(selector))
}
// AddBackMatcher reduces the previous set of elements on the stack to those that match
// the mateher, and adds them to the curernt set.
// It returns a new Selection object containing the current Selection combined
// with the filtered previous one
func (s *Selection) AddBackMatcher(m Matcher) *Selection {
return s.AddSelection(s.prevSel.FilterMatcher(m))
}
......@@ -270,13 +270,14 @@ func (s *Selection) ReplaceWithNodes(ns ...*html.Node) *Selection {
return s.Remove()
}
// Set the html content of each element in the selection to specified html string.
// SetHtml sets the html content of each element in the selection to
// specified html string.
func (s *Selection) SetHtml(html string) *Selection {
return setHtmlNodes(s, parseHtml(html)...)
}
// Set the content of each element in the selection to specified content. The
// provided text string is escaped.
// SetText sets the content of each element in the selection to specified content.
// The provided text string is escaped.
func (s *Selection) SetText(text string) *Selection {
return s.SetHtml(html.EscapeString(text))
}
......
......@@ -5,11 +5,7 @@ import "golang.org/x/net/html"
// Is checks the current matched set of elements against a selector and
// returns true if at least one of these elements matches.
func (s *Selection) Is(selector string) bool {
if len(s.Nodes) > 0 {
return s.IsMatcher(compileMatcher(selector))
}
return false
return s.IsMatcher(compileMatcher(selector))
}
// IsMatcher checks the current matched set of elements against a matcher and
......
......@@ -31,6 +31,10 @@ func NewDocumentFromNode(root *html.Node) *Document {
// NewDocument is a Document constructor that takes a string URL as argument.
// It loads the specified document, parses it, and stores the root Document
// node, ready to be manipulated.
//
// Deprecated: Use the net/http standard library package to make the request
// and validate the response before calling goquery.NewDocumentFromReader
// with the response's body.
func NewDocument(url string) (*Document, error) {
// Load the URL
res, e := http.Get(url)
......@@ -40,10 +44,10 @@ func NewDocument(url string) (*Document, error) {
return NewDocumentFromResponse(res)
}
// NewDocumentFromReader returns a Document from a generic reader.
// NewDocumentFromReader returns a Document from an io.Reader.
// It returns an error as second value if the reader's data cannot be parsed
// as html. It does *not* check if the reader is also an io.Closer, so the
// provided reader is never closed by this call, it is the responsibility
// as html. It does not check if the reader is also an io.Closer, the
// provided reader is never closed by this call. It is the responsibility
// of the caller to close it if required.
func NewDocumentFromReader(r io.Reader) (*Document, error) {
root, e := html.Parse(r)
......@@ -56,6 +60,8 @@ func NewDocumentFromReader(r io.Reader) (*Document, error) {
// NewDocumentFromResponse is another Document constructor that takes an http response as argument.
// It loads the specified response's document, parses it, and stores the root Document
// node, ready to be manipulated. The response's body is closed on return.
//
// Deprecated: Use goquery.NewDocumentFromReader with the response's body.
func NewDocumentFromResponse(res *http.Response) (*Document, error) {
if res == nil {
return nil, errors.New("Response is nil")
......
module "github.com/andybalholm/cascadia"
require "golang.org/x/net" v0.0.0-20180218175443-cbe0f9307d01
......@@ -2,10 +2,21 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package snappy implements the snappy block-based compression format.
// It aims for very high speeds and reasonable compression.
// Package snappy implements the Snappy compression format. It aims for very
// high speeds and reasonable compression.
//
// The C++ snappy implementation is at https://github.com/google/snappy
// There are actually two Snappy formats: block and stream. They are related,
// but different: trying to decompress block-compressed data as a Snappy stream
// will fail, and vice versa. The block format is the Decode and Encode
// functions and the stream format is the Reader and Writer types.
//
// The block format, the more common case, is used when the complete size (the
// number of bytes) of the original data is known upfront, at the time
// compression starts. The stream format, also known as the framing format, is
// for when that isn't always true.
//
// The canonical, C++ implementation is at https://github.com/google/snappy and
// it only implements the block format.
package snappy // import "github.com/golang/snappy"
import (
......
......@@ -35,6 +35,7 @@ type DB struct {
// Stats. Need 64-bit alignment.
cWriteDelay int64 // The cumulative duration of write delays
cWriteDelayN int32 // The cumulative number of write delays
inWritePaused int32 // The indicator whether write operation is paused by compaction
aliveSnaps, aliveIters int32
// Session.
......@@ -906,6 +907,8 @@ func (db *DB) GetSnapshot() (*Snapshot, error) {
// Returns the number of files at level 'n'.
// leveldb.stats
// Returns statistics of the underlying DB.
// leveldb.iostats
// Returns statistics of effective disk read and write.
// leveldb.writedelay
// Returns cumulative write delay caused by compaction.
// leveldb.sstables
......@@ -959,9 +962,14 @@ func (db *DB) GetProperty(name string) (value string, err error) {
level, len(tables), float64(tables.size())/1048576.0, duration.Seconds(),
float64(read)/1048576.0, float64(write)/1048576.0)
}
case p == "iostats":
value = fmt.Sprintf("Read(MB):%.5f Write(MB):%.5f",
float64(db.s.stor.reads())/1048576.0,
float64(db.s.stor.writes())/1048576.0)
case p == "writedelay":
writeDelayN, writeDelay := atomic.LoadInt32(&db.cWriteDelayN), time.Duration(atomic.LoadInt64(&db.cWriteDelay))
value = fmt.Sprintf("DelayN:%d Delay:%s", writeDelayN, writeDelay)
paused := atomic.LoadInt32(&db.inWritePaused) == 1
value = fmt.Sprintf("DelayN:%d Delay:%s Paused:%t", writeDelayN, writeDelay, paused)
case p == "sstables":
for level, tables := range v.levels {
value += fmt.Sprintf("--- level %d ---\n", level)
......@@ -990,6 +998,75 @@ func (db *DB) GetProperty(name string) (value string, err error) {
return
}
// DBStats is database statistics.
type DBStats struct {
WriteDelayCount int32
WriteDelayDuration time.Duration
WritePaused bool
AliveSnapshots int32
AliveIterators int32
IOWrite uint64
IORead uint64
BlockCacheSize int
OpenedTablesCount int
LevelSizes []int64
LevelTablesCounts []int
LevelRead []int64
LevelWrite []int64
LevelDurations []time.Duration
}
// Stats populates s with database statistics.
func (db *DB) Stats(s *DBStats) error {
err := db.ok()
if err != nil {
return err
}
s.IORead = db.s.stor.reads()
s.IOWrite = db.s.stor.writes()
s.WriteDelayCount = atomic.LoadInt32(&db.cWriteDelayN)
s.WriteDelayDuration = time.Duration(atomic.LoadInt64(&db.cWriteDelay))
s.WritePaused = atomic.LoadInt32(&db.inWritePaused) == 1
s.OpenedTablesCount = db.s.tops.cache.Size()
if db.s.tops.bcache != nil {
s.BlockCacheSize = db.s.tops.bcache.Size()
} else {
s.BlockCacheSize = 0
}
s.AliveIterators = atomic.LoadInt32(&db.aliveIters)
s.AliveSnapshots = atomic.LoadInt32(&db.aliveSnaps)
s.LevelDurations = s.LevelDurations[:0]
s.LevelRead = s.LevelRead[:0]
s.LevelWrite = s.LevelWrite[:0]
s.LevelSizes = s.LevelSizes[:0]
s.LevelTablesCounts = s.LevelTablesCounts[:0]
v := db.s.version()
defer v.release()
for level, tables := range v.levels {
duration, read, write := db.compStats.getStat(level)
if len(tables) == 0 && duration == 0 {
continue
}
s.LevelDurations = append(s.LevelDurations, duration)
s.LevelRead = append(s.LevelRead, read)
s.LevelWrite = append(s.LevelWrite, write)
s.LevelSizes = append(s.LevelSizes, tables.size())
s.LevelTablesCounts = append(s.LevelTablesCounts, len(tables))
}
return nil
}
// SizeOf calculates approximate sizes of the given key ranges.
// The length of the returned sizes are equal with the length of the given
// ranges. The returned sizes measure storage space usage, so if the user
......
......@@ -640,6 +640,16 @@ func (db *DB) tableNeedCompaction() bool {
return v.needCompaction()
}
// resumeWrite returns an indicator whether we should resume write operation if enough level0 files are compacted.
func (db *DB) resumeWrite() bool {
v := db.s.version()
defer v.release()
if v.tLen(0) < db.s.o.GetWriteL0PauseTrigger() {
return true
}
return false
}
func (db *DB) pauseCompaction(ch chan<- struct{}) {
select {
case ch <- struct{}{}:
......@@ -653,6 +663,7 @@ type cCmd interface {
}
type cAuto struct {
// Note for table compaction, an empty ackC represents it's a compaction waiting command.
ackC chan<- error
}
......@@ -765,8 +776,10 @@ func (db *DB) mCompaction() {
}
func (db *DB) tCompaction() {
var x cCmd
var ackQ []cCmd
var (
x cCmd
ackQ, waitQ []cCmd
)
defer func() {
if x := recover(); x != nil {
......@@ -778,6 +791,10 @@ func (db *DB) tCompaction() {
ackQ[i].ack(ErrClosed)
ackQ[i] = nil
}
for i := range waitQ {
waitQ[i].ack(ErrClosed)
waitQ[i] = nil
}
if x != nil {
x.ack(ErrClosed)
}
......@@ -795,12 +812,25 @@ func (db *DB) tCompaction() {
return
default:
}
// Resume write operation as soon as possible.
if len(waitQ) > 0 && db.resumeWrite() {
for i := range waitQ {
waitQ[i].ack(nil)
waitQ[i] = nil
}
waitQ = waitQ[:0]
}
} else {
for i := range ackQ {
ackQ[i].ack(nil)
ackQ[i] = nil
}
ackQ = ackQ[:0]
for i := range waitQ {
waitQ[i].ack(nil)
waitQ[i] = nil
}
waitQ = waitQ[:0]
select {
case x = <-db.tcompCmdC:
case ch := <-db.tcompPauseC:
......@@ -813,7 +843,11 @@ func (db *DB) tCompaction() {
if x != nil {
switch cmd := x.(type) {
case cAuto:
ackQ = append(ackQ, x)
if cmd.ackC != nil {
waitQ = append(waitQ, x)
} else {
ackQ = append(ackQ, x)
}
case cRange:
x.ack(db.tableRangeCompaction(cmd.level, cmd.min, cmd.max))
default:
......
......@@ -89,7 +89,11 @@ func (db *DB) flush(n int) (mdb *memDB, mdbFree int, err error) {
return false
case tLen >= pauseTrigger:
delayed = true
// Set the write paused flag explicitly.
atomic.StoreInt32(&db.inWritePaused, 1)
err = db.compTriggerWait(db.tcompCmdC)
// Unset the write paused flag.
atomic.StoreInt32(&db.inWritePaused, 0)
if err != nil {
return false
}
......@@ -146,7 +150,7 @@ func (db *DB) unlockWrite(overflow bool, merged int, err error) {
}
}
// ourBatch if defined should equal with batch.
// ourBatch is batch that we can modify.
func (db *DB) writeLocked(batch, ourBatch *Batch, merge, sync bool) error {
// Try to flush memdb. This method would also trying to throttle writes
// if it is too fast and compaction cannot catch-up.
......@@ -215,6 +219,11 @@ func (db *DB) writeLocked(batch, ourBatch *Batch, merge, sync bool) error {
}
}
// Release ourBatch if any.
if ourBatch != nil {
defer db.batchPool.Put(ourBatch)
}
// Seq number.
seq := db.seq + 1
......
......@@ -42,7 +42,7 @@ type session struct {
stTempFileNum int64
stSeqNum uint64 // last mem compacted seq; need external synchronization
stor storage.Storage
stor *iStorage
storLock storage.Locker
o *cachedOptions
icmp *iComparer
......@@ -68,7 +68,7 @@ func newSession(stor storage.Storage, o *opt.Options) (s *session, err error) {
return
}
s = &session{
stor: stor,
stor: newIStorage(stor),
storLock: storLock,
fileRef: make(map[int64]int),
}
......
package leveldb
import (
"github.com/syndtr/goleveldb/leveldb/storage"
"sync/atomic"
)
type iStorage struct {
storage.Storage
read uint64
write uint64
}
func (c *iStorage) Open(fd storage.FileDesc) (storage.Reader, error) {
r, err := c.Storage.Open(fd)
return &iStorageReader{r, c}, err
}
func (c *iStorage) Create(fd storage.FileDesc) (storage.Writer, error) {
w, err := c.Storage.Create(fd)
return &iStorageWriter{w, c}, err
}
func (c *iStorage) reads() uint64 {
return atomic.LoadUint64(&c.read)
}
func (c *iStorage) writes() uint64 {
return atomic.LoadUint64(&c.write)
}
// newIStorage returns the given storage wrapped by iStorage.
func newIStorage(s storage.Storage) *iStorage {
return &iStorage{s, 0, 0}
}
type iStorageReader struct {
storage.Reader
c *iStorage
}
func (r *iStorageReader) Read(p []byte) (n int, err error) {
n, err = r.Reader.Read(p)
atomic.AddUint64(&r.c.read, uint64(n))
return n, err
}
func (r *iStorageReader) ReadAt(p []byte, off int64) (n int, err error) {
n, err = r.Reader.ReadAt(p, off)
atomic.AddUint64(&r.c.read, uint64(n))
return n, err
}
type iStorageWriter struct {
storage.Writer
c *iStorage
}
func (w *iStorageWriter) Write(p []byte) (n int, err error) {
n, err = w.Writer.Write(p)
atomic.AddUint64(&w.c.write, uint64(n))
return n, err
}
......@@ -8,7 +8,6 @@ package storage
import (
"os"
"path/filepath"
)
type plan9FileLock struct {
......@@ -48,8 +47,7 @@ func rename(oldpath, newpath string) error {
}
}
_, fname := filepath.Split(newpath)
return os.Rename(oldpath, fname)
return os.Rename(oldpath, newpath)
}
func syncDir(name string) error {
......
......@@ -67,13 +67,25 @@ func isErrInvalid(err error) bool {
if err == os.ErrInvalid {
return true
}
// Go < 1.8
if syserr, ok := err.(*os.SyscallError); ok && syserr.Err == syscall.EINVAL {
return true
}
// Go >= 1.8 returns *os.PathError instead
if patherr, ok := err.(*os.PathError); ok && patherr.Err == syscall.EINVAL {
return true
}
return false
}
func syncDir(name string) error {
// As per fsync manpage, Linux seems to expect fsync on directory, however
// some system don't support this, so we will ignore syscall.EINVAL.
//
// From fsync(2):
// Calling fsync() does not necessarily ensure that the entry in the
// directory containing the file has also reached disk. For that an
// explicit fsync() on a file descriptor for the directory is also needed.
f, err := os.Open(name)
if err != nil {
return err
......
......@@ -12,7 +12,11 @@ import (
"sync"
)
const typeShift = 3
const typeShift = 4
// Verify at compile-time that typeShift is large enough to cover all FileType
// values by confirming that 0 == 0.
var _ [0]struct{} = [TypeAll >> typeShift]struct{}{}
type memStorageLock struct {
ms *memStorage
......@@ -143,7 +147,7 @@ func (ms *memStorage) Remove(fd FileDesc) error {
}
func (ms *memStorage) Rename(oldfd, newfd FileDesc) error {
if FileDescOk(oldfd) || FileDescOk(newfd) {
if !FileDescOk(oldfd) || !FileDescOk(newfd) {
return ErrInvalidFile
}
if oldfd == newfd {
......
......@@ -55,6 +55,14 @@ type ErrCorrupted struct {
Err error
}
func isCorrupted(err error) bool {
switch err.(type) {
case *ErrCorrupted:
return true
}
return false
}
func (e *ErrCorrupted) Error() string {
if !e.Fd.Zero() {
return fmt.Sprintf("%v [file=%v]", e.Err, e.Fd)
......
......@@ -451,7 +451,7 @@ func newTableOps(s *session) *tOps {
if !s.o.GetDisableBlockCache() {
var bcacher cache.Cacher
if s.o.GetBlockCacheCapacity() > 0 {
bcacher = cache.NewLRU(s.o.GetBlockCacheCapacity())
bcacher = s.o.GetBlockCacher().New(s.o.GetBlockCacheCapacity())
}
bcache = cache.NewCache(bcacher)
}
......
......@@ -20,7 +20,7 @@ func shorten(str string) string {
return str[:3] + ".." + str[len(str)-3:]
}
var bunits = [...]string{"", "Ki", "Mi", "Gi"}
var bunits = [...]string{"", "Ki", "Mi", "Gi", "Ti"}
func shortenb(bytes int) string {
i := 0
......
This diff is collapsed.
......@@ -4,7 +4,7 @@
package html
// Section 12.2.3.2 of the HTML5 specification says "The following elements
// Section 12.2.4.2 of the HTML5 specification says "The following elements
// have varying levels of special parsing rules".
// https://html.spec.whatwg.org/multipage/syntax.html#the-stack-of-open-elements
var isSpecialElementMap = map[string]bool{
......
This diff is collapsed.
......@@ -67,7 +67,7 @@ func mathMLTextIntegrationPoint(n *Node) bool {
return false
}
// Section 12.2.5.5.
// Section 12.2.6.5.
var breakout = map[string]bool{
"b": true,
"big": true,
......@@ -115,7 +115,7 @@ var breakout = map[string]bool{
"var": true,
}
// Section 12.2.5.5.