diff --git a/cmd/crawl/crawl.go b/cmd/crawl/crawl.go index de45494df16400321901c666b347b44041b307ce..3954682378f7980d2f92e06c7872c2b951f1ba91 100644 --- a/cmd/crawl/crawl.go +++ b/cmd/crawl/crawl.go @@ -99,7 +99,7 @@ func (h *warcSaveHandler) Handle(c *crawl.Crawler, u string, depth int, resp *ht return extractLinks(c, u, depth, resp, err) } -func NewSaveHandler(w *warc.Writer) crawl.Handler { +func newWarcSaveHandler(w *warc.Writer) crawl.Handler { info := strings.Join([]string{ "Software: crawl/1.0\r\n", "Format: WARC File Format 1.0\r\n", @@ -206,7 +206,7 @@ func main() { w := warc.NewWriter(outf) defer w.Close() - saver := NewSaveHandler(w) + saver := newWarcSaveHandler(w) crawler, err := crawl.NewCrawler(*dbPath, seeds, scope, crawl.FetcherFunc(fetch), crawl.NewRedirectHandler(saver)) if err != nil { diff --git a/crawler.go b/crawler.go index d162330660f5fd43448805499dede451360f81fe..f2a89689267e017f182e549ccf5bc1093c64e0d0 100644 --- a/crawler.go +++ b/crawler.go @@ -319,6 +319,7 @@ func (c *Crawler) Run(concurrency int) { wg.Wait() } +// Close the database and release resources associated with the crawler state. func (c *Crawler) Close() { c.db.Close() } diff --git a/warc/warc.go b/warc/warc.go index 0739fb93013421b0b92b3d24d86b2be5311f0687..7f2b03b1cf22647185e0e373e0f1e618b09a289f 100644 --- a/warc/warc.go +++ b/warc/warc.go @@ -23,7 +23,7 @@ var ( } ) -// A WARC header. Header field names are case-sensitive. +// Header for a WARC record. Header field names are case-sensitive. type Header map[string]string // Set a header to the specified value. Multiple values are not