Commit b06e5a29 authored by ale's avatar ale

clean up the state directory when done

parent 9fbc656c
......@@ -24,6 +24,7 @@ import (
var (
dbPath = flag.String("state", "crawldb", "crawl state database path")
keepDb = flag.Bool("keep", false, "keep the state database when done")
concurrency = flag.Int("c", 10, "concurrent workers")
depth = flag.Int("depth", 10, "maximum link depth")
validSchemes = flag.String("schemes", "http,https", "comma-separated list of allowed protocols")
......@@ -207,9 +208,14 @@ func main() {
saver := NewSaveHandler(w)
crawler, err := crawl.NewCrawler("crawldb", seeds, scope, crawl.FetcherFunc(fetch), crawl.NewRedirectHandler(saver))
crawler, err := crawl.NewCrawler(*dbPath, seeds, scope, crawl.FetcherFunc(fetch), crawl.NewRedirectHandler(saver))
if err != nil {
log.Fatal(err)
}
crawler.Run(*concurrency)
crawler.Close()
if !*keepDb {
os.RemoveAll(*dbPath)
}
}
......@@ -319,6 +319,10 @@ func (c *Crawler) Run(concurrency int) {
wg.Wait()
}
func (c *Crawler) Close() {
c.db.Close()
}
type redirectHandler struct {
h Handler
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment