Commit b06e5a29 authored by ale's avatar ale

clean up the state directory when done

parent 9fbc656c
...@@ -24,6 +24,7 @@ import ( ...@@ -24,6 +24,7 @@ import (
var ( var (
dbPath = flag.String("state", "crawldb", "crawl state database path") dbPath = flag.String("state", "crawldb", "crawl state database path")
keepDb = flag.Bool("keep", false, "keep the state database when done")
concurrency = flag.Int("c", 10, "concurrent workers") concurrency = flag.Int("c", 10, "concurrent workers")
depth = flag.Int("depth", 10, "maximum link depth") depth = flag.Int("depth", 10, "maximum link depth")
validSchemes = flag.String("schemes", "http,https", "comma-separated list of allowed protocols") validSchemes = flag.String("schemes", "http,https", "comma-separated list of allowed protocols")
...@@ -207,9 +208,14 @@ func main() { ...@@ -207,9 +208,14 @@ func main() {
saver := NewSaveHandler(w) saver := NewSaveHandler(w)
crawler, err := crawl.NewCrawler("crawldb", seeds, scope, crawl.FetcherFunc(fetch), crawl.NewRedirectHandler(saver)) crawler, err := crawl.NewCrawler(*dbPath, seeds, scope, crawl.FetcherFunc(fetch), crawl.NewRedirectHandler(saver))
if err != nil { if err != nil {
log.Fatal(err) log.Fatal(err)
} }
crawler.Run(*concurrency) crawler.Run(*concurrency)
crawler.Close()
if !*keepDb {
os.RemoveAll(*dbPath)
}
} }
...@@ -319,6 +319,10 @@ func (c *Crawler) Run(concurrency int) { ...@@ -319,6 +319,10 @@ func (c *Crawler) Run(concurrency int) {
wg.Wait() wg.Wait()
} }
func (c *Crawler) Close() {
c.db.Close()
}
type redirectHandler struct { type redirectHandler struct {
h Handler h Handler
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment