diff --git a/cmd/crawl/crawl.go b/cmd/crawl/crawl.go
index 63a592481b6f4ecbd38d395959f6e70942b7ef03..d68ac5e0a34f64ca05dd23ca3298171941ef9b90 100644
--- a/cmd/crawl/crawl.go
+++ b/cmd/crawl/crawl.go
@@ -13,6 +13,9 @@ import (
 	"os"
 	"strconv"
 	"strings"
+	"sync"
+	"sync/atomic"
+	"time"
 
 	"git.autistici.org/ale/crawl"
 	"git.autistici.org/ale/crawl/analysis"
@@ -115,6 +118,74 @@ func NewSaveHandler(w *warc.Writer) crawl.Handler {
 	}
 }
 
+type crawlStats struct {
+	bytes int64
+	start time.Time
+
+	lock   sync.Mutex
+	states map[int]int
+}
+
+func (c *crawlStats) Update(resp *http.Response) {
+	c.lock.Lock()
+	defer c.lock.Unlock()
+
+	c.states[resp.StatusCode]++
+	resp.Body = &byteCounter{resp.Body}
+}
+
+func (c *crawlStats) UpdateBytes(n int64) {
+	atomic.AddInt64(&c.bytes, n)
+}
+
+func (c *crawlStats) Dump() {
+	c.lock.Lock()
+	defer c.lock.Unlock()
+	rate := float64(c.bytes) / time.Since(c.start).Seconds() / 1000
+	fmt.Fprintf(os.Stderr, "stats: downloaded %d bytes (%.4g KB/s), status: %v\n", c.bytes, rate, c.states)
+}
+
+var (
+	stats *crawlStats
+
+	client *http.Client
+)
+
+func fetch(urlstr string) (*http.Response, error) {
+	resp, err := client.Get(urlstr)
+	if err == nil {
+		stats.Update(resp)
+	}
+	return resp, err
+}
+
+func init() {
+	client = &http.Client{}
+
+	stats = &crawlStats{
+		states: make(map[int]int),
+		start:  time.Now(),
+	}
+
+	go func() {
+		for range time.Tick(10 * time.Second) {
+			stats.Dump()
+		}
+	}()
+}
+
+type byteCounter struct {
+	io.ReadCloser
+}
+
+func (b *byteCounter) Read(buf []byte) (int, error) {
+	n, err := b.ReadCloser.Read(buf)
+	if n > 0 {
+		stats.UpdateBytes(int64(n))
+	}
+	return n, err
+}
+
 func main() {
 	flag.Parse()
 
@@ -128,6 +199,7 @@ func main() {
 		crawl.NewSchemeScope(strings.Split(*validSchemes, ",")),
 		crawl.NewDepthScope(*depth),
 		crawl.NewSeedScope(seeds),
+		crawl.NewRegexpIgnoreScope(nil),
 	}
 
 	w := warc.NewWriter(outf)
@@ -135,7 +207,7 @@ func main() {
 
 	saver := NewSaveHandler(w)
 
-	crawler, err := crawl.NewCrawler("crawldb", seeds, scope, crawl.FetcherFunc(http.Get), crawl.NewRedirectHandler(saver))
+	crawler, err := crawl.NewCrawler("crawldb", seeds, scope, crawl.FetcherFunc(fetch), crawl.NewRedirectHandler(saver))
 	if err != nil {
 		log.Fatal(err)
 	}
diff --git a/gen-ignores.py b/gen-ignores.py
new file mode 100755
index 0000000000000000000000000000000000000000..25b3cacf70ab4f86db74f1310191544d8aae2fbe
--- /dev/null
+++ b/gen-ignores.py
@@ -0,0 +1,31 @@
+#!/usr/bin/python
+#
+# Parse ArchiveBot ignore regexp patterns and generate a Go source
+# file with a global variable including all of them.
+#
+# Invoke with a single argument, the location of a checked-out copy of
+# https://github.com/ArchiveTeam/ArchiveBot/tree/master/db/ignore_patterns.
+#
+
+import glob
+import json
+import os
+import sys
+
+archivebot_ignore_path = sys.argv[1]
+print 'package crawl\n\nvar defaultIgnorePatterns = []string{'
+for fn in glob.glob(os.path.join(archivebot_ignore_path, '*.json')):
+    try:
+        with open(fn) as fd:
+            print '\n\t// %s' % os.path.basename(fn)
+            for p in json.load(fd)['patterns']:
+                if '\\\\1' in p or '(?!' in p:
+                    # RE2 does not support backreferences or other
+                    # fancy PCRE constructs. This excludes <10
+                    # patterns from the ignore list.
+                    continue
+                print '\t%s,' % json.dumps(p)
+    except Exception, e:
+        print >>sys.stderr, 'error in %s: %s' % (fn, e)
+print '}'
+
diff --git a/ignore_patterns.go b/ignore_patterns.go
new file mode 100644
index 0000000000000000000000000000000000000000..2c6d949d4606340bcf43a3c3ce0fca64e68d6de4
--- /dev/null
+++ b/ignore_patterns.go
@@ -0,0 +1,453 @@
+package crawl
+
+var defaultIgnorePatterns = []string{
+
+	// WordPress.
+	"wp-login\\.php",
+	"/wp-admin/",
+	"/xmlrpc\\.php",
+
+	// googleplus.json
+	"^https?://accounts\\.google\\.com/ServiceLogin",
+	"^https?://accounts\\.google\\.com/SignUp",
+	"^https?://lh4\\.googleusercontent\\.com/proxy/[^/]+",
+	"^https?://plus\\.google\\.com/_/scs/apps-static/",
+
+	// mediawiki.json
+	"[\\?&]oldid=\\d+",
+	"[\\?&]curid=\\d+",
+	"[\\?&]limit=(20|100|250|500)",
+	"[\\?&]hide(minor|bots|anons|liu|myself|redirs|links|trans|patrolled)=",
+	"([\\?&]title=|/)Special:(UserLogin|UserLogout|Translate|MobileFeedback|MobileOptions|RecentChangesLinked|Diff|MobileDiff)",
+	"([\\?&]title=|/)Special:RecentChanges&from=\\d+",
+	"([\\?&]title=|/)Special:ListFiles&dir=prev&offset=\\d+",
+	"([\\?&]title=|/)Special:(ListFiles|PrefixIndex).*&amp;",
+	"([\\?&]title=|/)Special:ListFiles.*&user=",
+	"([\\?&]title=|/)Special:Log/",
+	"[\\?&]action=edit&section=(\\d+|new)",
+	"[\\?&]feed(format)?=atom",
+	"[\\?&]redlink=1",
+	"[\\?&]printable=yes",
+	"[\\?&]mobileaction=",
+	"[\\?&]undo(after)?=\\d+",
+	"^http://a\\.wikia-beacon\\.com/__track/",
+	"/User_talk:.+/User_talk:",
+	"/User_blog:.+/User_blog:",
+	"/User:.+/User:",
+
+	// nosortedindex.json
+	"\\?C=[NMSD];O=[AD]$",
+
+	// coppermine.json
+	"(?:displayimage|thumbnails)\\.php[?&]album=(?:topn|toprated|lastcom|lastup|lastupby|random|lastcomby)",
+	"ratepic\\.php",
+	"addfav\\.php\\?.*ref=displayimage\\.php",
+	"displayimage\\.php\\?.*slideshow=\\d+",
+
+	// youtube.json
+	"^https?://accounts\\.google\\.com/ServiceLogin",
+	"\\.?youtube\\.com/user/[^/]+/(playlists|channels|videos)\\?(flow|view|sort|live_view)=",
+
+	// reddit.json
+	"^https?://www\\.reddit\\.com/gold\\?goldtype=",
+	"^https?://www\\.reddit\\.com/r/[^/]+/comments/[a-z0-9]+/[^/]+/[a-z0-9]+",
+	"^https?://www\\.reddit\\.com/r/[^/]+/comments/[a-z0-9]+.*\\?sort=",
+	"^https?://www\\.reddit\\.com/r/[^/]+/comments/[a-z0-9]+/[^/]+/\\.compact",
+	"^https?://www\\.reddit\\.com/r/[^/]+/(top|new|rising|controversial|gilded|ads)/.+[\\?&]after=",
+	"^https?://www\\.reddit\\.com/r/[^/]+/related/",
+	"^https?://www\\.reddit\\.com/r/[^/]+/(gilded)?\\.mobile\\?",
+	"^https?://www\\.reddit\\.com/r/[^/]+/search/?\\?",
+	"^https?://www\\.reddit\\.com/r/[^/]+/wiki/(revisions|discussions)/user/.+",
+	"^https?://www\\.reddit\\.com/user/[^/]+/(comments/)?.+[\\?&]sort=",
+	"^https?://www\\.reddit\\.com/.+/\\.rss$",
+	"^https?://simple\\.reddit\\.com/",
+	"^https?://pixel\\.redditmedia\\.com/pixel/",
+	"\\.reddit\\.com/message/compose/?\\?",
+	"^https?://m\\.reddit\\.com/",
+
+	// nogravatar.json
+	"^https?://(\\d|secure)\\.gravatar\\.com/avatar/",
+
+	// meetupeverywhere.json
+	"^https?://.*\\.meetup\\.com/login/",
+
+	// pinterest.json
+	"^https?://www\\.pinterest\\.com/[^/]+/\\^/[^/]+/",
+	"^https?://www\\.pinterest\\.com/[^/]+/[^/]+/\\^/[^/]+/",
+	"^https?://www\\.pinterest\\.com/[^/]+/[^/]+\\.[^/]+",
+	"^https?://www\\.pinterest\\.com/[^/]+/[^/]+/[^/]+\\.[^/]+",
+	"^https?://www\\.pinterest\\.com/[^/]+/webapp/js/app/(desktop|common)/bundle-(jcrop|mapbox)\\.js",
+	"^https?://www\\.pinterest\\.com/[^/]+/[^/]+/webapp/js/app/(desktop|common)/bundle-(jcrop|mapbox)\\.js",
+
+	// noonion.json
+
+	// blogs.json
+	"[\\?&]replytocom=",
+	"[\\?&]share=",
+	"/page/%d/$",
+	"\\?showComment(=|%5C)",
+	"/quote-comment-\\d+/$",
+	"/wp-login\\.php\\?",
+	"^https?://r\\-login\\.wordpress\\.com/remote\\-login\\.php",
+	"'\\%20\\+\\%20liker\\.(avatar|profile)_URL\\%20\\+\\%20'",
+	"\\%22\\%20\\+\\%20$wrapper\\.data\\(",
+	"^http://.+\\.blogspot\\.(com|in|com\\.au|co\\.uk|jp|co\\.nz|ca|de|it|fr|se|sg|es|pt|com\\.br|ar|mx|kr)/(search(\\?|/label/)|\\d{4,4}/\\d{2,2}/CSI/$)",
+	"livejournal\\.com/ljcounter/?\\?",
+	"\\?replyto=[0-9]+",
+	"[\\?&]mode=reply",
+	"xiti\\.com/hit\\.xiti\\?",
+	"/stats\\.g\\.doubleclick\\.net/dc\\.js$",
+	"/jetpack-comment/\\?",
+	"\\?like_comment=\\d+",
+	"^https?://.+/.+/disqus\\.com/forums/$",
+	"(\\?|%5Cx26)route=(/page/:page|/archive/:year/:month|/tagged/:tag|/post/:id|/image/:post_id)",
+	"%5Cx26route=/archive",
+	"^http://\\d+\\.media\\.tumblr\\.com/avatar_.+_16\\.png$",
+	"^http://www\\.livejournal\\.com/(tools/memadd|update|login)\\.bml\\?",
+	"^http://[^\\.]+\\.livejournal\\.com/.+[\\?&]mode=reply",
+	"^http://[^\\.]+\\.livejournal\\.com/.+/\\*sup_ru/ru/UTF-8/",
+	"^http://[^\\.]+\\.livejournal\\.com/.+http://[^\\.]+\\.livejournal\\.com/",
+	"^http://[^\\.]+\\.livejournal\\.com/.+/stats\\.g\\.doubleclick\\.net/dc\\.js$",
+	"^https?://www\\.dreamwidth\\.org/tools/(memadd|tellafriend)\\?",
+	"^https?://[^\\.]+\\.dreamwidth\\.org/.+[\\?&]mode=reply",
+
+	// global.json
+	//"/(.*)/(\\1/){3,}",
+	"%25252525",
+	"/App_Themes/.+/App_Themes/",
+	"/bxSlider/.+/bxSlider/",
+	"/bxSlider/bxSlider/",
+	"/slides/slides/.+/slides/",
+	"/slides/.+/slides/slides/",
+	"/slides/slides/slides/",
+	"/js/js/.+/js/",
+	"/js/.+/js/js/",
+	"/js/js/js/",
+	"/css/css/.+/css/",
+	"/css/.+/css/css/",
+	"/css/css/css/",
+	"/styles/styles/.+/styles/",
+	"/styles/.+/styles/styles/",
+	"/styles/styles/styles/",
+	"/scripts/scripts/.+/scripts/",
+	"/scripts/.+/scripts/scripts/",
+	"/scripts/scripts/scripts/",
+	"/images/images/.+/images/",
+	"/images/.+/images/images/",
+	"/images/images/images/",
+	"/img/img/.+/img/",
+	"/img/.+/img/img/",
+	"/img/img/img/",
+	"/clientscript/clientscript/.+/clientscript/",
+	"/clientscript/.+/clientscript/clientscript/",
+	"/clientscript/clientscript/clientscript/",
+	"/lib/exe/.*lib[-_]exe[-_]lib[-_]exe[-_]",
+	"/(%5C)+(%22|%27)",
+	"/%5C/%5C/",
+	"/%27\\+[^/]+\\+%27",
+	"/%22\\+[^/]+\\+%22",
+	"/%27%20\\+[^/]+\\+%20%27",
+	"/%22%20\\+[^/]+\\+%20%22",
+	"/\\\\+(%22|%27)",
+	"/\\\\+[\"']",
+	"/\\\\/\\\\/",
+	"/'\\+[^/]+\\+'",
+	"^https?://localhost(:\\d+)?/",
+	"^https?://(127|10)\\.\\d+\\.\\d+\\.\\d+(:\\d+)?/",
+	"^https?://172\\.(1[6-9]|2\\d|3[01])\\.\\d+\\.\\d+(:\\d+)?/",
+	"^https?://192\\.168\\.\\d+\\.\\d+(:\\d+)?/",
+	"^https?://www\\.google\\.com/recaptcha/api",
+	"^https?://geo\\.yahoo\\.com/b\\?",
+	"^https?://((s-)?static\\.ak\\.fbcdn\\.net|(connect\\.|www\\.)?facebook\\.com)/connect\\.php/js/.*rsrc\\.php",
+	"^https?://www\\.flickr\\.com/change_language\\.gne",
+	"^https?://((www|web|web-beta|wayback)\\.)?archive\\.org/",
+	"^https?://www\\.google\\.((com|ad|ae|al|am|as|at|az|ba|be|bf|bg|bi|bj|bs|bt|by|ca|cd|cf|cg|ch|ci|cl|cm|cn|cv|cz|de|dj|dk|dm|dz|ee|es|fi|fm|fr|ga|ge|gg|gl|gm|gp|gr|gy|hn|hr|ht|hu|ie|im|iq|is|it|je|jo|ki|kg|kz|la|li|lk|lt|lu|lv|md|me|mg|mk|ml|mn|ms|mu|mv|mw|ne|nl|no|nr|nu|pl|pn|ps|pt|ro|ru|rw|sc|se|sh|si|sk|sn|so|sm|sr|st|td|tg|tk|tl|tm|tn|to|tt|vg|vu|ws|rs|cat)|(com\\.(af|ag|ai|ar|au|bd|bh|bn|bo|br|bz|co|cu|cy|do|ec|eg|et|fj|gh|gi|gt|hk|jm|kh|kw|lb|ly|mm|mt|mx|my|na|nf|ng|ni|np|om|pa|pe|pg|ph|pk|pr|py|qa|sa|sb|sg|sl|sv|tj|tr|tw|ua|uy|vc|vn))|(co\\.(ao|bw|ck|cr|id|il|in|jp|ke|kr|ls|ma|mz|nz|th|tz|ug|uk|uz|ve|vi|za|zm|zw)))/finance\\?noIL=1&q=[^&]+&ei=",
+	"^https?://upload\\.wikimedia\\.org/wikipedia/[^/]+/thumb/",
+	"^http://b\\.scorecardresearch\\.com/",
+	"^http://i\\.dev\\.cdn\\.turner\\.com/",
+	"^http://video-subtitle\\.tedcdn\\.com/",
+	"^http://download\\.ted\\.com/",
+	"^http://msft\\.digitalrivercontent\\.net/win/.+\\.iso",
+	"^https?://tmz\\.vo\\.llnwd\\.net/",
+	"^https?://(www\\.)?megaupload\\.com/",
+	"^https?://(www\\.)?filesonic\\.com/",
+	"^https?://(www\\.)?wupload\\.com/",
+	"^https?://prod-preview\\.wired\\.com/",
+	"^http://([^\\./]+\\.)?stream\\.publicradio\\.org/",
+	"^http://icecast\\.streaming\\.castor\\.nl/",
+	"^http://wm1\\.streaming\\.castor\\.nl:8000/",
+	"^http://icecast\\.databoss\\.nl:8000/",
+	"^http://stream\\.rynothebearded\\.com:8000/",
+	"^http://mp3\\.live\\.tv-radio\\.com/",
+	"^http://av\\.rasset\\.ie/av/live/",
+	"^http://gcnplayer\\.gcnlive\\.com/.+",
+	"^http://streaming\\.radionomy\\.com/",
+	"^http://mp3\\.ffh\\.de/",
+	"^http://(www\\.)?theradio\\.cc\\:8000/",
+	"^http://(audio\\d?|nfw)\\.video\\.ria\\.ru/",
+	"^http://eu1\\.fastcast4u\\.com:3048/",
+	"^http://[^\\./]+\\.radioscoop\\.(com|net):\\d+/",
+	"^http://[^\\./]+\\.streamchan\\.org:\\d+/",
+	"^http://[^/]*musicproxy\\.s12\\.de/",
+	"^http://stream\\.rfi\\.fr/",
+	"^http://striiming\\d?\\.trio\\.ee/",
+	"^http://streamer\\.radiocampus\\.be(:\\d+)?/",
+	"^http://relay\\.broadcastify\\.com/",
+	"^http://audio\\d?\\.radioreference\\.com/",
+	"^http://[^/]+\\.akadostream\\.ru(:\\d+)?/",
+	"^http://radio\\.silver\\.ru(:\\d+)?/",
+	"^http://icecast\\.szwoelf\\.com:8000/",
+	"^http://altair\\.micronick\\.com:8080/\\?action=stream",
+	"^http://94\\.25\\.53\\.13[1-4]/.+\\.mp3$",
+	"^http://server\\.lradio\\.ru:\\d+/",
+	"^http://188\\.93\\.17\\.201:8080/",
+	"^http://81\\.19\\.85\\.19[56]/.+\\.mp3$",
+	"^http://81\\.19\\.85\\.203/.+\\.mp3$",
+	"^http://play(\\d+)?\\.radio13\\.ru:8000/",
+	"^http://stream(\\d+)?\\.media\\.rambler\\.ru/",
+	"^http://pub(\\d+)?\\.di\\.fm/",
+	"^http://vostok\\.fmtuner\\.ru/",
+	"^http://109\\.120\\.141\\.181:8000/",
+	"^http://195\\.88\\.63\\.114:8000/",
+	"^http://radiosilver\\.corbina\\.net:8000/",
+	"^http://89\\.251\\.147\\.100/",
+	"^http://bcs\\d?\\.fontanka\\.fm:8000/",
+	"^http://stream2\\.cnmns\\.net/",
+	"^http://[^/]+\\.streamtheworld\\.com/",
+	"^http://[^/]+\\.gaduradio\\.pl/",
+	"^http://anka\\.org:8080/",
+	"^http://radio\\.visionotaku\\.com:8000/",
+	"^http://stream\\.r-a-d\\.io/",
+	"^http://r-a-d\\.io/.+\\.mp3$",
+	"^http://95\\.81\\.155\\.17/",
+	"^https?://icecast\\.rtl2?\\.fr/",
+	"^http://mp3tslg\\.tdf-cdn\\.com/",
+	"^http://[^/]+/anony/mjpg\\.cgi$",
+	"^https?://air\\.radiorecord\\.ru(:\\d+)?/",
+	"^https?://[^/]+\\.rastream\\.com(:\\d+)?/",
+	"^https?://audiots\\.scdn\\.arkena\\.com/",
+	"^https?://(www|draft)\\.blogger\\.com/(navbar\\.g|post-edit\\.g|delete-comment\\.g|comment-iframe\\.g|share-post\\.g|email-post\\.g|blog-this\\.g|delete-backlink\\.g|rearrange|blog_this\\.pyra)\\?",
+	"^https?://www\\.tumblr\\.com/(impixu\\?|share(/link/?)?\\?|reblog/)",
+	"^https?://plus\\.google\\.com/share\\?",
+	"^https?://(apis|plusone)\\.google\\.com/_/\\+1/",
+	"^https?://(ssl\\.|www\\.)?reddit\\.com/(login\\?dest=|submit\\?|static/button/button)",
+	"^https?://digg\\.com/submit\\?",
+	"^https?://(www\\.)?facebook\\.com/(plugins/like(box)?\\.php|sharer/sharer\\.php|sharer?\\.php|dialog/(feed|share))\\?",
+	"^https?://(www\\.)?twitter\\.com/(share\\?|intent/((re)?tweet|favorite)|home/?\\?status=|\\?status=)",
+	"^https?://platform\\d?\\.twitter\\.com/widgets/tweet_button.html\\?",
+	"^https?://www\\.newsvine\\.com/_wine/save\\?",
+	"^https?://www\\.netvibes\\.com/subscribe\\.php\\?",
+	"^https?://add\\.my\\.yahoo\\.com/(rss|content)\\?",
+	"^http://www\\.addtoany\\.com/(add_to/|share_save\\?)",
+	"^https?://www\\.addthis\\.com/bookmark\\.php\\?",
+	"^https?://(www\\.)?pinterest\\.com/pin/create/",
+	"^https?://www\\.linkedin\\.com/(cws/share|shareArticle)\\?",
+	"^https?://(www\\.)?stumbleupon\\.com/(submit\\?|badge/embed/)",
+	"^https?://csp\\.cyworld\\.com/bi/bi_recommend_pop\\.php\\?",
+	"^https://share\\.flipboard\\.com/bookmarklet/popout\\?",
+	"^https?://flattr.com/submit/auto\\?",
+	"^https?://(www\\.)?myspace\\.com/Modules/PostTo/",
+	"^https?://www\\.google\\.com/bookmarks/mark\\?",
+	"^http://myweb2\\.search\\.yahoo\\.com/myresults/bookmarklet\\?",
+	"^http://vuible\\.com/pins-settings/",
+	"^https?://news\\.ycombinator\\.com/submitlink\\?",
+	"^http://reporter\\.es\\.msn\\.com/\\?fn=contribute",
+	"^http://www\\.blinklist\\.com/index\\.php\\?Action=Blink/addblink\\.php",
+	"^http://sphinn\\.com/index\\.php\\?c=post&m=submit&",
+	"^http://posterous\\.com/share\\?",
+	"^http://del\\.icio\\.us/post\\?",
+	"^https?://delicious\\.com/(save|post)\\?",
+	"^https?://(www\\.)?friendfeed\\.com/share\\?",
+	"^https?://(www\\.)?xing\\.com/(app/user\\?op=share|social_plugins/share\\?)",
+	"^http://iwiw\\.hu/pages/share/share\\.jsp\\?",
+	"^http://memori(\\.qip)?\\.ru/link/\\?",
+	"^http://wow\\.ya\\.ru/posts_(add|share)_link\\.xml\\?",
+	"^https?://connect\\.mail\\.ru/share\\?",
+	"^http://zakladki\\.yandex\\.ru/newlink\\.xml\\?",
+	"^https?://vkontakte\\.ru/share\\.php\\?",
+	"^https?://www\\.odnoklassniki\\.ru/dk\\?st\\.cmd=addShare",
+	"^https?://www\\.google\\.com/(reader/link\\?|buzz/post\\?)",
+	"^https?://service\\.weibo\\.com/share/share\\.php\\?",
+	"^https?://(www\\.)?technorati\\.com/faves/?\\?add=",
+	"^https?://bufferapp\\.com/add\\?",
+	"^https?://b\\.hatena\\.ne\\.jp/add\\?",
+	"^https?://api\\.addthis\\.com/",
+	"^https?://bookmark\\.naver\\.com/post\\?",
+	"^https?://mail\\.google\\.com/mail/",
+	"^http://pixel\\.blog\\.hu/",
+	"^https?://pixel\\.quantserve\\.com/",
+	"^http://b\\.scorecardresearch\\.com/",
+	"^https?://(www|ssl)\\.google-analytics\\.com/(r/)?(__utm\\.gif|collect\\?)",
+	"^https?://p\\.opt\\.fimserve\\.com/",
+	"^https?://(\\d|www|secure)\\.gravatar\\.com/avatar/ad516503a11cd5ca435acc9bb6523536",
+	"^https?://imageshack\\.com/lost$",
+	"^https?://[^/]+\\.corp\\.ne1\\.yahoo\\.com/",
+	"^https?://.+/js-agent\\.newrelic\\.com/nr-\\d{3,3}(\\.min)?\\.js$",
+	"^https?://.+/stats\\.g\\.doubleclick\\.net/dc\\.js$",
+	"^https?://.+/js/chartbeat\\.js$",
+	"^http://www\\.khaleejtimes\\.com/.+/kt_.+/kt_",
+	"^http://www\\.khaleejtimes\\.com/.+/images/.+/images/",
+	"^http://www\\.khaleejtimes\\.com/.+/imgactv/.+/imgactv/",
+	"^http://photobucket\\.com/.+/albums/.+/albums/",
+	"^https?://([^/]+\\.)?gdcvault\\.com(/.*/|/)(fonts(/.*/|/)fonts/|css(/.*/|/)css/|img(/.*/|/)img/)",
+	"^https://static\\.licdn\\.com/sc/p/com\\.linkedin\\.nux(:|%3A)nux-static-content(\\+|%2B)[\\d\\.]+/f/",
+	"^https?://www\\.flickr\\.com/(explore/|photos/[^/]+/(sets/\\d+/(page\\d+/)?)?)\\d+_[a-f0-9]+(_[a-z])?\\.jpg$",
+	"^https?://static\\.licdn\\.com/sc/p/.+/f//",
+	"^http://www\\.warnerbros\\.com/\\d+$",
+	"^https?://tm\\.uol\\.com\\.br/h/.+/h/",
+	"^https?://media\\.opb\\.org/clips/embed/.+\\.js$",
+
+	// twitter.json
+	"^https?://((?:www|mobile)\\.)?twitter\\.com/.+\\?(?:id|lang|locale|screen_name)=",
+	"^https?://mobile\\.twitter\\.com/i/anonymize\\?data=",
+
+	// imdb.json
+	"^http://b\\.scorecardresearch\\.com/",
+	"^http://ad\\.doubleclick\\.net/",
+	"^http://www\\.imdb\\.com/rd/",
+	"^http://www\\.imdb\\.com/.+\\?ref_=",
+	"^http://www\\.imdb\\.com/.+/board/flat/",
+	"^http://www\\.imdb\\.com/.+/board/inline/",
+	"^http://www\\.imdb\\.com/.+/board/thread/",
+	"^http://www\\.imdb\\.com/help/boards_posting\\.html",
+	"^http://www\\.imdb\\.com/register/",
+	"^http://www\\.imdb\\.com/.+/board/.+/\\d+\\?d=",
+	"^http://www\\.imdb\\.com/.+/videogallery/.+/.+/",
+
+	// facebook.json
+	"^https?://error\\.facebook\\.com/common/scribe_endpoint\\.php\\?c=",
+	"^https?://www\\.facebook\\.com/[^/]+/(posts/|app_)[^/]+\\?(ref=page_internal&)?_fb_noscript=",
+	"^https?://www\\.facebook\\.com/[^/]+/photos/(pb|a)\\.[^/]+/[^/]+/.{4,4}/",
+	"^https?://www\\.facebook\\.com/[^/]+/photos/(pb|a)\\.[^/]+/[^/]+/\\?type=",
+
+	// internetcentrum.json
+	"%3Bamp%3Bamp",
+	"&action=edit",
+	"action=(?:komentar|send)",
+	"action=(?:multiple_products_add_product|notify|add_product|buy_now)",
+	"&action=submit",
+	"&amp;action=edit",
+	"amp;amp;",
+	"answer=.+?&anksent=true",
+	"[a-z0-9]=(?:off|on)",
+	"blog=1&disp=msgform",
+	"\\?cal=",
+	"calendar_menu/calendar\\.php",
+	"calendar_menu/event\\.php",
+	"calendar\\.php",
+	"calendar_scheduler\\.php",
+	"captcha.php",
+	"cas12&cas12",
+	"comment\\.php\\?akce=new",
+	"/comment/reply/\\d+",
+	"cPath=.+&sort=.+",
+	"destination=node/%2F\\d+",
+	"destination=node/\\d+",
+	"(?:displayimage|thumbnails)\\.php\\?pos=-\\d+",
+	"file=posting.+mode=quote",
+	"&highlight=&",
+	"^http://harizzzma\\.com",
+	"^http://www.nahraj.net/",
+	"index.*\\.php\\?option=com_eventcal",
+	"index.php\\?site=calendar",
+	"index\\.php\\?site=guestbook&type=(?:ASC|DESC)",
+	"index.php/Speci%C3%A1ln%C3%AD",
+	"index.php\\?title=Diskuse:",
+	"index.php\\?title=MediaWiki_diskuse:",
+	"index.php\\?title=Soubor_diskuse",
+	"index.php\\?title=Speci%C3%A1ln%C3%AD",
+	"index\\.php\\?\\w+&rok=(1995|2016)&mesic=\\d+&autor=\\d+$",
+	"index\\.php\\?.+year=198.",
+	"index\\.php\\?.+year=203.",
+	"kalendar-akci",
+	"kalendar\\.php",
+	"kalendarrok=\\d{4}",
+	//"lang=(?!czech|english)",
+	//"language=(?!cs|en)",
+	"LightNEasy\\.php\\?do=login",
+	"limit=.+limit=.+",
+	"login=",
+	"login\\.php",
+	"(?:login|registrace|live\\?)",
+	"mact=Calendar",
+	"main_page=(?:product_reviews_write|login|cookie_usage)",
+	"memberlist\\.php\\?mode=email",
+	"memberlist\\.php\\?mode=.+order=",
+	"(?:memberlist|viewprofile|viewtopic)\\.php\\?.*sk=.&sd=.",
+	"mini.+calendar",
+	"mm=\\d+.+yy=\\d{4}",
+	"mode=(?:lostpassword|sendpassword)",
+	"modules.+name=Forums.+view=(?:next|previous)",
+	"modules\\.php\\?name=coppermine.*file=displayimage.+&slideshow=\\d+",
+	"modules\\.php\\?name=coppermine.*meta=(?:topn|toprated|lastcom|lastup|lastupby|random|lastcomby)",
+	"modules\\.php\\?name=Statistics",
+	"mo=\\d+.+ye=\\d{4}",
+	"name=Kalender",
+	"name=Statistics",
+	"option=com_jcalpro.+date=\\d{4}-",
+	"\\?option=com.+&month=.+&year=\\d{4}",
+	"option=&Itemid=.+&date=\\d{4}-",
+	//"order=(?!1)",
+	"orderby=(?:name|note|count|news)",
+	"photo.php\\?i=-\\d+",
+	"/photos.+\\?url=",
+	".*\\..*\\..*\\.pl",
+	"p=ordersBasket.+sOption=add",
+	"portal\\.php\\?month=[\\d]+",
+	"postdays=0&postorder=asc",
+	"prev_next=(?:prev|next)",
+	"/calendar/",
+	"product_reviews_write\\.php\\?",
+	"profile\\.php\\?mode=email",
+	"profile\\.php\\?mode=register",
+	"\\?q=event.+/(?:day|list|month|table|week)/all/all",
+	"random_num=\\d+",
+	"Recentchangeslinked/",
+	"report\\.php\\?f=.+",
+	"search_id=mini_cal&d=\\d+",
+	"SESSION_ID=",
+	"showcal\\.php",
+	"site=guestbook.+type=(?:ASC|DESC)",
+	//"/sites/all/(sites|modules|libraries|scripts|themes)/.+/\\1",
+	"Souprava=.+Souprava=.+",
+	"Special:Whatlinkshere",
+	"start-index=-\\d+",
+	"/switchuilocale/",
+	"target[xy]=.+target[xy]=.+",
+	"tellafriend\\.php",
+	":Userlogin&",
+	"user/(?:register|login)",
+	"viewtopic\\.php\\?.*highlight=",
+	"viewtopic\\.php\\?p=\\d+",
+	"viewtopic\\.php\\?.+view=print",
+	"y=\\d{4}&m=\\d+",
+
+	// forums.json
+	"/cron\\.php\\?",
+	"/external\\.php\\?type=rss",
+	"/login\\.php\\?",
+	"/newreply\\.php\\?",
+	"/private\\.php\\?",
+	"/privmsg\\.php\\?",
+	"/register\\.php\\?",
+	"/sendmessage\\.php\\?",
+	"/subscription\\.php\\?",
+	"/posting\\.php\\?",
+	"/viewtopic\\.php\\?.+&view=(next|previous)",
+	"/viewtopic\\.php\\?.+&hilit=",
+	"/feed\\.php\\?",
+	"/index\\.php\\?option=com_mailto",
+	"&view=login&return=",
+	"&format=opensearch",
+	"/misc\\.php\\?do=whoposted",
+	"/newthread\\.php\\?",
+	"/post_thanks\\.php\\?",
+	"/blog_post\\.php\\?do=newblog",
+	"/forumdisplay\\.php.*[\\?&]do=markread",
+	"/userpoll/vote\\.php\\?",
+	"/showthread\\.php.*[\\?&]goto=(next(old|new)est|newpost)",
+	"/editpost\\.php\\?",
+	"/\\?view=getlastpost$",
+	"/index\\.php\\?sharelink=",
+	"/ucp\\.php\\?mode=delete_cookies",
+}
diff --git a/scope.go b/scope.go
index a2c06b626cea622a236cbc52ca0293a8b71e7d54..ccba5f585154794a0211a8b5259a6a215a6eefe9 100644
--- a/scope.go
+++ b/scope.go
@@ -3,6 +3,7 @@ package crawl
 import (
 	"fmt"
 	"net/url"
+	"regexp"
 	"strings"
 )
 
@@ -95,3 +96,30 @@ func NewSeedScope(seeds []*url.URL) Scope {
 	}
 	return NewURLPrefixScope(pfx)
 }
+
+type regexpIgnoreScope struct {
+	ignores []*regexp.Regexp
+}
+
+func (s *regexpIgnoreScope) Check(uri *url.URL, depth int) bool {
+	uriStr := uri.String()
+	for _, i := range s.ignores {
+		if i.MatchString(uriStr) {
+			return false
+		}
+	}
+	return true
+}
+
+func NewRegexpIgnoreScope(ignores []string) Scope {
+	if ignores == nil {
+		ignores = defaultIgnorePatterns
+	}
+	r := regexpIgnoreScope{
+		ignores: make([]*regexp.Regexp, 0, len(ignores)),
+	}
+	for _, i := range ignores {
+		r.ignores = append(r.ignores, regexp.MustCompile(i))
+	}
+	return &r
+}