Skip to content
Snippets Groups Projects
Commit efe98903 authored by ale's avatar ale
Browse files

relax the CSS url() regexp

parent f0c14e5e
No related branches found
No related tags found
No related merge requests found
......@@ -14,7 +14,7 @@ import (
)
var (
urlcssRx = regexp.MustCompile(`background.*:.*url\(["']?([^'"\)]+)["']?\)`)
urlcssRx = regexp.MustCompile(`.*:.*url\(["']?([^'"\)]+)["']?\)`)
linkMatches = []struct {
tag string
......@@ -32,6 +32,9 @@ func GetLinks(resp *http.Response) ([]*url.URL, error) {
ctype := resp.Header.Get("Content-Type")
if strings.HasPrefix(ctype, "text/html") {
// Use goquery to extract links from the parsed HTML
// contents (query patterns are described in the
// linkMatches table).
doc, err := goquery.NewDocumentFromResponse(resp)
if err != nil {
return nil, err
......@@ -44,6 +47,8 @@ func GetLinks(resp *http.Response) ([]*url.URL, error) {
})
}
} else if strings.HasPrefix(ctype, "text/css") {
// Use a simple (and actually quite bad) regular
// expression to extract "url()" links from CSS.
if data, err := ioutil.ReadAll(resp.Body); err == nil {
for _, val := range urlcssRx.FindAllStringSubmatch(string(data), -1) {
outlinks = append(outlinks, val[1])
......@@ -51,7 +56,8 @@ func GetLinks(resp *http.Response) ([]*url.URL, error) {
}
}
// Uniquify and parse outbound links.
// Parse outbound links relative to the request URI, and
// return unique results.
var result []*url.URL
links := make(map[string]*url.URL)
for _, val := range outlinks {
......@@ -59,9 +65,8 @@ func GetLinks(resp *http.Response) ([]*url.URL, error) {
links[linkurl.String()] = linkurl
}
}
for _, link := range links {
result = append(result, link)
for _, u := range links {
result = append(result, u)
}
return result, nil
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment