Commit f74df9f4 by ale

add command to create sitemap.xml file

1 parent cb8a01e4
Pipeline #250 passed
in 1 minute 47 seconds
Showing with 135 additions and 0 deletions
// Create a sitemap.xml from the all_pages.json file.
//
package main
import (
"encoding/json"
"encoding/xml"
"flag"
"fmt"
"io"
"log"
"os"
"regexp"
"sort"
"strings"
)
var baseURL = flag.String("base-url", "https://www.autistici.org", "base for output URLs")
type page struct {
URL string `json:"url"`
Lang string `json:"lang"`
Title string `json:"title"`
}
type sitemapLink struct {
XMLName xml.Name `xml:"xhtml:link"`
Rel string `xml:"rel,attr"`
Lang string `xml:"hreflang,attr"`
URL string `xml:"href,attr"`
}
type sitemapPage struct {
XMLName xml.Name `xml:"url"`
Loc string `xml:"loc"`
Links []*sitemapLink
}
type sitemapTop struct {
XMLName xml.Name `xml:"urlset"`
NS string `xml:"xmlns,attr"`
NSxhtml string `xml:"xmlns:xhtml,attr"`
Pages []*sitemapPage
}
type sitemapPageList []*sitemapPage
func (l sitemapPageList) Len() int { return len(l) }
func (l sitemapPageList) Swap(i, j int) { l[i], l[j] = l[j], l[i] }
func (l sitemapPageList) Less(i, j int) bool {
return strings.Compare(l[i].Loc, l[j].Loc) < 0
}
func readAllPages(r io.Reader) ([]*page, error) {
var m map[string]*page
if err := json.NewDecoder(r).Decode(&m); err != nil {
return nil, err
}
var out []*page
for _, p := range m {
out = append(out, p)
}
return out, nil
}
var extRx = regexp.MustCompile(`(index)?\.[a-z]{2}\.html$`)
func fullURL(u string) string {
if strings.HasSuffix(*baseURL, "/") {
u = u[1:]
}
return *baseURL + u
}
func makeSitemap(pages []*page) {
byURL := make(map[string][]*page)
for _, p := range pages {
realURL := extRx.ReplaceAllLiteralString(p.URL, "")
byURL[realURL] = append(byURL[realURL], p)
}
sitemap := &sitemapTop{
NS: "http://www.sitemaps.org/schemas/sitemap/0.9",
NSxhtml: "http://www.w3.org/1999/xhtml",
}
for realURL, pp := range byURL {
sp := &sitemapPage{
Loc: fullURL(realURL),
}
for _, p := range pp {
// Make a public URL using query-string-based language fixation.
langURL := fmt.Sprintf("%s?hl=%s", sp.Loc, p.Lang)
sp.Links = append(sp.Links, &sitemapLink{
Rel: "alternate",
Lang: p.Lang,
URL: langURL,
})
}
sitemap.Pages = append(sitemap.Pages, sp)
}
sort.Sort(sitemapPageList(sitemap.Pages))
out, err := xml.MarshalIndent(sitemap, "", " ")
if err != nil {
log.Fatal(err)
}
io.WriteString(os.Stdout, xml.Header)
os.Stdout.Write(out)
io.WriteString(os.Stdout, "\n")
}
func main() {
flag.Parse()
if flag.NArg() > 1 {
log.Fatal("too many arguments")
}
var input io.Reader = os.Stdin
if flag.NArg() > 0 {
f, err := os.Open(flag.Arg(0))
if err != nil {
log.Fatal(err)
}
defer f.Close()
input = f
}
pages, err := readAllPages(input)
if err != nil {
log.Fatal(err)
}
makeSitemap(pages)
}
......@@ -26,6 +26,7 @@ override_dh_install:
install -m 755 -o root -g root build/bin/jsonsubst $(CURDIR)/debian/ai-webtools/usr/bin/jsonsubst
install -m 755 -o root -g root build/bin/faq2md $(CURDIR)/debian/ai-webtools/usr/bin/faq2md
install -m 755 -o root -g root build/bin/sitesearch $(CURDIR)/debian/ai-webtools/usr/sbin/sitesearch
install -m 755 -o root -g root build/bin/mksitemap $(CURDIR)/debian/ai-webtools/usr/bin/mksitemap
override_dh_installinit:
dh_installinit --name=sitesearch
......
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!