Skip to content
Snippets Groups Projects
Commit a5e06fb5 authored by ale's avatar ale
Browse files

Autodetect ISBN numbers in the text if not found in metadata

parent 825467fc
No related branches found
No related tags found
No related merge requests found
package liber
import (
"io"
"io/ioutil"
"regexp"
"strconv"
"strings"
"git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/meskio/epubgo"
)
var isbnRx = regexp.MustCompile(`(?:ISBN(?:-10|-13)?\s*:?\s*)?((?:(?:[\d]-?){9}|(?:[\d]-?){12})[\dxX])(?:[^-\d]|$)`)
func findISBNInEpub(epub *epubgo.Epub) []string {
var isbn []string
spine, err := epub.Spine()
if err != nil {
return nil
}
for spine.Next() == nil {
r, err := spine.Open()
if err != nil {
continue
}
if found := findISBNInPage(r); len(found) > 0 {
isbn = append(isbn, found...)
}
r.Close()
}
return isbn
}
func findISBNInPage(r io.Reader) []string {
data, err := ioutil.ReadAll(r)
if err != nil {
return nil
}
var result []string
for _, m := range isbnRx.FindAllSubmatch(data, -1) {
if len(m) > 1 {
isbn := string(m[1])
if validateIsbn(isbn) {
result = append(result, isbn)
}
}
}
return result
}
func validateIsbn10(isbn string) bool {
var sum int
var multiply int = 10
for i, v := range isbn {
if v == '-' {
continue
}
digitString := string(v)
if i == 9 && digitString == "X" {
digitString = "10"
}
digit, err := strconv.Atoi(digitString)
if err != nil {
return false
}
sum = sum + (multiply * digit)
multiply--
}
return sum%11 == 0
}
func validateIsbn13(isbn string) bool {
var sum int
for i, v := range isbn {
var multiply int
if i%2 == 0 {
multiply = 1
} else {
multiply = 3
}
digit, err := strconv.Atoi(string(v))
if err != nil {
return false
}
sum = sum + (multiply * digit)
}
return sum%10 == 0
}
func validateIsbn(isbn string) bool {
isbn = strings.Replace(isbn, "-", "", -1)
switch len(isbn) {
case 10:
return validateIsbn10(isbn)
case 13:
return validateIsbn13(isbn)
default:
return false
}
}
package liber
import (
"reflect"
"strings"
"testing"
)
var isbnTestData = []struct {
input string
expected []string
}{
{"not-an-isbn", nil},
{"123456780-330-28498-312345678", nil},
// Valid ISBN numbers.
{"ISBN: 0-330-28498-3", []string{"0-330-28498-3"}},
{"ISBN : 1-58182-008-9", []string{"1-58182-008-9"}},
{"ISBN-10: 2-226-05257-7", []string{"2-226-05257-7"}},
{"ISBN 3-7965-1900-8", []string{"3-7965-1900-8"}},
{"ISBN 4-19-830127-1", []string{"4-19-830127-1"}},
{"ISBN 5-85270-001-0", []string{"5-85270-001-0"}},
{"ISBN 978-600-119-125-1", []string{"978-600-119-125-1"}},
{"978-601-7151-13-3", []string{"978-601-7151-13-3"}},
{"ISBN-13: 978-602-8328-22-7", []string{"978-602-8328-22-7"}},
{"ISBN 978-603-500-045-1", []string{"978-603-500-045-1"}},
{"ISBN 605-384-057-2", []string{"605-384-057-2"}},
{"ISBN 978-606-8126-35-7", []string{"978-606-8126-35-7"}},
{"ISBN 978-607-455-035-1", []string{"978-607-455-035-1"}},
{"ISBN: 978-608-203-023-4", []string{"978-608-203-023-4"}},
{"ISBN 978-612-45165-9-7", []string{"978-612-45165-9-7"}},
{"ISBN 978-614-404-018-8", []string{"978-614-404-018-8"}},
{"ISBN 978-615-5014-99-4", []string{"978-615-5014-99-4"}},
{"ISBN 7-301-10299-2", []string{"7-301-10299-2"}},
{"ISBN 80-85983-44-3", []string{"80-85983-44-3"}},
{"81-7215-399-6", []string{"81-7215-399-6"}},
{"82-530-0983-6", []string{"82-530-0983-6"}},
{"eISBN : 83-08-01587-5", []string{"83-08-01587-5"}},
{"eISBN : 84-86546-08-7", []string{"84-86546-08-7"}},
{"ISBN 85-7531-015-1", []string{"85-7531-015-1"}},
{"ISBN 86-341-0846-5", []string{"86-341-0846-5"}},
{"ISBN 87-595-2277-1", []string{"87-595-2277-1"}},
{"88-04-47328-2", []string{"88-04-47328-2"}},
{"ISBN 90-5691-187-2", []string{"90-5691-187-2"}},
{"ISBN 91-1-811692-2", []string{"91-1-811692-2"}},
{"ISBN 92-67-10370-9", []string{"92-67-10370-9"}},
{"ISBN 93-5025-214-7", []string{"93-5025-214-7"}},
{"ISBN 950-04-0442-7", []string{"950-04-0442-7"}},
{"ISBN 951-0-11369-7", []string{"951-0-11369-7"}},
{"ISBN 952-471-294-6", []string{"952-471-294-6"}},
{"ISBN 953-157-105-8", []string{"953-157-105-8"}},
{"ISBN 954-430-603-X", []string{"954-430-603-X"}},
{"ISBN 955-20-3051-X", []string{"955-20-3051-X"}},
{"ISBN 956-7291-48-9", []string{"956-7291-48-9"}},
{"ISBN 957-01-7429-3", []string{"957-01-7429-3"}},
{"ISBN 958-04-6278-X", []string{"958-04-6278-X"}},
{"ISBN 959-10-0363-3", []string{"959-10-0363-3"}},
{"ISBN 961-6403-23-0", []string{"961-6403-23-0"}},
{"ISBN 962-04-0195-6", []string{"962-04-0195-6"}},
{"ISBN 978-988-00-3827-3", []string{"978-988-00-3827-3"}},
{"ISBN: 978-9928400529", []string{"978-9928400529"}},
{"ISBN: 978-9929801646", []string{"978-9929801646"}},
{"ISBN: 978-9930943106", []string{"978-9930943106"}},
// Multiple results.
{"ISBN: 0-330-28498-3 and ISBN : 1-58182-008-9",
[]string{"0-330-28498-3", "1-58182-008-9"}},
// Invalid ISBN numbers (wrong check digit).
{"ISBN 961-6403-23-1", nil},
{"ISBN 962-04-0195-1", nil},
{"ISBN: 978-9928400521", nil},
{"ISBN: 978-9929801641", nil},
{"ISBN: 978-993094310X", nil},
}
func TestISBN_Detect(t *testing.T) {
for _, td := range isbnTestData {
isbn := findISBNInPage(strings.NewReader(td.input))
if !reflect.DeepEqual(isbn, td.expected) {
t.Errorf("input: %s, got: %v, expected: %v", td.input, isbn, td.expected)
}
}
}
......@@ -3,6 +3,7 @@ package liber
import (
"errors"
"fmt"
"log"
"path/filepath"
"regexp"
"strings"
......@@ -192,6 +193,14 @@ func parseEpub(filename string) (*Metadata, error) {
}
}
// If we haven't found an ISBN, look for it in the book text.
if len(m.ISBN) == 0 {
if isbn := findISBNInEpub(e); len(isbn) > 0 {
log.Printf("found ISBN in book text: %s", strings.Join(isbn, ", "))
m.ISBN = isbn
}
}
m.Sources = []MetadataSource{{
Name: "epub",
ID: filename,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment