Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
liber
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
ale
liber
Commits
0910a960
Commit
0910a960
authored
10 years ago
by
ale
Browse files
Options
Downloads
Patches
Plain Diff
streamline metadata processing using interfaces
parent
30145e43
No related branches found
No related tags found
No related merge requests found
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
googlebooks.go
+8
-2
8 additions, 2 deletions
googlebooks.go
metadata.go
+55
-9
55 additions, 9 deletions
metadata.go
opf.go
+25
-0
25 additions, 0 deletions
opf.go
update.go
+92
-35
92 additions, 35 deletions
update.go
update_test.go
+40
-20
40 additions, 20 deletions
update_test.go
with
220 additions
and
66 deletions
googlebooks.go
+
8
−
2
View file @
0910a960
...
@@ -14,6 +14,12 @@ import (
...
@@ -14,6 +14,12 @@ import (
"time"
"time"
)
)
type
googleBooksRefiner
struct
{}
func
(
r
*
googleBooksRefiner
)
Name
()
string
{
return
"gbooks"
}
type
atomResultEntry
struct
{
type
atomResultEntry
struct
{
GoogleId
string
`xml:"id"`
GoogleId
string
`xml:"id"`
Title
string
`xml:"http://purl.org/dc/terms title"`
Title
string
`xml:"http://purl.org/dc/terms title"`
...
@@ -142,7 +148,7 @@ func googleBooksGet(uri string) (*http.Response, error) {
...
@@ -142,7 +148,7 @@ func googleBooksGet(uri string) (*http.Response, error) {
return
nil
,
errors
.
New
(
"deadline exceeded"
)
return
nil
,
errors
.
New
(
"deadline exceeded"
)
}
}
func
LookupG
oogleBooks
(
m
*
Metadata
)
([]
*
Metadata
,
error
)
{
func
(
r
*
g
oogleBooks
Refiner
)
Lookup
(
m
*
Metadata
)
([]
*
Metadata
,
error
)
{
qstr
:=
googleBooksQuery
(
m
)
qstr
:=
googleBooksQuery
(
m
)
if
qstr
==
""
{
if
qstr
==
""
{
return
nil
,
errors
.
New
(
"insufficient metadata for query"
)
return
nil
,
errors
.
New
(
"insufficient metadata for query"
)
...
@@ -173,7 +179,7 @@ var imageUnavailableMD5 = [16]byte{
...
@@ -173,7 +179,7 @@ var imageUnavailableMD5 = [16]byte{
0x0d
,
0xe4
,
0x38
,
0x3e
,
0xba
,
0xd0
,
0xad
,
0xad
,
0x5e
,
0xeb
,
0x89
,
0x75
,
0xcd
,
0x79
,
0x66
,
0x57
,
0x0d
,
0xe4
,
0x38
,
0x3e
,
0xba
,
0xd0
,
0xad
,
0xad
,
0x5e
,
0xeb
,
0x89
,
0x75
,
0xcd
,
0x79
,
0x66
,
0x57
,
}
}
func
GetG
oogleBooksCover
(
m
*
Metadata
)
([]
byte
,
error
)
{
func
(
r
*
g
oogleBooks
Refiner
)
GetBook
Cover
(
m
*
Metadata
)
([]
byte
,
error
)
{
gbid
:=
getGoogleBooksId
(
m
)
gbid
:=
getGoogleBooksId
(
m
)
if
gbid
==
""
{
if
gbid
==
""
{
return
nil
,
errors
.
New
(
"no ID"
)
return
nil
,
errors
.
New
(
"no ID"
)
...
...
This diff is collapsed.
Click to expand it.
metadata.go
+
55
−
9
View file @
0910a960
...
@@ -11,6 +11,22 @@ import (
...
@@ -11,6 +11,22 @@ import (
"github.com/meskio/epubgo"
"github.com/meskio/epubgo"
)
)
// A metadata provider generates metadata from the local filesystem.
type
MetadataProvider
interface
{
Name
()
string
Lookup
(
*
FileStorage
,
string
,
string
)
(
*
Metadata
,
error
)
GetBookCover
(
*
FileStorage
,
string
)
(
string
,
error
)
}
// A metadata refiner improves on existing metadata and may provide
// more than one result to choose from. It usually involves talking to
// a remote service.
type
MetadataRefiner
interface
{
Name
()
string
Lookup
(
*
Metadata
)
([]
*
Metadata
,
error
)
GetBookCover
(
*
Metadata
)
([]
byte
,
error
)
}
type
MetadataSource
struct
{
type
MetadataSource
struct
{
Name
string
Name
string
ID
string
ID
string
...
@@ -30,10 +46,18 @@ type Metadata struct {
...
@@ -30,10 +46,18 @@ type Metadata struct {
}
}
// Sufficient returns true if the object contains enough information.
// Sufficient returns true if the object contains enough information.
// If this check does not pass, the book won't be added to the database.
func
(
m
*
Metadata
)
Sufficient
()
bool
{
func
(
m
*
Metadata
)
Sufficient
()
bool
{
return
m
.
Title
!=
""
return
m
.
Title
!=
""
}
}
// Complete returns true if we're satisfied with the quality of the
// information about this book. If this returns true, remote checks
// will be skipped.
func
(
m
*
Metadata
)
Complete
()
bool
{
return
(
m
.
Title
!=
""
&&
len
(
m
.
Creator
)
>
0
&&
len
(
m
.
ISBN
)
>
0
)
}
// Uniques returns the list of possible unique tokens for this book.
// Uniques returns the list of possible unique tokens for this book.
func
(
m
*
Metadata
)
Uniques
()
[]
string
{
func
(
m
*
Metadata
)
Uniques
()
[]
string
{
var
out
[]
string
var
out
[]
string
...
@@ -220,23 +244,45 @@ func parseAnything(filename string) (*Metadata, error) {
...
@@ -220,23 +244,45 @@ func parseAnything(filename string) (*Metadata, error) {
},
nil
},
nil
}
}
func
Parse
(
filename
string
)
(
*
Book
,
string
,
error
)
{
type
fileProvider
struct
{}
func
(
p
*
fileProvider
)
Lookup
(
storage
*
FileStorage
,
path
,
filetype
string
)
(
*
Metadata
,
error
)
{
path
=
storage
.
Abs
(
path
)
var
m
*
Metadata
var
m
*
Metadata
var
err
error
var
err
error
ext
:=
strings
.
ToLower
(
filepath
.
Ext
(
filename
))
switch
filetype
{
switch
ext
{
case
".epub"
:
case
".epub"
:
m
,
err
=
parseEpub
(
filename
)
m
,
err
=
parseEpub
(
path
)
case
".mobi"
:
case
".mobi"
:
m
,
err
=
parseMobi
(
filename
)
m
,
err
=
parseMobi
(
path
)
case
".pdf"
:
case
".pdf"
:
m
,
err
=
parseAnything
(
filename
)
m
,
err
=
parseAnything
(
path
)
default
:
default
:
return
nil
,
""
,
errors
.
New
(
"unsupported file format"
)
return
nil
,
errors
.
New
(
"unsupported file format"
)
}
}
if
err
!=
nil
{
if
err
!=
nil
{
return
nil
,
""
,
err
return
nil
,
err
}
}
return
&
Book
{
Metadata
:
m
},
ext
,
nil
return
m
,
nil
}
func
(
p
*
fileProvider
)
GetBookCover
(
storage
*
FileStorage
,
path
string
)
(
string
,
error
)
{
coverPath
:=
path
+
".cover.png"
if
storage
.
Exists
(
coverPath
)
{
return
coverPath
,
nil
}
return
""
,
nil
}
func
(
p
*
fileProvider
)
Name
()
string
{
return
"file"
}
func
GetFileType
(
path
string
)
(
string
,
error
)
{
filetype
:=
strings
.
ToLower
(
filepath
.
Ext
(
path
))
if
filetype
!=
".epub"
&&
filetype
!=
".mobi"
&&
filetype
!=
".pdf"
{
return
""
,
errors
.
New
(
"unsupported file format"
)
}
return
filetype
,
nil
}
}
This diff is collapsed.
Click to expand it.
opf.go
+
25
−
0
View file @
0910a960
...
@@ -82,3 +82,28 @@ func opfMetadataPath(epubPath string) string {
...
@@ -82,3 +82,28 @@ func opfMetadataPath(epubPath string) string {
func
opfCoverPath
(
epubPath
string
)
string
{
func
opfCoverPath
(
epubPath
string
)
string
{
return
filepath
.
Join
(
filepath
.
Dir
(
epubPath
),
"cover.jpg"
)
return
filepath
.
Join
(
filepath
.
Dir
(
epubPath
),
"cover.jpg"
)
}
}
type
opfProvider
struct
{}
func
(
p
*
opfProvider
)
Lookup
(
storage
*
FileStorage
,
path
,
filetype
string
)
(
*
Metadata
,
error
)
{
if
!
storage
.
Exists
(
opfMetadataPath
(
path
))
{
return
nil
,
nil
}
m
,
err
:=
opfOpen
(
opfMetadataPath
(
storage
.
Abs
(
path
)))
if
err
!=
nil
{
return
nil
,
err
}
return
m
,
err
}
func
(
p
*
opfProvider
)
GetBookCover
(
storage
*
FileStorage
,
path
string
)
(
string
,
error
)
{
coverPath
:=
opfCoverPath
(
path
)
if
storage
.
Exists
(
coverPath
)
{
return
coverPath
,
nil
}
return
""
,
nil
}
func
(
p
*
opfProvider
)
Name
()
string
{
return
"opf"
}
This diff is collapsed.
Click to expand it.
update.go
+
92
−
35
View file @
0910a960
...
@@ -42,9 +42,11 @@ type fileAndBook struct {
...
@@ -42,9 +42,11 @@ type fileAndBook struct {
}
}
type
updateContext
struct
{
type
updateContext
struct
{
db
*
Database
db
*
Database
storage
*
FileStorage
storage
*
FileStorage
chooser
MetadataChooserFunc
chooser
MetadataChooserFunc
providers
[]
MetadataProvider
refiners
[]
MetadataRefiner
}
}
func
(
uc
*
updateContext
)
dbFileScanner
(
fileCh
chan
fileData
)
{
func
(
uc
*
updateContext
)
dbFileScanner
(
fileCh
chan
fileData
)
{
...
@@ -151,52 +153,94 @@ func (uc *updateContext) extractor(fileCh chan fileData, outCh chan fileAndBook)
...
@@ -151,52 +153,94 @@ func (uc *updateContext) extractor(fileCh chan fileData, outCh chan fileAndBook)
}
}
func
(
uc
*
updateContext
)
parseMeta
(
f
fileData
)
(
*
Book
,
string
,
error
)
{
func
(
uc
*
updateContext
)
parseMeta
(
f
fileData
)
(
*
Book
,
string
,
error
)
{
// Attempt direct metadata extraction.
filetype
,
err
:=
GetFileType
(
f
.
path
)
book
,
filetype
,
err
:=
Parse
(
uc
.
storage
.
Abs
(
f
.
path
))
if
err
!=
nil
{
if
err
!=
nil
{
return
nil
,
""
,
err
return
nil
,
""
,
err
}
}
// Check if a Calibre OPF file exists.
// Attempt metadata extraction from the providers. The first
if
opfmeta
,
err
:=
opfOpen
(
opfMetadataPath
(
uc
.
storage
.
Abs
(
f
.
path
)));
err
==
nil
{
// match returned stops the iteration. At the same time, look
book
.
Metadata
.
Merge
(
opfmeta
)
// for a cover image until one is found.
}
else
{
var
meta
*
Metadata
// No local metadata, use Google Books to retrieve
var
coverPath
string
// more information on the book. Ask the user to
// choose in case there are multiple results.
for
_
,
provider
:=
range
uc
.
providers
{
candidates
,
err
:=
LookupGoogleBooks
(
book
.
Metadata
)
if
meta
==
nil
{
if
err
==
nil
&&
len
(
candidates
)
>
0
{
meta
,
err
=
provider
.
Lookup
(
uc
.
storage
,
f
.
path
,
filetype
)
if
len
(
candidates
)
==
1
{
if
err
!=
nil
{
log
.
Printf
(
"found Google Books match: %s"
,
candidates
[
0
]
.
String
())
log
.
Printf
(
"%s: %s: could not parse: %v"
,
f
.
path
,
provider
.
Name
(),
err
)
book
.
Metadata
.
Merge
(
candidates
[
0
])
}
else
if
meta
!=
nil
{
}
else
if
uc
.
chooser
!=
nil
{
log
.
Printf
(
"%s: identified by: %s"
,
f
.
path
,
provider
.
Name
())
if
userchoice
:=
uc
.
chooser
(
f
.
path
,
candidates
);
userchoice
!=
nil
{
}
book
.
Metadata
.
Merge
(
userchoice
)
}
if
coverPath
==
""
{
coverPath
,
err
=
provider
.
GetBookCover
(
uc
.
storage
,
f
.
path
)
if
err
!=
nil
{
log
.
Printf
(
"%s: %s: could not fetch cover image at %s"
,
f
.
path
,
provider
.
Name
(),
err
)
}
else
if
coverPath
!=
""
{
log
.
Printf
(
"%s: cover image found by: %s"
,
f
.
path
,
provider
.
Name
())
}
}
}
if
meta
==
nil
{
return
nil
,
""
,
errors
.
New
(
"no metadata could be identified"
)
}
// If the book cover couldn't be found locally, prepare to
// download it. It's possible that we've already done this, so
// check in the storage first (TODO: this check isn't useful,
// if the cover exists it should have been emitted by the
// fileProvider above).
localCoverPath
:=
f
.
path
+
".cover.png"
if
coverPath
==
""
&&
uc
.
storage
.
Exists
(
localCoverPath
)
{
coverPath
=
localCoverPath
}
// Only run remote checks if the metadata isn't complete.
if
!
meta
.
Complete
()
{
// Integrate metadata using the refiners. We check them all,
// and merge their results into the metadata object. The user
// is prompted if a choice is necessary. Search for a book
// cover only until one is found.
for
_
,
refiner
:=
range
uc
.
refiners
{
candidates
,
err
:=
refiner
.
Lookup
(
meta
)
if
err
==
nil
&&
len
(
candidates
)
>
0
{
if
len
(
candidates
)
==
1
{
log
.
Printf
(
"found match from %s: %s"
,
refiner
.
Name
(),
candidates
[
0
]
.
String
())
meta
.
Merge
(
candidates
[
0
])
}
else
if
uc
.
chooser
!=
nil
{
if
userchoice
:=
uc
.
chooser
(
f
.
path
,
candidates
);
userchoice
!=
nil
{
meta
.
Merge
(
userchoice
)
}
}
}
if
coverPath
==
""
{
if
coverData
,
err
:=
refiner
.
GetBookCover
(
meta
);
err
==
nil
{
if
imgf
,
err
:=
os
.
Create
(
uc
.
storage
.
Abs
(
localCoverPath
));
err
!=
nil
{
log
.
Printf
(
"Error saving cover image: %v"
,
err
)
}
else
{
imgf
.
Write
(
coverData
)
imgf
.
Close
()
coverPath
=
localCoverPath
}
}
}
}
}
}
}
}
}
// Check if the book metadata looks ok. If not, don't even
// Check if the book metadata looks ok. If not, don't even
// bother looking for a cover image.
// bother looking for a cover image.
if
!
book
.
Metada
ta
.
Sufficient
()
{
if
!
me
ta
.
Sufficient
()
{
return
nil
,
""
,
errors
.
New
(
"insufficient metadata"
)
return
nil
,
""
,
errors
.
New
(
"insufficient metadata"
)
}
}
// Try to find a cover image. Look on the local filesystem
// Create a Book with no ID (yet).
// first, otherwise check Google Books.
book
:=
&
Book
{
localCoverPath
:=
opfCoverPath
(
f
.
path
)
Metadata
:
meta
,
if
uc
.
storage
.
Exists
(
localCoverPath
)
{
CoverPath
:
coverPath
,
book
.
CoverPath
=
localCoverPath
}
else
if
imageData
,
err
:=
GetGoogleBooksCover
(
book
.
Metadata
);
err
==
nil
{
imageFileName
:=
f
.
path
+
".cover.png"
if
imgf
,
err
:=
os
.
Create
(
uc
.
storage
.
Abs
(
imageFileName
));
err
!=
nil
{
log
.
Printf
(
"Could not save cover image for %d: %v"
,
book
.
Id
,
err
)
}
else
{
imgf
.
Write
(
imageData
)
imgf
.
Close
()
book
.
CoverPath
=
imageFileName
}
}
}
return
book
,
filetype
,
nil
return
book
,
filetype
,
nil
...
@@ -251,6 +295,19 @@ func (db *Database) Update(dir string, chooser MetadataChooserFunc) {
...
@@ -251,6 +295,19 @@ func (db *Database) Update(dir string, chooser MetadataChooserFunc) {
db
:
db
,
db
:
db
,
chooser
:
chooser
,
chooser
:
chooser
,
storage
:
NewFileStorage
(
dir
),
storage
:
NewFileStorage
(
dir
),
// Calibre/OPF must be first, so we don't attempt to
// parse the file itself.
providers
:
[]
MetadataProvider
{
&
opfProvider
{},
&
fileProvider
{},
},
// Check Google Books when the metadata is not
// sufficient to fully describe the book.
refiners
:
[]
MetadataRefiner
{
&
googleBooksRefiner
{},
},
}
}
var
wg
sync
.
WaitGroup
var
wg
sync
.
WaitGroup
...
...
This diff is collapsed.
Click to expand it.
update_test.go
+
40
−
20
View file @
0910a960
...
@@ -20,6 +20,30 @@ func createTestFs(fs map[string]string) string {
...
@@ -20,6 +20,30 @@ func createTestFs(fs map[string]string) string {
return
base
return
base
}
}
func
checkDbPathIntegrity
(
t
*
testing
.
T
,
db
*
Database
)
{
// Files should have relative paths.
for
i
:=
db
.
Scan
(
FileBucket
);
i
.
Valid
();
i
.
Next
()
{
var
f
File
if
err
:=
i
.
Value
(
&
f
);
err
!=
nil
{
t
.
Fatal
(
err
)
}
if
strings
.
HasPrefix
(
f
.
Path
,
"/"
)
{
t
.
Errorf
(
"file has absolute path: %v"
,
f
.
Path
)
}
}
// Book cover images should have relative paths.
for
i
:=
db
.
Scan
(
BookBucket
);
i
.
Valid
();
i
.
Next
()
{
var
b
Book
if
err
:=
i
.
Value
(
&
b
);
err
!=
nil
{
t
.
Fatal
(
err
)
}
if
b
.
CoverPath
!=
""
&&
strings
.
HasPrefix
(
b
.
CoverPath
,
"/"
)
{
t
.
Errorf
(
"file has absolute path: %v"
,
b
.
CoverPath
)
}
}
}
func
TestDatabase_Update
(
t
*
testing
.
T
)
{
func
TestDatabase_Update
(
t
*
testing
.
T
)
{
util
.
WalkerDefaultMinSize
=
0
util
.
WalkerDefaultMinSize
=
0
...
@@ -59,30 +83,26 @@ func TestDatabase_Update(t *testing.T) {
...
@@ -59,30 +83,26 @@ func TestDatabase_Update(t *testing.T) {
db
.
Update
(
tmpdir
,
chooser
)
db
.
Update
(
tmpdir
,
chooser
)
testDb
(
"second update"
)
testDb
(
"second update"
)
if
chooserCalled
{
t
.
Errorf
(
"chooser function was called"
)
}
// Check that the test file is there.
// Check that the test file is there.
if
_
,
err
:=
db
.
GetFile
(
"book/Test Ebook.pdf"
);
err
!=
nil
{
if
_
,
err
:=
db
.
GetFile
(
"book/Test Ebook.pdf"
);
err
!=
nil
{
t
.
Errorf
(
"test file is not in the database"
)
t
.
Errorf
(
"test file is not in the database"
)
}
}
// Files should have relative paths.
checkDbPathIntegrity
(
t
,
db
)
for
i
:=
db
.
Scan
(
FileBucket
);
i
.
Valid
();
i
.
Next
()
{
}
var
f
File
if
err
:=
i
.
Value
(
&
f
);
err
!=
nil
{
t
.
Fatal
(
err
)
}
if
strings
.
HasPrefix
(
f
.
Path
,
"/"
)
{
t
.
Errorf
(
"file has absolute path: %v"
,
f
.
Path
)
}
}
// Book cover images should have relative paths.
func
TestDatabase_UpdateEpub
(
t
*
testing
.
T
)
{
for
i
:=
db
.
Scan
(
BookBucket
);
i
.
Valid
();
i
.
Next
()
{
util
.
WalkerDefaultMinSize
=
0
var
b
Book
if
err
:=
i
.
Value
(
&
b
);
err
!=
nil
{
td
,
db
:=
newTestDatabase
(
t
)
t
.
Fatal
(
err
)
defer
td
.
Close
(
)
}
if
b
.
CoverPath
!=
""
&&
strings
.
HasPrefix
(
b
.
CoverPath
,
"/"
)
{
// Read the test epub from testdata/.
t
.
Errorf
(
"file has absolute path: %v"
,
b
.
CoverPath
)
db
.
Update
(
"testdata"
,
nil
)
}
}
checkDbPathIntegrity
(
t
,
db
)
}
}
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment