1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 """
18 This module provides functionality fetch information on a book (like
19 author and title) when given a isbn number.
20
21 Example usage::
22
23 import isbn
24 isbn.config() # read keys from keys.conf
25 result=isbn.fetch('978123456789X') # get info
26
27 At this moment it provides classes to retrieve information from
28 Amazon via I{ecs} (you need a (free) license key), from libraries via
29 I{sru} (at this moment: Library of Congress, Britisch Library (Copac),
30 and the dutch Koninklijke Bibliotheek) or from isbndb.com (you need a (free)
31 license key)via an ad-hoc webservice.
32
33 To implement this functionality an extensible hierarchy of classes is provided.
34 The base class is called L{SRU} and most classes based on it merely override the
35 C{__init__()} method to set a different url to retrieve the information. The L{Amazon} class
36 is slightly more complicated as it has to encrypt the url with a key.
37 Once a L{SRU} derived class in instantiated you can call the instances C{fetch()} method
38 to try and retrieve the information. The result (either a dictionary with entries or None)
39 is stored in the result instance variable.
40
41 Together with the L{SRU} class a number of L{ContentHandler} derived classes are defined to
42 interpret the xml data that is returned.
43
44 For convenience the isbn module provides the following toplevel functions:
45
46 - C{fetch(isbnnumber)} will try all sources defined in this module (excluding the ones without proper keys)
47 - C{awskeypair(id,key)} set an amazon AWS keypair
48 - C{isbndbdotcomkey(key)} set an isbndb.com key
49 - C{getproviders()} get a list of classes that can be used to fetch info
50 - C{enable(klass)} lets fetch() use this class
51 - C{disable(klass)} prohibit fetch() using this class
52 - C{config(filename)} configure AWS and/or isbndb keys from a config file (default keys.conf)
53 """
54 from AWSQuerySigner import AWSQS
55
56
57 AWSId='No AWSId specified'
58 AWSKey='No AWSKey specified'
59
61 """
62 get/set an Amazon AWS keypair and enable lookup via an L{Amazon} class instance.
63
64 @param Id: AWS Id, or None.
65 @type Id: string.
66 @param Key: AWS Key, or None.
67 @type Key: string.
68 @return: a tuple (AWSId,AWSKey) if Id == None, nothing otherwise.
69 """
70 global AWSId,AWSKey
71 if Id is None: return (AWSId,AWSKey)
72 AWSId=Id
73 AWSKey=Key
74 enable(Amazon)
75
76
77 isbndbkey='No ISBNDB.com key specified'
78
80 """
81 get/set an isbndb.com key and enable lookup via an L{isbndbdotcom} class instance.
82
83 @param key: AWS Key, or None.
84 @type key: string.
85 @return: a string if key == None, nothing otherwise.
86 """
87 global isbndbkey
88 if key is None : return isbndbkey
89 isbndbkey=key
90 enable(isbndbdotcom)
91
92 from urllib import urlopen,urlretrieve
93 import xml.sax
94 from xml.sax.handler import ContentHandler
95
96 import string
97
99 """
100 Calculate the checksum for an isbn-10 number.
101
102 @param line: 9 or 10 digit number.
103 @type line: string.
104 @return: 10 digit isbnnumber.
105 @rtype: string.
106
107 If you pass in a 10 digit number the last digit is ignored. This is to facilitate the conversion
108 from an isbn-13 number (aka EAN code, 13 digits starting w. 978) to an isbn-10 number by
109 simply chopping of the first 3 digits and passing it to isbnchecksum().
110
111 Example::
112
113 isbn10 = isbnchecksum(isbn13[3:])
114 """
115 if (len(line) == 10):
116 line = line[0:9]
117 if (len(line) != 9):
118 return '# ISBN should be 9 digits, excluding checksum!'
119 sum = 0
120 count = 0
121 for ix in line:
122 sum = sum + (10 - count) * string.atoi(ix)
123 count = count + 1
124 sum = sum % 11
125 if (sum != 0):
126 sum = 11 - sum
127 if (sum == 10):
128 line = line + 'X'
129 else:
130 line = line + string.digits[sum]
131 return line
132
134 """
135 Simple SAX content handler to process xml retrieved from an
136 SRU server. It is nothing more than a hack really since it does
137 not use information about the scheme used (like Dublin Core or
138 Bath) but just sniffs for tags that are relevant and stores it in
139 the instance variable I{self.r}. See L{isbn.fetch} for
140 information about the contents.
141 """
143 ContentHandler.__init__(self)
144 self.r={'authors':[]}
145 self.data=''
146 self.tags=[]
147
149 self.data=''
150 self.tags.append(name)
151
152
155
157
158 self.tags.pop(-1)
159 if name.startswith('dc:') :
160 name = name[3:]
161 if name.startswith('dcterms:') :
162 name = name[8:]
163
164 if name in ('date','publisher','title'):
165 self.r[name]=self.data
166 if name == 'year' or name == 'issued' or name == 'dateIssued':
167 self.r['date']=self.data
168 elif name =='creator' or name=='author':
169 self.r['authors'].append(self.data)
170
172 """
173 Simple SAX content handler to process xml retrieved from an
174 SRU server delivering xml using the mods schema and stores it in the instance variable
175 I{self.r}. See L{isbn.fetch} for information about the
176 contents.
177
178 """
180 ContentHandler.__init__(self)
181 self.r={'authors':[]}
182 self.data=''
183 self.tags=[]
184
186 self.data=''
187 self.tags.append(name)
188
189
192
194
195 self.tags.pop(-1)
196
197 if name == 'publisher':
198 self.r[name]=self.data
199 elif name=='namePart' and self.tags[-2] == 'mods':
200 self.r['authors'].append(self.data)
201 elif name == 'dateIssued':
202 self.r['date']=self.data
203 elif name=='title' and self.tags[-2] == 'mods':
204 self.r['title'] = self.data
205
207 """
208 Simple SAX content handler to process xml retrieved from an
209 AWS server. See L{isbn.fetch} for information about the
210 contents.
211
212 """
214 ContentHandler.__init__(self)
215 self.r={'authors':[]}
216 self.data=''
217 self.error=0
218
221
222
225
226
228
229
230 if name in ('Date','Publisher','Title','ISBN','Binding'):
231 self.r[name.lower()]=self.data
232 elif name == 'Amount':
233 self.amount=self.data
234 elif name == 'ListPrice':
235 self.r['price']=float(self.amount)/100.0
236 self.r['currency']=self.crcode
237 elif name == 'CurrencyCode':
238 self.crcode=self.data
239 elif name=='URL':
240 self.url=self.data
241 elif name in ('SmallImage','MediumImage','LargeImage'):
242 self.r[name.lower()]=self.url
243 elif name == 'PublicationDate':
244 self.r['date']=self.data
245 elif name in ('Author','Creator'):
246 self.r['authors'].append(self.data)
247 elif name=='Error':
248 self.error+=1
249 else:
250 pass
251
253 """
254 Simple SAX content handler to process xml retrieved from
255 U{isbndb.com} See L{isbn.fetch} for information about the contents.
256
257 """
259 ContentHandler.__init__(self)
260 self.r={'authors':[]}
261 self.data=''
262 self.error=0
263 self.tags=[]
264
266 self.data=''
267 if 'book_id' in attr: self.r['isbndb_book_id'] = attr['book_id']
268 self.tags.append(name)
269
270
273
274
276
277 if name in ('Title'):
278 self.r[name.lower()]=self.data
279 elif name in ('Person'):
280 self.r['authors'].append(self.data)
281 else:
282 pass
283
284 self.tags.pop(-1)
285
286
288
290 """
291 Base class for all classes that implement retrieval of book information via a webservice.
292 """
293 - def __init__(self,url,contenthandlerfactory,name):
294 """
295 @param url: an url that will be used to retrieve information. Should contain a single '%s' placeholder to fot an isbn number.
296 @type url: string.
297 @param contenthandlerfactory: a reference to a ContentHandler derived class.
298 @type contenthandlerfactory: ContentHandler.
299 @param name: the visible name for this class, stored in the result on success.
300 @type name: string.
301 @return: an SRU instance.
302
303 """
304 self.url=url
305 self.factory=contenthandlerfactory
306 self.name=name
307 self.result=None
308
310 """
311 Retrieve information about a book based on isbn number.
312
313 @param isbn: an isbn-10 or isbn-13 number.
314 @type isbn: string.
315 @return: nothing, result is stored in I{result}.
316 @raise SRUError: if no result could be retrieved.
317 """
318 self.retrieve(urlopen(self.url%isbn))
319
321 """
322 Parse results in xml from an open file descriptor.
323
324 @param f: a file descriptor supporting a C{read()} method as e.g. returned from C{urlopen()}.
325 @return: nothing, result is stored in I{result}.
326 @raise SRUError: if no result could be retrieved.
327
328 L{fetch} and L{retrieve} are separated to facilitate derived classes that need to do something
329 to the url before sending it and retrieving the results. They still can reuse the actual
330 handling of the returned xml. See L{Amazon} class for an example.
331 """
332 self.result=None
333 parser=xml.sax.make_parser()
334 handler=self.factory()
335 parser.setContentHandler(handler)
336 parser.parse(f)
337 if 'title' in handler.r:
338 handler.r['repository']=self.getName()
339 self.result=dict(handler.r)
340 return self.result
341 raise SRUError('no result from %s'%self.getName())
342
345
347 """
348 Fetch bookinformation from Amazon using I{ecs}. See U{http://aws.amazon.com/}
349
350 It depends on the aws querysigner code from U{http://sowacs.appspot.com/AWS/}
351
352 An instance of L{Amazon} may on success hold the following keys in its result variable:
353
354 - title: the book title.
355 - author: a list of full names of the author(s).
356 - date: the date of publication.
357 - publisher: the name of the publisher.
358 - binding: the type of binding, e.g. 'hardcover'.
359 - price: the list price (a float).
360 - currency: the currency code, e.g. 'GBP'.
361 - cover: the filename of the downloaded cover image.
362 """
363
364 - def __init__(self,AWSId=None,AWSKey=None,endpoint="ecs.amazonaws.co.uk",uri="/onca/xml"):
365 """
366 @param AWSId: an Amazon AWS Id.
367 @param AWSKey: an Amazon AWS Key.
368 @param endpoint: fully qualified domain name of ecs server. defaults to uk but .com may be appropriate as well.
369 @param uri: path to service.
370 @type AWSId: string.
371 @type AWSKey: string.
372 @type endpoint: string.
373 @type uri: string.
374
375 Note: if AWSId is None, the id and key are set from the globally saved variables. This makes it
376 possible not to use keys in the code but read them from a config file with config().
377
378 Example::
379 import isbn
380 isbn.config('keys.ini')
381 a=isbn.Amazon()
382 print a.result
383 """
384 super(Amazon,self).__init__(None,AWSHandler,self.__class__.__name__)
385 if AWSId is None:
386 (AWSId,AWSKey)=awskeypair()
387 self.AWSId=AWSId
388 self.AWSKey=AWSKey
389 self.endpoint=endpoint
390 self.uri=uri
391
393 query = {
394 "Keywords" : isbn,
395 "Version" : "2009-03-31",
396 "Service" : "AWSECommerceService",
397 "AWSAccessKeyId" : self.AWSId,
398 "Operation" : "ItemSearch",
399 "SearchIndex":"Books",
400 "ResponseGroup" : "Medium"
401 }
402 awsqs = AWSQS( 'GET', self.endpoint, self.uri, query, self.AWSKey )
403 self.url = awsqs.signedrequest
404 self.retrieve(urlopen(self.url))
405 r=self.result
406 try:
407 url = 'cover image'
408 if 'largeimage' in r: url=r['largeimage']
409 elif 'mediumimage' in r: url=r['mediumimage']
410 elif 'smallimage' in r: url=r['smallimage']
411 filename,headers=urlretrieve(url)
412 r['cover']=filename
413 except Exception,e:
414 print 'exception retrieving cover',e
415 pass
416 return r
417
419 """
420 Fetch bookinformation from the dutch Koninklijke Bibliotheek using I{sru}.
421
422 KB uses the dc (dublin core) schema and requires no key.
423
424 An instance of KoninklijkeBibliotheek may on success hold the following keys in its result variable:
425
426 - title: the book title.
427 - author: a list of full names of the author(s).
428 - date: the date of publication.
429 - publisher: the name of the publisher.
430 """
434
436 """
437 Fetch bookinformation from the British Library (or actually Copac) using I{sru}.
438
439 BL uses the mods schema and requires no key.
440
441 An instance of BritishLibrary may on success hold the following keys in its result variable:
442
443 - title: the book title.
444 - author: a list of full names of the author(s).
445 - date: the date of publication.
446 - publisher: the name of the publisher.
447 """
448
449
451 self.url='http://copac.ac.uk:3000/copac?version=1.1&operation=searchRetrieve&query=%s&maximumRecords=1'
452 super(BritishLibrary,self).__init__(self.url,COPACHandler,self.__class__.__name__)
453
455 """
456 Fetch bookinformation from the Library of Congress using I{sru}.
457
458 LibCon uses the dc (dublin core) schema and requires no key.
459
460 An instance of class LibraryOfCongress may on success hold the following keys in its result variable:
461
462 - title: the book title.
463 - author: a list of full names of the author(s).
464 - date: the date of publication.
465 - publisher: the name of the publisher.
466 """
468 self.url='http://z3950.loc.gov:7090/voyager?version=1.1&operation=searchRetrieve&query=bath.isbn+any+%s&maximumRecords=1&recordSchema=dc'
469 super(LibraryOfCongress,self).__init__(self.url,DCHandler,self.__class__.__name__)
470
486
487
488 _providers = {Amazon:False,BritishLibrary:True,LibraryOfCongress:True,KoninklijkeBibliotheek:True,isbndbdotcom:False}
489
491 """
492 Return a dictionary of possible providers of information.
493
494 Keys are the classes, values are True or False depending on whether the will be used by L{isbn.fetch}.
495 """
496 return dict(_providers)
497
499 """
500 Indicate that class p should be used by L{isbn.fetch}.
501
502 @param p: an SRU derived class.
503 @type p: SRU.
504 """
505 if not p in _providers : raise KeyError()
506 _providers[p]=True
507
509 """
510 Indicate that class p should be NOT used by L{isbn.fetch}.
511
512 @param p: an SRU derived class.
513 @type p: SRU.
514 """
515 if not p in _providers : raise KeyError()
516 _providers[p]=False
517
519 """
520 Get bookinformation from a number of sources.
521
522 @param isbn: a 10 or 13 digit isbn number.
523 @type isbn: string.
524 @return: a dictionary w. book information or None.
525 @rtype: dict.
526
527 If a 13 digit isbn/ean code is given (i.e. a number starting w. 978),
528 I{isbnfetch} will try do find informtion first by trying this 13 digit number
529 then by using the derived 10 digit number.
530
531 The dictionary returned contains the following keys:
532
533 - authors: a list of 1 or more authors
534 - title: the book title
535 - repository: the source of the info
536
537 It may contain extra information:
538
539 - date: the publication date
540 - publisher: the books publisher
541 - binding: info on the physical form of the book, e.g. 'paperback'
542 - value: the current listprice of the book
543 - currency: the currency code, e,g, 'GBP''
544 - cover: a file name containing a cover image
545 """
546 (ai,ak)=awskeypair()
547 ik=isbndbdotcomkey()
548 isbnfinders=[]
549
550 p = getproviders()
551 for f in (Amazon,BritishLibrary,LibraryOfCongress,KoninklijkeBibliotheek,isbndbdotcom):
552 if f in p and p[f]:
553 isbnfinders.append(f())
554 del p[f]
555 for f in p:
556 if p[f] :
557 isbnfinders.append(f())
558
559 isbns = [isbn ]
560 if isbn[0:3]=='978' :
561 isbns.append(isbnchecksum(isbn[3:12]))
562 for isbn in isbns:
563 for i in isbnfinders:
564 try:
565 i.fetch(isbn)
566 i.result
567 if not i.result is None:
568 return i.result
569 except SRUError, e:
570 print e
571 pass
572 except IOError,e:
573 print e
574 pass
575 return None
576
577 _configfile='keys.conf'
579 """
580 get/set the configfile used to retrieve keys from.
581 """
582 global _configfile
583 if filename is None : return _configfile
584 _configfile = filename
585
587 """
588 Read and set keys from a configfile.
589
590 Any L{ConfigParser.ConfigParser} compatible file (generally a .ini file) may be given as argument. The default is
591 C{keys.conf}.
592
593 To be usefull it should contain the the section C{[keys]}. Currently only Amazon AWS keypairs and
594 isbndb.com keys are recognized. The file may contain other sections, these are ignored.
595
596 Example config file::
597
598 [keys]
599 AWSId = "ABCDEFGHIJKLMNOP"
600 AWSKey = "gYGyGyGyHJDJHSGGSGFSHGDVfffDH"
601 isbndbkey = "HSUIUJSJ21"
602
603 """
604 from ConfigParser import ConfigParser
605 if not filename is None : configfile(filename)
606 cf=ConfigParser()
607 try:
608 cf.read(_configfile)
609 awskeypair(cf.get('keys','AWSId').strip('"'),cf.get('keys','AWSKey').strip('"'))
610 isbndbdotcomkey(cf.get('keys','isbndbkey'))
611 except Exception, e:
612 print e
613 pass
614
615 if __name__ == '__main__' :
616
617
618 config('books.conf')
619
620 isbns = ['9781921573132','9780809556649','9789021520919','9789041413680','9780593061732','9780330262132','9780450531507','9780297825036','9789067899192','9780764571411','9780521354653','9780596005771','9781930110595','9789055480869',
621 '9789055480616','9789043002042','9780596000264','9781928994558','9789039512272','9781565924949',
622 '9781565921498','9781565927162','9781565920415','9781565923980','9780596002053','9780201688146',
623 '9780596000851','9780470102602']
624
625 for i in isbns:
626 print i,fetch(i),'\n','-'*20
627