##    module isbn, retrieves information about books from various sources
##    Copyright 2009 Michel. J. Anders
##
##    This program is free software: you can redistribute it and/or modify
##    it under the terms of the GNU General Public License as published by
##    the Free Software Foundation, either version 3 of the License, or
##    (at your option) any later version.
##
##    This program is distributed in the hope that it will be useful,
##    but WITHOUT ANY WARRANTY; without even the implied warranty of
##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
##    GNU General Public License for more details.
##
##    You should have received a copy of the GNU General Public License
##    along with this program.  If not, see <http://www.gnu.org/licenses/>.

"""
This module provides functionality fetch information on a book (like
author and title) when given a isbn number.

Example usage::

    import isbn
    isbn.config()                                   # read keys from keys.conf
    result=isbnfetch('978123456789X')  # get info

At this moment it provides classes to retrieve information from
Amazon via I{ecs} (you need a (free) license key), from libraries via
I{sru} (at this moment: Library of Congress, Britisch Library (Copac),
and the dutch Koninklijke Bibliotheek) or from isbndb.com (you need a (free)
license key)via an ad-hoc webservice.

To implement this functionality an extensible hierarchy of classes is provided.
The base class is called L{SRU} and most classes based on it merely override the
C{__init__()} method to set a different url to retrieve the information. The L{Amazon} class
is slightly more complicated as it has to encrypt the url with a key.
Once a L{SRU} derived class in instantiated you can call the instances C{fetch()} method
to try and retrieve the information. The result (either a dictionary with entries or None)
is stored in the result instance variable.

Together with the L{SRU} class a number of L{ContentHandler} derived classes are defined to
interpret the xml data that is returned.

For convenience the isbn module provides the following toplevel functions:

    - C{fetch(isbnnumber)}           will try all sources defined in this module (excluding the ones without proper keys)
    - C{awskeypair(id,key)}          set an amazon AWS keypair
    - C{isbndbdotcomkey(key)}    set an isbndb.com key
    - C{getproviders()}                 get a list of classes that can be used to fetch info
    - C{enable(klass)}                  lets fetch() use this class
    - C{disable(klass)}                 prohibit fetch() using this class
    - C{config(filename)}              configure AWS and/or isbndb keys from a config file (default keys.conf)
"""
from AWSQuerySigner import AWSQS

# default AWS keypair 
AWSId='No AWSId specified'
AWSKey='No AWSKey specified'

def awskeypair(Id=None,Key=None):
    """
    get/set an Amazon AWS keypair and enable lookup via an L{Amazon} class instance.

    @param Id: AWS Id, or None.
    @type Id: string.
    @param Key: AWS Key, or None.
    @type Key: string.
    @return: a tuple (AWSId,AWSKey) if Id == None, nothing otherwise.
    """
    global AWSId,AWSKey
    if Id is None: return (AWSId,AWSKey)
    AWSId=Id
    AWSKey=Key
    enable(Amazon)

# default isbndb.com key
isbndbkey='No ISBNDB.com key specified'

def isbndbdotcomkey(key=None):
    """
    get/set an isbndb.com key  and enable lookup via an L{isbndbdotcom} class instance.

    @param key: AWS Key, or None.
    @type key: string.
    @return: a string if key == None, nothing otherwise.
    """
    global isbndbkey
    if key is None : return isbndbkey
    isbndbkey=key
    enable(isbndbdotcom)
    
from urllib import urlopen,urlretrieve
import xml.sax
from xml.sax.handler import ContentHandler

import string

def isbnchecksum(line):
    """
    Calculate the checksum for an isbn-10 number.
    
    @param line: 9 or 10 digit number.
    @type line: string.
    @return: 10 digit isbnnumber.
    @rtype: string.

    If you pass in a 10 digit number the last digit is ignored. This is to facilitate the conversion
    from an isbn-13 number (aka EAN code, 13 digits starting w. 978) to an isbn-10 number by
    simply chopping of the first 3 digits and passing it to isbnchecksum().

    Example::

        isbn10 = isbnchecksum(isbn13[3:])
    """
    if (len(line) == 10):
        line = line[0:9]
    if (len(line) != 9):
        return '# ISBN should be 9 digits, excluding checksum!'
    sum = 0
    count = 0
    for ix in line:
        sum = sum + (10 - count) * string.atoi(ix)
        count = count + 1
    sum = sum % 11
    if (sum != 0):
        sum = 11 - sum
    if (sum == 10):
        line = line + 'X'
    else:
        line = line + string.digits[sum]
    return line

class DCHandler(ContentHandler):
    """
    Simple SAX content handler to process xml retrieved from an
    SRU server. It is nothing more than a hack really since it does
    not use information about the scheme used (like Dublin Core or
    Bath) but just sniffs for tags that are relevant and stores it in
    the instance variable I{self.r}. See L{isbn.fetch} for
    information about the contents.
    """
    def __init__(self):
        ContentHandler.__init__(self)
        self.r={'authors':[]}
        self.data=''
        self.tags=[]
        
    def startElement(self,name,attr):
        self.data=''
        self.tags.append(name)
        #print "|".join(self.tags[-2:])
        
    def characters(self,ch):
        self.data+=ch
    
    def endElement(self,name):
        #print self.data
        self.tags.pop(-1)
        if name.startswith('dc:') :
            name = name[3:]
        if name.startswith('dcterms:') :
            name = name[8:]
        
        if name in ('date','publisher','title'):
            self.r[name]=self.data
        if name == 'year' or name == 'issued' or name == 'dateIssued':
            self.r['date']=self.data
        elif name =='creator' or name=='author':
            self.r['authors'].append(self.data)

class COPACHandler(ContentHandler):
    """
    Simple SAX content handler to process xml retrieved from an
    SRU server delivering xml using the mods schema and stores it in the instance variable
    I{self.r}. See L{isbn.fetch} for information about the
    contents.
    
    """
    def __init__(self):
        ContentHandler.__init__(self)
        self.r={'authors':[]}
        self.data=''
        self.tags=[]
        
    def startElement(self,name,attr):
        self.data=''
        self.tags.append(name)
        #print "|".join(self.tags[-2:])
        
    def characters(self,ch):
        self.data+=ch
    
    def endElement(self,name):
        #print self.data
        self.tags.pop(-1)
        
        if name == 'publisher':
            self.r[name]=self.data
        elif name=='namePart' and self.tags[-2] == 'mods':
            self.r['authors'].append(self.data)
        elif name == 'dateIssued':
            self.r['date']=self.data
        elif name=='title' and self.tags[-2] == 'mods':
            self.r['title'] = self.data

class AWSHandler(ContentHandler):
    """
    Simple SAX content handler to process xml retrieved from an
    AWS server.  See L{isbn.fetch} for information about the
    contents.
    
    """
    def __init__(self):
        ContentHandler.__init__(self)
        self.r={'authors':[]}
        self.data=''
        self.error=0
        
    def startElement(self,name,attr):
        self.data=''
        #print name,"|"
    
    def characters(self,ch):
        self.data+=ch
        #print ch,
    
    def endElement(self,name):
        #print name
        #print self.data
        if name in ('Date','Publisher','Title','ISBN','Binding'):
            self.r[name.lower()]=self.data
        elif name == 'Amount':
            self.amount=self.data
        elif name == 'ListPrice':
            self.r['price']=float(self.amount)/100.0
            self.r['currency']=self.crcode
        elif name == 'CurrencyCode':
            self.crcode=self.data
        elif name=='URL':
            self.url=self.data
        elif name in ('SmallImage','MediumImage','LargeImage'):
            self.r[name.lower()]=self.url
        elif name == 'PublicationDate':
            self.r['date']=self.data
        elif name in ('Author','Creator'):
            self.r['authors'].append(self.data)
        elif name=='Error':
            self.error+=1
        else:
            pass # print name,self.data
            
class ISBNDBHandler(ContentHandler):
    """
    Simple SAX content handler to process xml retrieved from
    U{isbndb.com} See L{isbn.fetch} for information about the contents.
    
    """
    def __init__(self):
        ContentHandler.__init__(self)
        self.r={'authors':[]}
        self.data=''
        self.error=0
        self.tags=[]
        
    def startElement(self,name,attr):
        self.data=''
        if 'book_id' in attr: self.r['isbndb_book_id'] = attr['book_id']
        self.tags.append(name)
        #print "|".join(self.tags)
    
    def characters(self,ch):
        self.data+=ch
        #print ch,
    
    def endElement(self,name):
        #print
        if name in ('Title'):
            self.r[name.lower()]=self.data
        elif name in ('Person'):
            self.r['authors'].append(self.data)
        else:
            pass
        #print self.data
        self.tags.pop(-1)
                    

class SRUError(Exception): pass

class SRU(object):
    """
    Base class for all classes that implement retrieval of book information via a webservice.
    """
    def __init__(self,url,contenthandlerfactory,name):
        """
        @param url: an url that will be used to retrieve information. Should contain a single '%s' placeholder to fot an isbn number.
        @type url: string.
        @param contenthandlerfactory: a reference to a ContentHandler derived class.
        @type contenthandlerfactory: ContentHandler.
        @param name: the visible name for this class, stored in the result on success.
        @type name: string.
        @return: an SRU instance.

        """
        self.url=url
        self.factory=contenthandlerfactory
        self.name=name
        self.result=None

    def fetch(self,isbn):
        """
        Retrieve information about a book based on isbn number.

        @param isbn: an isbn-10 or isbn-13 number.
        @type isbn: string.
        @return: nothing, result is stored in I{result}.
        @raise SRUError: if no result could be retrieved.
        """
        self.retrieve(urlopen(self.url%isbn))

    def retrieve(self,f):
        """
        Parse results in xml from an open file descriptor.

        @param f: a file descriptor supporting a C{read()} method as e.g. returned from C{urlopen()}.
        @return: nothing, result is stored in I{result}.
        @raise SRUError: if no result could be retrieved.

        L{fetch} and L{retrieve} are separated to facilitate derived classes that need to do something
        to the url before sending it and retrieving the results. They still can reuse the actual
        handling of the returned xml. See L{Amazon} class for an example.
        """
        self.result=None
        parser=xml.sax.make_parser()
        handler=self.factory()
        parser.setContentHandler(handler)
        parser.parse(f)
        if 'title' in handler.r:
            handler.r['repository']=self.getName()
            self.result=dict(handler.r)
            return self.result
        raise SRUError('no result from %s'%self.getName())

    def getName(self):
        return self.name
    
class Amazon(SRU):
    """
    Fetch bookinformation from Amazon using I{ecs}. See U{http://aws.amazon.com/}

    It depends on the aws querysigner code from U{http://sowacs.appspot.com/AWS/}

    An instance of L{Amazon} may on success hold the following keys in its result variable:

       - title: the book title.
       - author: a list of full names of the author(s).
       - date: the date of publication.
       - publisher: the name of the publisher.
       - binding: the type of binding, e.g. 'hardcover'.
       - price: the list price (a float).
       - currency: the currency code, e.g. 'GBP'.
       - cover: the filename of the downloaded cover image. 
    """
    
    def __init__(self,AWSId=None,AWSKey=None,endpoint="ecs.amazonaws.co.uk",uri="/onca/xml"):
        """
        @param AWSId: an Amazon AWS Id.
        @param AWSKey: an Amazon AWS Key.
        @param endpoint: fully qualified domain name of ecs server. defaults to uk but .com may be appropriate as well.
        @param uri: path to service.
        @type AWSId: string.
        @type AWSKey: string.
        @type endpoint: string.
        @type uri: string.

        Note: if AWSId is  None, the id and key are set from the globally saved variables. This makes it
        possible not to use keys in the code but read them from a config file with config().

        Example::
            import isbn
            isbn.config('keys.ini')
            a=isbn.Amazon()
            print a.result
        """
        super(Amazon,self).__init__(None,AWSHandler,self.__class__.__name__)
        if AWSId is None:
            (AWSId,AWSKey)=awskeypair()
        self.AWSId=AWSId
        self.AWSKey=AWSKey
        self.endpoint=endpoint
        self.uri=uri

    def fetch(self,isbn):
        query = {
            "Keywords" : isbn,
            "Version" : "2009-03-31",
            "Service" : "AWSECommerceService",
            "AWSAccessKeyId" : self.AWSId,
            "Operation" : "ItemSearch",
            "SearchIndex":"Books",
            "ResponseGroup" : "Medium"
        }
        awsqs = AWSQS( 'GET', self.endpoint, self.uri, query, self.AWSKey )
        self.url = awsqs.signedrequest
        self.retrieve(urlopen(self.url))
        r=self.result
        try:
            url = 'cover image'
            if 'largeimage'      in r: url=r['largeimage']
            elif 'mediumimage' in r: url=r['mediumimage']
            elif 'smallimage'     in r: url=r['smallimage']
            filename,headers=urlretrieve(url)
            r['cover']=filename
        except Exception,e:
            print 'exception retrieving cover',e
            pass
        return r

class KoninklijkeBibliotheek(SRU):
    """
    Fetch bookinformation from the dutch Koninklijke Bibliotheek using I{sru}.

    KB uses the dc (dublin core) schema and requires no key.

    An instance of KoninklijkeBibliotheek may on success hold the following keys in its result variable:

       - title: the book title.
       - author: a list of full names of the author(s).
       - date: the date of publication.
       - publisher: the name of the publisher.
    """
    def __init__(self):
        self.url='http://www2.kb.nl/cgi-zoek/sru.pl?version=1.1&operation=searchRetrieve&query=%s'    
        super(KoninklijkeBibliotheek,self).__init__(self.url,DCHandler,self.__class__.__name__)

class BritishLibrary(SRU):
    """
    Fetch bookinformation from the British Library (or actually Copac) using I{sru}.

    BL uses the mods schema and requires no key.

    An instance of BritishLibrary may on success hold the following keys in its result variable:

       - title: the book title.
       - author: a list of full names of the author(s).
       - date: the date of publication.
       - publisher: the name of the publisher.
    """
    # old url='http://herbie.bl.uk:9080/cgi-bin/blils.cgi?version=1.1&operation=searchRetrieve&query=%s&maximumRecords=1&recordSchema=dc'%isbn
    # note: the new url does return mods schema only (not dc)
    def __init__(self):
        self.url='http://copac.ac.uk:3000/copac?version=1.1&operation=searchRetrieve&query=%s&maximumRecords=1'  
        super(BritishLibrary,self).__init__(self.url,COPACHandler,self.__class__.__name__)

class LibraryOfCongress(SRU):
    """
    Fetch bookinformation from the Library of Congress using I{sru}.

    LibCon uses the dc (dublin core) schema and requires no key.

    An instance of class LibraryOfCongress may on success hold the following keys in its result variable:

       - title: the book title.
       - author: a list of full names of the author(s).
       - date: the date of publication.
       - publisher: the name of the publisher.
    """
    def __init__(self):
        self.url='http://z3950.loc.gov:7090/voyager?version=1.1&operation=searchRetrieve&query=bath.isbn+any+%s&maximumRecords=1&recordSchema=dc'  
        super(LibraryOfCongress,self).__init__(self.url,DCHandler,self.__class__.__name__)

class isbndbdotcom(SRU):
    """
    Fetch bookinformation from the isbndb.com using I{a webservice}.

    See U{www.isbndb.com}.

    An instance of class isbndbdotcom may on success hold the following keys in its result variable:

       - title: the book title.
       - author: a list of full names of the author(s).
    """
    def __init__(self,key=None):
        if key is None : key = isbndbdotcomkey()
        self.url='http://isbndb.com/api/books.xml?access_key='+key+'&results=authors&index1=isbn&value1=%s'  
        super(isbndbdotcom,self).__init__(self.url,ISBNDBHandler,self.__class__.__name__)

# a private dictionary. fetch() will use all classes not marked False.
_providers = {Amazon:False,BritishLibrary:True,LibraryOfCongress:True,KoninklijkeBibliotheek:True,isbndbdotcom:False}

def getproviders():
    """
    Return a dictionary of possible providers of information.

    Keys are the classes, values are True or False depending on whether the will be used by L{isbn.fetch}.
    """
    return dict(_providers)

def enable(p):
    """
    Indicate that class p should be used by L{isbn.fetch}.
    
    @param p: an SRU derived class.
    @type p: SRU.
    """
    if not p in _providers : raise KeyError()
    _providers[p]=True
    
def disable(p):
    """
    Indicate that class p should be NOT used by L{isbn.fetch}.
    
    @param p: an SRU derived class.
    @type p: SRU.
    """
    if not p in _providers : raise KeyError()
    _providers[p]=False

def fetch(isbn):
    """
    Get bookinformation from a number of sources.
    
    @param isbn: a 10 or 13 digit isbn number.
    @type isbn: string.
    @return: a dictionary w. book information or None.
    
    If a 13 digit isbn/ean code is given (i.e. a number starting w. 978),
    I{isbnfetch} will try do find informtion first by trying this 13 digit number
    then by using the derived 10 digit number.
    
    The dictionary returned contains the following keys:
        
        - authors: a list of 1 or more authors
        - title: the book title
        - repository: the source of the info
        
    It may contain extra information:
        
        - date: the publication date
        - publisher: the books publisher
        - binding: info on the physical form of the book, e.g. 'paperback'
        - value: the current listprice of the book
        - currency: the currency code, e,g, 'GBP''
        - cover: a file name containing a cover image
    """
    (ai,ak)=awskeypair()
    ik=isbndbdotcomkey()
    isbnfinders=[]
    # this is my prefered order (amazon is quick)
    p = getproviders()
    for f in (Amazon,BritishLibrary,LibraryOfCongress,KoninklijkeBibliotheek,isbndbdotcom):
        if f in p and p[f]:
            isbnfinders.append(f())
            del p[f]
    for f in p:
        if p[f] :
            isbnfinders.append(f())
            
    isbns = [isbn ]
    if isbn[0:3]=='978' :
        isbns.append(isbnchecksum(isbn[3:12]))
    for isbn in isbns:
        for i in isbnfinders:
            try:
                i.fetch(isbn)
                i.result
                if not i.result is None:
                    return i.result
            except SRUError, e:
                print e
                pass
            except IOError,e:
                print e
                pass
    return None

_configfile='keys.conf'
def configfile(filename=None):
    """
    get/set the configfile used to retrieve keys from.
    """
    global _configfile
    if filename is None : return _configfile
    _configfile = filename

def config(filename=None):
    """
    Read and set keys from a configfile.

    Any L{ConfigParser.ConfigParser} compatible file (generally a .ini file) may be given as argument. The default is
    C{keys.conf}.

    To be usefull it should contain the the section C{[keys]}. Currently only Amazon AWS keypairs and
    isbndb.com keys are recognized. The file may contain other sections, these are ignored.

    Example config file::
    
        [keys]
        AWSId = "ABCDEFGHIJKLMNOP"
        AWSKey = "gYGyGyGyHJDJHSGGSGFSHGDVfffDH"
        isbndbkey = "HSUIUJSJ21"

    """
    from ConfigParser import ConfigParser
    if not filename is None : configfile(filename)
    cf=ConfigParser()
    try:
        cf.read(_configfile)
        awskeypair(cf.get('keys','AWSId').strip('"'),cf.get('keys','AWSKey').strip('"'))
        isbndbdotcomkey(cf.get('keys','isbndbkey'))
    except Exception, e:
        print e
        pass # silently ignore absent config file

if __name__ == '__main__' :
    # tests the retrieval of several isbns.
    
    config('books.conf')
    
    isbns = ['9781921573132','9780809556649','9789021520919','9789041413680','9780593061732','9780330262132','9780450531507','9780297825036','9789067899192','9780764571411','9780521354653','9780596005771','9781930110595','9789055480869',
    '9789055480616','9789043002042','9780596000264','9781928994558','9789039512272','9781565924949',
    '9781565921498','9781565927162','9781565920415','9781565923980','9780596002053','9780201688146',
    '9780596000851','9780470102602']
    
    for i in isbns:
        print i,fetch(i),'\n','-'*20


