Commit a3d85998 authored by Dmitry Shelepnev's avatar Dmitry Shelepnev
Browse files

Add __detect_namespaces to FB2 class

parent 52886546
Loading
Loading
Loading
Loading
+5 −3
Original line number Diff line number Diff line
@@ -2,6 +2,7 @@
import os
import zipfile
from xml import sax
from io import BytesIO

from book_tools.format.mimetype import Mimetype

@@ -42,9 +43,9 @@ class __detector:
        else:
            return Mimetype.OCTET_STREAM

def detect_mime(file):
def detect_mime(file, original_filename):
    FB2_ROOT = 'FictionBook'
    mime = __detector.file(file.name)
    mime = __detector.file(original_filename)

    try:
        if mime == Mimetype.XML or mime == Mimetype.FB2:
@@ -76,7 +77,8 @@ def detect_mime(file):
def create_bookfile(file, original_filename):
    if isinstance(file, str):
        file = open(file, 'rb')
    mimetype = detect_mime(file)
    file = BytesIO(file.read())
    mimetype = detect_mime(file,original_filename)
    if mimetype == Mimetype.EPUB:
        return EPub(file, original_filename)
    elif mimetype == Mimetype.FB2:
+13 −7
Original line number Diff line number Diff line
@@ -13,15 +13,17 @@ class FB2StructureException(Exception):
            print(traceback.print_exc())

class Namespace(object):
    FICTION_BOOK = 'http://www.gribuser.ru/xml/fictionbook/2.0'
    FICTION_BOOK20 = 'http://www.gribuser.ru/xml/fictionbook/2.0'
    FICTION_BOOK21 = 'http://www.gribuser.ru/xml/fictionbook/2.1'
    XLINK = 'http://www.w3.org/1999/xlink'

class FB2Base(BookFile):
    def __init__(self, file, original_filename, mimetype):
        BookFile.__init__(self, file, original_filename, mimetype)
        self.__namespaces = {'fb': Namespace.FICTION_BOOK, 'xlink': Namespace.XLINK}
        self.__namespaces = {'xlink': Namespace.XLINK}
        try:
            tree = self.__create_tree__()
            self.__detect_namespaces(tree)
            self.__detect_title(tree)
            self.__detect_authors(tree)
            self.__detect_tags(tree)
@@ -45,7 +47,7 @@ class FB2Base(BookFile):
            tree = self.__create_tree__()
            res = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:coverpage/fb:image', namespaces=self.__namespaces)
            cover_id = res[0].get('{' + Namespace.XLINK + '}href')[1:]
            res = tree.xpath('//fb:binary[@id="%s"]' % cover_id, namespaces=self.__namespaces)
            res = tree.xpath('/fb:binary[@id="%s"]' % cover_id, namespaces=self.__namespaces)
            content = base64.b64decode(res[0].text)
            with open(os.path.join(working_dir, 'cover.jpeg'), 'wb') as cover_file:
                cover_file.write(content)
@@ -58,17 +60,21 @@ class FB2Base(BookFile):
            tree = self.__create_tree__()
            res = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:coverpage/fb:image', namespaces=self.__namespaces)
            cover_id = res[0].get('{' + Namespace.XLINK + '}href')[1:]
            res = tree.xpath('//fb:binary[@id="%s"]' % cover_id, namespaces=self.__namespaces)
            res = tree.xpath('/fb:binary[@id="%s"]' % cover_id, namespaces=self.__namespaces)
            content = base64.b64decode(res[0].text)
            return content
        except Exception as err:
            print(err)
            return None

    def __detect_namespaces(self, tree):
        tag = tree.getroot().tag
        self.__namespaces['fb'] = Namespace.FICTION_BOOK20 if tag.find(Namespace.FICTION_BOOK20)>0 else Namespace.FICTION_BOOK21
        return None

    def __detect_title(self, tree):
        res = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:book-title', namespaces=self.__namespaces)
        if len(res) == 0:
            res = tree.xpath('/FictionBook/description/title-info/book-title')
            res = tree.xpath('/*[local-name() = "FictionBook"]/*[local-name() = "description"]/*[local-name() = "title-info"]/*[local-name() = "book-title"]')
        if len(res) > 0:
            self.__set_title__(res[0].text)

+1 −1
Original line number Diff line number Diff line
@@ -260,7 +260,7 @@ class BookMobi(object):
            f = open(file, 'rb')
        else:
            f = file
        self.filename = f.name

        self.f = f
        self.f.seek(0,0)
        # palm database header
+3 −3
Original line number Diff line number Diff line
@@ -5,13 +5,13 @@ import time
import datetime
import logging
import re

from book_tools.format import create_bookfile

from django.db import transaction

from opds_catalog import fb2parse, opdsdb
from opds_catalog import inpx_parser
#from opds_catalog import settings
import opds_catalog.zipf as zipfile

from constance import config
@@ -207,9 +207,9 @@ class opdsScanner:

                try:
                    book_data = create_bookfile(file, name)
                except:
                except Exception as err:
                    book_data = None
                    self.logger.warning(rel_path + ' - ' + name + ' Book parse error, skipping...')
                    self.logger.warning(rel_path + ' - ' + name + ' Book parse error, skipping... (Error: %s)'%err)
                    self.bad_books += 1

                if book_data:
+2 −2
Original line number Diff line number Diff line
@@ -65,7 +65,7 @@ class scanTestCase(TestCase):
        self.assertEqual(book.catalog.path, self.test_zip)
        self.assertEqual(book.catalog.cat_name, self.test_zip)
        self.assertEqual(book.catalog.cat_type, 1)
        self.assertEqual(book.docdate, "130552595662030000")
        self.assertEqual(book.docdate, "2014-09-15")
        self.assertEqual(book.title, "Любовь в жизни Обломова")
        self.assertEqual(book.avail, 2)
        self.assertEqual(book.authors.count(), 1)
@@ -86,7 +86,7 @@ class scanTestCase(TestCase):
        self.assertEqual(book.path, self.test_zip)
        self.assertEqual(book.cat_type, 1)
        self.assertEqual(book.title, "Драконьи Услуги")
        self.assertEqual(book.authors.get(full_name="Куприянов Денис").search_full_name, "КУПРИЯНОВ ДЕНИС")
        self.assertEqual(book.authors.get(full_name="Куприянов Денис Валерьевич").search_full_name, "КУПРИЯНОВ ДЕНИС ВАЛЕРЬЕВИЧ")

    def test_scanall(self):
        """ Тестирование процедуры scanall (извлекает метаданные из книг и помещает в БД) """