Loading book_tools/format/__init__.py +5 −3 Original line number Diff line number Diff line Loading @@ -2,6 +2,7 @@ import os import zipfile from xml import sax from io import BytesIO from book_tools.format.mimetype import Mimetype Loading Loading @@ -42,9 +43,9 @@ class __detector: else: return Mimetype.OCTET_STREAM def detect_mime(file): def detect_mime(file, original_filename): FB2_ROOT = 'FictionBook' mime = __detector.file(file.name) mime = __detector.file(original_filename) try: if mime == Mimetype.XML or mime == Mimetype.FB2: Loading Loading @@ -76,7 +77,8 @@ def detect_mime(file): def create_bookfile(file, original_filename): if isinstance(file, str): file = open(file, 'rb') mimetype = detect_mime(file) file = BytesIO(file.read()) mimetype = detect_mime(file,original_filename) if mimetype == Mimetype.EPUB: return EPub(file, original_filename) elif mimetype == Mimetype.FB2: Loading book_tools/format/fb2.py +13 −7 Original line number Diff line number Diff line Loading @@ -13,15 +13,17 @@ class FB2StructureException(Exception): print(traceback.print_exc()) class Namespace(object): FICTION_BOOK = 'http://www.gribuser.ru/xml/fictionbook/2.0' FICTION_BOOK20 = 'http://www.gribuser.ru/xml/fictionbook/2.0' FICTION_BOOK21 = 'http://www.gribuser.ru/xml/fictionbook/2.1' XLINK = 'http://www.w3.org/1999/xlink' class FB2Base(BookFile): def __init__(self, file, original_filename, mimetype): BookFile.__init__(self, file, original_filename, mimetype) self.__namespaces = {'fb': Namespace.FICTION_BOOK, 'xlink': Namespace.XLINK} self.__namespaces = {'xlink': Namespace.XLINK} try: tree = self.__create_tree__() self.__detect_namespaces(tree) self.__detect_title(tree) self.__detect_authors(tree) self.__detect_tags(tree) Loading @@ -45,7 +47,7 @@ class FB2Base(BookFile): tree = self.__create_tree__() res = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:coverpage/fb:image', namespaces=self.__namespaces) cover_id = res[0].get('{' + Namespace.XLINK + '}href')[1:] res = tree.xpath('//fb:binary[@id="%s"]' % cover_id, namespaces=self.__namespaces) res = tree.xpath('/fb:binary[@id="%s"]' % cover_id, namespaces=self.__namespaces) content = base64.b64decode(res[0].text) with open(os.path.join(working_dir, 'cover.jpeg'), 'wb') as cover_file: cover_file.write(content) Loading @@ -58,17 +60,21 @@ class FB2Base(BookFile): tree = self.__create_tree__() res = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:coverpage/fb:image', namespaces=self.__namespaces) cover_id = res[0].get('{' + Namespace.XLINK + '}href')[1:] res = tree.xpath('//fb:binary[@id="%s"]' % cover_id, namespaces=self.__namespaces) res = tree.xpath('/fb:binary[@id="%s"]' % cover_id, namespaces=self.__namespaces) content = base64.b64decode(res[0].text) return content except Exception as err: print(err) return None def __detect_namespaces(self, tree): tag = tree.getroot().tag self.__namespaces['fb'] = Namespace.FICTION_BOOK20 if tag.find(Namespace.FICTION_BOOK20)>0 else Namespace.FICTION_BOOK21 return None def __detect_title(self, tree): res = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:book-title', namespaces=self.__namespaces) if len(res) == 0: res = tree.xpath('/FictionBook/description/title-info/book-title') res = tree.xpath('/*[local-name() = "FictionBook"]/*[local-name() = "description"]/*[local-name() = "title-info"]/*[local-name() = "book-title"]') if len(res) > 0: self.__set_title__(res[0].text) Loading book_tools/pymobi/mobi.py +1 −1 Original line number Diff line number Diff line Loading @@ -260,7 +260,7 @@ class BookMobi(object): f = open(file, 'rb') else: f = file self.filename = f.name self.f = f self.f.seek(0,0) # palm database header Loading opds_catalog/sopdscan.py +3 −3 Original line number Diff line number Diff line Loading @@ -5,13 +5,13 @@ import time import datetime import logging import re from book_tools.format import create_bookfile from django.db import transaction from opds_catalog import fb2parse, opdsdb from opds_catalog import inpx_parser #from opds_catalog import settings import opds_catalog.zipf as zipfile from constance import config Loading Loading @@ -207,9 +207,9 @@ class opdsScanner: try: book_data = create_bookfile(file, name) except: except Exception as err: book_data = None self.logger.warning(rel_path + ' - ' + name + ' Book parse error, skipping...') self.logger.warning(rel_path + ' - ' + name + ' Book parse error, skipping... (Error: %s)'%err) self.bad_books += 1 if book_data: Loading opds_catalog/tests/test_scan.py +2 −2 Original line number Diff line number Diff line Loading @@ -65,7 +65,7 @@ class scanTestCase(TestCase): self.assertEqual(book.catalog.path, self.test_zip) self.assertEqual(book.catalog.cat_name, self.test_zip) self.assertEqual(book.catalog.cat_type, 1) self.assertEqual(book.docdate, "130552595662030000") self.assertEqual(book.docdate, "2014-09-15") self.assertEqual(book.title, "Любовь в жизни Обломова") self.assertEqual(book.avail, 2) self.assertEqual(book.authors.count(), 1) Loading @@ -86,7 +86,7 @@ class scanTestCase(TestCase): self.assertEqual(book.path, self.test_zip) self.assertEqual(book.cat_type, 1) self.assertEqual(book.title, "Драконьи Услуги") self.assertEqual(book.authors.get(full_name="Куприянов Денис").search_full_name, "КУПРИЯНОВ ДЕНИС") self.assertEqual(book.authors.get(full_name="Куприянов Денис Валерьевич").search_full_name, "КУПРИЯНОВ ДЕНИС ВАЛЕРЬЕВИЧ") def test_scanall(self): """ Тестирование процедуры scanall (извлекает метаданные из книг и помещает в БД) """ Loading Loading
book_tools/format/__init__.py +5 −3 Original line number Diff line number Diff line Loading @@ -2,6 +2,7 @@ import os import zipfile from xml import sax from io import BytesIO from book_tools.format.mimetype import Mimetype Loading Loading @@ -42,9 +43,9 @@ class __detector: else: return Mimetype.OCTET_STREAM def detect_mime(file): def detect_mime(file, original_filename): FB2_ROOT = 'FictionBook' mime = __detector.file(file.name) mime = __detector.file(original_filename) try: if mime == Mimetype.XML or mime == Mimetype.FB2: Loading Loading @@ -76,7 +77,8 @@ def detect_mime(file): def create_bookfile(file, original_filename): if isinstance(file, str): file = open(file, 'rb') mimetype = detect_mime(file) file = BytesIO(file.read()) mimetype = detect_mime(file,original_filename) if mimetype == Mimetype.EPUB: return EPub(file, original_filename) elif mimetype == Mimetype.FB2: Loading
book_tools/format/fb2.py +13 −7 Original line number Diff line number Diff line Loading @@ -13,15 +13,17 @@ class FB2StructureException(Exception): print(traceback.print_exc()) class Namespace(object): FICTION_BOOK = 'http://www.gribuser.ru/xml/fictionbook/2.0' FICTION_BOOK20 = 'http://www.gribuser.ru/xml/fictionbook/2.0' FICTION_BOOK21 = 'http://www.gribuser.ru/xml/fictionbook/2.1' XLINK = 'http://www.w3.org/1999/xlink' class FB2Base(BookFile): def __init__(self, file, original_filename, mimetype): BookFile.__init__(self, file, original_filename, mimetype) self.__namespaces = {'fb': Namespace.FICTION_BOOK, 'xlink': Namespace.XLINK} self.__namespaces = {'xlink': Namespace.XLINK} try: tree = self.__create_tree__() self.__detect_namespaces(tree) self.__detect_title(tree) self.__detect_authors(tree) self.__detect_tags(tree) Loading @@ -45,7 +47,7 @@ class FB2Base(BookFile): tree = self.__create_tree__() res = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:coverpage/fb:image', namespaces=self.__namespaces) cover_id = res[0].get('{' + Namespace.XLINK + '}href')[1:] res = tree.xpath('//fb:binary[@id="%s"]' % cover_id, namespaces=self.__namespaces) res = tree.xpath('/fb:binary[@id="%s"]' % cover_id, namespaces=self.__namespaces) content = base64.b64decode(res[0].text) with open(os.path.join(working_dir, 'cover.jpeg'), 'wb') as cover_file: cover_file.write(content) Loading @@ -58,17 +60,21 @@ class FB2Base(BookFile): tree = self.__create_tree__() res = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:coverpage/fb:image', namespaces=self.__namespaces) cover_id = res[0].get('{' + Namespace.XLINK + '}href')[1:] res = tree.xpath('//fb:binary[@id="%s"]' % cover_id, namespaces=self.__namespaces) res = tree.xpath('/fb:binary[@id="%s"]' % cover_id, namespaces=self.__namespaces) content = base64.b64decode(res[0].text) return content except Exception as err: print(err) return None def __detect_namespaces(self, tree): tag = tree.getroot().tag self.__namespaces['fb'] = Namespace.FICTION_BOOK20 if tag.find(Namespace.FICTION_BOOK20)>0 else Namespace.FICTION_BOOK21 return None def __detect_title(self, tree): res = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:book-title', namespaces=self.__namespaces) if len(res) == 0: res = tree.xpath('/FictionBook/description/title-info/book-title') res = tree.xpath('/*[local-name() = "FictionBook"]/*[local-name() = "description"]/*[local-name() = "title-info"]/*[local-name() = "book-title"]') if len(res) > 0: self.__set_title__(res[0].text) Loading
book_tools/pymobi/mobi.py +1 −1 Original line number Diff line number Diff line Loading @@ -260,7 +260,7 @@ class BookMobi(object): f = open(file, 'rb') else: f = file self.filename = f.name self.f = f self.f.seek(0,0) # palm database header Loading
opds_catalog/sopdscan.py +3 −3 Original line number Diff line number Diff line Loading @@ -5,13 +5,13 @@ import time import datetime import logging import re from book_tools.format import create_bookfile from django.db import transaction from opds_catalog import fb2parse, opdsdb from opds_catalog import inpx_parser #from opds_catalog import settings import opds_catalog.zipf as zipfile from constance import config Loading Loading @@ -207,9 +207,9 @@ class opdsScanner: try: book_data = create_bookfile(file, name) except: except Exception as err: book_data = None self.logger.warning(rel_path + ' - ' + name + ' Book parse error, skipping...') self.logger.warning(rel_path + ' - ' + name + ' Book parse error, skipping... (Error: %s)'%err) self.bad_books += 1 if book_data: Loading
opds_catalog/tests/test_scan.py +2 −2 Original line number Diff line number Diff line Loading @@ -65,7 +65,7 @@ class scanTestCase(TestCase): self.assertEqual(book.catalog.path, self.test_zip) self.assertEqual(book.catalog.cat_name, self.test_zip) self.assertEqual(book.catalog.cat_type, 1) self.assertEqual(book.docdate, "130552595662030000") self.assertEqual(book.docdate, "2014-09-15") self.assertEqual(book.title, "Любовь в жизни Обломова") self.assertEqual(book.avail, 2) self.assertEqual(book.authors.count(), 1) Loading @@ -86,7 +86,7 @@ class scanTestCase(TestCase): self.assertEqual(book.path, self.test_zip) self.assertEqual(book.cat_type, 1) self.assertEqual(book.title, "Драконьи Услуги") self.assertEqual(book.authors.get(full_name="Куприянов Денис").search_full_name, "КУПРИЯНОВ ДЕНИС") self.assertEqual(book.authors.get(full_name="Куприянов Денис Валерьевич").search_full_name, "КУПРИЯНОВ ДЕНИС ВАЛЕРЬЕВИЧ") def test_scanall(self): """ Тестирование процедуры scanall (извлекает метаданные из книг и помещает в БД) """ Loading