Commit 985c872a authored by mitshel's avatar mitshel
Browse files

В первом приближении сделана Демонизация сканера книг

parent 5bfaf4c0
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -4,7 +4,7 @@ py/__pycache__/*
py/ziptest.py
py/fb2parse.py
py/b64decode.py
py/*.log
*.log
fb2toepub/*
!fb2toepub/*.zip
fb2conv/*
+25 −0
Original line number Diff line number Diff line
@@ -126,6 +126,31 @@ book_shelf = yes
logfile=sopds.log
loglevel=info

[daemon]
# pid_file должен содержать путь к pid-файлу процесса. Для демона sopdsd.py по указанному пути должен быть доступ на запись
# по умолчанию pid_file=/tmp/sopds.pid
pid_file = /tmp/sopds.pid

# scan_day_of_week содержит день недели (1=пн, 7=вс, 0=каждый день) когда должен запускаться процесс сканирования
# по умолчанию scan_day_of_week=0
scan_day_of_week = 0

# scan_time содержит время в формате HH:MM когда необходимо запустить сканирование
# по умолчанию scan_time=00:00
scan_time = 00:00

# scan_interval содержит интервал (в минутах) между сканированиями, например если scan_interval=120, а scan_time=00:00,
# то сканирование будет запускаться каждые 2 часа начиная с 00:00 (т.е. в 00:00, 02:00, 04:00, 06:00, и т.д.)
# если сканирование не будет завершено в течение указанного интервала, но новое сканирование запустится сразу после окончания предидущего
# Установка scan_interval=0 приводит к однократному выполнению сканирования в установленное опцией scan_time время
# по умолчанию scan_intervel = 0
scan_interval = 240

# scan_on_start определяет необходимость запуска сканирования при старте sopdsd.py
# если scan_on_start = yes , то при запуске sopdsd.py сразу же запустится сканирование
# по умолчанию scan_on_start = yes
scan_on_start = yes

[site]
id=http://sopds.ru/
title=SOPDS.RU | OPDS Catalog
+68 −101
Original line number Diff line number Diff line
INFO     [2014-04-14 21:25:34,904] Books added      : 0
INFO     [2014-04-14 21:25:34,935] Books skipped    : 5569
INFO     [2014-04-14 21:25:34,936] Books DB entries deleted : 0
INFO     [2014-04-14 21:25:34,936] Books in archives: 0
INFO     [2014-04-14 21:25:34,936] Archives scanned : 35
INFO     [2014-04-14 21:25:34,936] Archives skipped : 127
INFO     [2014-04-14 21:25:34,936] Bad archives     : 0
INFO     [2014-04-14 21:25:34,937] Time estimated:0 hours, 3 minutes, 43 seconds.
2014-04-14 21:48:44,661 INFO     Books added      : 0
2014-04-14 21:48:44,701 INFO     Books skipped    : 5569
2014-04-14 21:48:44,701 INFO     Books DB entries deleted : 0
2014-04-14 21:48:44,702 INFO     Books in archives: 0
2014-04-14 21:48:44,702 INFO     Archives scanned : 35
2014-04-14 21:48:44,703 INFO     Archives skipped : 127
2014-04-14 21:48:44,703 INFO     Bad archives     : 0
2014-04-14 21:48:44,704 INFO     Time estimated:0 hours, 3 minutes, 43 seconds.
2014-04-14 22:22:24,055 INFO     Starting sopds-scan...
2014-04-14 22:25:31,667 INFO     Books added      : 0
2014-04-14 22:25:31,668 INFO     Books skipped    : 5569
2014-04-14 22:25:31,668 INFO     Books DB entries deleted : 0
2014-04-14 22:25:31,668 INFO     Books in archives: 0
2014-04-14 22:25:31,669 INFO     Archives scanned : 35
2014-04-14 22:25:31,669 INFO     Archives skipped : 127
2014-04-14 22:25:31,669 INFO     Bad archives     : 0
2014-04-14 22:25:31,670 INFO     Time estimated:0 hours, 3 minutes, 7 seconds.
2014-04-14 22:26:55,890 INFO      ***** Starting sopds-scan...
2014-04-14 22:29:43,030 INFO     Books added      : 0
2014-04-14 22:29:43,031 INFO     Books skipped    : 5569
2014-04-14 22:29:43,031 INFO     Books DB entries deleted : 0
2014-04-14 22:29:43,031 INFO     Books in archives: 0
2014-04-14 22:29:43,031 INFO     Archives scanned : 35
2014-04-14 22:29:43,032 INFO     Archives skipped : 127
2014-04-14 22:29:43,032 INFO     Bad archives     : 0
2014-04-14 22:29:43,032 INFO     Time estimated:0 hours, 2 minutes, 47 seconds.
2014-04-15 01:00:03,585 INFO      ***** Starting sopds-scan...
2014-04-15 01:03:13,938 INFO     Books added      : 0
2014-04-15 01:03:13,955 INFO     Books skipped    : 5569
2014-04-15 01:03:13,955 INFO     Books DB entries deleted : 0
2014-04-15 01:03:13,955 INFO     Books in archives: 0
2014-04-15 01:03:13,956 INFO     Archives scanned : 35
2014-04-15 01:03:13,956 INFO     Archives skipped : 127
2014-04-15 01:03:13,956 INFO     Bad archives     : 0
2014-04-15 01:03:13,956 INFO     Time estimated:0 hours, 3 minutes, 10 seconds.
2014-04-16 01:00:03,093 INFO      ***** Starting sopds-scan...
2014-04-16 01:03:09,771 INFO     Books added      : 0
2014-04-16 01:03:09,791 INFO     Books skipped    : 5569
2014-04-16 01:03:09,791 INFO     Books DB entries deleted : 0
2014-04-16 01:03:09,792 INFO     Books in archives: 0
2014-04-16 01:03:09,792 INFO     Archives scanned : 35
2014-04-16 01:03:09,792 INFO     Archives skipped : 127
2014-04-16 01:03:09,792 INFO     Bad archives     : 0
2014-04-16 01:03:09,792 INFO     Time estimated:0 hours, 3 minutes, 6 seconds.
2014-04-16 20:46:00,223 INFO      ***** Starting sopds-scan...
2014-04-16 20:49:32,500 INFO     Books added      : 0
2014-04-16 20:49:32,520 INFO     Books skipped    : 5569
2014-04-16 20:49:32,520 INFO     Books DB entries deleted : 0
2014-04-16 20:49:32,521 INFO     Books in archives: 0
2014-04-16 20:49:32,521 INFO     Archives scanned : 35
2014-04-16 20:49:32,521 INFO     Archives skipped : 127
2014-04-16 20:49:32,522 INFO     Bad archives     : 0
2014-04-16 20:49:32,522 INFO     Time estimated:0 hours, 3 minutes, 32 seconds.
2014-04-16 21:10:36,116 INFO      ***** Starting sopds-scan...
2014-04-16 21:13:31,660 INFO     Books added      : 0
2014-04-16 21:13:31,660 INFO     Books skipped    : 5569
2014-04-16 21:13:31,661 INFO     Books DB entries deleted : 0
2014-04-16 21:13:31,661 INFO     Books in archives: 0
2014-04-16 21:13:31,662 INFO     Archives scanned : 35
2014-04-16 21:13:31,662 INFO     Archives skipped : 127
2014-04-16 21:13:31,662 INFO     Bad archives     : 0
2014-04-16 21:13:31,663 INFO     Time estimated:0 hours, 2 minutes, 55 seconds.
2014-04-17 01:00:03,900 INFO      ***** Starting sopds-scan...
2014-04-17 01:03:08,967 INFO     Books added      : 0
2014-04-17 01:03:08,982 INFO     Books skipped    : 5569
2014-04-17 01:03:08,982 INFO     Books DB entries deleted : 0
2014-04-17 01:03:08,982 INFO     Books in archives: 0
2014-04-17 01:03:08,982 INFO     Archives scanned : 35
2014-04-17 01:03:08,983 INFO     Archives skipped : 127
2014-04-17 01:03:08,983 INFO     Bad archives     : 0
2014-04-17 01:03:08,983 INFO     Time estimated:0 hours, 3 minutes, 5 seconds.
2014-04-18 01:00:05,077 INFO      ***** Starting sopds-scan...
2014-04-18 01:03:07,394 INFO     Books added      : 0
2014-04-18 01:03:07,419 INFO     Books skipped    : 5569
2014-04-18 01:03:07,419 INFO     Books DB entries deleted : 0
2014-04-18 01:03:07,419 INFO     Books in archives: 0
2014-04-18 01:03:07,420 INFO     Archives scanned : 35
2014-04-18 01:03:07,420 INFO     Archives skipped : 127
2014-04-18 01:03:07,420 INFO     Bad archives     : 0
2014-04-18 01:03:07,420 INFO     Time estimated:0 hours, 3 minutes, 2 seconds.
2014-04-18 20:51:20,884 INFO      ***** Starting sopds-scan...
2014-04-18 20:54:39,417 INFO     Books added      : 0
2014-04-18 20:54:39,456 INFO     Books skipped    : 3321
2014-04-18 20:59:05,436 INFO      ***** Starting sopds-scan...
2014-04-18 21:02:06,398 INFO     Books added      : 0
2014-04-18 21:02:06,398 INFO     Books skipped    : 3321
2014-04-18 21:02:06,399 INFO     Books DB entries deleted : 0
2014-04-18 21:02:06,399 INFO     Books in archives: 0
2014-04-18 21:02:06,400 INFO     Archives scanned : 35
2014-04-18 21:02:06,400 INFO     Archives skipped : 127
2014-04-18 21:02:06,401 INFO     Bad archives     : 0
2014-04-18 21:02:06,401 INFO     Time estimated:0 hours, 3 minutes, 1 seconds.
2014-04-18 21:42:46,144 INFO      ***** Starting sopds-scan...
Traceback (most recent call last):
  File "./sopdsd.py", line 184, in <module>
    daemon.start()
  File "./sopdsd.py", line 90, in start
    self.run()
  File "./sopdsd.py", line 170, in run
    scanner.log_options()
NameError: global name 'scanner' is not defined
2014-04-20 21:17:10,523 INFO      ***** Starting sopds-scan...
2014-04-20 21:20:17,078 INFO     Books added      : 0
2014-04-20 21:20:17,078 INFO     Books skipped    : 5569
2014-04-20 21:20:17,079 INFO     Books DB entries deleted : 0
2014-04-20 21:20:17,079 INFO     Books in archives: 0
2014-04-20 21:20:17,079 INFO     Archives scanned : 35
2014-04-20 21:20:17,080 INFO     Archives skipped : 127
2014-04-20 21:20:17,080 INFO     Bad archives     : 0
2014-04-20 21:20:17,081 INFO     Time estimated:0 hours, 3 minutes, 6 seconds.
2014-04-20 21:27:44,940 INFO      ***** Starting sopds-scan...
2014-04-20 21:31:54,480 INFO     Books added      : 0
2014-04-20 21:31:54,480 INFO     Books skipped    : 5569
2014-04-20 21:31:54,481 INFO     Books DB entries deleted : 0
2014-04-20 21:31:54,481 INFO     Books in archives: 0
2014-04-20 21:31:54,482 INFO     Archives scanned : 35
2014-04-20 21:31:54,482 INFO     Archives skipped : 127
2014-04-20 21:31:54,483 INFO     Bad archives     : 0
2014-04-20 21:31:54,483 INFO     Time estimated:0 hours, 4 minutes, 11 seconds.
2014-04-20 21:35:45,579 INFO      ***** Starting sopds-scan...
2014-04-20 21:39:27,670 INFO      ***** Starting sopds-scan...
2014-04-20 21:40:07,558 INFO      ***** Starting sopds-scan...
2014-04-20 21:41:09,482 INFO      ***** Starting sopds-scan...
2014-04-20 21:41:49,286 INFO      ***** Starting sopds-scan...
Traceback (most recent call last):
  File "./sopdsd.py", line 184, in <module>
    daemon.start()
  File "./sopdsd.py", line 90, in start
    self.run()
  File "./sopdsd.py", line 171, in run
    self.scanner.scan_all()
  File "/home/www/opds/py/sopdscan.py", line 88, in scan_all
    self.opdsdb.avail_check_prepare()
  File "/home/www/opds/py/sopdsdb.py", line 820, in avail_check_prepare
    cursor.execute(sql)
  File "/usr/lib/python3.3/site-packages/mysql/connector/cursor.py", line 508, in execute
    self._handle_result(self._connection.cmd_query(stmt))
  File "/usr/lib/python3.3/site-packages/mysql/connector/connection.py", line 636, in cmd_query
    result = self._handle_result(self._send_cmd(ServerCmd.QUERY, query))
  File "/usr/lib/python3.3/site-packages/mysql/connector/connection.py", line 554, in _handle_result
    raise errors.get_exception(packet)
mysql.connector.errors.DatabaseError: 1205 (HY000): Lock wait timeout exceeded; try restarting transaction
2014-04-20 21:43:41,567 INFO      ***** Starting sopds-scan...
2014-04-20 21:47:08,035 INFO     Books added      : 0
2014-04-20 21:47:08,036 INFO     Books skipped    : 5569
2014-04-20 21:47:08,036 INFO     Books DB entries deleted : 0
2014-04-20 21:47:08,037 INFO     Books in archives: 0
2014-04-20 21:47:08,037 INFO     Archives scanned : 35
2014-04-20 21:47:08,037 INFO     Archives skipped : 127
2014-04-20 21:47:08,038 INFO     Bad archives     : 0
2014-04-20 21:47:08,038 INFO     Time estimated:0 hours, 3 minutes, 26 seconds.
2014-04-20 21:47:38,067 INFO      ***** Starting sopds-scan...
2014-04-20 21:51:13,077 INFO     Books added      : 0
2014-04-20 21:51:13,078 INFO     Books skipped    : 11138
2014-04-20 21:51:13,084 INFO     Books DB entries deleted : 0
2014-04-20 21:51:13,085 INFO     Books in archives: 0
2014-04-20 21:51:13,085 INFO     Archives scanned : 70
2014-04-20 21:51:13,086 INFO     Archives skipped : 254
2014-04-20 21:51:13,086 INFO     Bad archives     : 0
2014-04-20 21:51:13,086 INFO     Time estimated:0 hours, 7 minutes, 31 seconds.
2014-04-20 21:52:11,941 INFO      ***** Starting sopds-scan...
+1 −224
Original line number Diff line number Diff line
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import os
import sopdsdb
import sopdsparse
import time
import datetime
import sopdscfg
import base64
import zipf
import logging
from sopdscan import opdsScanner
from optparse import OptionParser
from sys import argv

class opdsScanner:
    def __init__(self, cfg, verbose=False):
        self.VERBOSE=verbose
        self.cfg=cfg
        self.opdsdb=None
        self.fb2parser=None
        self.init_stats()
        self.init_logger()
        zipf.ZIP_CODEPAGE=self.cfg.ZIP_CODEPAGE
        self.extensions_set={x for x in self.cfg.EXT_LIST}

    def init_logger(self):
        if self.cfg.LOGLEVEL!=logging.NOTSET:
            # Создаем обработчик для записи логов в файл
            self.fh = logging.FileHandler(self.cfg.LOGFILE)
            self.fh.setLevel(self.cfg.LOGLEVEL)

        if self.VERBOSE:
            # Создадим обработчик для вывода логов на экран с максимальным уровнем вывода
            self.ch = logging.StreamHandler()
            self.ch.setLevel(logging.DEBUG)

        logformat='%(asctime)s %(levelname)-8s %(message)s'
        if self.VERBOSE:
            logging.basicConfig(format = logformat, level = logging.DEBUG, handlers=(self.fh,self.ch))
        else:
            logging.basicConfig(format = logformat, level = logging.INFO, handlers=(self.fh,))

    def init_stats(self):
        self.t1=datetime.timedelta(seconds=time.time())
        self.t2=self.t1
        self.books_added   = 0
        self.books_skipped = 0
        self.books_deleted = 0
        self.arch_scanned = 0
        self.arch_skipped = 0
        self.bad_archives = 0
        self.books_in_archives = 0

    def log_options(self):
        logging.info(' ***** Starting sopds-scan...')
        logging.debug('OPTIONS SET')
        if self.cfg.CONFIGFILE!=None:     logging.debug('configfile = '+self.cfg.CONFIGFILE)
        if self.cfg.ROOT_LIB!=None:       logging.debug('root_lib = '+self.cfg.ROOT_LIB)
        if self.cfg.FB2TOEPUB_PATH!=None: logging.debug('fb2toepub = '+self.cfg.FB2TOEPUB_PATH)
        if self.cfg.FB2TOMOBI_PATH!=None: logging.debug('fb2tomobi = '+self.cfg.FB2TOMOBI_PATH)
        if self.cfg.TEMP_DIR!=None:       logging.debug('temp_dir = '+self.cfg.TEMP_DIR)

    def log_stats(self):
        self.t2=datetime.timedelta(seconds=time.time())
        logging.info('Books added      : '+str(self.books_added))
        logging.info('Books skipped    : '+str(self.books_skipped))
        if self.cfg.DELETE_LOGICAL:
            logging.info('Books deleted    : '+str(self.books_deleted))
        else:
            logging.info('Books DB entries deleted : '+str(self.books_deleted))
        logging.info('Books in archives: '+str(self.books_in_archives)) 
        logging.info('Archives scanned : '+str(self.arch_scanned))
        logging.info('Archives skipped : '+str(self.arch_skipped))
        logging.info('Bad archives     : '+str(self.bad_archives))

        t=self.t2-self.t1
        seconds=t.seconds%60
        minutes=((t.seconds-seconds)//60)%60
        hours=t.seconds//3600
        logging.info('Time estimated:'+str(hours)+' hours, '+str(minutes)+' minutes, '+str(seconds)+' seconds.')

    def scan_all(self):
        self.opdsdb=sopdsdb.opdsDatabase(self.cfg.DB_NAME,self.cfg.DB_USER,self.cfg.DB_PASS,self.cfg.DB_HOST,self.cfg.ROOT_LIB)
        self.opdsdb.openDB()
        self.opdsdb.avail_check_prepare()

        if self.cfg.COVER_EXTRACT:
            if not os.path.isdir(sopdscfg.COVER_PATH):
                os.mkdir(sopdscfg.COVER_PATH)

        self.fb2parser=sopdsparse.fb2parser(self.cfg.COVER_EXTRACT)

        for full_path, dirs, files in os.walk(self.cfg.ROOT_LIB):
            for name in files:
                file=os.path.join(full_path,name)
                (n,e)=os.path.splitext(name)
                if (e.lower() == '.zip'):
                    if self.cfg.ZIPSCAN:
                        self.processzip(name,full_path,file)
                else:
                    file_size=os.path.getsize(file)
                    self.processfile(name,full_path,file,0,file_size)

        self.opdsdb.commit()
        if self.cfg.DELETE_LOGICAL:
           self.books_deleted=self.opdsdb.books_del_logical()
        else:
           self.books_deleted=self.opdsdb.books_del_phisical()
        self.opdsdb.update_double()
        self.opdsdb.closeDB()
        self.opdsdb=None

    def processzip(self,name,full_path,file):
        rel_file=os.path.relpath(file,self.cfg.ROOT_LIB)
        if self.cfg.ZIPRESCAN or self.opdsdb.zipisscanned(rel_file,1)==0:
            cat_id=self.opdsdb.addcattree(rel_file,1)
            try:
                z = zipf.ZipFile(file, 'r', allowZip64=True)
                filelist = z.namelist()
                for n in filelist:
                    try:
                        logging.debug('Start process ZIP file = '+file+' book file = '+n)
                        file_size=z.getinfo(n).file_size
                        self.processfile(n,file,z.open(n),1,file_size,cat_id=cat_id)
                    except:
                        logging.error('Error processing ZIP file = '+file+' book file = '+n)
                z.close()
                self.arch_scanned+=1
            except:
                logging.error('Error while read ZIP archive. File '+file+' corrupt.')
                self.bad_archives+=1
        else:
            self.arch_skipped+=1
            logging.debug('Skip ZIP archive '+rel_file+'. Already scanned.')

    def processfile(self,name,full_path,file,archive=0,file_size=0,cat_id=0):
        (n,e)=os.path.splitext(name)
        if e.lower() in self.extensions_set:
            rel_path=os.path.relpath(full_path,self.cfg.ROOT_LIB)
            logging.debug("Attempt to add book "+rel_path+"/"+name)
            self.fb2parser.reset()
            if self.opdsdb.findbook(name,rel_path,1)==0:
               if archive==0:
                  cat_id=self.opdsdb.addcattree(rel_path,archive)
               title=''
               lang=''
               annotation=''
               docdate=''

               if e.lower()=='.fb2' and self.cfg.FB2PARSE:
                  if isinstance(file, str):
                     f=open(file,'rb')
                  else:
                     f=file
                  self.fb2parser.parse(f,self.cfg.FB2HSIZE)
                  f.close()

                  if len(self.fb2parser.lang.getvalue())>0:
                     lang=self.fb2parser.lang.getvalue()[0].strip(' \'\"')
                  if len(self.fb2parser.book_title.getvalue())>0:
                     title=self.fb2parser.book_title.getvalue()[0].strip(' \'\"\&-.#\\\`')
                  if len(self.fb2parser.annotation.getvalue())>0:
                     annotation=('\n'.join(self.fb2parser.annotation.getvalue()))[:10000]
                  if len(self.fb2parser.docdate.getvalue())>0:
                     docdate=self.fb2parser.docdate.getvalue()[0].strip();

                  if self.fb2parser.parse_error!=0:
                     logging.warning(rel_path+' - '+name+' fb2 parse warning ['+self.fb2parser.parse_errormsg+']')

               if title=='': title=n

               book_id=self.opdsdb.addbook(name,rel_path,cat_id,e,title,annotation,docdate,lang,file_size,archive,self.cfg.DUBLICATES_FIND)
               self.books_added+=1

               if e.lower()=='.fb2' and self.cfg.FB2PARSE and self.cfg.COVER_EXTRACT:
                  try:
                    create_cover(book_id)
                  except:
                    logging.error('Error extract cover from file '+name)

               if archive==1:
                  self.books_in_archives+=1
               logging.debug("Book "+rel_path+"/"+name+" Added ok.")

               idx=0
               for l in self.fb2parser.author_last.getvalue():
                   last_name=l.strip(' \'\"\&-.#\\\`')
                   first_name=self.fb2parser.author_first.getvalue()[idx].strip(' \'\"\&-.#\\\`')
                   author_id=self.opdsdb.addauthor(first_name,last_name)
                   self.opdsdb.addbauthor(book_id,author_id)
                   idx+=1
               for l in self.fb2parser.genre.getvalue():
                   self.opdsdb.addbgenre(book_id,self.opdsdb.addgenre(l.lower().strip(' \'\"')))
               for l in self.fb2parser.series.getattrs('name'):
                   self.opdsdb.addbseries(book_id,self.opdsdb.addseries(l.strip()))
               if not self.cfg.SINGLE_COMMIT: self.opdsdb.commit()

            else:
               self.books_skipped+=1
               logging.debug("Book "+rel_path+"/"+name+" Already in DB.")

    def create_cover(self,book_id):
        ictype=self.fb2parser.cover_image.getattr('content-type')
        coverid=self.fb2parser.cover_image.getattr('id')
        fn=''
        if ictype==None:
           ictype=''
        else:
           ictype=ictype.lower()
           if ictype=='image/jpeg' or ictype=='image/jpg':
              fn=str(book_id)+'.jpg'
           else:
              if ictype=='image/png':
                 fn=str(book_id)+'.png'
              else:
                 if coverid!=None:
                    (f,e)=os.path.splitext(coverid)
                 else:
                    e='.img'
                 fn=str(book_id)+e

           fp=os.path.join(sopdscfg.COVER_PATH,fn)
           if len(self.fb2parser.cover_image.cover_data)>0:
              img=open(fp,'wb')
              s=self.fb2parser.cover_image.cover_data
              dstr=base64.b64decode(s)
              img.write(dstr)
              img.close()
        self.opdsdb.addcover(book_id,fn,ictype)

if (__name__=="__main__"):
    parser=OptionParser(conflict_handler="resolve", version="sopds-scan.py. Version "+sopdscfg.VERSION, add_help_option=True, usage='sopds-scan.py [options]',description='sopds-scan.py: Simple OPDS Scanner - programm for scan your e-books directory and store data to MYSQL database.')
    parser.add_option('-v','--verbose', action='store_true', dest='verbose', default=False, help='Enable verbose output')
Loading