Commit 770ce17f authored by mitshel's avatar mitshel
Browse files

Исправлена ошибка сканирования, приводящая к пропуску книг вне архивов

parent 6b7f0e87
Loading
Loading
Loading
Loading
+59 −59
Original line number Diff line number Diff line
@@ -154,7 +154,7 @@ class opdsScanner:
               annotation=''
               docdate=''

            if e.lower()=='.fb2' and cfg.FB2PARSE:
               if e.lower()=='.fb2' and self.cfg.FB2PARSE:
                  if isinstance(file, str):
                     f=open(file,'rb')
                  else:
@@ -187,28 +187,28 @@ class opdsScanner:

               if archive==1:
                  self.books_in_archives+=1
            logging.debug('Added ok.')
               logging.debug("Book "+rel_path+"/"+name+" Added ok.")

               idx=0
            for l in self.fb2parse.author_last.getvalue():
               for l in self.fb2parser.author_last.getvalue():
                   last_name=l.strip(' \'\"\&-.#\\\`')
                   first_name=self.fb2parser.author_first.getvalue()[idx].strip(' \'\"\&-.#\\\`')
                   author_id=self.opdsdb.addauthor(first_name,last_name)
                   self.opdsdb.addbauthor(book_id,author_id)
                   idx+=1
            for l in self.fb2parse.genre.getvalue():
               for l in self.fb2parser.genre.getvalue():
                   self.opdsdb.addbgenre(book_id,self.opdsdb.addgenre(l.lower().strip(' \'\"')))
            for l in self.fb2parse.series.getattrs('name'):
               for l in self.fb2parser.series.getattrs('name'):
                   self.opdsdb.addbseries(book_id,self.opdsdb.addseries(l.strip()))
               if not self.cfg.SINGLE_COMMIT: self.opdsdb.commit()

            else:
               self.books_skipped+=1
            logging.debug('Already in DB.')
               logging.debug("Book "+rel_path+"/"+name+" Already in DB.")

    def create_cover(self,book_id):
        ictype=self.fb2parse.cover_image.getattr('content-type')
        coverid=self.fb2parse.cover_image.getattr('id')
        ictype=self.fb2parser.cover_image.getattr('content-type')
        coverid=self.fb2parser.cover_image.getattr('id')
        fn=''
        if ictype==None:
           ictype=''
@@ -227,9 +227,9 @@ class opdsScanner:
                 fn=str(book_id)+e

           fp=os.path.join(sopdscfg.COVER_PATH,fn)
           if len(self.fb2parse.cover_image.cover_data)>0:
           if len(self.fb2parser.cover_image.cover_data)>0:
              img=open(fp,'wb')
              s=self.fb2parse.cover_image.cover_data
              s=self.fb2parser.cover_image.cover_data
              dstr=base64.b64decode(s)
              img.write(dstr)
              img.close()

py/sopdsd.py

100644 → 100755
+136 −77
Original line number Diff line number Diff line
#!/usr/bin/env python
#http://www.jejik.com/articles/2007/02/a_simple_unix_linux_daemon_in_python/
#!/usr/bin/env python3

import sys, os, time, atexit
from signal import SIGTERM
 
class Daemon:
class Daemon(object):
    """
A generic daemon class.

Usage: subclass the Daemon class and override the run() method
    Subclass Daemon class and override the run() method.
    """
    def __init__(self, pidfile, stdin='/dev/null', stdout='/dev/null', stderr='/dev/null'):
        self.stdin = stdin
@@ -18,115 +15,177 @@ Usage: subclass the Daemon class and override the run() method
 
    def daemonize(self):
        """
do the UNIX double-fork magic, see Stevens' "Advanced
Programming in the UNIX Environment" for details (ISBN 0201563177)
http://www.erlenstar.demon.co.uk/unix/faq_2.html#SEC16
        Deamonize, do double-fork magic.
        """
        try:
            pid = os.fork()
            if pid > 0:
                # exit first parent
                # Exit first parent.
                sys.exit(0)
        except OSError, e:
            sys.stderr.write("fork #1 failed: %d (%s)\n" % (e.errno, e.strerror))
        except OSError as e:
            message = "Fork #1 failed: {}\n".format(e)
            sys.stderr.write(message)
            sys.exit(1)
 
        # decouple from parent environment
        # Decouple from parent environment.
        os.chdir("/")
        os.setsid()
        os.umask(0)
 
        # do second fork
        # Do second fork.
        try:
            pid = os.fork()
            if pid > 0:
                # exit from second parent
                # Exit from second parent.
                sys.exit(0)
        except OSError, e:
            sys.stderr.write("fork #2 failed: %d (%s)\n" % (e.errno, e.strerror))
        except OSError as e:
            message = "Fork #2 failed: {}\n".format(e)
            sys.stderr.write(message)
            sys.exit(1)
 
        # redirect standard file descriptors
        print('daemon going to background, PID: {}'.format(os.getpid(),end="\r"))
 
        # Redirect standard file descriptors.
        sys.stdout.flush()
        sys.stderr.flush()
        si = file(self.stdin, 'r')
        so = file(self.stdout, 'a+')
        se = file(self.stderr, 'a+', 0)
        si = open(self.stdin, 'r')
        so = open(self.stdout, 'a+')
        se = open(self.stderr, 'a+')
        os.dup2(si.fileno(), sys.stdin.fileno())
        os.dup2(so.fileno(), sys.stdout.fileno())
        os.dup2(se.fileno(), sys.stderr.fileno())
 
        # write pidfile
        atexit.register(self.delpid)
        # Write pidfile.
        pid = str(os.getpid())
        file(self.pidfile,'w+').write("%s\n" % pid)
        open(self.pidfile,'w+').write("{}\n".format(pid))
 
        # Register a function to clean up.
        atexit.register(self.delpid)
 
    def delpid(self):
        os.remove(self.pidfile)
 
    def start(self):
        """
Start the daemon
        Start daemon.
        """
        # Check for a pidfile to see if the daemon already runs
        # Check pidfile to see if the daemon already runs.
        try:
            pf = file(self.pidfile,'r')
            pf = open(self.pidfile,'r')
            pid = int(pf.read().strip())
            pf.close()
        except IOError:
            pid = None
 
        if pid:
            message = "pidfile %s already exist. Daemon already running?\n"
            sys.stderr.write(message % self.pidfile)
            message = "Pidfile {} already exist. Daemon already running?\n".format(self.pidfile)
            sys.stderr.write(message)
            sys.exit(1)
 
        # Start the daemon
        # Start daemon.
        self.daemonize()
        self.run()
 
    def stop(self):
    def status(self):
        """
Stop the daemon
        Get status of daemon.
        """
        # Get the pid from the pidfile
        try:
            pf = file(self.pidfile,'r')
            pf = open(self.pidfile,'r')
            pid = int(pf.read().strip())
            pf.close()
        except IOError:
            pid = None
            message = "There is not PID file. Daemon already running?\n"
            sys.stderr.write(message)
            sys.exit(1)
 
        if not pid:
            message = "pidfile %s does not exist. Daemon not running?\n"
            sys.stderr.write(message % self.pidfile)
            return # not an error in a restart
        try:
            procfile = open("/proc/{}/status".format(pid), 'r')
            procfile.close()
            message = "There is a process with the PID {}\n".format(pid)
            sys.stdout.write(message)
        except IOError:
            message = "There is not a process with the PID {}\n".format(self.pidfile)
            sys.stdout.write(message)
 
        # Try killing the daemon process
    def stop(self):
        """
        Stop the daemon.
        """
        # Get the pid from pidfile.
        try:
            pf = open(self.pidfile,'r')
            pid = int(pf.read().strip())
            pf.close()
        except IOError as e:
            message = str(e) + "\nDaemon not running?\n"
            sys.stderr.write(message)
            sys.exit(1)
 
        # Try killing daemon process.
        try:
            while 1:
            os.kill(pid, SIGTERM)
                time.sleep(0.1)
        except OSError, err:
            #FIX for Ru_ru locale
            # Anton Fischer <a.fschr@gmail.com>
            #err = str(err)
            #if err.find("No such process") > 0:
            time.sleep(1)
        except OSError as e:
            print(str(e))
            sys.exit(1)
 
        try:
            if os.path.exists(self.pidfile):
                os.remove(self.pidfile)
            #else:
            # print str(err)
            # sys.exit(1)
        except IOError as e:
            message = str(e) + "\nCan not remove pid file {}".format(self.pidfile)
            sys.stderr.write(message)
            sys.exit(1)
 
    def restart(self):
        """
Restart the daemon
        Restart daemon.
        """
        self.stop()
        time.sleep(1)
        self.start()
 
    def run(self):
        """
You should override this method when you subclass Daemon. It will be called after the process has been
daemonized by start() or restart().
        You should override this method when you subclass Daemon.
        It will be called after the process has been daemonized by start() or restart().
        """
 
class MyDaemon(Daemon):
    def run(self):
        while True:
            time.sleep(1)
 
if __name__ == "__main__":
    daemon = MyDaemon('/tmp/python-daemon.pid')
    if len(sys.argv) == 2:
        print('{} {}'.format(sys.argv[0],sys.argv[1]))
 
        if 'start' == sys.argv[1]:
            daemon.start()
        elif 'stop' == sys.argv[1]:
            daemon.stop()
        elif 'restart' == sys.argv[1]:
            daemon.restart()
        elif 'status' == sys.argv[1]:
            daemon.status()
        else:
            print ("Unknown command")
            sys.exit(2)
        sys.exit(0)
    else:
        print ('show cmd deamon usage')
        print ("Usage: {} start|stop|restart|status".format(sys.argv[0]))
        sys.exit(2)


#scanner=opdsScanner()
#scanner.log_options()
#scanner.scan_all()
#scanner.log_stats()


#print('Start daemon...')
#d=Daemon('/var/run/sopds.pid')
#d.start()

py/sopdsd1.py

0 → 100755
+143 −0
Original line number Diff line number Diff line
#!/usr/bin/env python3
#http://www.jejik.com/articles/2007/02/a_simple_unix_linux_daemon_in_python/

import sys, os, time, atexit
from signal import SIGTERM
from sopdsscanner import opdsScanner

class Daemon:
    """
A generic daemon class.

Usage: subclass the Daemon class and override the run() method
"""
    def __init__(self, pidfile, stdin='/dev/null', stdout='/dev/null', stderr='/dev/null'):
        self.stdin = stdin
        self.stdout = stdout
        self.stderr = stderr
        self.pidfile = pidfile

    def daemonize(self):
        """
do the UNIX double-fork magic, see Stevens' "Advanced
Programming in the UNIX Environment" for details (ISBN 0201563177)
http://www.erlenstar.demon.co.uk/unix/faq_2.html#SEC16
"""
        try:
            pid = os.fork()
            if pid > 0:
                # exit first parent
                sys.exit(0)
        except Exception as e:
            sys.stderr.write("fork #1 failed: %d (%s)\n" % (e.errno, e.strerror))
            sys.exit(1)

        # decouple from parent environment
        os.chdir("/")
        os.setsid()
        os.umask(0)

        # do second fork
        try:
            pid = os.fork()
            if pid > 0:
                # exit from second parent
                sys.exit(0)
        except Exception as e:
            sys.stderr.write("fork #2 failed: %d (%s)\n" % (e.errno, e.strerror))
            sys.exit(1)

        # redirect standard file descriptors
        sys.stdout.flush()
        sys.stderr.flush()
        si = open(self.stdin, 'r')
        so = open(self.stdout, 'a+')
        se = open(self.stderr, 'a+')
        os.dup2(si.fileno(), sys.stdin.fileno())
        os.dup2(so.fileno(), sys.stdout.fileno())
        os.dup2(se.fileno(), sys.stderr.fileno())

        # write pidfile
        atexit.register(self.delpid)
        pid = str(os.getpid())
        file(self.pidfile,'w+').write("%s\n" % pid)

    def delpid(self):
        os.remove(self.pidfile)

    def start(self):
        """
Start the daemon
"""
        # Check for a pidfile to see if the daemon already runs
        try:
            pf = open(self.pidfile,'r')
            pid = int(pf.read().strip())
            pf.close()
        except IOError:
            pid = None

        if pid:
            message = "pidfile %s already exist. Daemon already running?\n"
            sys.stderr.write(message % self.pidfile)
            sys.exit(1)

        # Start the daemon
        self.daemonize()
        self.run()

    def stop(self):
        """
Stop the daemon
"""
        # Get the pid from the pidfile
        try:
            pf = file(self.pidfile,'r')
            pid = int(pf.read().strip())
            pf.close()
        except IOError:
            pid = None

        if not pid:
            message = "pidfile %s does not exist. Daemon not running?\n"
            sys.stderr.write(message % self.pidfile)
            return # not an error in a restart

        # Try killing the daemon process
        try:
            while 1:
                os.kill(pid, SIGTERM)
                time.sleep(0.1)
        except Exception as err:
            #FIX for Ru_ru locale
            # Anton Fischer <a.fschr@gmail.com>
            #err = str(err)
            #if err.find("No such process") > 0:
                if os.path.exists(self.pidfile):
                    os.remove(self.pidfile)
            #else:
            # print str(err)
            # sys.exit(1)

    def restart(self):
        """
Restart the daemon
"""
        self.stop()
        self.start()

    def run(self):
        """
You should override this method when you subclass Daemon. It will be called after the process has been
daemonized by start() or restart().
"""

scanner=opdsScanner()
scanner.log_options()
scanner.scan_all()
scanner.log_stats()


print('Start daemon...')
d=Daemon('/var/run/sopds.pid')
d.start()

py/sopdsscanner.py

0 → 100755
+251 −0
Original line number Diff line number Diff line
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import os
import sopdsdb
import sopdsparse
import time
import datetime
import sopdscfg
import base64
import zipf
import logging
from optparse import OptionParser
from sys import argv

class opdsScanner:
    def __init__(self, configfile='', verbose=False):
        self.VERBOSE=verbose
        self.CONFIGFILE=configfile
        self.cfg=None
        self.opdsdb=None
        self.fb2parser=None
        self.init_stats()
        self.init_config()
        self.init_logger()
        zipf.ZIP_CODEPAGE=self.cfg.ZIP_CODEPAGE
        self.extensions_set={x for x in self.cfg.EXT_LIST}

    def init_logger(self):
        if self.cfg.LOGLEVEL!=logging.NOTSET:
            # Создаем обработчик для записи логов в файл
            self.fh = logging.FileHandler(self.cfg.LOGFILE)
            self.fh.setLevel(self.cfg.LOGLEVEL)

        if self.VERBOSE:
            # Создадим обработчик для вывода логов на экран с максимальным уровнем вывода
            self.ch = logging.StreamHandler()
            self.ch.setLevel(logging.DEBUG)

        logformat='%(asctime)s %(levelname)-8s %(message)s'
        if self.VERBOSE:
            logging.basicConfig(format = logformat, level = logging.DEBUG, handlers=(self.fh,self.ch))
        else:
            logging.basicConfig(format = logformat, level = logging.INFO, handlers=(self.fh,))

    def init_stats(self):
        self.t1=datetime.timedelta(seconds=time.time())
        self.t2=self.t1
        self.books_added   = 0
        self.books_skipped = 0
        self.books_deleted = 0
        self.arch_scanned = 0
        self.arch_skipped = 0
        self.bad_archives = 0
        self.books_in_archives = 0

    def init_config(self):
        if self.CONFIGFILE=='': self.cfg=sopdscfg.cfgreader()
        else: self.cfg=sopdscfg.cfgreader(self.CONFIGFILE)

    def log_options(self):
        logging.info(' ***** Starting sopds-scan...')
        logging.debug('OPTIONS SET')
        if self.cfg.CONFIGFILE!=None:     logging.debug('configfile = '+self.cfg.CONFIGFILE)
        if self.cfg.ROOT_LIB!=None:       logging.debug('root_lib = '+self.cfg.ROOT_LIB)
        if self.cfg.FB2TOEPUB_PATH!=None: logging.debug('fb2toepub = '+self.cfg.FB2TOEPUB_PATH)
        if self.cfg.FB2TOMOBI_PATH!=None: logging.debug('fb2tomobi = '+self.cfg.FB2TOMOBI_PATH)
        if self.cfg.TEMP_DIR!=None:       logging.debug('temp_dir = '+self.cfg.TEMP_DIR)

    def log_stats(self):
        self.t2=datetime.timedelta(seconds=time.time())
        logging.info('Books added      : '+str(self.books_added))
        logging.info('Books skipped    : '+str(self.books_skipped))
        if self.cfg.DELETE_LOGICAL:
            logging.info('Books deleted    : '+str(self.books_deleted))
        else:
            logging.info('Books DB entries deleted : '+str(self.books_deleted))
        logging.info('Books in archives: '+str(self.books_in_archives)) 
        logging.info('Archives scanned : '+str(self.arch_scanned))
        logging.info('Archives skipped : '+str(self.arch_skipped))
        logging.info('Bad archives     : '+str(self.bad_archives))

        t=self.t2-self.t1
        seconds=t.seconds%60
        minutes=((t.seconds-seconds)//60)%60
        hours=t.seconds//3600
        logging.info('Time estimated:'+str(hours)+' hours, '+str(minutes)+' minutes, '+str(seconds)+' seconds.')

    def scan_all(self):
        self.opdsdb=sopdsdb.opdsDatabase(self.cfg.DB_NAME,self.cfg.DB_USER,self.cfg.DB_PASS,self.cfg.DB_HOST,self.cfg.ROOT_LIB)
        self.opdsdb.openDB()
        self.opdsdb.avail_check_prepare()

        if self.cfg.COVER_EXTRACT:
            if not os.path.isdir(sopdscfg.COVER_PATH):
                os.mkdir(sopdscfg.COVER_PATH)

        self.fb2parser=sopdsparse.fb2parser(self.cfg.COVER_EXTRACT)

        for full_path, dirs, files in os.walk(self.cfg.ROOT_LIB):
            for name in files:
                file=os.path.join(full_path,name)
                (n,e)=os.path.splitext(name)
                if (e.lower() == '.zip'):
                    if self.cfg.ZIPSCAN:
                        self.processzip(name,full_path,file)
                    else:
                        file_size=os.path.getsize(file)
                        self.processfile(name,full_path,file,0,file_size)

        self.opdsdb.commit()
        if self.cfg.DELETE_LOGICAL:
           self.books_deleted=self.opdsdb.books_del_logical()
        else:
           self.books_deleted=self.opdsdb.books_del_phisical()
        self.opdsdb.update_double()
        self.opdsdb.closeDB()
        self.opdsdb=None

    def processzip(self,name,full_path,file):
        rel_file=os.path.relpath(file,self.cfg.ROOT_LIB)
        if self.cfg.ZIPRESCAN or self.opdsdb.zipisscanned(rel_file,1)==0:
            cat_id=self.opdsdb.addcattree(rel_file,1)
            try:
                z = zipf.ZipFile(file, 'r', allowZip64=True)
                filelist = z.namelist()
                for n in filelist:
                    try:
                        logging.debug('Start process ZIP file = '+file+' book file = '+n)
                        file_size=z.getinfo(n).file_size
                        self.processfile(n,file,z.open(n),1,file_size,cat_id=cat_id)
                    except:
                        logging.error('Error processing ZIP file = '+file+' book file = '+n)
                z.close()
                self.arch_scanned+=1
            except:
                logging.error('Error while read ZIP archive. File '+file+' corrupt.')
                self.bad_archives+=1
        else:
            self.arch_skipped+=1
            logging.debug('Skip ZIP archive '+rel_file+'. Already scanned.')

    def processfile(self,name,full_path,file,archive=0,file_size=0,cat_id=0):
        (n,e)=os.path.splitext(name)
        if e.lower() in self.extensions_set:
            rel_path=os.path.relpath(full_path,self.cfg.ROOT_LIB)
            logging.debug("Attempt to add book "+rel_path+"/"+name)
            self.fb2parser.reset()
            if self.opdsdb.findbook(name,rel_path,1)==0:
               if archive==0:
                  cat_id=self.opdsdb.addcattree(rel_path,archive)
               title=''
               lang=''
               annotation=''
               docdate=''

            if e.lower()=='.fb2' and cfg.FB2PARSE:
               if isinstance(file, str):
                  f=open(file,'rb')
               else:
                  f=file
               self.fb2parser.parse(f,self.cfg.FB2HSIZE)
               f.close()

               if len(self.fb2parser.lang.getvalue())>0:
                  lang=self.fb2parser.lang.getvalue()[0].strip(' \'\"')
               if len(self.fb2parser.book_title.getvalue())>0:
                  title=self.fb2parser.book_title.getvalue()[0].strip(' \'\"\&-.#\\\`')
               if len(self.fb2parser.annotation.getvalue())>0:
                  annotation=('\n'.join(self.fb2parser.annotation.getvalue()))[:10000]
               if len(self.fb2parser.docdate.getvalue())>0:
                  docdate=self.fb2parser.docdate.getvalue()[0].strip();

               if self.fb2parser.parse_error!=0:
                  logging.warning(rel_path+' - '+name+' fb2 parse warning ['+self.fb2parser.parse_errormsg+']')

            if title=='': title=n

            book_id=self.opdsdb.addbook(name,rel_path,cat_id,e,title,annotation,docdate,lang,file_size,archive,self.cfg.DUBLICATES_FIND)
            self.books_added+=1

            if e.lower()=='.fb2' and self.cfg.FB2PARSE and self.cfg.COVER_EXTRACT:
               try:
                 create_cover(book_id)
               except:
                 logging.error('Error extract cover from file '+name)

            if archive==1:
               self.books_in_archives+=1
            logging.debug('Added ok.')

            idx=0
            for l in self.fb2parse.author_last.getvalue():
                last_name=l.strip(' \'\"\&-.#\\\`')
                first_name=self.fb2parser.author_first.getvalue()[idx].strip(' \'\"\&-.#\\\`')
                author_id=self.opdsdb.addauthor(first_name,last_name)
                self.opdsdb.addbauthor(book_id,author_id)
                idx+=1
            for l in self.fb2parse.genre.getvalue():
                self.opdsdb.addbgenre(book_id,self.opdsdb.addgenre(l.lower().strip(' \'\"')))
            for l in self.fb2parse.series.getattrs('name'):
                self.opdsdb.addbseries(book_id,self.opdsdb.addseries(l.strip()))
            if not self.cfg.SINGLE_COMMIT: self.opdsdb.commit()

        else:
            self.books_skipped+=1
            logging.debug('Already in DB.')

    def create_cover(self,book_id):
        ictype=self.fb2parse.cover_image.getattr('content-type')
        coverid=self.fb2parse.cover_image.getattr('id')
        fn=''
        if ictype==None:
           ictype=''
        else:
           ictype=ictype.lower()
           if ictype=='image/jpeg' or ictype=='image/jpg':
              fn=str(book_id)+'.jpg'
           else:
              if ictype=='image/png':
                 fn=str(book_id)+'.png'
              else:
                 if coverid!=None:
                    (f,e)=os.path.splitext(coverid)
                 else:
                    e='.img'
                 fn=str(book_id)+e

           fp=os.path.join(sopdscfg.COVER_PATH,fn)
           if len(self.fb2parse.cover_image.cover_data)>0:
              img=open(fp,'wb')
              s=self.fb2parse.cover_image.cover_data
              dstr=base64.b64decode(s)
              img.write(dstr)
              img.close()
        self.opdsdb.addcover(book_id,fn,ictype)


if (__name__=="__main__"):
    parser=OptionParser(conflict_handler="resolve", version="sopds-scan.py. Version "+sopdscfg.VERSION, add_help_option=True, usage='sopds-scan.py [options]',description='sopds-scan.py: Simple OPDS Scanner - programm for scan your e-books directory and store data to MYSQL database.')
    parser.add_option('-v','--verbose', action='store_true', dest='verbose', default=False, help='Enable verbose output')
    parser.add_option('-c','--config',dest='configfile',default='',help='Config file pargh')
    (options,arguments)=parser.parse_args()
    VERBOSE=options.verbose
    CFG_FILE=options.configfile

    scanner=opdsScanner(CFG_FILE,VERBOSE)
    scanner.log_options()
    scanner.scan_all()
    scanner.log_stats()