Commit c2a1458f authored by mitshel's avatar mitshel
Browse files

Исправлена проблема парсинга невалидного fb2: например когда в аннотацию...

Исправлена проблема парсинга невалидного fb2: например когда в аннотацию заталкивали несколько мегабайт данных (собственно всю книгу)
то весь этот объем пытался передаться в mysql - в результате возникала ошибка
parent 64981fb4
Loading
Loading
Loading
Loading
+8 −9
Original line number Diff line number Diff line
@@ -122,8 +122,7 @@ def processfile(db,fb2,name,full_path,file,archive=0,file_size=0,cat_id=0):
             if len(fb2.book_title.getvalue())>0:
                title=fb2.book_title.getvalue()[0].strip(' \'\"\&()-.#[]\\\`')
             if len(fb2.annotation.getvalue())>0:
                annotation='\n'.join(fb2.annotation.getvalue())
             
                annotation=('\n'.join(fb2.annotation.getvalue()))[:10000]
             
             if VERBOSE:
                if fb2.parse_error!=0:
@@ -170,22 +169,22 @@ def processzip(db,fb2,name,full_path,file):
    rel_file=os.path.relpath(file,cfg.ROOT_LIB)
    if cfg.ZIPRESCAN or db.zipisscanned(rel_file,1)==0:
          cat_id=db.addcattree(rel_file,1)
#       try:
       try:
          z = zipf.ZipFile(file, 'r', allowZip64=True)
          filelist = z.namelist()
          for n in filelist:
#              try:
              try:
                  if VERBOSE:
                     print('Start process ZIPped file: ',file,' file: ',n)
                  file_size=z.getinfo(n).file_size
                  processfile(db,fb2,n,file,z.open(n),1,file_size,cat_id=cat_id)
#              except:
#                  print('Error processing zip archive:',file,' file: ',n)
              except:
                  print('Error processing zip archive:',file,' file: ',n)
          z.close()
          arch_scanned+=1
#       except:
#          print('Error while read ZIP archive. File '+file+' corrupt.')
#          bad_archives+=1
       except:
          print('Error while read ZIP archive. File '+file+' corrupt.')
          bad_archives+=1
    else:
       arch_skipped+=1
       if VERBOSE:
+11 −8
Original line number Diff line number Diff line
@@ -4,8 +4,9 @@
import xml.parsers.expat

class fb2tag:
   def __init__(self,tags):
   def __init__(self,tags,value_size=0):
       self.tags=tags
       self.value_size=value_size
       self.attrs=[]
       self.index=-1
       self.size=len(self.tags)
@@ -37,6 +38,8 @@ class fb2tag:
          if self.tags[self.index]==tag:
             self.index-=1
             if self.process_value:
                if self.value_size!=0:
                   self.current_value=self.current_value[:self.value_size] 
                self.values.append(self.current_value)
                self.current_value=''
             self.process_value=False
@@ -109,14 +112,14 @@ class fb2cover(fb2tag):
class fb2parser:
   def __init__(self, readcover=0):
       self.rc=readcover
       self.author_first=fb2tag(('description','title-info','author','first-name'))
       self.author_last=fb2tag(('description','title-info','author','last-name'))
       self.genre=fb2tag(('description','title-info','genre'))
       self.lang=fb2tag(('description','title-info','lang'))
       self.book_title=fb2tag(('description','title-info','book-title'))
       self.annotation=fb2tag(('description','title-info','annotation','p'))
       self.author_first=fb2tag(('description','title-info','author','first-name'),64)
       self.author_last=fb2tag(('description','title-info','author','last-name'),64)
       self.genre=fb2tag(('description','title-info','genre'),32)
       self.lang=fb2tag(('description','title-info','lang'),16)
       self.book_title=fb2tag(('description','title-info','book-title'),256)
       self.annotation=fb2tag(('description','title-info','annotation','p'),10000)
       if self.rc!=0:
          self.cover_name = fb2tag (('description','coverpage','image'))
          self.cover_name = fb2tag (('description','coverpage','image'),32)
          self.cover_image = fb2cover (('fictionbook','binary'));
       self.stoptag='description'
       self.process_description=True