Non-initialized variable fix.
[openlib.git] / fb2_process.py
index 38d8876e863cd2256f7a6865793d22e919de2498..595359a2fb396c54468615bc60a8a981d1a5721a 100755 (executable)
@@ -42,7 +42,7 @@ class MetaData:
     self.sort_title = db.SortName(self.title).replace(' ','_');
     self.sort_author = db.SortAuthorName(self.author).replace(' ','_');
     self.path = ("%s/%s/%s/%s/%s (%d)" % (self.sort_author[0],self.sort_author[0:2],self.sort_author[0:4],self.sort_author[0:32],self.sort_title[0:64],self.book_id))
-    self.dataname = (self.title.decode('utf-8')[0:64]+' '+self.author.decode('utf-8')[0:32]).replace('/','')
+    self.dataname = (self.title.decode('utf-8')[0:64]+' -- '+self.author.decode('utf-8')[0:32]).replace('/','')
 
   def __init__(self,meta_dict,size):
 
@@ -66,9 +66,10 @@ class MetaData:
       author_ids = set()
       try:
         for author in self.authors:
+          print author.encode('utf-8')
           author_ids.add(db.GetOrCreateAuthor(author.encode('utf-8')))
       except:
-        pass    
+        pass
     except:
       self.author='Неизвестный Автор (%s)' % (tag)
       self.authors = []
@@ -117,7 +118,7 @@ class MetaData:
       ser_id=None
     try:
       self.series_idx = meta_dict['series_index']
-      ser_num=meta_dict['series_index']
+      ser_num=meta_dict['series_index'].split(',')[0]
     except:
       ser_num=None
 
@@ -135,7 +136,10 @@ class MetaData:
       self.has_cover=0        
 
     try:
-      self.comments=meta_dict['comments'].encode('utf-8')
+      self.comments=meta_dict['comments']
+      if len(self.comments)>20000:
+        self.comments=self.comments[:20000]
+      self.comments=self.comments.encode('utf-8')
     except:
       self.comments=''  
 
@@ -256,7 +260,7 @@ def CompressAll(limit=100):
       pass  
 
 def CheckFiles(delete = 0):
-  ids = db.ListByFormat('FB2',300000)
+  ids = db.ListByFormat('FB2',1000000)
   cnt = 0
   for id in ids:
     cnt = cnt + 1;
@@ -267,7 +271,7 @@ def CheckFiles(delete = 0):
       if delete==1:
         db.DelBook(id)
         db.Commit()
-  ids = db.ListByFormat('FB2.ZIP',300000)
+  ids = db.ListByFormat('FB2.ZIP',1000000)
   cnt = 0
   for id in ids:
     cnt = cnt + 1;
@@ -279,7 +283,21 @@ def CheckFiles(delete = 0):
         db.DelBook(id)
         db.Commit()
 
+def RemoveDups(limit = 100):
+  if limit<2:
+    return
+  id_to_del=set([]) 
+  recs = db.ListDups(limit);
+  for rec in recs:
+    ids = db.ListByTitleAndAuthor(rec[0],rec[1],rec[2])
+    for id in ids:
+      id_to_del.add(id)
+  for id in id_to_del:
+    print "\r Deleting %s..." % (id)
+    DelBook(id)
+
 def main():
+  print "Processing...\r"
   ProcessDir(db.tmp_files)
   CompressAll(2000)