X-Git-Url: https://git.rvb.name/openlib.git/blobdiff_plain/169adb0ddaf8cc3eedfa15b500e14570bda0cdee..425585bda1b9e764533ca8a91098f66c833df5ae:/fb2_process.py diff --git a/fb2_process.py b/fb2_process.py index 38d8876..595359a 100755 --- a/fb2_process.py +++ b/fb2_process.py @@ -42,7 +42,7 @@ class MetaData: self.sort_title = db.SortName(self.title).replace(' ','_'); self.sort_author = db.SortAuthorName(self.author).replace(' ','_'); self.path = ("%s/%s/%s/%s/%s (%d)" % (self.sort_author[0],self.sort_author[0:2],self.sort_author[0:4],self.sort_author[0:32],self.sort_title[0:64],self.book_id)) - self.dataname = (self.title.decode('utf-8')[0:64]+' '+self.author.decode('utf-8')[0:32]).replace('/','') + self.dataname = (self.title.decode('utf-8')[0:64]+' -- '+self.author.decode('utf-8')[0:32]).replace('/','') def __init__(self,meta_dict,size): @@ -66,9 +66,10 @@ class MetaData: author_ids = set() try: for author in self.authors: + print author.encode('utf-8') author_ids.add(db.GetOrCreateAuthor(author.encode('utf-8'))) except: - pass + pass except: self.author='Неизвестный Автор (%s)' % (tag) self.authors = [] @@ -117,7 +118,7 @@ class MetaData: ser_id=None try: self.series_idx = meta_dict['series_index'] - ser_num=meta_dict['series_index'] + ser_num=meta_dict['series_index'].split(',')[0] except: ser_num=None @@ -135,7 +136,10 @@ class MetaData: self.has_cover=0 try: - self.comments=meta_dict['comments'].encode('utf-8') + self.comments=meta_dict['comments'] + if len(self.comments)>20000: + self.comments=self.comments[:20000] + self.comments=self.comments.encode('utf-8') except: self.comments='' @@ -256,7 +260,7 @@ def CompressAll(limit=100): pass def CheckFiles(delete = 0): - ids = db.ListByFormat('FB2',300000) + ids = db.ListByFormat('FB2',1000000) cnt = 0 for id in ids: cnt = cnt + 1; @@ -267,7 +271,7 @@ def CheckFiles(delete = 0): if delete==1: db.DelBook(id) db.Commit() - ids = db.ListByFormat('FB2.ZIP',300000) + ids = db.ListByFormat('FB2.ZIP',1000000) cnt = 0 for id in ids: cnt = cnt + 1; @@ -279,7 +283,21 @@ def CheckFiles(delete = 0): db.DelBook(id) db.Commit() +def RemoveDups(limit = 100): + if limit<2: + return + id_to_del=set([]) + recs = db.ListDups(limit); + for rec in recs: + ids = db.ListByTitleAndAuthor(rec[0],rec[1],rec[2]) + for id in ids: + id_to_del.add(id) + for id in id_to_del: + print "\r Deleting %s..." % (id) + DelBook(id) + def main(): + print "Processing...\r" ProcessDir(db.tmp_files) CompressAll(2000)