Non-initialized variable fix.
[openlib.git] / fb2_process.py
1 #!/usr/bin/python
2 # -*- coding: utf-8 -*-
3
4 import re
5 import db
6 import fb2_meta
7 import os
8 import sys
9 import shutil
10
11 mapping = None
12
13 def GetTagsMapping(db):
14   global mapping
15   c = db.cursor()
16   c.execute("select tag_mask,tag_result from metadata.tags_mapping")
17   mapping = c.fetchall()
18   result = []
19   for item in mapping:
20     result.append((re.compile(item[0]),item[1].encode('utf-8')))
21   mapping = result
22   
23 def Matches(tag):
24   global mapping
25   for item in mapping:
26     if item[0].match(tag):
27       return item[1]
28   return tag
29
30 def NewTags(tags):
31   global mapping
32   if not mapping:
33     GetTagsMapping(db.database)
34   new_tags = set()
35   for item in tags:
36     new_tags.add(Matches(item))
37   return list(new_tags)
38
39 class MetaData:
40
41   def GetPath(self):
42     self.sort_title = db.SortName(self.title).replace(' ','_');
43     self.sort_author = db.SortAuthorName(self.author).replace(' ','_');
44     self.path = ("%s/%s/%s/%s/%s (%d)" % (self.sort_author[0],self.sort_author[0:2],self.sort_author[0:4],self.sort_author[0:32],self.sort_title[0:64],self.book_id))
45     self.dataname = (self.title.decode('utf-8')[0:64]+' -- '+self.author.decode('utf-8')[0:32]).replace('/','')
46
47   def __init__(self,meta_dict,size):
48
49     self.size = size
50     try:
51       tags=NewTags(meta_dict['tags'])
52     except:
53       tags=['other']  
54     if 'trash' in tags:
55       self.state="trash"
56       return
57     self.tags=tags
58     try:
59       tag=tags[0]
60     except:
61       tag='Жанр неизвестен'  
62
63     try:
64       self.author=meta_dict['authors'][0].encode('utf-8')
65       self.authors = meta_dict['authors']
66       author_ids = set()
67       try:
68         for author in self.authors:
69           print author.encode('utf-8')
70           author_ids.add(db.GetOrCreateAuthor(author.encode('utf-8')))
71       except:
72         pass
73     except:
74       self.author='Неизвестный Автор (%s)' % (tag)
75       self.authors = []
76       author_ids = []
77
78     try:
79       try:
80         self.langs=meta_dict['languages']    
81       except:
82         self.langs=['ru']  
83       lang_ids = set()
84       for lang in meta_dict['languages']:
85         lang_ids.add(db.GetOrCreateLang(lang.encode('utf-8')))
86     except:
87       pass
88       
89     
90     try:
91       self.publisher = meta_dict['publisher'].encode('utf-8')
92       pub_id=db.GetOrCreatePublisher(self.publisher)
93     except:
94       pub_id=None
95  
96     try:
97       title = meta_dict['book_title'].encode('utf-8')
98     except:
99       title='Название неизвестно'
100     self.title=title
101
102     try:
103       pub_date=meta_dict['pubdate']
104     except:
105       pub_date=None
106     self.pub_date=pub_date
107
108     try:
109       isbn=meta_dict['isbn'].encode('utf-8')
110     except:
111       isbn=None
112     self.isbn=isbn
113
114     try:
115       self.series = meta_dict['series'].encode('utf-8')
116       ser_id=db.GetOrCreateSeries(meta_dict['series'])
117     except:
118       ser_id=None
119     try:
120       self.series_idx = meta_dict['series_index']
121       ser_num=meta_dict['series_index'].split(',')[0]
122     except:
123       ser_num=None
124
125     tag_ids = set()
126     try:
127       for tag in tags:
128         tag_ids.add(db.GetOrCreateTag(tag))
129     except:
130       pass
131       
132     try:
133       self.cover=meta_dict['cover']
134       self.has_cover=1
135     except:
136       self.has_cover=0        
137
138     try:
139       self.comments=meta_dict['comments']
140       if len(self.comments)>20000:
141         self.comments=self.comments[:20000]
142       self.comments=self.comments.encode('utf-8')
143     except:
144       self.comments=''  
145
146     book_id = db.CreateBook(title,pub_date,ser_num,isbn)
147     self.book_id = book_id
148    
149     db.LinkBookToAuthors(book_id,author_ids);
150     db.LinkBookToLangs(book_id,lang_ids);
151     if pub_id:
152       db.LinkBookToPublishers(book_id,pub_id);
153     if ser_id:
154       db.LinkBookToSeries(book_id,ser_id);
155     db.LinkBookToTags(book_id,tag_ids);
156     if self.comments:
157       db.StoreComment(book_id,self.comments)
158   
159     self.GetPath()
160     db.SetPath(self.book_id,self.path,self.dataname,self.size,self.has_cover);
161     self.state="done"
162
163 def ProcessFile(filename):
164
165   size = os.path.getsize(filename)
166   stream = open(filename)
167   meta = fb2_meta.get_metadata(stream)
168   stream.close()
169
170   try:
171     book = MetaData(meta.__dict__,size) 
172
173     if book.state=="done":
174
175       new_file_path = db.file_root + book.path + '/' + book.dataname + '.fb2'
176       cover_path = db.file_root + book.path + '/cover.jpg'
177       new_dir_path = db.file_root + book.path 
178
179       os.makedirs(new_dir_path,0755)
180       shutil.move(filename,new_file_path)
181
182       if book.has_cover:
183         cover_path = new_dir_path + '/cover.jpg'
184         print "Book has cover, try to store to "+cover_path
185         coverfile = open(cover_path,'w')
186         coverfile.write(book.cover.decode('base64'))
187         coverfile.close()
188
189       db.Commit()
190       print "Moved to "+new_dir_path
191
192     elif book.state=="trash":
193    
194       print "Tags blacklisted, trashing"
195       os.remove(filename) 
196     
197     else: 
198     
199       shutil.move(filename,db.failed_files+os.path.basename(filename))
200       print "Moved to failed_files"
201       db.Rollback()  
202     
203   except:
204
205     shutil.move(filename,db.failed_files+os.path.basename(filename))
206     print "Moved to failed_files"
207     db.Rollback()  
208
209 def ProcessDir(dirname):
210   for file in os.listdir(dirname):
211     if file.endswith(".fb2"):
212       print "Processing "+file
213       ProcessFile(os.path.join(dirname,file))
214
215 def DelBook(id):
216   path = os.path.join(db.file_root,db.PathByID(id))
217   if path:
218     for file in os.listdir(path):
219       os.remove(os.path.join(path,file))
220     db.DelBook(id)
221     os.rmdir(path)
222     db.Commit()  
223
224 def CompressBook(id):
225   path=db.PathByID(id)
226   if path:
227     datafile = os.path.join(db.file_root,path,db.DataByID(id,'FB2'))
228     datapath = datafile.replace("\"","\\\"")
229     datapath=datapath.replace("`","\`")
230     datapath=datapath.replace("$","\$")
231     zipfile = datapath + '.zip'
232     command = "zip --move --junk-paths \"%s\" \"%s\"" % (zipfile,datapath)
233     command = command.encode('utf-8')
234     print command
235     if os.system(command)==0:
236       db.ChangeBookFormat(id,'FB2','FB2.ZIP')
237       db.Commit()
238
239 def UnCompressBook(id):
240   path=db.PathByID(id)
241   if path:
242     datafile = os.path.join(db.file_root,path,db.DataByID(id,'FB2.ZIP'))
243     datapath = datafile.replace("\"","\\\"")
244     datapath=datapath.replace("`","\`")
245     datapath=datapath.replace("$","\$")
246     command = "unzip  \"%s\" -d \"%s\"" % (datapath,os.path.join(db.file_root,path))
247     command = command.encode('utf-8')
248     'print command'
249     if os.system(command)==0:
250       os.remove(datafile)
251       db.ChangeBookFormat(id,'FB2.ZIP','FB2')
252       db.Commit()
253
254 def CompressAll(limit=100):
255   ids = db.ListByFormat('FB2',limit)
256   for id in ids:
257     try:
258       CompressBook(id[0])
259     except:
260       pass  
261
262 def CheckFiles(delete = 0):
263   ids = db.ListByFormat('FB2',1000000)
264   cnt = 0
265   for id in ids:
266     cnt = cnt + 1;
267     sys.stdout.write("\r%s"%(cnt))
268     datafile = os.path.join(db.file_root,db.PathByID(id[0]),db.DataByID(id[0],'FB2'))
269     if not os.path.exists(datafile):
270       print "\r File %s not found" % (datafile)
271       if delete==1:
272         db.DelBook(id)
273         db.Commit()
274   ids = db.ListByFormat('FB2.ZIP',1000000)
275   cnt = 0
276   for id in ids:
277     cnt = cnt + 1;
278     sys.stdout.write("\r%s"%(cnt))
279     datafile = os.path.join(db.file_root,db.PathByID(id[0]),db.DataByID(id[0],'FB2.ZIP'))
280     if not os.path.exists(datafile):
281       print "\r File %s not found" % (datafile)
282       if delete==1:
283         db.DelBook(id)
284         db.Commit()
285
286 def RemoveDups(limit = 100):
287   if limit<2:
288     return
289   id_to_del=set([]) 
290   recs = db.ListDups(limit);
291   for rec in recs:
292     ids = db.ListByTitleAndAuthor(rec[0],rec[1],rec[2])
293     for id in ids:
294       id_to_del.add(id)
295   for id in id_to_del:
296     print "\r Deleting %s..." % (id)
297     DelBook(id)
298
299 def main():
300   print "Processing...\r"
301   ProcessDir(db.tmp_files)
302   CompressAll(2000)
303
304 if __name__ == "__main__":
305     import sys
306     reload(sys)
307     sys.setdefaultencoding('utf-8')
308     main()
309