--- /dev/null
+#!/bin/sh
+
+PIDFILE=/var/run/openlib.pid
+
+if [ -e "$PIDFILE" ] ; then
+ # our pidfile exists, let's make sure the process is still running though
+ PID=`/bin/cat "$PIDFILE"`
+ if /bin/kill -0 "$PID" > /dev/null 2>&1 ; then
+ # indeed it is, i'm outta here!
+ /bin/echo 'Previous instance running...'
+ exit 0
+ fi
+ fi
+
+# create or update the pidfile
+/bin/echo "$$" > $PIDFILE
+
+. /etc/openlib.conf
+
+cd $upload
+find $upload -type f -name "*.zip" -mmin +10 -exec sh -c 'unzip $1 && rm $1' _ {} \;
+find $upload -type f -name "*.fb2" ! -user www-data -exec chown www-data:users {} \;
+find $upload -type f -name "*.fb2" -user www-data -mmin +10 -exec mv {} $temp \;
+
+cd /opt/openlibrary
+/opt/openlibrary/fb2_process.py
+
+/bin/rm -f "$PIDFILE"
+
+exit 0
--- /dev/null
+#!/usr/bin/python
+
+import MySQLdb
+import ConfigParser
+
+def SortName(name):
+ if database:
+ c = database.cursor()
+ c.execute('SELECT metadata.SortStr(%s)', (name))
+ return c.fetchone()[0]
+ else:
+ print "No connection to DB"
+ exit()
+
+def SortAuthorName(name):
+ if database:
+ c = database.cursor()
+ c.execute('SELECT metadata.SortAuthor(%s)', (name))
+ return c.fetchone()[0]
+ else:
+ print "No connection to DB"
+ exit()
+
+def GetOrCreateAuthor(name):
+ if database:
+ c = database.cursor()
+ c.execute('SELECT metadata.GetOrCreateAuthor(%s)', (name))
+ return c.fetchone()[0]
+ else:
+ print "No connection to DB"
+ exit()
+
+def GetOrCreateLang(name):
+ if database:
+ c = database.cursor()
+ c.execute('SELECT metadata.GetOrCreateLang(%s)', (name))
+ return c.fetchone()[0]
+ else:
+ print "No connection to DB"
+ exit()
+
+def GetOrCreatePublisher(name):
+ if database:
+ c = database.cursor()
+ c.execute('SELECT metadata.GetOrCreatePublisher(%s)', (name))
+ return c.fetchone()[0]
+ else:
+ print "No connection to DB"
+ exit()
+
+def GetOrCreateSeries(name):
+ if database:
+ c = database.cursor()
+ c.execute('SELECT metadata.GetOrCreateSeries(%s)', (name))
+ return c.fetchone()[0]
+ else:
+ print "No connection to DB"
+ exit()
+
+def GetOrCreateTag(name):
+ if database:
+ c = database.cursor()
+ c.execute('SELECT metadata.GetOrCreateTag(%s)', (name))
+ return c.fetchone()[0]
+ else:
+ print "No connection to DB"
+ exit()
+
+def CreateBook(title,pubdate,series_index,isbn):
+ if database:
+ c = database.cursor()
+ c.execute('SELECT metadata.CreateBook(%s,%s,%s,%s)', (title,pubdate,series_index,isbn))
+ return c.fetchone()[0]
+ else:
+ print "No connection to DB"
+ exit()
+
+def LinkBookToAuthors(book_id,author_ids):
+ if database:
+ c = database.cursor()
+ for author_id in author_ids:
+ c.execute('INSERT INTO metadata.books_authors_link(book,author) VALUES (%s,%s)', (book_id,author_id))
+ else:
+ print "No connection to DB"
+ exit()
+
+def LinkBookToLangs(book_id,lang_ids):
+ if database:
+ c = database.cursor()
+ io = 0
+ for lang_id in lang_ids:
+ io = io + 1
+ c.execute('INSERT INTO metadata.books_languages_link(book,lang_code,item_order) VALUES (%s,%s,%s)', (book_id,lang_id,io))
+ else:
+ print "No connection to DB"
+ exit()
+
+def LinkBookToPublishers(book_id,pub_id):
+ if database:
+ c = database.cursor()
+ c.execute('INSERT INTO metadata.books_publishers_link(book,publisher) VALUES (%s,%s)', (book_id,pub_id))
+ else:
+ print "No connection to DB"
+ exit()
+
+def LinkBookToSeries(book_id,ser_id):
+ if database:
+ c = database.cursor()
+ c.execute('INSERT INTO metadata.books_series_link(book,series) VALUES (%s,%s)', (book_id,ser_id))
+ else:
+ print "No connection to DB"
+ exit()
+
+def LinkBookToTags(book_id,tag_ids):
+ if database:
+ c = database.cursor()
+ for tag_id in tag_ids:
+ c.execute('INSERT INTO metadata.books_tags_link(book,tag) VALUES (%s,%s)', (book_id,tag_id))
+ else:
+ print "No connection to DB"
+ exit()
+
+def SetPath(book_id,path,dataname,filesize,cover):
+ if database:
+ c = database.cursor()
+ c.execute('UPDATE metadata.books SET path=%s, has_cover=%s WHERE id=%s', (path,cover,book_id))
+ c.execute('INSERT INTO metadata.data(book,format,uncompressed_size,name) values (%s,%s,%s,%s)',(book_id,'FB2',filesize,dataname))
+ else:
+ print "No connection to DB"
+ exit()
+
+def StoreComment(book_id,comment):
+ if database:
+ c = database.cursor()
+ c.execute('INSERT INTO metadata.comments(book,text) values (%s,%s)',(book_id,comment))
+ else:
+ print "No connection to DB"
+ exit()
+
+def PathByID(book_id):
+ if database:
+ c = database.cursor()
+ c.execute('SELECT path FROM metadata.books WHERE id=%s',(book_id))
+ return c.fetchone()[0]
+ else:
+ print "No connection to DB"
+ exit()
+
+def DataByID(book_id,format):
+ if database:
+ c = database.cursor()
+ c.execute('SELECT name FROM metadata.data WHERE book=%s and format=%s',(book_id,format))
+ return c.fetchone()[0]+'.'+format.lower()
+ else:
+ print "No connection to DB"
+ exit()
+
+def DelBook(book_id):
+ if database:
+ c = database.cursor()
+ c.execute('DELETE FROM metadata.books WHERE id=%s',(book_id))
+ else:
+ print "No connection to DB"
+ exit()
+
+def ChangeBookFormat(book_id,old_format,new_format):
+ if database:
+ c = database.cursor()
+ c.execute('UPDATE metadata.data SET format=%s WHERE book=%s and format=%s',(new_format,book_id,old_format))
+ else:
+ print "No connection to DB"
+ exit()
+
+def TestArchive(name):
+ if database:
+ c = database.cursor()
+ c.execute('SELECT count(*) from metadata.processed_archives WHERE filename=%s',(name))
+ return c.fetchone()[0]
+ else:
+ print "No connection to DB"
+ exit()
+
+def MarkArchive(name):
+ if database:
+ c = database.cursor()
+ c.execute('insert into metadata.processed_archives(filename) values (%s)',(name))
+ else:
+ print "No connection to DB"
+ exit()
+
+def ListByFormat(format,limit=100):
+ if database:
+ c = database.cursor()
+ c.execute('SELECT DISTINCT book FROM metadata.data WHERE format=%s ORDER BY book LIMIT 0,%s',(format,limit))
+ return c.fetchall()
+ else:
+ print "No connection to DB"
+ exit()
+
+def Commit():
+ if database:
+ database.commit()
+ else:
+ print "No connection to DB"
+ exit()
+
+def Rollback():
+ if database:
+ database.rollback()
+ else:
+ print "No connection to DB"
+ exit()
+
+
+try:
+
+ cfg = ConfigParser.RawConfigParser(allow_no_value=True)
+ cfg.readfp(open('/etc/openlib.conf'))
+ dbhost = cfg.get("mysql","host")
+ dbuser = cfg.get("mysql","user")
+ dbpasswd = cfg.get("mysql","passwd")
+ file_root = cfg.get("storage","files")
+ tmp_files = cfg.get("storage","temp")
+ failed_files = cfg.get("storage","failed")
+ upload_files = cfg.get("storage","upload")
+
+except:
+
+ print "Error reading configuration file"
+ exit()
+
+try:
+
+ database = MySQLdb.connect(host=dbhost,user=dbuser,passwd=dbpasswd,use_unicode=True)
+ database.set_character_set('utf8')
+ c = database.cursor()
+ c.execute('SET NAMES utf8;')
+
+except:
+
+ print "Error connecting database"
+ exit()
+
\ No newline at end of file
--- /dev/null
+from __future__ import with_statement
+__license__ = 'GPL v3'
+__copyright__ = '2011, Roman Mukhin <ramses_ru at hotmail.com>, '\
+ '2008, Anatoly Shipitsin <norguhtar at gmail.com>'
+'''Read meta information from fb2 files'''
+
+
+# TODO clean up and save only needed (sorry for this code:) )
+
+# -------------------------------------------
+
+#From calibre http://bazaar.launchpad.net/~kovid/calibre/trunk/view/head:
+# /src/calibre/ebooks/metadata/fb2.py
+#Based on revision 10897
+
+import os
+import datetime
+from functools import partial
+# from base64 import b64decode
+from lxml import etree
+#from calibre.utils.date import parse_date
+#from calibre import guess_type, guess_all_extensions, prints, force_unicode
+#from calibre.ebooks.metadata import MetaInformation, check_isbn
+#from calibre.ebooks.chardet import xml_to_unicode
+
+
+# -------------------------------------------
+
+def force_unicode(text):
+ if not isinstance(text, unicode):
+ uni = unicode(text, encoding='utf-8')
+ else:
+ uni = text
+ return uni
+
+# -------------------------------------------
+# from calibre http://bazaar.launchpad.net/~kovid/calibre/trunk/view/head:
+# /src/calibre/ebooks/chardet/__init__.py
+# Based on rev 10897
+
+import re, codecs
+ENCODING_PATS = [
+ re.compile(r'<\?[^<>]+encoding\s*=\s*[\'"](.*?)[\'"][^<>]*>',
+ re.IGNORECASE),
+ re.compile(r'''<meta\s+?[^<>]*?content\s*=\s*['"][^'"]*?charset=([-_a-z0-9]+)[^'"]*?['"][^<>]*>''',
+ re.IGNORECASE),
+ ]
+
+def strip_encoding_declarations(raw):
+ limit = 50*1024
+ for pat in ENCODING_PATS:
+ prefix = raw[:limit]
+ suffix = raw[limit:]
+ prefix = pat.sub('', prefix)
+ raw = prefix + suffix
+ return raw
+
+def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False,
+ resolve_entities=False, assume_utf8=False):
+ '''
+ Force conversion of byte string to unicode. Tries to look for XML/HTML
+ encoding declaration first, if not found uses the chardet library and
+ prints a warning if detection confidence is < 100%
+ @return: (unicode, encoding used)
+ '''
+ encoding = None
+ if not raw:
+ return u'', encoding
+ if not isinstance(raw, unicode):
+ if raw.startswith(codecs.BOM_UTF8):
+ raw, encoding = raw.decode('utf-8')[1:], 'utf-8'
+ elif raw.startswith(codecs.BOM_UTF16_LE):
+ raw, encoding = raw.decode('utf-16-le')[1:], 'utf-16-le'
+ elif raw.startswith(codecs.BOM_UTF16_BE):
+ raw, encoding = raw.decode('utf-16-be')[1:], 'utf-16-be'
+ if not isinstance(raw, unicode):
+ for pat in ENCODING_PATS:
+ match = pat.search(raw)
+ if match:
+ encoding = match.group(1)
+ break
+ if encoding is None:
+ encoding = 'utf-8'
+
+ try:
+ if encoding.lower().strip() == 'macintosh':
+ encoding = 'mac-roman'
+ if encoding.lower().replace('_', '-').strip() in (
+ 'gb2312', 'chinese', 'csiso58gb231280', 'euc-cn', 'euccn',
+ 'eucgb2312-cn', 'gb2312-1980', 'gb2312-80', 'iso-ir-58'):
+ # Microsoft Word exports to HTML with encoding incorrectly set to
+ # gb2312 instead of gbk. gbk is a superset of gb2312, anyway.
+ encoding = 'gbk'
+ raw = raw.decode(encoding, 'replace')
+ except LookupError:
+ encoding = 'utf-8'
+ raw = raw.decode(encoding, 'replace')
+
+ if strip_encoding_pats:
+ raw = strip_encoding_declarations(raw)
+ #if resolve_entities:
+ # raw = substitute_entites(raw)
+
+ return raw, encoding
+
+
+# -------------------------------------------
+
+NAMESPACES = {
+ 'fb2' : 'http://www.gribuser.ru/xml/fictionbook/2.0',
+ 'xlink' : 'http://www.w3.org/1999/xlink' }
+
+XPath = partial(etree.XPath, namespaces=NAMESPACES)
+tostring = partial(etree.tostring, method='text', encoding=unicode)
+
+def get_metadata(stream):
+ ''' Return fb2 metadata as a L{MetaInformation} object '''
+
+ mi = type('lamdbaobject', (object,), {})()
+
+ root = _get_fbroot(stream)
+ if root is None:
+ return mi
+
+ book_title = _parse_book_title(root)
+ authors = _parse_authors(root)
+
+ # fallback for book_title
+ if book_title:
+ book_title = unicode(book_title)
+ else:
+# book_title = force_unicode(os.path.splitext(
+# os.path.basename(getattr(stream, 'name',
+# _('Unknown'))))[0])
+ book_title = force_unicode(getattr(stream, 'name'))
+ mi.book_title = book_title
+ mi.authors = authors
+
+ try:
+ _parse_comments(root, mi)
+ except:
+ pass
+ try:
+ _parse_tags(root, mi)
+ except:
+ pass
+ try:
+ _parse_series(root, mi)
+ except:
+ pass
+ try:
+ _parse_isbn(root, mi)
+ except:
+ pass
+ try:
+ _parse_publisher(root, mi)
+ except:
+ pass
+ try:
+ _parse_pubdate(root, mi)
+ except:
+ pass
+# try:
+# _parse_timestamp(root, mi)
+# except:
+# pass
+
+ try:
+ _parse_language(root, mi)
+ except:
+ pass
+
+ try:
+ _parse_cover_data(root,'cover.jpg',mi)
+ except:
+ pass
+ #_parse_uuid(root, mi)
+
+ #if DEBUG:
+ # prints(mi)
+ return mi
+
+def _parse_authors(root):
+ authors = []
+ # pick up authors but only from 1 secrion <title-info>; otherwise it is not consistent!
+ # Those are fallbacks: <src-title-info>, <document-info>
+ for author_sec in ['title-info', 'src-title-info']:
+ for au in XPath('//fb2:%s/fb2:author'%author_sec)(root):
+ author = _parse_author(au)
+ if author:
+ authors.append(author)
+ break
+
+ # if no author so far
+ if not authors:
+ #authors.append(_('Unknown'))
+ authors.append('Unknown')
+
+ return authors
+
+def _parse_author(elm_author):
+ """ Returns a list of display author and sortable author"""
+
+ xp_templ = 'normalize-space(fb2:%s/text())'
+
+ author = XPath(xp_templ % 'first-name')(elm_author)
+ lname = XPath(xp_templ % 'last-name')(elm_author)
+ mname = XPath(xp_templ % 'middle-name')(elm_author)
+
+ if mname:
+ author = (author + ' ' + mname).strip()
+ if lname:
+ author = (author + ' ' + lname).strip()
+
+ # fallback to nickname
+ if not author:
+ nname = XPath(xp_templ % 'nickname')(elm_author)
+ if nname:
+ author = nname
+
+ return author
+
+
+def _parse_book_title(root):
+ # <title-info> has a priority. (actually <title-info> is mandatory)
+ # other are backup solution (sequence is important. other then in fb2-doc)
+ xp_ti = '//fb2:title-info/fb2:book-title/text()'
+ xp_pi = '//fb2:publish-info/fb2:book-title/text()'
+ xp_si = '//fb2:src-title-info/fb2:book-title/text()'
+ book_title = XPath('normalize-space(%s|%s|%s)' % (xp_ti, xp_pi, xp_si))(root)
+
+ return book_title
+
+#TODO add from calibre
+def _parse_cover_data(root, imgid, mi):
+ elm_binary = XPath('//fb2:binary[@id="%s"]'%imgid)(root)
+ if elm_binary:
+ mimetype = elm_binary[0].get('content-type', 'image/jpeg')
+ pic_data = elm_binary[0].text
+ mi.cover = pic_data
+
+def _parse_tags(root, mi):
+ # pick up genre but only from 1 secrion <title-info>; otherwise it is not consistent!
+ # Those are fallbacks: <src-title-info>
+ for genre_sec in ['title-info', 'src-title-info']:
+ # -- i18n Translations-- ?
+ tags = XPath('//fb2:%s/fb2:genre/text()' % genre_sec)(root)
+ if tags:
+ mi.tags = list(map(unicode, tags))
+ break
+
+def _parse_series(root, mi):
+ # calibri supports only 1 series: use the 1-st one
+ # pick up sequence but only from 1 secrion in prefered order
+ # except <src-title-info>
+
+ #TODO parse all
+ xp_ti = '//fb2:title-info/fb2:sequence[1]'
+ xp_pi = '//fb2:publish-info/fb2:sequence[1]'
+
+ elms_sequence = XPath('%s|%s' % (xp_ti, xp_pi))(root)
+ if elms_sequence:
+ mi.series = elms_sequence[0].get('name', None)
+ if mi.series:
+ mi.series_index = elms_sequence[0].get('number', None)
+
+def _parse_isbn(root, mi):
+ # some people try to put several isbn in this field, but it is not allowed. try to stick to the 1-st one in this case
+ isbn = XPath('normalize-space(//fb2:publish-info/fb2:isbn/text())')(root)
+ if isbn:
+ # some people try to put several isbn in this field, but it is not allowed. try to stick to the 1-st one in this case
+ if ',' in isbn:
+ isbn = isbn[:isbn.index(',')]
+
+ #TODO add from calibre
+ #if check_isbn(isbn):
+ mi.isbn = isbn
+
+def _parse_comments(root, mi):
+ # pick up annotation but only from 1 secrion <title-info>; fallback: <src-title-info>
+ for annotation_sec in ['title-info', 'src-title-info']:
+ elms_annotation = XPath('//fb2:%s/fb2:annotation' % annotation_sec)(root)
+ if elms_annotation:
+ mi.comments = tostring(elms_annotation[0])
+ # TODO: tags i18n, xslt?
+ break
+
+def _parse_publisher(root, mi):
+ publisher = XPath('string(//fb2:publish-info/fb2:publisher/text())')(root)
+ if publisher:
+ mi.publisher = publisher
+
+def _parse_pubdate(root, mi):
+ year = XPath('number(//fb2:publish-info/fb2:year/text())')(root)
+ if float.is_integer(year):
+ # only year is available, so use 1-st of Jan
+ mi.pubdate = datetime.date(int(year), 1, 1)
+
+def _parse_timestamp(root, mi):
+ #<date value="1996-12-03">03.12.1996</date>
+ xp ='//fb2:document-info/fb2:date/@value|'\
+ '//fb2:document-info/fb2:date/text()'
+ docdate = XPath('string(%s)' % xp)(root)
+ if docdate:
+ #TODO add from calibre
+ #mi.timestamp = parse_date(docdate)
+ mi.timestamp = docdate
+
+def _parse_language(root, mi):
+ language = XPath('string(//fb2:title-info/fb2:lang/text())')(root)
+ if language:
+ mi.language = language
+ mi.languages = [ language ]
+
+def _parse_uuid(root, mi):
+ uuid = XPath('normalize-space(//document-info/fb2:id/text())')(root)
+ if uuid:
+ mi.uuid = uuid
+
+def _get_fbroot(stream):
+ parser = etree.XMLParser(recover=True, no_network=True)
+ raw = stream.read()
+ raw = xml_to_unicode(raw, strip_encoding_pats=True)[0]
+ root = etree.fromstring(raw, parser=parser)
+ return root
--- /dev/null
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+import re
+import db
+import fb2_meta
+import os
+import sys
+import shutil
+
+mapping = None
+
+def GetTagsMapping(db):
+ global mapping
+ c = db.cursor()
+ c.execute("select tag_mask,tag_result from metadata.tags_mapping")
+ mapping = c.fetchall()
+ result = []
+ for item in mapping:
+ result.append((re.compile(item[0]),item[1].encode('utf-8')))
+ mapping = result
+
+def Matches(tag):
+ global mapping
+ for item in mapping:
+ if item[0].match(tag):
+ return item[1]
+ return tag
+
+def NewTags(tags):
+ global mapping
+ if not mapping:
+ GetTagsMapping(db.database)
+ new_tags = set()
+ for item in tags:
+ new_tags.add(Matches(item))
+ return list(new_tags)
+
+class MetaData:
+
+ def GetPath(self):
+ self.sort_title = db.SortName(self.title).replace(' ','_');
+ self.sort_author = db.SortAuthorName(self.author).replace(' ','_');
+ self.path = ("%s/%s/%s/%s/%s (%d)" % (self.sort_author[0],self.sort_author[0:2],self.sort_author[0:4],self.sort_author[0:32],self.sort_title[0:64],self.book_id))
+ self.dataname = (self.title.decode('utf-8')[0:64]+' '+self.author.decode('utf-8')[0:32]).replace('/','')
+
+ def __init__(self,meta_dict,size):
+
+ self.size = size
+ try:
+ tags=NewTags(meta_dict['tags'])
+ except:
+ tags=['other']
+ if 'trash' in tags:
+ self.state="trash"
+ return
+ self.tags=tags
+ try:
+ tag=tags[0]
+ except:
+ tag='Жанр неизвестен'
+
+ try:
+ self.author=meta_dict['authors'][0].encode('utf-8')
+ self.authors = meta_dict['authors']
+ author_ids = set()
+ try:
+ for author in self.authors:
+ author_ids.add(db.GetOrCreateAuthor(author.encode('utf-8')))
+ except:
+ pass
+ except:
+ self.author='Неизвестный Автор (%s)' % (tag)
+ self.authors = []
+ author_ids = []
+
+ try:
+ try:
+ self.langs=meta_dict['languages']
+ except:
+ self.langs=['ru']
+ lang_ids = set()
+ for lang in meta_dict['languages']:
+ lang_ids.add(db.GetOrCreateLang(lang.encode('utf-8')))
+ except:
+ pass
+
+
+ try:
+ self.publisher = meta_dict['publisher'].encode('utf-8')
+ pub_id=db.GetOrCreatePublisher(self.publisher)
+ except:
+ pub_id=None
+
+ try:
+ title = meta_dict['book_title'].encode('utf-8')
+ except:
+ title='Название неизвестно'
+ self.title=title
+
+ try:
+ pub_date=meta_dict['pubdate']
+ except:
+ pub_date=None
+ self.pub_date=pub_date
+
+ try:
+ isbn=meta_dict['isbn'].encode('utf-8')
+ except:
+ isbn=None
+ self.isbn=isbn
+
+ try:
+ self.series = meta_dict['series'].encode('utf-8')
+ ser_id=db.GetOrCreateSeries(meta_dict['series'])
+ except:
+ ser_id=None
+ try:
+ self.series_idx = meta_dict['series_index']
+ ser_num=meta_dict['series_index']
+ except:
+ ser_num=None
+
+ tag_ids = set()
+ try:
+ for tag in tags:
+ tag_ids.add(db.GetOrCreateTag(tag))
+ except:
+ pass
+
+ try:
+ self.cover=meta_dict['cover']
+ self.has_cover=1
+ except:
+ self.has_cover=0
+
+ try:
+ self.comments=meta_dict['comments'].encode('utf-8')
+ except:
+ self.comments=''
+
+ book_id = db.CreateBook(title,pub_date,ser_num,isbn)
+ self.book_id = book_id
+
+ db.LinkBookToAuthors(book_id,author_ids);
+ db.LinkBookToLangs(book_id,lang_ids);
+ if pub_id:
+ db.LinkBookToPublishers(book_id,pub_id);
+ if ser_id:
+ db.LinkBookToSeries(book_id,ser_id);
+ db.LinkBookToTags(book_id,tag_ids);
+ if self.comments:
+ db.StoreComment(book_id,self.comments)
+
+ self.GetPath()
+ db.SetPath(self.book_id,self.path,self.dataname,self.size,self.has_cover);
+ self.state="done"
+
+def ProcessFile(filename):
+
+ size = os.path.getsize(filename)
+ stream = open(filename)
+ meta = fb2_meta.get_metadata(stream)
+ stream.close()
+
+ try:
+ book = MetaData(meta.__dict__,size)
+
+ if book.state=="done":
+
+ new_file_path = db.file_root + book.path + '/' + book.dataname + '.fb2'
+ cover_path = db.file_root + book.path + '/cover.jpg'
+ new_dir_path = db.file_root + book.path
+
+ os.makedirs(new_dir_path,0755)
+ shutil.move(filename,new_file_path)
+
+ if book.has_cover:
+ cover_path = new_dir_path + '/cover.jpg'
+ print "Book has cover, try to store to "+cover_path
+ coverfile = open(cover_path,'w')
+ coverfile.write(book.cover.decode('base64'))
+ coverfile.close()
+
+ db.Commit()
+ print "Moved to "+new_dir_path
+
+ elif book.state=="trash":
+
+ print "Tags blacklisted, trashing"
+ os.remove(filename)
+
+ else:
+
+ shutil.move(filename,db.failed_files+os.path.basename(filename))
+ print "Moved to failed_files"
+ db.Rollback()
+
+ except:
+
+ shutil.move(filename,db.failed_files+os.path.basename(filename))
+ print "Moved to failed_files"
+ db.Rollback()
+
+def ProcessDir(dirname):
+ for file in os.listdir(dirname):
+ if file.endswith(".fb2"):
+ print "Processing "+file
+ ProcessFile(os.path.join(dirname,file))
+
+def DelBook(id):
+ path = os.path.join(db.file_root,db.PathByID(id))
+ if path:
+ for file in os.listdir(path):
+ os.remove(os.path.join(path,file))
+ db.DelBook(id)
+ os.rmdir(path)
+ db.Commit()
+
+def CompressBook(id):
+ path=db.PathByID(id)
+ if path:
+ datafile = os.path.join(db.file_root,path,db.DataByID(id,'FB2'))
+ datapath = datafile.replace("\"","\\\"")
+ datapath=datapath.replace("`","\`")
+ datapath=datapath.replace("$","\$")
+ zipfile = datapath + '.zip'
+ command = "zip --move --junk-paths \"%s\" \"%s\"" % (zipfile,datapath)
+ command = command.encode('utf-8')
+ print command
+ if os.system(command)==0:
+ db.ChangeBookFormat(id,'FB2','FB2.ZIP')
+ db.Commit()
+
+def UnCompressBook(id):
+ path=db.PathByID(id)
+ if path:
+ datafile = os.path.join(db.file_root,path,db.DataByID(id,'FB2.ZIP'))
+ datapath = datafile.replace("\"","\\\"")
+ datapath=datapath.replace("`","\`")
+ datapath=datapath.replace("$","\$")
+ command = "unzip \"%s\" -d \"%s\"" % (datapath,os.path.join(db.file_root,path))
+ command = command.encode('utf-8')
+ 'print command'
+ if os.system(command)==0:
+ os.remove(datafile)
+ db.ChangeBookFormat(id,'FB2.ZIP','FB2')
+ db.Commit()
+
+def CompressAll(limit=100):
+ ids = db.ListByFormat('FB2',limit)
+ for id in ids:
+ try:
+ CompressBook(id[0])
+ except:
+ pass
+
+def CheckFiles(delete = 0):
+ ids = db.ListByFormat('FB2',300000)
+ cnt = 0
+ for id in ids:
+ cnt = cnt + 1;
+ sys.stdout.write("\r%s"%(cnt))
+ datafile = os.path.join(db.file_root,db.PathByID(id[0]),db.DataByID(id[0],'FB2'))
+ if not os.path.exists(datafile):
+ print "\r File %s not found" % (datafile)
+ if delete==1:
+ db.DelBook(id)
+ db.Commit()
+ ids = db.ListByFormat('FB2.ZIP',300000)
+ cnt = 0
+ for id in ids:
+ cnt = cnt + 1;
+ sys.stdout.write("\r%s"%(cnt))
+ datafile = os.path.join(db.file_root,db.PathByID(id[0]),db.DataByID(id[0],'FB2.ZIP'))
+ if not os.path.exists(datafile):
+ print "\r File %s not found" % (datafile)
+ if delete==1:
+ db.DelBook(id)
+ db.Commit()
+
+def main():
+ ProcessDir(db.tmp_files)
+ CompressAll(2000)
+
+if __name__ == "__main__":
+ import sys
+ reload(sys)
+ sys.setdefaultencoding('utf-8')
+ main()
+
\ No newline at end of file
--- /dev/null
+#!/usr/bin/python
+
+from BeautifulSoup import BeautifulSoup
+import urllib
+import db
+import re
+import os
+
+pattern = re.compile("^f.fb2.*")
+os.environ['http_proxy']='http://localhost:3128'
+os.environ['no_proxy']='localhost,127.0.0.1'
+proxies = {'http': 'http://localhost:3128'}
+
+
+for host in ['flibusta.net','flibustahezeous3.onion','flibusta.i2p']:
+
+ try:
+ print "Trying %s" % (host)
+ html_page = urllib.urlopen("http://%s/daily/" % (host))
+ html = BeautifulSoup(html_page)
+
+ os_command = "wget -c -P \"%s\" http://%s/daily/%s" % (db.upload_files,host,'%s')
+ print os_command
+ matched = False
+
+ for link in html.findAll('a'):
+ file = link.get("href")
+ print file
+ if pattern.match(file):
+ matched = True
+ if not db.TestArchive(file):
+ print "Processing %s" % file
+ if os.system(os_command % file) == 0:
+ db.MarkArchive(file)
+ db.Commit()
+ except:
+ matched = False
+
+ if matched:
+ break
+
+if matched:
+ print "Got from %s" % host
+else:
+ print "Failed to get"