get_flibusta.py

   1 #!/usr/bin/python
   2
   3 from BeautifulSoup import BeautifulSoup
   4 import urllib
   5 import db
   6 import re
   7 import os
   8
   9 pattern = re.compile("^f.fb2.*")
  10 os.environ['http_proxy']='http://192.168.1.100:3128'
  11 os.environ['no_proxy']='localhost,127.0.0.1'
  12 proxies = {'http': 'http://localhost:3128'}
  13
  14
  15 for host in ['flibusta.lib','flibustahezeous3.onion','flibusta.i2p']:
  16
  17   try:
  18     print "Trying %s" % (host)
  19     html_page = urllib.urlopen("http://%s/daily/" % (host))
  20     html = BeautifulSoup(html_page)
  21
  22     os_command = "wget -c -q -P \"%s\" http://%s/daily/%s" % (db.upload_files,host,'%s')
  23     matched = False
  24
  25     for link in html.findAll('a'):
  26       file = link.get("href")
  27       print file
  28       if pattern.match(file):
  29         print "Pattern matched"
  30         matched = True
  31         if not db.TestArchive(file):
  32           print "Processing %s" % file
  33           if os.system(os_command % file) == 0:
  34             db.MarkArchive(file)
  35             db.Commit()
  36   except:
  37     matched = False
  38
  39   if matched:
  40     break
  41
  42 if matched:
  43   print "Got from %s" % host
  44 else:
  45   print "Failed to get"