X-Git-Url: https://git.rvb.name/openlib.git/blobdiff_plain/169adb0ddaf8cc3eedfa15b500e14570bda0cdee..refs/heads/master:/get_flibusta.py diff --git a/get_flibusta.py b/get_flibusta.py index 654b5be..fa320b2 100755 --- a/get_flibusta.py +++ b/get_flibusta.py @@ -1,45 +1,49 @@ #!/usr/bin/python -from BeautifulSoup import BeautifulSoup -import urllib +from bs4 import BeautifulSoup +import urllib3 import db import re import os pattern = re.compile("^f.fb2.*") -os.environ['http_proxy']='http://localhost:3128' +os.environ['http_proxy']='http://192.168.1.100:3128' os.environ['no_proxy']='localhost,127.0.0.1' proxies = {'http': 'http://localhost:3128'} -for host in ['flibusta.net','flibustahezeous3.onion','flibusta.i2p']: +for host in ['flibustahezeous3.onion','flibusta.i2p']: + matched = False try: - print "Trying %s" % (host) - html_page = urllib.urlopen("http://%s/daily/" % (host)) - html = BeautifulSoup(html_page) - - os_command = "wget -c -P \"%s\" http://%s/daily/%s" % (db.upload_files,host,'%s') - print os_command - matched = False - - for link in html.findAll('a'): - file = link.get("href") - print file - if pattern.match(file): - matched = True - if not db.TestArchive(file): - print "Processing %s" % file - if os.system(os_command % file) == 0: - db.MarkArchive(file) - db.Commit() + print("Trying %s" % (host)) + http = urllib3.PoolManager() + resp = http.request('GET', "http://%s/daily/" % (host)) + if resp.status == 200: + html_page = resp.data.decode('utf-8') + html = BeautifulSoup(html_page,"html.parser") + + os_command = "wget -c -q -P \"%s\" http://%s/daily/%s" % (db.upload_files,host,'%s') + + for link in html.findAll('a'): + file = link.get("href") + print(file) + if pattern.match(file): + print("Pattern matched") + matched = True + if not db.TestArchive(file): + print("Processing %s" % file) + if os.system(os_command % file) == 0: + db.MarkArchive(file) + db.Commit() except: + raise matched = False if matched: break if matched: - print "Got from %s" % host + print("Got from %s" % host) else: - print "Failed to get" + print("Failed to get")