X-Git-Url: https://git.rvb.name/openlib.git/blobdiff_plain/de4a5d3664fd9de7c8fd560745668c320c018b45..425585bda1b9e764533ca8a91098f66c833df5ae:/get_flibusta.py diff --git a/get_flibusta.py b/get_flibusta.py index 6409847..fa320b2 100755 --- a/get_flibusta.py +++ b/get_flibusta.py @@ -1,7 +1,7 @@ #!/usr/bin/python -from BeautifulSoup import BeautifulSoup -import urllib +from bs4 import BeautifulSoup +import urllib3 import db import re import os @@ -12,34 +12,38 @@ os.environ['no_proxy']='localhost,127.0.0.1' proxies = {'http': 'http://localhost:3128'} -for host in ['flibusta.lib','flibustahezeous3.onion','flibusta.i2p']: +for host in ['flibustahezeous3.onion','flibusta.i2p']: + matched = False try: - print "Trying %s" % (host) - html_page = urllib.urlopen("http://%s/daily/" % (host)) - html = BeautifulSoup(html_page) - - os_command = "wget -c -q -P \"%s\" http://%s/daily/%s" % (db.upload_files,host,'%s') - matched = False - - for link in html.findAll('a'): - file = link.get("href") - print file - if pattern.match(file): - print "Pattern matched" - matched = True - if not db.TestArchive(file): - print "Processing %s" % file - if os.system(os_command % file) == 0: - db.MarkArchive(file) - db.Commit() + print("Trying %s" % (host)) + http = urllib3.PoolManager() + resp = http.request('GET', "http://%s/daily/" % (host)) + if resp.status == 200: + html_page = resp.data.decode('utf-8') + html = BeautifulSoup(html_page,"html.parser") + + os_command = "wget -c -q -P \"%s\" http://%s/daily/%s" % (db.upload_files,host,'%s') + + for link in html.findAll('a'): + file = link.get("href") + print(file) + if pattern.match(file): + print("Pattern matched") + matched = True + if not db.TestArchive(file): + print("Processing %s" % file) + if os.system(os_command % file) == 0: + db.MarkArchive(file) + db.Commit() except: + raise matched = False if matched: break if matched: - print "Got from %s" % host + print("Got from %s" % host) else: - print "Failed to get" + print("Failed to get")