#!/usr/bin/python
-from BeautifulSoup import BeautifulSoup
-import urllib
+from bs4 import BeautifulSoup
+import urllib3
import db
import re
import os
proxies = {'http': 'http://localhost:3128'}
-for host in ['flibustahezeous3.onion','flibusta.i2p','flibusta.net']:
+for host in ['flibustahezeous3.onion','flibusta.i2p']:
+ matched = False
try:
- print "Trying %s" % (host)
- html_page = urllib.urlopen("http://%s/daily/" % (host))
- html = BeautifulSoup(html_page)
-
- os_command = "wget -c -q -P \"%s\" http://%s/daily/%s" % (db.upload_files,host,'%s')
- print os_command
- matched = False
-
- for link in html.findAll('a'):
- file = link.get("href")
- print file
- if pattern.match(file):
- matched = True
- if not db.TestArchive(file):
- print "Processing %s" % file
- if os.system(os_command % file) == 0:
- db.MarkArchive(file)
- db.Commit()
+ print("Trying %s" % (host))
+ http = urllib3.PoolManager()
+ resp = http.request('GET', "http://%s/daily/" % (host))
+ if resp.status == 200:
+ html_page = resp.data.decode('utf-8')
+ html = BeautifulSoup(html_page,"html.parser")
+
+ os_command = "wget -c -q -P \"%s\" http://%s/daily/%s" % (db.upload_files,host,'%s')
+
+ for link in html.findAll('a'):
+ file = link.get("href")
+ print(file)
+ if pattern.match(file):
+ print("Pattern matched")
+ matched = True
+ if not db.TestArchive(file):
+ print("Processing %s" % file)
+ if os.system(os_command % file) == 0:
+ db.MarkArchive(file)
+ db.Commit()
except:
+ raise
matched = False
if matched:
break
if matched:
- print "Got from %s" % host
+ print("Got from %s" % host)
else:
- print "Failed to get"
+ print("Failed to get")