293 3.2 KB 102
import hashlib
import os
from random import random
import requests
import time
import threading
import traceback
def sha512Digest(data):
sha512 = hashlib.sha512(data).digest()
return ''.join("%02x" % x for x in sha512)
class RateLimit:
def __init__(self, perSecond, session=requests) -> None:
self.perSecond = perSecond
self.waitUntil = time.time()
self.lock = threading.Lock()
self.session = session
def wait(self):
self.lock.acquire()
currentTime = time.time()
if currentTime < self.waitUntil:
time.sleep(self.waitUntil - currentTime)
currentTime = time.time()
self.waitUntil = currentTime + 1.0 / self.perSecond
self.lock.release()
def get(self, url, fragile=False, *args, **kwargs):
backoff = 1
while True:
try:
self.wait()
response = self.session.get(url, *args, **kwargs)
if response.ok:
return response
elif response.status_code == 404:
return
else:
if fragile:
return None
print(
backoff,
"| rejected",
response.status_code,
"trying to fetch",
url,
)
except KeyboardInterrupt:
raise
except Exception as err:
if fragile:
return None
print(backoff, "| exception", err, "while fetching", url)
backoff = backoff * 2
print(backoff, '|', response.status_code, '| response code while fetching', url)
time.sleep(random() * backoff)
def saveImage(self, url, downloadPath, timeout=None):
data = self.downloadImage(url, timeout=timeout)
if not data:
return None
sha512 = sha512Digest(data)
output_dir = os.path.dirname(downloadPath)
os.makedirs(output_dir, exist_ok=True)
with open(downloadPath, mode='wb') as outp:
outp.write(data)
return sha512
def downloadImage(self, url, storagePath=None, timeout=None, fragile=False):
backoff = 1
while True:
try:
response = self.get(url, timeout=timeout, stream=True, fragile=fragile)
if not response:
return None
if not storagePath:
return response.raw.read()
if os.path.exists(storagePath):
return storagePath
with open(storagePath, 'wb') as outp:
for chunk in response.iter_content(chunk_size=2**16):
outp.write(chunk)
return storagePath
except KeyboardInterrupt:
raise
except:
print('something failed when downloading')
traceback.print_exc()
backoff *= 2
by Synthbot
by Synthbot
by Synthbot
by Synthbot
by Synthbot