如何解决Python通过Tor请求流-连接模具
我正在使用python请求库以多处理方式从“ onionurl”下载文件,以从tor服务下载许多文件。
这就是代码背后的原因。
但是,下载这些文件时,它们每隔一两分钟就会切出。由于流中的下载失败,因此不会给出任何错误,但会返回“关闭文本文件”。这意味着不可能下载这些洋葱服务器上托管的文件,这些文件每个都有数百GB。
对于解决此问题的任何帮助,将不胜感激。
session = requests.session()
session.proxies = {}
session.proxies['http'] = 'socks5h://localhost:9050'
session.proxies['https'] = 'socks5h://localhost:9050'
#print(onionurlforrequest)
url = onionurl
try:
if not os.path.isdir(foldername):
os.makedirs(foldername)
# download the body of response by chunk,not immediately
with session.get(url,stream=True,verify=False,timeout=1000000) as response:
# get the total file size
file_size = int(response.headers.get("Content-Length",0))
print(file_size)
# get the file name
filename = dataloc
with open(filename,"wb") as text_file:
for chunk in response.iter_content(chunk_size=1024):
text_file.write(chunk)
if (file_size > 1000000):
filesizemb = file_size / 1000000
else:
filesizemb = 1
print("closing text file")
text_file.close()
解决方法
通过简单地接受连接将终止并编写新函数以精确偏移量恢复下载的方式来解决该问题,该问题的理论已在此问题中进行了解释-How to resume file download in Python?
我的代码(警告,混乱):
def onionrequestthreadeddataleakdownloadresume(onionurl,resume_byte_pos):
print("rerunning")
companyname = onionurl[0]
onionurl = onionurl[1]
dataloc = '/media/archangel/Elements/clop/dataleaks/'
foldername = dataloc
dataloc = dataloc + companyname + "/"
try:
if not os.path.isdir(dataloc):
os.mkdir(dataloc)
except Exception as e:
print(e)
print("folder not created")
filename = os.path.basename(onionurl)
filenamebasename = filename
dataloc = dataloc + filename
try:
# seconds = 20
# timeout = Timeout(seconds)
# timeout.start()
session = requests.session()
session.proxies = {}
session.proxies['http'] = 'socks5h://localhost:9050'
session.proxies['https'] = 'socks5h://localhost:9050'
#print(onionurlforrequest)
# onionurlforrequest = "http://" + onionurl
print("dataloc")
print(dataloc)
print("onionurl")
print(onionurl)
url = onionurl
try:
print("url")
print(url)
if not os.path.isdir(foldername):
os.makedirs(foldername)
# download the body of response by chunk,not immediately
#https://stackoverflow.com/questions/16694907/download-large-file-in-python-with-requests?rq=1
try:
try:
seconds = 20
timeout = Timeout(seconds)
timeout.start()
except Exception as ex:
print(ex)
resume_header = {'Accept-Encoding': None,'Range': 'bytes=%d-' % resume_byte_pos}
try:
with session.get(url,stream=True,verify=False,headers=resume_header,timeout=600) as response:
#response.raise_for_status()
# get the total file size
file_size = int(response.headers['Content-Length'])
if (file_size > 1000000):
filesizemb = file_size / 1000000
else:
filesizemb = 1
print(file_size)
# get the file name
filename = dataloc
# filename = os.path.join(dataloc,url.split("/")[-1])
# progress bar,changing the unit to bytes instead of iteration (default by tqdm)
# response = session.get(url,stream = True)
# progress = tqdm(response.iter_content(1024),f"Downloading {filename}",total=file_size,unit="B",unit_scale=True,unit_divisor=1024)
try:
with open(filename,"ab") as text_file:
for chunk in response.iter_content(chunk_size=1024*1024):
#https://www.kite.com/python/answers/how-to-download-large-files-with-requests-in-python
#if len(chunk) != 1024*36:
if chunk:
#print(len(chunk))
text_file.write(chunk)
text_file.flush()
except Exception as ex:
logging.error(f'write failed with error: {ex}')
print(ex)
#else:
# write data read to the file
# f.write(data)
# update the progress bar manually
# progress.update(len(data))
# finally,if the url is valid
#logging.info('Download finished successfully')
print("exited with for file")
except Exception as ex:
logging.error(f'Request failed with error: {ex}')
print(ex)
except Exception as ex:
logging.error(f'Attempt failed with error: {ex}')
print(ex)
print("closing text file")
# text_file.close()
#list composed of dataleaklocation (location in external),filename (filename after / slash),dataleakurl (urlofonion),contentsize
except Exception as e:
print("FAILED DOWNLOAD 2")
print(e)
except Exception as e:
print("FAILED DOWNLOAD 5")
print(e)
def onionrequestthreadeddataleakdownload2(onionurl):
companyname = onionurl[0]
onionurl = onionurl[1]
dataloc = '/media/archangel/Elements/clop/dataleaks/'
foldername = dataloc
dataloc = dataloc + companyname + "/"
try:
if not os.path.isdir(dataloc):
os.mkdir(dataloc)
except Exception as e:
print(e)
print("folder not created")
filename = os.path.basename(onionurl)
filenamebasename = filename
dataloc = dataloc + filename
try:
# seconds = 20
# timeout = Timeout(seconds)
# timeout.start()
session = requests.session()
session.proxies = {}
session.proxies['http'] = 'socks5h://localhost:9050'
session.proxies['https'] = 'socks5h://localhost:9050'
#print(onionurlforrequest)
# onionurlforrequest = "http://" + onionurl
print("dataloc")
print(dataloc)
print("onionurl")
print(onionurl)
url = onionurl
try:
print("url")
print(url)
if not os.path.isdir(foldername):
os.makedirs(foldername)
# download the body of response by chunk,not immediately
#https://stackoverflow.com/questions/16694907/download-large-file-in-python-with-requests?rq=1
try:
try:
seconds = 20
timeout = Timeout(seconds)
timeout.start()
except Exception as ex:
print(ex)
# resume_header = ({'Range': f'bytes=0-2000000'})
#file_size_online = int(r.headers.get('content-length',0))
headersac = {'Accept-Encoding': None}
try:
with session.get(url,headers = headersac,timeout=600) as response:
#response.raise_for_status()
# get the total file size
# file_size = int(response.headers.get("Content-Length",0))
file_size = int(response.headers['Content-Length'])
if (file_size > 1000000):
filesizemb = file_size / 1000000
else:
filesizemb = 1
print(file_size)
#e
# get the file name
filename = dataloc
# filename = os.path.join(dataloc,"wb") as text_file:
for chunk in response.iter_content(chunk_size=1024*1024):
#https://www.kite.com/python/answers/how-to-download-large-files-with-requests-in-python
#if len(chunk) != 1024*36:
if chunk:
# print(len(chunk))
text_file.write(chunk)
text_file.flush()
except Exception as ex:
logging.error(f'write failed with error: {ex}')
print(ex)
#else:
# write data read to the file
# f.write(data)
# update the progress bar manually
# progress.update(len(data))
# finally,if the url is valid
#logging.info('Download finished successfully')
except Exception as ex:
logging.error(f'request failed with error: {ex}')
print(ex)
print("exited with for file")
#path = Path(filename)
file_size_offline = Path(filename).stat().st_size
print("file size offline")
while (file_size_offline != file_size):
try:
print(file_size_offline)
print(file_size)
print("file size incomplete")
file_size_offline = Path(filename).stat().st_size
onionurllist = []
onionurllist.append(companyname)
onionurllist.append(onionurl)
onionrequestthreadeddataleakdownloadresume(onionurllist,file_size_offline)
file_size_offline = Path(filename).stat().st_size
except Exception as ex:
print("redownload failed")
print(ex)
print("LOOP FINISHED")
print(file_size)
print(file_size_offline)
print(filename)
except Exception as ex:
logging.error(f'Attempt failed with error: {ex}')
print(ex)
# print("closing text file")
# text_file.close()
if(file_size_offline != file_size):
while (file_size_offline != file_size):
try:
print(file_size_offline)
print(file_size)
print("file size incomplete")
file_size_offline = Path(filename).stat().st_size
onionurllist = []
onionurllist.append(companyname)
onionurllist.append(onionurl)
onionrequestthreadeddataleakdownloadresume(onionurllist,file_size_offline)
file_size_offline = Path(filename).stat().st_size
except Exception as ex:
print("redownload failed")
print(ex)
else:
#list composed of dataleaklocation (location in external),contentsize
returnedlist = []
returnedlist.append(dataloc)
returnedlist.append(filenamebasename)
returnedlist.append(url)
returnedlist.append(filesizemb)
return returnedlist
if(file_size_offline != file_size):
print("rerunning a final FINAL time")
while (file_size_offline != file_size):
try:
print(file_size_offline)
print(file_size)
print("file size incomplete")
file_size_offline = Path(filename).stat().st_size
onionurllist = []
onionurllist.append(companyname)
onionurllist.append(onionurl)
onionrequestthreadeddataleakdownloadresume(onionurllist,contentsize
returnedlist = []
returnedlist.append(dataloc)
returnedlist.append(filenamebasename)
returnedlist.append(url)
returnedlist.append(filesizemb)
return returnedlist
returnedlist = []
returnedlist.append(dataloc)
returnedlist.append(filenamebasename)
returnedlist.append(url)
returnedlist.append(filesizemb)
return returnedlist
except Exception as e:
print("FAILED DOWNLOAD 2")
print(e)
except Exception as e:
print("FAILED DOWNLOAD 5")
print(e)
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。