https://blog.csdn.net/u012662731/article/details/78537432
import requests,sys
from bs4 import BeautifulSoup
import inspect
def PrintLineFileFunc(string=''):
callerframerecord = inspect.stack()[1] # 0代表当前行 , 1当前调用
frame = callerframerecord[0]
info = inspect.getframeinfo(frame)
filename = info.filename[info.filename.rfind('/')+1:]
print("FILE:"+ filename + " FUNCTION:"+ info.function + " LINE:" + str(info.lineno)+ ' ' + string)
class Downloader(object):
def __init__(self):
self.server = 'https://www.biqukan.com'
self.target = 'https://www.biqukan.com/1_1094/'
self.names = []
self.urls = []
self.nums = 0
def get_download_url(self):
PrintLineFileFunc()
req = requests.get(self.target)
html = req.text
PrintLineFileFunc()
div_bf = BeautifulSoup(html)
div = div_bf.find_all('div', 'listmain')
a_bf = BeautifulSoup(str(div[0]))
a = a_bf.find_all('a')
PrintLineFileFunc()
self.nums = len(a[15:])
for each in a[15:]:
print('each.string: ' + each.string)
self.names.append(each.string)
href = each.get('href')
self.urls.append(self.server + href)
print('href: ' + href)
print('self.nums:')
print(self.nums)
def get_contents(self, target):
PrintLineFileFunc(target)
req = requests.get(target)
PrintLineFileFunc('requested')
html = req.text
bf = BeautifulSoup(html)
texts = bf.find_all('div', 'showtxt')
PrintLineFileFunc('dived')
texts = texts[0].text.replace('\xa0' * 8, '\n\n')
PrintLineFileFunc('replace')
return texts
def writer(self, name, path, text):
PrintLineFileFunc()
print('name: ' + name+' path: ' + path )
write_flag = True
with open(path, 'a', encoding='utf-8') as f:
f.write(name + '\n')
f.writelines(text)
f.write('\n\n')
if __name__ == "__main__" :
PrintLineFileFunc('main')
dl = Downloader()
dl.get_download_url()
PrintLineFileFunc()
for i in range(dl.nums):
PrintLineFileFunc(str(i))
dl.writer(dl.names[i], 'a.txt', dl.get_contents(dl.urls[i]))
sys.stdout.write("almost downed:%.3f%%" % float(i/dl.nums)+'\r')
sys.stdout.flush()
print('download finished')