BS4的解析次序

    xiaoxiao2022-07-07  187

    htm2 = requests.get(url,headers)     soup = BeautifulSoup(htm2.content,'lxml')         books_lst = []         name = soup.find('div',class_='listmain')     #父div,子dl,孙dt(总标题)和dd(每一章)     if name:         dd_items = name.find('dl')         dt_num = 0         for n in dd_items.children:             ename = str(n.name).strip()             if ename == 'dt':                 dt_num += 1             if ename != 'dd':                 continue             books_info = {}             if dt_num == 2:                 durls = n.find_all('a')[0]                 books_info['name'] = durls.get_text()                 books_info['url'] = 'http://www.biqukan.com' + durls.get('href')                 books_lst.append(books_info)     return books_lst     

    最新回复(0)