Python爬虫实现猫眼电影搜索

    xiaoxiao2024-12-26  50

    import requests import time from bs4 import BeautifulSoup import re movie=[] def search(url): try: headers={'user-agent':'Mozilla/5.0'} r=requests.get(url,headers=headers) return r.text except: search(url) def parse_page(url,k): html=search(url) sum_tmp=[] pattern1=re.compile('class="channel-detail movie-item-title" title="(.*?)">',re.S) movie_name=re.findall(pattern1,html) pattern2=re.compile('class="movie-item-cat">(.*?)</div>') movie_lb=re.findall(pattern2,html) pattern3=re.compile('class="movie-item-pub">(.*?)</div>') movie_time=re.findall(pattern3,html) if len(movie_name)==0: print('爬取结束') print('共'+str(len(movie))+'部') exit() for i in range(len(movie_name)): movie_tmp = [] if movie_time[i] == '': movie_time[i]='暂无' movie_tmp.append(movie_name[i]) movie_tmp.append(movie_lb[i]) movie_tmp.append(movie_time[i]) movie.append(movie_tmp) for i in range(k+1, len(movie) + 1): print('第' + str(i) + '部:') print('电影名称:'+movie[i-1][0]+' 电影类型:'+movie[i-1][1]+' 上映时间:'+movie[i-1][2]) print('----------------------------------------------------------------------------') def main(k,sname,offset): url = 'https://maoyan.com/query?kw='+sname+'&type=0&offset='+str(offset) parse_page(url,k) if __name__ == '__main__': sname=input("请输入电影信息:") print('-----------------------------开始爬取---------------------------------') time.sleep(1) for i in range(20): k=i*20 main(k,sname,offset=i*20) time.sleep(1)

     

    最新回复(0)