Python requests 解决乱码问题

    xiaoxiao2023-11-02  126

    # -*- coding:utf-8 -*- import requests import random # 获取内容 class Get_content(object): def __init__(self): self.headers = { 'User-Agent': random.choice([ "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36", "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2226.0 Safari/537.36", "Mozilla/5.0 (Windows NT 6.4; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36", "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36", "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2224.3 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.93 Safari/537.36", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36", "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36", "Mozilla/5.0 (Windows NT 4.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36", ]) } def info(self, url,proxy_ip=None): proxy_ip = { # http://10.10.1.10:3128 'http': proxy_ip, 'https': proxy_ip, } try: html = requests.get(url, headers=self.headers, timeout=(15, 20),proxies=proxy_ip, verify=False) # print('状态码', html.status_code, url) if html.status_code == 200: bm = requests.utils.get_encodings_from_content(html.text) if bm: html.encoding = bm[0] if (bm[0] != 'utf-8' or 'UTF-8' or 'utf8') and '�' in html.text: html.encoding = 'utf-8' if '�' in html.text: html.encoding = 'gbk' return html.text else: return None except Exception as e: return None if __name__ == '__main__': g = Get_content() a = g.info('https://www.baidu.com/s?wd=ip','http://****:808') print(a) b = g.info('https://www.baidu.com/s?wd=ip') print(b)

     

    最新回复(0)