python2.7代码实现: 爬虫主文件:
# -*- coding: utf-8 -*- import sys from scrapy.http import Request #python语言转码,非必要 reload(sys) sys.setdefaultencoding("utf-8") import scrapy #定义爬虫类,必须继承scrapy.Spider class DmozSpider(scrapy.Spider): name = "itcast" #设置爬虫名称 header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36'} #设置浏览器用户代理 #用start_requests()方法,代替start_urls def start_requests(self): return [Request('https://www.baidu.com',meta={'cookiejar':1},callback=self.parse)] #parse回调函数 def parse(self, response): Cookie = response.headers.getlist('Cookie') #获取响应cookie print 1111111111111111111111 print Cookie print 1111111111111111111111 return 0