开始
from selenium
import webdriver
import time
from lxml
import etree
import json
from selenium
.webdriver
.support
.ui
import WebDriverWait
from selenium
.webdriver
.support
import expected_conditions
as EC
import requests
import re
import urllib
.parse
import urllib
import csv
import os
header
= {
"User-Agent":"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36",
"Accept": "*/*",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7",
"Connection": "keep-alive",
"Cookie": 'll="108293"; bid=ZhBAGA3s9Z4; __utmc=30149280; __utmz=30149280.1558788663.1.1.utmcsr=wx.qq.com|utmccn=(referral)|utmcmd=referral|utmcct=/; _vwo_uuid_v2=DC2C59F1AAFB62E6AC80AE2F5138E05E0|2f9fdfd8614610d60cd2be061072faa7; push_noty_num=0; push_doumail_num=0; dbcl2="186705531:tdt3UnU/3+8"; ck=g3On; _pk_ref.100001.8cb4=["","",1558790349,"https://accounts.douban.com/passport/login"]; __utmv=30149280.18670; _pk_id.100001.8cb4=1ce6ca18acd029a1.1558790349.1.1558792701.1558790349.; ap_v=0,6.0; __utma=30149280.1804237607.1558788663.1558795739.1558798475.3; __utmb=30149280.0.10.1558798475',
"Host": "www.douban.com",
"Referer": "https://movie.douban.com/subject/30170448/collections?start=0",
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36",
}
driver
= webdriver
.Chrome
(
executable_path
='/home/lbc/Documents/chromedriver',
)
driver
.get
('https://accounts.douban.com/passport/login')
cookies
= driver
.get_cookies
()
cookie_dict
= {}
for cookie
in cookies
:
cookie_dict
[cookie
['name']] = cookie
['value']
from selenium
.webdriver
import ActionChains
element
= driver
.find_element_by_xpath
('//*[@id="account"]/div[2]/div[2]/div/div[1]/ul[1]/li[2]')
ActionChains
(driver
).move_to_element
(element
).perform
()
ActionChains
(driver
).move_to_element
(element
).click
(element
).perform
()
driver
.find_element_by_name
('username').send_keys
('1532624****')
driver
.find_element_by_name
('password').send_keys
('****')
element
= driver
.find_element_by_xpath
('//*[@id="account"]/div[2]/div[2]/div/div[2]/div[1]/div[4]/a')
ActionChains
(driver
).move_to_element
(element
).perform
()
ActionChains
(driver
).move_to_element
(element
).click
(element
).perform
()
driver
.get
('https://www.douban.com/')
cookies
= driver
.get_cookies
()
cookie_dict
= {}
for cookie
in cookies
:
cookie_dict
[cookie
['name']] = cookie
['value']
driver
.find_element_by_id
('inp-query').send_keys
('何以为家')
element
= driver
.find_element_by_xpath
('//*[@id="db-nav-sns"]/div/div/div[2]/form/fieldset/div[2]/input')
ActionChains
(driver
).move_to_element
(element
).perform
()
ActionChains
(driver
).move_to_element
(element
).click
(element
).perform
()
driver
.get
('https://www.douban.com/search?source=suggest&q=何以为家')
cookies
= driver
.get_cookies
()
cookie_dict
= {}
for cookie
in cookies
:
cookie_dict
[cookie
['name']] = cookie
['value']
element
= driver
.find_element_by_xpath
('//*[@id="content"]/div/div[1]/div[3]/div[2]/div[1]/div[2]/div/h3/a')
ActionChains
(driver
).move_to_element
(element
).perform
()
ActionChains
(driver
).move_to_element
(element
).click
(element
).perform
()
driver
.get
('https://movie.douban.com/subject/30170448/collections?start=0')
cookies
= driver
.get_cookies
()
cookie_dict
= {}
for cookie
in cookies
:
cookie_dict
[cookie
['name']] = cookie
['value']
element
= driver
.find_element_by_xpath
('//*[@id="collections_tab"]/div[2]')
ActionChains
(driver
).move_to_element
(element
).perform
()
ActionChains
(driver
).move_to_element
(element
).click
(element
).perform
()
data
=driver
.page_source
abc
=etree
.HTML
(data
)
d
= abc
.xpath
('//*[@id="collections_tab"]/div[2]')
for i
in d
:
time
= i
.xpath
('./table/tbody/tr/td[2]/p/text()')
star
= i
.xpath
('./table/tbody/tr/td[2]/p/span/@title')
pic
= i
.xpath
('./table/tbody/tr/td[1]/a/img/@src')
name
= i
.xpath
('./table/tbody/tr/td[1]/a/img/@alt')
content
= i
.xpath
('./table/tbody/tr/td[2]/p[2]/text()')
userurl
= i
.xpath
('./table/tbody/tr/td[1]/a/@href')
print(time
,star
,pic
,name
,content
,userurl
)
转载请注明原文地址: https://yun.8miu.com/read-134073.html