'''
1、房王:正则
http://gz.ihk.cn/myxf/houselist/?mark=gzxf089
图片,楼盘名,楼盘描述,主力户型,地址,价格
放在一个字典中
es_conlistbox
思路:
1.获取URL规律
第一页:http://gz.ihk.cn/myxf/houselist/p1/?mark=gzxf089
第二页:http://gz.ihk.cn/myxf/houselist/p2/?mark=gzxf089
第三页:http://gz.ihk.cn/myxf/houselist/p3/?mark=gzxf089
2.匹配 html div的clas为es_conlistbox 中的内内容是信息列表
在每个页面中:
完整页面:30项目
3.创建匹配规则
图片:<img class="lazy" data-original="http://img1.appweb.ihk.cn/gd/estate/scan_photo/2016-09-30/7858d194577140dda031291e58a115db(WJ-3600-2016-09-30)" onerror="javascript: this.src = 'http://gz.ihk.cn/newsecond/images/default.jpg'">
'''
import requests,re
from lxml import etree
# from lxml import btree
#用类来写
# class House_ling():
# def __init__(self):
# self.run()
#
# def run(self):
# base_url = 'http://gz.ihk.cn/myxf/houselist/?mark=gzxf089'
# # params = {
# # 'mark': 'gzxf089'
# # }
#
# #获取第一页的内容
# response = requests.get(base_url)
#
#
#
#
# def get_alldata(self,response):
# pass
#用函数写
def houseling():
base_url = 'http://gz.ihk.cn/myxf/houselist/?mark=gzxf089'
#1、获取总的html
page_str = requests.get(base_url).text
# print(page_str)
# print()
pattern = re.compile('<div class="n_conlistpic">(.*?)<div class="n_conlistrliioc">',re.S)
house_list = pattern.findall(page_str)
# print(house_list[0])
# print(len(house_list))
#2、创建楼盘匹配规则
# 图片,楼盘名,楼盘描述,主力户型,地址,价格
count = 1
for house_info in house_list:
#匹配图片地址
pattern_picture_rule =re.compile('data-original="(.*?)" onerror')
picture_path = pattern_picture_rule.findall(house_info)
#匹配楼盘名称
pattern_name_rule = re.compile('<a><strong>(.*?)</strong></a>')
name = pattern_name_rule.findall(house_info)
# print(name)
#匹配楼盘描述
pattern_description_rule =re.compile(r'<div class="n_conlistrbrief">(.*?)</div>',re.S)
descripation_lazy = pattern_description_rule.findall(house_info)
descripation_rule = re.compile('<span>(.*?)</span>')
descripation = descripation_rule.findall(str(descripation_lazy))
# print(descripation)
#匹配主力户型
pattern_house_type = re.compile('<div class="n_conlistradd"><span>(.*?)</span></div>')
house_type = pattern_house_type.findall(house_info)
# print(house_type)
#匹配地址
# pattern_postion = re.compile('</em><span>(.*?)</span>')
pattern_postion = re.compile('</em><span>(.*?)</span></div>')
postion = pattern_postion.findall(house_info)
# print(postion)
#匹配价格
pattern_price = re.compile('<li><strong>(\d+)</strong><b>')
price = pattern_price.findall(house_info)
# print(price)
print(count,':',picture_path[0],name[0],descripation[0],house_type[0],postion[0],price[0])
# print(count,postion)
count +=1
if __name__ == '__main__':
houseling()