从网站中爬取文字和图片
public class TravelSpider {
public static void main(String
[] args
) throws Exception
{
String url
= "http://www.jinmalvyou.com/search/index/view_type/1/keyword/国内";
fetchTravelData(url
);
}
private static void fetchTravelData(String url
) throws Exception
{
Document document
= Jsoup
.connect(url
).get();
Elements elements
= document
.select(".rl-b-li");
for (Element element
: elements
) {
Elements rnameElement
= element
.select(".pro-title>a");
String rname
= rnameElement
.text();
System
.out
.println("路线名称:" + rname
);
Elements introduceElements
= element
.select(".pro-colomn");
Element introduceElement
= introduceElements
.get(0);
String routeIntroduce
= introduceElement
.text();
System
.out
.println("路线介绍:" + routeIntroduce
);
Elements priceElements
= element
.select(".price>strong");
String price
= priceElements
.text();
System
.out
.println("路线价格:" + price
);
Elements rimageElements
= element
.select(".pro-img img");
String rimageSrc
= "http:" + rimageElements
.attr("src");
String localPath
= saveImage(rimageSrc
);
System
.out
.println("路线图片:" +localPath
);
System
.out
.println("-----------------------------------");
}
Elements nextElements
= document
.select("a.next");
if (nextElements
!= null
&& !nextElements
.isEmpty()) {
String nextUrl
= "http://www.jinmalvyou.com" + nextElements
.attr("href");
fetchTravelData(nextUrl
);
}
}
private static String
saveImage(String rimageSrc
) throws IOException
{
int index
= rimageSrc
.lastIndexOf("/");
String rimageName
= rimageSrc
.substring(index
+ 1);
String localPath
= "E:\\63\\travelImages\\" + rimageName
;
CloseableHttpClient client
= HttpClients
.createDefault();
HttpGet get
= new HttpGet(rimageSrc
);
CloseableHttpResponse response
= client
.execute(get
);
if (response
.getStatusLine().getStatusCode() == 200) {
HttpEntity entity
= response
.getEntity();
InputStream inputStream
= entity
.getContent();
FileOutputStream outputStream
= new FileOutputStream(localPath
);
IOUtils
.copy(inputStream
, outputStream
);
inputStream
.close();
outputStream
.close();
}
return localPath
;
}
}