Java 简单爬虫

    xiaoxiao2025-06-07  16

    demo1

    package com.javabase; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.net.HttpURLConnection; import java.net.URL; import java.util.List; public class WabpageDemo { public static void main(String[] args) throws IOException { // TODO Auto-generated method stub URL url; HttpURLConnection urlCon; //实例化 url=new URL("https://www.taobao.com "); //执行url的openConnection的方法,方法返回值为Connection连接 urlCon=(HttpURLConnection) url.openConnection(); //执行了getResponseCode方法,返回int状态值 int rscode=urlCon.getResponseCode(); if(rscode==200){ //rscode 返回两百表示成功 BufferedReader reader=new BufferedReader (new InputStreamReader(urlCon.getInputStream(),"UTF-8")); String line; List<String> alist; while((line=reader.readLine())!=null){ System.out.println(line); // if((line.startsWith("<a"))&&(line.contains("data-cid"))) // System.out.println(line.substring(line.indexOf("\">")+2, line.indexOf("</a>"))); } } } }

    demo2

    需要添加工具包:

    <dependency> <groupId>org.jsoup</groupId> <artifactId>jsoup</artifactId> <version>1.11.3</version> </dependency> package com.javabase; import java.io.IOException; import org.jsoup.Connection; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; public class WabpageDemo2 { public static void main(String[] args) { // TODO Auto-generated method stub try { Document doc=Jsoup.connect("https://www.taobao.com").get(); // Connection con=Jsoup.connect("http://www1.sxcredit.gov.cn/public/infocomquery.do?method=publicIndexQuery"); // con.data("query.enterprisename", "兴"); // Document doc2=con.timeout(100000).post(); Elements es=doc.getElementsByTag("a"); for(Element e:es){ String text=e.text(); System.out.println(text); String url=e.attr("href"); System.out.println(url); } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } }

    demo3

    package com.javabase; import java.io.IOException; import org.jsoup.Connection; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; public class WabpageDemo3 { public static void main(String[] args) { // TODO Auto-generated method stub try { Document doc=Jsoup.connect("https://blog.csdn.net/h356363/article/details/90579050").get(); Elements es=doc.getElementsByTag("li"); for(Element e:es){ Element e1=e.child(0); System.out.println(e1.text()); System.out.println(e1.attr("href")); } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } }
    最新回复(0)