更多知识请访问 www.itkc8.com
用线程代码
package com.cowboy.service; import java.util.concurrent.*; /** * @ClassName CommonThreadPool * @Description TODO * @Author hux * @Date 2019/5/22、15:40 * @Version 1.0 **/ public class CommonThreadPool { private static ExecutorService exec = new ThreadPoolExecutor(50, 100, 0L, TimeUnit.MILLISECONDS, new LinkedBlockingQueue<Runnable>(10000), new ThreadPoolExecutor.CallerRunsPolicy()); public static void execute(Runnable command) { exec.execute(command); } /** * 子线程执行结束future.get()返回null,若没有执行完毕,主线程将会阻塞等待 * @param command * @return */ public static Future submit(Runnable command) { return exec.submit(command); } /** * 子线程中的返回值可以从返回的future中获取:future.get(); * @param command * @return */ public static Future submit(Callable command) { return exec.submit(command); } public static void shutdown(){ exec.shutdown(); } } package com.cowboy.service; import com.cowboy.model.Article; import org.jsoup.Connection; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; /** * @ClassName ThreadTest * @Description TODO * @Author hux * @Date 2019/5/22、15:41 * @Version 1.0 **/ public class ThreadTest { private static final String URL = "https://blog.csdn.net/valada";//"https://blog.csdn.net/foruok"; public static void main(String[] args) { int pageNow = 1; int totalPage = getTotalPage(); System.out.println("总页数:"+totalPage); long l1 = System.currentTimeMillis(); List<Future> futureList = new ArrayList<>(); for(pageNow = 1; pageNow <= totalPage; pageNow++) { int finalI = pageNow; Callable<List<Article>> task = () -> { List<Article> artitcleByPage = getArtitcleByPage(finalI); return artitcleByPage; }; Future submit = CommonThreadPool.submit(task); futureList.add(submit); } //主线程处理其他工作,让子线程异步去执行. System.out.println("now waiting sub thread done."); //主线程其他工作完毕,等待子线程的结束, 调用future.get()系列的方法即可。 List<Article> articleList = new ArrayList<>(); try { for (Future future : futureList) { List<Article> list = (List<Article>) future.get(); articleList.addAll(list); } } catch (InterruptedException | ExecutionException e) { e.printStackTrace(); } System.out.println(System.currentTimeMillis() - l1); System.out.println(articleList.size() + " "+articleList); CommonThreadPool.shutdown(); //遍历输出博主所有的文章 for(Article article : articleList) { System.out.println("文章标题:" + article.getTitle().replaceFirst("原","")); System.out.println("文章绝对路劲地址:" + article.getAddress()); System.out.println("文章简介:" + article.getDesption()); System.out.println("发表时间:" + article.getTime()); System.out.println("阅读数量:" + article.getReadNum()); System.out.println("评论数量:" + article.getCommentNum()); } } /** * 获取总页数 * @return */ public static int getTotalPage(){ Connection conn = Jsoup.connect(URL) .userAgent("Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:50.0) Gecko/20100101 Firefox/50.0") .timeout(8000) .method(Connection.Method.GET); Document doc = null; try { doc = conn.get(); } catch (IOException e) { e.printStackTrace(); } Element body = doc.body(); int totalPage = 1; Elements scripts = body.getElementsByTag("script"); Iterator it = scripts.iterator(); while(it.hasNext()) { Element element = (Element)it.next(); String text = element.data(); if (text.contains("pageSize") && text.contains("listTotal")) { int i = text.indexOf("var pageSize = "); int i1 = text.indexOf("var listTotal = "); int i2 = text.indexOf("var pageQueryStr ="); String pageSize = text.substring(i+15, i1); String listTotal = text.substring(i1+16, i2); double i3 = Double.parseDouble(pageSize.replace(";", "").trim()); double i4 = Double.parseDouble(listTotal.replace(";", "").trim()); double number = i4 / i3; totalPage = (int)Math.ceil(number); break; } } return totalPage; } /** * 分页读取 * @param pageNow * @return * @throws IOException */ public static List<Article> getArtitcleByPage(int pageNow) throws IOException { Connection conn = Jsoup.connect(URL + "/article/list/" + pageNow) .userAgent("Mozilla/5.0 (Windows NT 6.1; rv:47.0) Gecko/20100101 Firefox/47.") .timeout(8000) .method(Connection.Method.GET); Document doc = conn.get(); Element body = doc.body(); List<Article> resultList = new ArrayList<>(); Elements articleList = body.getElementsByClass("article-item-box"); for(Element article : articleList){ Article articleEntity = new Article(); Element linkNode = (article.select("div h4 a")).get(0); Element desptionNode = (article.getElementsByClass("content")).get(0); Element articleManageNode = (article.getElementsByClass("info-box")).get(0); articleEntity.setAddress(linkNode.attr("href")); articleEntity.setTitle(linkNode.text()); articleEntity.setDesption(desptionNode.text()); articleEntity.setTime(articleManageNode.getElementsByClass("date").text()); articleEntity.setReadNum(articleManageNode.getElementsByClass("read-num").get(0).getElementsByClass("num").text()); articleEntity.setCommentNum(articleManageNode.getElementsByClass("read-num").get(1).getElementsByClass("num").text()); resultList.add(articleEntity); } return resultList; } }
结果:
处理时间:2946 处理数量:1989
不用线程
package com.cowboy.service; import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.concurrent.*; import com.cowboy.model.Article; import org.jsoup.*; import org.jsoup.nodes.*; import org.jsoup.select.*; /** * @author shizongger * @date 2017/02/09 */ public class Main { private static final String URL = "https://blog.csdn.net/valada"; //private final static Executor executor = Executors.newCachedThreadPool();//启用多线程 public static void main(String[] args) throws IOException { Connection conn = Jsoup.connect(URL) .userAgent("Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:50.0) Gecko/20100101 Firefox/50.0") .timeout(8000) .method(Connection.Method.GET); Document doc = conn.get(); Element body = doc.body(); int totalPage = 1; int pageNow = 1; Elements scripts = body.getElementsByTag("script"); Iterator it = scripts.iterator(); while(it.hasNext()) { Element element = (Element)it.next(); String text = element.data(); if (text.contains("pageSize") && text.contains("listTotal")) { int i = text.indexOf("var pageSize = "); int i1 = text.indexOf("var listTotal = "); int i2 = text.indexOf("var pageQueryStr ="); String pageSize = text.substring(i+15, i1); String listTotal = text.substring(i1+16, i2); double i3 = Double.parseDouble(pageSize.replace(";", "").trim()); double i4 = Double.parseDouble(listTotal.replace(";", "").trim()); double number = i4 / i3; totalPage = (int)Math.ceil(number); break; } } System.out.println("总页数:"+totalPage); List<Article> articleList = new ArrayList<>(); long l1 = System.currentTimeMillis(); for(pageNow = 1; pageNow <= totalPage; pageNow++){ articleList.addAll(getArtitcleByPage(pageNow)); } System.out.println(System.currentTimeMillis() - l1); System.out.println(articleList.size() + " "+articleList); //遍历输出博主所有的文章 /*for(Article article : articleList) { System.out.println("文章标题:" + article.getTitle().replaceFirst("原","")); System.out.println("文章绝对路劲地址:" + article.getAddress()); System.out.println("文章简介:" + article.getDesption()); System.out.println("发表时间:" + article.getTime()); System.out.println("阅读数量:" + article.getReadNum()); System.out.println("评论数量:" + article.getCommentNum()); } System.out.println("总文章数量:"+articleList.size());*/ } public static List<Article> getArtitcleByPage(int pageNow) throws IOException{ Connection conn = Jsoup.connect(URL + "/article/list/" + pageNow) .userAgent("Mozilla/5.0 (Windows NT 6.1; rv:47.0) Gecko/20100101 Firefox/47.") .timeout(8000) .method(Connection.Method.GET); Document doc = conn.get(); Element body = doc.body(); List<Article> resultList = new ArrayList<>(); Elements articleList = body.getElementsByClass("article-item-box"); for(Element article : articleList){ Article articleEntity = new Article(); Element linkNode = (article.select("div h4 a")).get(0); Element desptionNode = (article.getElementsByClass("content")).get(0); Element articleManageNode = (article.getElementsByClass("info-box")).get(0); articleEntity.setAddress(linkNode.attr("href")); articleEntity.setTitle(linkNode.text()); articleEntity.setDesption(desptionNode.text()); articleEntity.setTime(articleManageNode.getElementsByClass("date").text()); articleEntity.setReadNum(articleManageNode.getElementsByClass("read-num").get(0).getElementsByClass("num").text()); articleEntity.setCommentNum(articleManageNode.getElementsByClass("read-num").get(1).getElementsByClass("num").text()); resultList.add(articleEntity); } return resultList; } }
结果
处理时间:30073 处理数量:1989
30073/2946 算算效率提升了多少倍