连接池
1、为什么要用连接池
如果每一次请求都要创建HttpClient,会有频繁创建和销毁的问题,可以使用连接池来解决这个问题。 如何创建项目请看:https://blog.csdn.net/weixin_44588495/article/details/90580722
2、创建连接池对象
利用PoolingHttpClientConnectionManager类创建。必须要设置最大的连接数和每一个主机的最大连接数。原因就是如果同时爬取百度新闻和淘宝的数据的话,若爬取百度的连接数为100,淘宝的连接数就为0了。
PoolingHttpClientConnectionManager cm
= new PoolingHttpClientConnectionManager();
cm
.setMaxTotal(100);
cm
.setDefaultMaxPerRoute(10);
3、创建doGet方法
public static void doGet(PoolingHttpClientConnectionManager cm
){
CloseableHttpClient httpClient
= HttpClients
.custom().setConnectionManager(cm
).build();
CloseableHttpResponse response
= null
;
try {
response
= httpClient
.execute(httpGet
);
if(response
.getStatusLine().getStatusCode() == 200){
HttpEntity httpEntity
= response
.getEntity();
String content
= EntityUtils
.toString(httpEntity
,"utf-8");
System
.out
.println(content
);
}
} catch (IOException e
) {
e
.printStackTrace();
}finally {
try {
response
.close();
} catch (IOException e
) {
e
.printStackTrace();
}
try {
httpClient
.close();
} catch (IOException e
) {
e
.printStackTrace();
}
}
}
4、完整的代码如下:
package com
.crawler
;
import org
.apache
.http
.HttpEntity
;
import org
.apache
.http
.client
.methods
.CloseableHttpResponse
;
import org
.apache
.http
.client
.methods
.HttpGet
;
import org
.apache
.http
.impl
.client
.CloseableHttpClient
;
import org
.apache
.http
.impl
.client
.HttpClients
;
import org
.apache
.http
.impl
.conn
.PoolingHttpClientConnectionManager
;
import org
.apache
.http
.util
.EntityUtils
;
import java
.io
.IOException
;
public class HttpClientPoolTest {
public static void main(String
[] args
) {
PoolingHttpClientConnectionManager cm
= new PoolingHttpClientConnectionManager();
cm
.setMaxTotal(100);
cm
.setDefaultMaxPerRoute(10);
doGet(cm
);
doGet(cm
);
}
public static void doGet(PoolingHttpClientConnectionManager cm
){
CloseableHttpClient httpClient
= HttpClients
.custom().setConnectionManager(cm
).build();
HttpGet httpGet
= new HttpGet("http://www.itcast.cn");
CloseableHttpResponse response
= null
;
try {
response
= httpClient
.execute(httpGet
);
if(response
.getStatusLine().getStatusCode() == 200){
HttpEntity httpEntity
= response
.getEntity();
String content
= EntityUtils
.toString(httpEntity
,"utf-8");
System
.out
.println(content
.length());
}
} catch (IOException e
) {
e
.printStackTrace();
}finally {
if (response
!= null
){
try {
response
.close();
} catch (IOException e
) {
e
.printStackTrace();
}
}
}
}
}
5、设置请求的时间
最后将该代码块放置在doPost方法内
RequestConfig config
= RequestConfig
.custom().setConnectTimeout(1000)
.setConnectionRequestTimeout(500)
.setSocketTimeout(10 * 1000)
.build();
httpGet
.setConfig(config
);