------------本文笔记整理自《Hadoop海量数据处理:技术详解与项目实战》范东来
1.代码文件 IpParser.java 编码方式为 UTF-8,而纯真数据库文件qqwry.dat编码为GBK,故把getStr()方法的返回参数设置为GBK,否则乱码。
2. 最后main()中有测试用例,可直接运行。
3. 纯真IP地址数据库下载地址:http://www.cz88.net/
4. 测试结果:
IP: 120.196.145.58 解析结果:广东省梅州市 : 移动 IP: 203.107.6.88 解析结果:浙江省杭州市 : 阿里巴巴阿里云NTP服务器
package com.etl.utls; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.RandomAccessFile; public class IpParser { /* * IP地址解析器 */ //纯真IP数据库文件 private String DbPath = "./cz88/qqwry.dat"; //www.cz88.net private String Country, LocalStr; private long IPN; private int RecordCount, CountryFlag; private long RangE, RangB, OffSet, StartIP, EndIP, FirstStartIP, LastStartIP, EndIPOff; private RandomAccessFile fis; //文件随机读写,按byte[]读/写 private byte[] buff; private long ByteArrayToLong(byte[] b) { long ret = 0; for (int i = 0; i < b.length; i++) { long t = 1L; for (int j = 0; j < i; j++) { t = t * 256L; } ret += ((b[i] < 0 ) ? 256 + b[i] : b[i]) * t; } return ret; } private long ipStrToLong(String ip) { String[] arr = ip.split("\\."); //“正则表达式”:"."在正则中有特殊含义,需转义 long ret = 0; for (int i = 0; i < arr.length; i++) { long l = 1; for (int j = 0; j < i; j++) { l *= 256L; } try { ret += Long.parseLong(arr[arr.length-i-1]) * l; //从低位(右边)起 } catch(Exception e) { ret += 0; } } return ret; } public void seek(String ip) throws Exception { IPN = ipStrToLong(ip); fis = new RandomAccessFile(DbPath, "r"); buff = new byte[4]; fis.seek(0); //seek() 设置文件指针位置 fis.read(buff); //读取buff个字节 FirstStartIP = ByteArrayToLong(buff); fis.read(buff); LastStartIP = ByteArrayToLong(buff); RecordCount = (int)((LastStartIP - FirstStartIP) / 7); if (RecordCount <= 1) { LocalStr = Country = "未知"; throw new Exception(); } RangB = 0; RangE = RecordCount; long RecNo; do { RecNo = (RangB + RangE) / 2; getStartIP(RecNo); if (IPN == StartIP) { RangB = RecNo; break; } if (IPN > StartIP) RangB = RecNo; else RangE = RecNo; } while (RangB < RangE - 1); getStartIP(RangB); getEndIP(); getCountry(IPN); fis.close(); } private String getFlagStr(long OffSet) throws IOException { int flag = 0; do { fis.seek(OffSet); buff = new byte[1]; fis.read(buff); flag = (buff[0] < 0) ? 256 + buff[0] : buff[0]; if (flag == 1 || flag == 2) { buff = new byte[3]; fis.read(buff); if (flag == 2) { CountryFlag = 2; EndIPOff = OffSet - 4; } OffSet = ByteArrayToLong(buff); } else { break; } } while (true); if (OffSet < 12) { return ""; } else { fis.seek(OffSet); return getStr(); } } private String getStr() throws IOException { long l = fis.length(); ByteArrayOutputStream byteout = new ByteArrayOutputStream(); byte c = fis.readByte(); do { byteout.write(c); c = fis.readByte(); } while (c != 0 && fis.getFilePointer() < l); //qqwry.dat文件默认编码为GBK,但该文件是UTF-8格式的,所以输出会乱码,需要指定为GBK return new String(byteout.toByteArray(),"GBK"); } private void getCountry(long ip) throws IOException { if (CountryFlag == 1 || CountryFlag == 2) { Country = getFlagStr(EndIPOff + 4); if (CountryFlag == 1) { LocalStr = getFlagStr(fis.getFilePointer()); if (IPN >= ipStrToLong("255.255.255.0") && IPN <= ipStrToLong("255.255.255.255")) { LocalStr = getFlagStr(EndIPOff + 21); Country = getFlagStr(EndIPOff + 12); } } else { LocalStr = getFlagStr(EndIPOff + 8); } } else { Country = getFlagStr(EndIPOff + 4); LocalStr = getFlagStr(fis.getFilePointer()); } } private long getEndIP() throws IOException { fis.seek(EndIPOff); buff = new byte[4]; fis.read(buff); EndIP = ByteArrayToLong(buff); buff = new byte[1]; fis.read(buff); CountryFlag = (buff[0] < 0) ? 255 + buff[0] : buff[0]; return EndIP; } private long getStartIP(long RecNo) throws IOException { OffSet = FirstStartIP + RecNo * 7; fis.seek(OffSet); buff = new byte[4]; fis.read(buff); StartIP = ByteArrayToLong(buff); buff = new byte[3]; fis.read(buff); EndIPOff = ByteArrayToLong(buff); return StartIP; } public String getLocal() { return this.LocalStr; } public String getCountry() { return this.Country; } public void setPath(String path) { this.DbPath = path; } //调用该函数即可获得IP地址所在的实际区域 public String parse(String ipStr) throws Exception { this.seek(ipStr); return this.getCountry() + " " + this.getLocal(); } //测试 public static void main(String[] args) { IpParser ipParser = new IpParser(); try { //203.107.6.88 String strs = ipParser.parse("120.196.145.58"); System.out.println(strs.split(" ")[0] + " : " + strs.split(" ")[1]); //IP: 120.196.145.58 输出结果:广东省梅州市 : 移动 //IP: 203.107.6.88 输出结果:浙江省杭州市 : 阿里巴巴阿里云NTP服务器 } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } }