具体算法可以参考 http://www.aboutyun.com/thread-18178-1-1.html
本文用K_means算法实现鸢尾花的识别
鸢尾花卉数据集,是一类多重变量分析的数据集。 每个数据包含4个属性。 可通过花萼长度,花萼宽度,花瓣长度,花瓣宽度4个属性预测鸢尾花卉属于(Setosa,Versicolour,Virginica)三个种类中的哪一类 最后通过K_means算法识别的类别和数据中给定的类别对比计算正确率
实现过程
定K值及初始质心计算距离并划分数据使用均值作为新质心新的质心和原质心相等算法停止
Java代码
import java
.io
.BufferedReader
;
import java
.io
.FileNotFoundException
;
import java
.io
.FileReader
;
import java
.io
.IOException
;
import java
.text
.DecimalFormat
;
import java
.util
.ArrayList
;
import java
.util
.List
;
public class K_means {
private List
<float[]> K1
= new ArrayList<>();
private List
<float[]> K2
= new ArrayList<>();
private List
<float[]> K3
= new ArrayList<>();
public static int num
;
private static List
<float[]> flowerList
= new ArrayList<>();
private void initData(){
FileReader reader
= null
;
try {
reader
= new FileReader("input.txt");
} catch (FileNotFoundException e1
) {
e1
.printStackTrace();
}
BufferedReader br
= new BufferedReader(reader
);
String str
;
try {
while((str
= br
.readLine()) != null
){
float[] flower
= new float[6];
String
[] strArray
=str
.split("\\s+");
flower
[0] = Float
.parseFloat(strArray
[0]);
flower
[1] = Float
.parseFloat(strArray
[1]);
flower
[2] = Float
.parseFloat(strArray
[2]);
flower
[3] = Float
.parseFloat(strArray
[3]);
flower
[4] = Float
.parseFloat(strArray
[4]);
flower
[5] = Float
.parseFloat(strArray
[5]);
flowerList
.add(flower
);
}
} catch (IOException e
) {
e
.printStackTrace();
}
}
public float[] kMeans(List
<float[]> K
){
float[] mean1
= new float[4];
DecimalFormat df
= new DecimalFormat(".000");
for(int i
= 0; i
< K
.size();i
++){
mean1
[0] += K
.get(i
)[1];
mean1
[1] += K
.get(i
)[2];
mean1
[2] += K
.get(i
)[3];
mean1
[3] += K
.get(i
)[4];
}
mean1
[0] = mean1
[0]/K
.size();
mean1
[0] = Float
.parseFloat(df
.format(mean1
[0]));
mean1
[1] = mean1
[1]/K
.size();
mean1
[1] = Float
.parseFloat(df
.format(mean1
[1]));
mean1
[2] = mean1
[2]/K
.size();
mean1
[2] = Float
.parseFloat(df
.format(mean1
[2]));
mean1
[3] = mean1
[3]/K
.size();
mean1
[3] = Float
.parseFloat(df
.format(mean1
[3]));
return mean1
;
}
private static float min(float f1
,float f2
,float f3
) {
float min
= 999f;
if(f1
< min
)
min
= f1
;
if(f2
< min
)
min
= f2
;
if(f3
< min
)
min
= f3
;
return min
;
}
public void kDistance(){
float[] flowerK1
= new float[]{4.9f,3,1.4f,0.2f};
float[] flowerK2
= new float[]{6.4f,3.2f,4.5f,1.5f};
float[] flowerK3
= new float[]{5.8f,2.7f,5.1f,1.9f};
float D1
= 0f;float D2
= 0f;float D3
= 0f;
while(true){
num
= 0;
for(int i
= 0;i
< flowerList
.size(); i
++){
D1
= (float) (Math
.pow(flowerList
.get(i
)[1] - flowerK1
[0], 2) + Math
.pow(flowerList
.get(i
)[2] - flowerK1
[1], 2) + Math
.pow(flowerList
.get(i
)[3] - flowerK1
[2], 2) + Math
.pow(flowerList
.get(i
)[4] - flowerK1
[3], 2));
D2
= (float) (Math
.pow(flowerList
.get(i
)[1] - flowerK2
[0], 2) + Math
.pow(flowerList
.get(i
)[2] - flowerK2
[1], 2) + Math
.pow(flowerList
.get(i
)[3] - flowerK2
[2], 2) + Math
.pow(flowerList
.get(i
)[4] - flowerK2
[3], 2));
D3
= (float) (Math
.pow(flowerList
.get(i
)[1] - flowerK3
[0], 2) + Math
.pow(flowerList
.get(i
)[2] - flowerK3
[1], 2) + Math
.pow(flowerList
.get(i
)[3] - flowerK3
[2], 2) + Math
.pow(flowerList
.get(i
)[4] - flowerK3
[3], 2));
if(D1
== min(D1
,D2
,D3
)){
K1
.add(flowerList
.get(i
));
if(flowerList
.get(i
)[5] == 1.0)
num
+=1;
}
if(D2
== min(D1
,D2
,D3
)){
K2
.add(flowerList
.get(i
));
if(flowerList
.get(i
)[5] == 2.0)
num
+=1;
}
if(D3
== min(D1
,D2
,D3
)){
K3
.add(flowerList
.get(i
));
if(flowerList
.get(i
)[5] == 3.0)
num
+=1;
}
}
System
.out
.println(num
);
double rate
= (double)num
/(double)flowerList
.size();
System
.out
.println("正确率为:"+rate
);
if(flowerK1
.equals(kMeans(K1
)) &&flowerK2
.equals(kMeans(K2
))&&flowerK3
.equals(kMeans(K3
))){
break;
}
else
flowerK1
= kMeans(K1
); flowerK2
= kMeans(K2
); flowerK3
= kMeans(K3
);
System
.out
.println(kMeans(K1
)[0]+" "+kMeans(K1
)[1]+" "+kMeans(K1
)[2]+" "+kMeans(K1
)[3]);
System
.out
.println(kMeans(K2
)[0]+" "+kMeans(K2
)[1]+" "+kMeans(K2
)[2]+" "+kMeans(K2
)[3]);
System
.out
.println(kMeans(K3
)[0]+" "+kMeans(K3
)[1]+" "+kMeans(K3
)[2]+" "+kMeans(K3
)[3]);
}
}
public static void main(String
[] args
) {
K_means kmeans
= new K_means();
kmeans
.initData();
kmeans
.kDistance();
}
}
部分数据 1 5.1 3.5 1.4 0.2 1 2 4.9 3 1.4 0.2 1 3 4.7 3.2 1.3 0.2 1 4 4.6 3.1 1.5 0.2 1 5 5 3.6 1.4 0.3 1 6 5.4 3.9 1.7 0.4 1 7 4.6 3.4 1.4 0.3 1 8 5 3.4 1.5 0.2 1 9 4.4 2.9 1.4 0.2 1 10 4.9 3.1 1.5 0.1 1 11 5.4 3.7 1.5 0.2 1 12 4.8 3.4 1.6 0.2 1 13 4.8 3 1.4 0.1 1 14 4.3 3 1.1 0.1 1 15 5.8 4 1.2 0.2 1 16 5.7 4.4 1.5 0.4 1 17 5.4 3.9 1.3 0.4 1 18 5.1 3.5 1.4 0.3 1 19 5.7 3.8 1.7 0.3 1 20 5.1 3.8 1.5 0.3 1 21 5.4 3.4 1.7 0.2 1 22 5.1 3.7 1.5 0.4 1 23 4.6 3.6 1 0.2 1 24 5.1 3.3 1.7 0.5 1 25 4.8 3.4 1.9 0.2 1 26 5 3 1.6 0.2 1 27 5 3.4 1.6 0.4 1 28 5.2 3.5 1.5 0.2 1 29 5.2 3.4 1.4 0.2 1 30 4.7 3.2 1.6 0.2 1 31 4.8 3.1 1.6 0.2 1 32 5.4 3.4 1.5 0.4 1 33 5.2 4.1 1.5 0.1 1 34 5.5 4.2 1.4 0.2 1 35 4.9 3.1 1.5 0.2 1 36 5 3.2 1.2 0.2 1 37 5.5 3.5 1.3 0.2 1 38 4.9 3.6 1.4 0.1 1 39 4.4 3 1.3 0.2 1 40 5.1 3.4 1.5 0.2 1 41 5 3.5 1.3 0.3 1 42 4.5 2.3 1.3 0.3 1 43 4.4 3.2 1.3 0.2 1 44 5 3.5 1.6 0.6 1 45 5.1 3.8 1.9 0.4 1 46 4.8 3 1.4 0.3 1 47 5.1 3.8 1.6 0.2 1 48 4.6 3.2 1.4 0.2 1 49 5.3 3.7 1.5 0.2 1 50 5 3.3 1.4 0.2 1 51 7 3.2 4.7 1.4 2 52 6.4 3.2 4.5 1.5 2 53 6.9 3.1 4.9 1.5 2 54 5.5 2.3 4 1.3 2 55 6.5 2.8 4.6 1.5 2 56 5.7 2.8 4.5 1.3 2 57 6.3 3.3 4.7 1.6 2 58 4.9 2.4 3.3 1 2 59 6.6 2.9 4.6 1.3 2 60 5.2 2.7 3.9 1.4 2 61 5 2 3.5 1 2 62 5.9 3 4.2 1.5 2 63 6 2.2 4 1 2 64 6.1 2.9 4.7 1.4 2 65 5.6 2.9 3.6 1.3 2 66 6.7 3.1 4.4 1.4 2 67 5.6 3 4.5 1.5 2 68 5.8 2.7 4.1 1 2 69 6.2 2.2 4.5 1.5 2 70 5.6 2.5 3.9 1.1 2 71 5.9 3.2 4.8 1.8 2 72 6.1 2.8 4 1.3 2 73 6.3 2.5 4.9 1.5 2 74 6.1 2.8 4.7 1.2 2 75 6.4 2.9 4.3 1.3 2 76 6.6 3 4.4 1.4 2 77 6.8 2.8 4.8 1.4 2 78 6.7 3 5 1.7 2 79 6 2.9 4.5 1.5 2 80 5.7 2.6 3.5 1 2 81 5.5 2.4 3.8 1.1 2 82 5.5 2.4 3.7 1 2 83 5.8 2.7 3.9 1.2 2 84 6 2.7 5.1 1.6 2 85 5.4 3 4.5 1.5 2 86 6 3.4 4.5 1.6 2 87 6.7 3.1 4.7 1.5 2 88 6.3 2.3 4.4 1.3 2 89 5.6 3 4.1 1.3 2 90 5.5 2.5 4 1.3 2 91 5.5 2.6 4.4 1.2 2 92 6.1 3 4.6 1.4 2 93 5.8 2.6 4 1.2 2 94 5 2.3 3.3 1 2 95 5.6 2.7 4.2 1.3 2 96 5.7 3 4.2 1.2 2 97 5.7 2.9 4.2 1.3 2 98 6.2 2.9 4.3 1.3 2 99 5.1 2.5 3 1.1 2 100 5.7 2.8 4.1 1.3 2 101 6.3 3.3 6 2.5 3 102 5.8 2.7 5.1 1.9 3 103 7.1 3 5.9 2.1 3 104 6.3 2.9 5.6 1.8 3 105 6.5 3 5.8 2.2 3 106 7.6 3 6.6 2.1 3 107 4.9 2.5 4.5 1.7 3 108 7.3 2.9 6.3 1.8 3 109 6.7 2.5 5.8 1.8 3 110 7.2 3.6 6.1 2.5 3 111 6.5 3.2 5.1 2 3 112 6.4 2.7 5.3 1.9 3 113 6.8 3 5.5 2.1 3 114 5.7 2.5 5 2 3 115 5.8 2.8 5.1 2.4 3 116 6.4 3.2 5.3 2.3 3 117 6.5 3 5.5 1.8 3 118 7.7 3.8 6.7 2.2 3 119 7.7 2.6 6.9 2.3 3 120 6 2.2 5 1.5 3 121 6.9 3.2 5.7 2.3 3 122 5.6 2.8 4.9 2 3 123 7.7 2.8 6.7 2 3 124 6.3 2.7 4.9 1.8 3 125 6.7 3.3 5.7 2.1 3 126 7.2 3.2 6 1.8 3 127 6.2 2.8 4.8 1.8 3 128 6.1 3 4.9 1.8 3 129 6.4 2.8 5.6 2.1 3 130 7.2 3 5.8 1.6 3 131 7.4 2.8 6.1 1.9 3 132 7.9 3.8 6.4 2 3 133 6.4 2.8 5.6 2.2 3 134 6.3 2.8 5.1 1.5 3 135 6.1 2.6 5.6 1.4 3 136 7.7 3 6.1 2.3 3 137 6.3 3.4 5.6 2.4 3 138 6.4 3.1 5.5 1.8 3 139 6 3 4.8 1.8 3 140 6.9 3.1 5.4 2.1 3 141 6.7 3.1 5.6 2.4 3 142 6.9 3.1 5.1 2.3 3 143 5.8 2.7 5.1 1.9 3 144 6.8 3.2 5.9 2.3 3 145 6.7 3.3 5.7 2.5 3 146 6.7 3 5.2 2.3 3 147 6.3 2.5 5 1.9 3 148 6.5 3 5.2 2 3 149 6.2 3.4 5.4 2.3 3 150 5.9 3 5.1 1.8 3