用Java实现Kmeans聚类算法

    xiaoxiao2022-07-03  139

    具体算法可以参考 http://www.aboutyun.com/thread-18178-1-1.html

    本文用K_means算法实现鸢尾花的识别

    鸢尾花卉数据集,是一类多重变量分析的数据集。 每个数据包含4个属性。 可通过花萼长度,花萼宽度,花瓣长度,花瓣宽度4个属性预测鸢尾花卉属于(Setosa,Versicolour,Virginica)三个种类中的哪一类 最后通过K_means算法识别的类别和数据中给定的类别对比计算正确率

    实现过程

    定K值及初始质心计算距离并划分数据使用均值作为新质心新的质心和原质心相等算法停止

    Java代码

    import java.io.BufferedReader; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.text.DecimalFormat; import java.util.ArrayList; import java.util.List; public class K_means { //将花分为三类 private List<float[]> K1 = new ArrayList<>(); private List<float[]> K2 = new ArrayList<>(); private List<float[]> K3 = new ArrayList<>(); public static int num; private static List<float[]> flowerList = new ArrayList<>(); /** * 输入流导入实验数据 */ private void initData(){ FileReader reader = null; try { reader = new FileReader("input.txt"); } catch (FileNotFoundException e1) { e1.printStackTrace(); } BufferedReader br = new BufferedReader(reader); String str; try { while((str = br.readLine()) != null){ float[] flower = new float[6]; String[] strArray =str.split("\\s+"); flower[0] = Float.parseFloat(strArray[0]); flower[1] = Float.parseFloat(strArray[1]); flower[2] = Float.parseFloat(strArray[2]); flower[3] = Float.parseFloat(strArray[3]); flower[4] = Float.parseFloat(strArray[4]); flower[5] = Float.parseFloat(strArray[5]); flowerList.add(flower); } } catch (IOException e) { e.printStackTrace(); } } /** * 分别计算每一类的均值 * @return */ public float[] kMeans(List<float[]> K){ float[] mean1 = new float[4]; DecimalFormat df = new DecimalFormat(".000"); for(int i = 0; i < K.size();i++){ mean1[0] += K.get(i)[1]; mean1[1] += K.get(i)[2]; mean1[2] += K.get(i)[3]; mean1[3] += K.get(i)[4]; } mean1[0] = mean1[0]/K.size(); mean1[0] = Float.parseFloat(df.format(mean1[0])); mean1[1] = mean1[1]/K.size(); mean1[1] = Float.parseFloat(df.format(mean1[1])); mean1[2] = mean1[2]/K.size(); mean1[2] = Float.parseFloat(df.format(mean1[2])); mean1[3] = mean1[3]/K.size(); mean1[3] = Float.parseFloat(df.format(mean1[3])); return mean1; } /** * 选定最小距离 * @param f1 * @param f2 * @param f3 * @return */ private static float min(float f1,float f2,float f3) { float min = 999f; if(f1 < min) min = f1; if(f2 < min) min = f2; if(f3 < min) min = f3; return min; } /** * 计算距离并划分数据 */ public void kDistance(){ float[] flowerK1 = new float[]{4.9f,3,1.4f,0.2f}; //初始质心 float[] flowerK2 = new float[]{6.4f,3.2f,4.5f,1.5f}; float[] flowerK3 = new float[]{5.8f,2.7f,5.1f,1.9f}; float D1 = 0f;float D2 = 0f;float D3 = 0f; while(true){ num = 0; for(int i = 0;i < flowerList.size(); i++){ D1 = (float) (Math.pow(flowerList.get(i)[1] - flowerK1[0], 2) + Math.pow(flowerList.get(i)[2] - flowerK1[1], 2) + Math.pow(flowerList.get(i)[3] - flowerK1[2], 2) + Math.pow(flowerList.get(i)[4] - flowerK1[3], 2)); D2 = (float) (Math.pow(flowerList.get(i)[1] - flowerK2[0], 2) + Math.pow(flowerList.get(i)[2] - flowerK2[1], 2) + Math.pow(flowerList.get(i)[3] - flowerK2[2], 2) + Math.pow(flowerList.get(i)[4] - flowerK2[3], 2)); D3 = (float) (Math.pow(flowerList.get(i)[1] - flowerK3[0], 2) + Math.pow(flowerList.get(i)[2] - flowerK3[1], 2) + Math.pow(flowerList.get(i)[3] - flowerK3[2], 2) + Math.pow(flowerList.get(i)[4] - flowerK3[3], 2)); if(D1 == min(D1,D2,D3)){ K1.add(flowerList.get(i)); if(flowerList.get(i)[5] == 1.0) num +=1; } if(D2 == min(D1,D2,D3)){ K2.add(flowerList.get(i)); if(flowerList.get(i)[5] == 2.0) num +=1; } if(D3 == min(D1,D2,D3)){ K3.add(flowerList.get(i)); if(flowerList.get(i)[5] == 3.0) num +=1; } } System.out.println(num); //识别正确的花的数量 double rate = (double)num/(double)flowerList.size(); System.out.println("正确率为:"+rate); if(flowerK1.equals(kMeans(K1)) &&flowerK2.equals(kMeans(K2))&&flowerK3.equals(kMeans(K3))){ break; } else //如果新的质心和原质心相等 算法停止 flowerK1 = kMeans(K1); flowerK2 = kMeans(K2); flowerK3 = kMeans(K3); System.out.println(kMeans(K1)[0]+" "+kMeans(K1)[1]+" "+kMeans(K1)[2]+" "+kMeans(K1)[3]); System.out.println(kMeans(K2)[0]+" "+kMeans(K2)[1]+" "+kMeans(K2)[2]+" "+kMeans(K2)[3]); System.out.println(kMeans(K3)[0]+" "+kMeans(K3)[1]+" "+kMeans(K3)[2]+" "+kMeans(K3)[3]); } } public static void main(String[] args) { K_means kmeans = new K_means(); kmeans.initData(); kmeans.kDistance(); } }

    部分数据 1 5.1 3.5 1.4 0.2 1 2 4.9 3 1.4 0.2 1 3 4.7 3.2 1.3 0.2 1 4 4.6 3.1 1.5 0.2 1 5 5 3.6 1.4 0.3 1 6 5.4 3.9 1.7 0.4 1 7 4.6 3.4 1.4 0.3 1 8 5 3.4 1.5 0.2 1 9 4.4 2.9 1.4 0.2 1 10 4.9 3.1 1.5 0.1 1 11 5.4 3.7 1.5 0.2 1 12 4.8 3.4 1.6 0.2 1 13 4.8 3 1.4 0.1 1 14 4.3 3 1.1 0.1 1 15 5.8 4 1.2 0.2 1 16 5.7 4.4 1.5 0.4 1 17 5.4 3.9 1.3 0.4 1 18 5.1 3.5 1.4 0.3 1 19 5.7 3.8 1.7 0.3 1 20 5.1 3.8 1.5 0.3 1 21 5.4 3.4 1.7 0.2 1 22 5.1 3.7 1.5 0.4 1 23 4.6 3.6 1 0.2 1 24 5.1 3.3 1.7 0.5 1 25 4.8 3.4 1.9 0.2 1 26 5 3 1.6 0.2 1 27 5 3.4 1.6 0.4 1 28 5.2 3.5 1.5 0.2 1 29 5.2 3.4 1.4 0.2 1 30 4.7 3.2 1.6 0.2 1 31 4.8 3.1 1.6 0.2 1 32 5.4 3.4 1.5 0.4 1 33 5.2 4.1 1.5 0.1 1 34 5.5 4.2 1.4 0.2 1 35 4.9 3.1 1.5 0.2 1 36 5 3.2 1.2 0.2 1 37 5.5 3.5 1.3 0.2 1 38 4.9 3.6 1.4 0.1 1 39 4.4 3 1.3 0.2 1 40 5.1 3.4 1.5 0.2 1 41 5 3.5 1.3 0.3 1 42 4.5 2.3 1.3 0.3 1 43 4.4 3.2 1.3 0.2 1 44 5 3.5 1.6 0.6 1 45 5.1 3.8 1.9 0.4 1 46 4.8 3 1.4 0.3 1 47 5.1 3.8 1.6 0.2 1 48 4.6 3.2 1.4 0.2 1 49 5.3 3.7 1.5 0.2 1 50 5 3.3 1.4 0.2 1 51 7 3.2 4.7 1.4 2 52 6.4 3.2 4.5 1.5 2 53 6.9 3.1 4.9 1.5 2 54 5.5 2.3 4 1.3 2 55 6.5 2.8 4.6 1.5 2 56 5.7 2.8 4.5 1.3 2 57 6.3 3.3 4.7 1.6 2 58 4.9 2.4 3.3 1 2 59 6.6 2.9 4.6 1.3 2 60 5.2 2.7 3.9 1.4 2 61 5 2 3.5 1 2 62 5.9 3 4.2 1.5 2 63 6 2.2 4 1 2 64 6.1 2.9 4.7 1.4 2 65 5.6 2.9 3.6 1.3 2 66 6.7 3.1 4.4 1.4 2 67 5.6 3 4.5 1.5 2 68 5.8 2.7 4.1 1 2 69 6.2 2.2 4.5 1.5 2 70 5.6 2.5 3.9 1.1 2 71 5.9 3.2 4.8 1.8 2 72 6.1 2.8 4 1.3 2 73 6.3 2.5 4.9 1.5 2 74 6.1 2.8 4.7 1.2 2 75 6.4 2.9 4.3 1.3 2 76 6.6 3 4.4 1.4 2 77 6.8 2.8 4.8 1.4 2 78 6.7 3 5 1.7 2 79 6 2.9 4.5 1.5 2 80 5.7 2.6 3.5 1 2 81 5.5 2.4 3.8 1.1 2 82 5.5 2.4 3.7 1 2 83 5.8 2.7 3.9 1.2 2 84 6 2.7 5.1 1.6 2 85 5.4 3 4.5 1.5 2 86 6 3.4 4.5 1.6 2 87 6.7 3.1 4.7 1.5 2 88 6.3 2.3 4.4 1.3 2 89 5.6 3 4.1 1.3 2 90 5.5 2.5 4 1.3 2 91 5.5 2.6 4.4 1.2 2 92 6.1 3 4.6 1.4 2 93 5.8 2.6 4 1.2 2 94 5 2.3 3.3 1 2 95 5.6 2.7 4.2 1.3 2 96 5.7 3 4.2 1.2 2 97 5.7 2.9 4.2 1.3 2 98 6.2 2.9 4.3 1.3 2 99 5.1 2.5 3 1.1 2 100 5.7 2.8 4.1 1.3 2 101 6.3 3.3 6 2.5 3 102 5.8 2.7 5.1 1.9 3 103 7.1 3 5.9 2.1 3 104 6.3 2.9 5.6 1.8 3 105 6.5 3 5.8 2.2 3 106 7.6 3 6.6 2.1 3 107 4.9 2.5 4.5 1.7 3 108 7.3 2.9 6.3 1.8 3 109 6.7 2.5 5.8 1.8 3 110 7.2 3.6 6.1 2.5 3 111 6.5 3.2 5.1 2 3 112 6.4 2.7 5.3 1.9 3 113 6.8 3 5.5 2.1 3 114 5.7 2.5 5 2 3 115 5.8 2.8 5.1 2.4 3 116 6.4 3.2 5.3 2.3 3 117 6.5 3 5.5 1.8 3 118 7.7 3.8 6.7 2.2 3 119 7.7 2.6 6.9 2.3 3 120 6 2.2 5 1.5 3 121 6.9 3.2 5.7 2.3 3 122 5.6 2.8 4.9 2 3 123 7.7 2.8 6.7 2 3 124 6.3 2.7 4.9 1.8 3 125 6.7 3.3 5.7 2.1 3 126 7.2 3.2 6 1.8 3 127 6.2 2.8 4.8 1.8 3 128 6.1 3 4.9 1.8 3 129 6.4 2.8 5.6 2.1 3 130 7.2 3 5.8 1.6 3 131 7.4 2.8 6.1 1.9 3 132 7.9 3.8 6.4 2 3 133 6.4 2.8 5.6 2.2 3 134 6.3 2.8 5.1 1.5 3 135 6.1 2.6 5.6 1.4 3 136 7.7 3 6.1 2.3 3 137 6.3 3.4 5.6 2.4 3 138 6.4 3.1 5.5 1.8 3 139 6 3 4.8 1.8 3 140 6.9 3.1 5.4 2.1 3 141 6.7 3.1 5.6 2.4 3 142 6.9 3.1 5.1 2.3 3 143 5.8 2.7 5.1 1.9 3 144 6.8 3.2 5.9 2.3 3 145 6.7 3.3 5.7 2.5 3 146 6.7 3 5.2 2.3 3 147 6.3 2.5 5 1.9 3 148 6.5 3 5.2 2 3 149 6.2 3.4 5.4 2.3 3 150 5.9 3 5.1 1.8 3

    最新回复(0)