"""
Created on Wed May 22 10:43:50 2019
@author: 激光雷达
"""
from numpy
import *
import operator
'''Part 1 '''
def creatDataSet():
group
= array
([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]])
labels
= ['A','A','B','B']
return group
,labels
def classfiy0(iinX
,dataSet
,labels
,k
):
dataSetSize
= dataSet
.shape
[0]
dataMat
= tile
(iinX
,(dataSetSize
,1)) - dataSet
sqDiffMat
= dataMat
**2
sqDistances
= sqDiffMat
.sum(axis
=1)
distances
= sqDistances
**0.5
sortDistanceIndex
= distances
.argsort
()
classCount
= {}
for i
in range(k
):
voteIlabel
= labels
[sortDistanceIndex
[i
]]
classCount
[voteIlabel
] = classCount
.get
(voteIlabel
,0) + 1
sortedClassCount
= sorted(classCount
.items
(),key
=operator
.itemgetter
(1),
reverse
=True)
return sortedClassCount
[0][0]
group
,labels
= creatDataSet
()
print(classfiy0
([0,0],group
,labels
,3))
''' Part 2 '''
def file2matrix(filename
):
fr
= open(filename
)
arrayOLines
= fr
.readlines
()
numberOfLines
= len(arrayOLines
)
returnMat
= zeros
((numberOfLines
,3))
classLabelVector
= []
index
= 0
for line
in arrayOLines
:
line
= line
.strip
()
listFromline
= line
.split
('\t')
returnMat
[index
,:] = listFromline
[0:3]
classLabelVector
.append
(int(listFromline
[-1]))
index
+= 1
return returnMat
,classLabelVector
datingDataMat
,datingDataLabels
= file2matrix
('datingTestSet2.txt')
print(datingDataMat
)
print(datingDataLabels
[0:20])
import matplotlib
import matplotlib
.pyplot
as plt
fig
= plt
.figure
()
ax
= fig
.add_subplot
(111)
ax
.scatter
(datingDataMat
[:,1],datingDataMat
[:,2])
ax
.set_xlabel
("Percentage of time spent playing video games")
ax
.set_ylabel
("Ice cream kilograms consumed per week")
plt
.show
()
plt2
= matplotlib
.pyplot
fig2
= plt2
.figure
()
ax2
= fig2
.add_subplot
(111)
ax2
.scatter
(datingDataMat
[:,1],datingDataMat
[:,2],15.0*array
(datingDataLabels
)
,15.0*array
(datingDataLabels
))
ax2
.set_xlabel
("Percentage of time spent playing video games")
ax2
.set_ylabel
("Ice cream kilograms consumed per week")
plt2
.show
()
plt3
= matplotlib
.pyplot
fig3
= plt3
.figure
()
ax3
= fig3
.add_subplot
(111)
ax3
.scatter
(datingDataMat
[:,0],datingDataMat
[:,1],15.0*array
(datingDataLabels
)
,15.0*array
(datingDataLabels
))
ax3
.set_xlabel
("Frequent Flight Miles Obtained Annually")
ax3
.set_ylabel
("Percentage of time spent playing video games")
ax3
.legend
()
plt3
.show
()
plt4
= matplotlib
.pyplot
plt4
.rcParams
['font.sans-serif']=['Simhei']
plt4
.rcParams
['axes.unicode_minus']=False
datingDataMat4
, datingLabels4
= file2matrix
('datingTestSet2.txt')
plt4
.figure
()
axes4
= plt4
.subplot
(111)
type1_x
= []
type1_y
= []
type2_x
= []
type2_y
= []
type3_x
= []
type3_y
= []
for i
in range(len(datingLabels4
)):
if datingLabels4
[i
] == 1:
type1_x
.append
(datingDataMat4
[i
][0])
type1_y
.append
(datingDataMat4
[i
][1])
if datingLabels4
[i
] == 2:
type2_x
.append
(datingDataMat4
[i
][0])
type2_y
.append
(datingDataMat4
[i
][1])
if datingLabels4
[i
] == 3:
type3_x
.append
(datingDataMat4
[i
][0])
type3_y
.append
(datingDataMat4
[i
][1])
type1
= axes4
.scatter
(type1_x
, type1_y
, s
=20, c
='r')
type2
= axes4
.scatter
(type2_x
, type2_y
, s
=40, c
='b')
type3
= axes4
.scatter
(type3_x
, type3_y
, s
=60, c
='k')
plt4
.legend
((type1
, type2
, type3
), ('Dislike', 'Charming general', 'Glamour'))
plt4
.show
()
''' Part 3 '''
def autoNorm(dataSet
):
minValues
= dataSet
.min(0)
maxValues
= dataSet
.max(0)
ranges
= maxValues
- minValues
normDataSet
= zeros
(shape
(dataSet
))
tempVector
= dataSet
.shape
[0]
normDataSet
= dataSet
- tile
(minValues
,(tempVector
,1))
normDataSet
= normDataSet
/tile
(ranges
,(tempVector
,1))
return normDataSet
,ranges
,minValues
normDataSet
,ranges
,minValues
= autoNorm
(datingDataMat
)
print()
print(normDataSet
)
print()
print(ranges
)
print()
print(minValues
)
print()
''' Part 4 '''
def datingClassTest():
hoRatio
= 0.10
datingDataMat
,datingDataLabels
= file2matrix
('datingTestSet2.txt')
normMat
,ranges
,minValues
= autoNorm
(datingDataMat
)
tempVector
= normMat
.shape
[0]
numTestVecs
= int(tempVector
*hoRatio
)
errorCount
= 0.
for i
in range(numTestVecs
):
classfierResults
= classfiy0
(normMat
[i
,:],normMat
[numTestVecs
:tempVector
,:],
datingDataLabels
[numTestVecs
:tempVector
],3)
print("The classfier came back with: %d, the real is : %d"
%(classfierResults
,datingDataLabels
[i
]))
if (classfierResults
!= datingDataLabels
[i
] ):
errorCount
+= 1
print( "The total error rate is : %f"%(errorCount
/float(numTestVecs
)) )
datingClassTest
()
''' Part 5 '''
def classfiyPerson():
resultList
= ['Not at all','Small doses','Large doses']
percentTats
= float(input("Percecntage of time spent on video games ?"))
ffMiles
= float(input("Frequent flier miles earned per year ?"))
iceCream
= float(input("Liters icecream consumed per year ?"))
datingDataMat
,datingDataLabels
= file2matrix
('datingTestSet2.txt')
normMat
,ranges
,minValues
= autoNorm
(datingDataMat
)
inArr
= array
([ffMiles
,percentTats
,iceCream
])
classfiyResult
= classfiy0
((inArr
- minValues
)/ranges
,normMat
,
datingDataLabels
,3)
print("You will probably like this person: ",resultList
[classfiyResult
- 1])
classfiyPerson
()
转载请注明原文地址: https://yun.8miu.com/read-107266.html