# -*- coding:utf-8 -*-
__author__ = 'yangxin_ryan'
from numpy import *
from votesmart import votesmart
class Apriori(object):
def load_data_set(self):
return [[1, 3, 4], [2, 3, 5], [1, 2, 3, 5], [2, 5]]
def create_c1(self, data_set):
# 创建集合C1,即对data_set去重、排序、放入list中
# 然后转换所有的元素为 frozenset
c1 = []
for transaction in data_set:
for item in transaction:
if not [item] in c1:
c1.append([item])
c1.sort()
return map(frozenset, c1)
def scan_d(self, d, ck, min_support):
# 计算候选数据集 CK 在数据集 D 中的支持度,并返回支持度大于最小支持度(minSupport)的数据
ss_cnt = {}
for tid in d:
for can in ck:
if can.issubset(tid):
if not ss_cnt.has_key(can):
ss_cnt[can] = 1
else:
ss_cnt[can]