案例:
import pandas as pd pd.options.display.max_columns = 70 # 为了查看所有的属性 from mlxtend.frequent_patterns import apriori from mlxtend.frequent_patterns import association_rules #电影关联性分析 movies = pd.read_csv('movies.csv') # print(movies.head()) # print(movies.shape) movies_ohe = movies.drop('genres',axis=1).join(movies['genres'].str.get_dummies('|')) # print(movies_ohe.head()) # print(movies_ohe.shape) #设置电影的索引 # print(movies_ohe.set_index(['movieId','title'],inplace=True)) # print(movies_ohe.head()) movies_ohe=movies_ohe.drop(['movieId','title'],axis=1) #进行关联分析 frequent_itemsets_movies = apriori(movies_ohe,use_colnames=True, min_support=0.025) print(frequent_itemsets_movies) rules_movies =association_rules(frequent_itemsets_movies, metric='lift', min_threshold=1.25) print(rules_movies) print(rules_movies[(rules_movies.lift>4)].sort_values(by=['lift'], ascending=False))