DM-exp-2/code/5-6_cal_fpgrowth.py
fly6516 6e9c2a5f91 feat(code): 添加 Apriori 和 FP-Growth 算法实现
- 新增 Apriori算法挖掘关联规则的实现
- 新增 FP-Growth算法挖掘频繁项集的实现
- 添加相应的数据预处理和结果保存代码
- 优化代码结构,提高可读性和可维护性
2025-03-12 16:31:10 +08:00

42 lines
1.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#-*- coding: utf-8 -*-
# 使用FP-Growth算法挖掘菜品订单关联规则
from __future__ import print_function
import pandas as pd
from fpgrowth import find_frequent_itemsets # 导入FP-Growth函数
inputfile = '../data/menu_orders.xls'
outputfile = '../tmp/fpgrowth_rules.xlsx' # 结果文件,保留 .xlsx 格式
data = pd.read_excel(inputfile, header=None)
print(u'\n转换原始数据至0-1矩阵...')
ct = lambda x: pd.Series(1, index=x[pd.notnull(x)]) # 转换0-1矩阵的过渡函数
b = map(ct, data.iloc[:, :].values) # 用map方式执行
data = pd.DataFrame(list(b)).fillna(0) # 实现矩阵转换空值用0填充
print(u'\n转换完毕。')
del b # 删除中间变量b节省内存
# 将数据转换为事务列表
transactions = []
for _, row in data.iterrows():
transactions.append(list(row[row == 1].index))
min_support = 0.2 # 最小支持度
min_support_count = int(min_support * len(transactions)) # 转换为绝对支持度
# 使用FP-Growth算法挖掘频繁项集
frequent_itemsets = find_frequent_itemsets(transactions, min_support_count)
# 确保 frequent_itemsets 是一个列表,其中每个元素是一个列表
frequent_itemsets = [list(itemset) for itemset in frequent_itemsets]
# 将结果保存为DataFrame
# 修改将频繁项集转换为DataFrame时确保每一行对应一个频繁项集的所有元素
result_data = []
for itemset in frequent_itemsets:
result_data.append({'Frequent Itemsets': ', '.join(itemset)}) # 将每个频繁项集转换为字符串
result = pd.DataFrame(result_data)
result.to_excel(outputfile, engine='openpyxl') # 保存结果,指定 engine='openpyxl'
print(u'\nFP-Growth算法运行完毕结果已保存至:', outputfile)