2025-03-12 08:31:10 +00:00
|
|
|
|
#-*- coding: utf-8 -*-
|
|
|
|
|
# 使用FP-Growth算法挖掘菜品订单关联规则
|
|
|
|
|
from __future__ import print_function
|
|
|
|
|
import pandas as pd
|
|
|
|
|
from fpgrowth import find_frequent_itemsets # 导入FP-Growth函数
|
2025-03-14 03:40:29 +00:00
|
|
|
|
import time # 导入time模块用于计时
|
2025-03-12 08:31:10 +00:00
|
|
|
|
|
2025-03-14 03:40:29 +00:00
|
|
|
|
inputfile = '../data/iris.csv'
|
2025-03-12 08:31:10 +00:00
|
|
|
|
outputfile = '../tmp/fpgrowth_rules.xlsx' # 结果文件,保留 .xlsx 格式
|
2025-03-14 03:40:29 +00:00
|
|
|
|
data = pd.read_csv(inputfile, header=None)
|
2025-03-12 08:31:10 +00:00
|
|
|
|
|
|
|
|
|
print(u'\n转换原始数据至0-1矩阵...')
|
|
|
|
|
ct = lambda x: pd.Series(1, index=x[pd.notnull(x)]) # 转换0-1矩阵的过渡函数
|
|
|
|
|
b = map(ct, data.iloc[:, :].values) # 用map方式执行
|
|
|
|
|
data = pd.DataFrame(list(b)).fillna(0) # 实现矩阵转换,空值用0填充
|
|
|
|
|
print(u'\n转换完毕。')
|
|
|
|
|
del b # 删除中间变量b,节省内存
|
|
|
|
|
|
|
|
|
|
# 将数据转换为事务列表
|
|
|
|
|
transactions = []
|
|
|
|
|
for _, row in data.iterrows():
|
|
|
|
|
transactions.append(list(row[row == 1].index))
|
|
|
|
|
|
|
|
|
|
min_support = 0.2 # 最小支持度
|
|
|
|
|
min_support_count = int(min_support * len(transactions)) # 转换为绝对支持度
|
|
|
|
|
|
|
|
|
|
# 使用FP-Growth算法挖掘频繁项集
|
2025-03-14 03:40:29 +00:00
|
|
|
|
start_time = time.time() # 记录开始时间
|
2025-03-12 08:31:10 +00:00
|
|
|
|
frequent_itemsets = find_frequent_itemsets(transactions, min_support_count)
|
2025-03-14 03:40:29 +00:00
|
|
|
|
end_time = time.time() # 记录结束时间
|
|
|
|
|
print(f'\nFP-Growth算法运行时间: {end_time - start_time} 秒')
|
2025-03-12 08:31:10 +00:00
|
|
|
|
|
|
|
|
|
# 确保 frequent_itemsets 是一个列表,其中每个元素是一个列表
|
|
|
|
|
frequent_itemsets = [list(itemset) for itemset in frequent_itemsets]
|
|
|
|
|
|
|
|
|
|
# 将结果保存为DataFrame
|
|
|
|
|
# 修改:将频繁项集转换为DataFrame时,确保每一行对应一个频繁项集的所有元素
|
|
|
|
|
result_data = []
|
|
|
|
|
for itemset in frequent_itemsets:
|
|
|
|
|
result_data.append({'Frequent Itemsets': ', '.join(itemset)}) # 将每个频繁项集转换为字符串
|
|
|
|
|
|
|
|
|
|
result = pd.DataFrame(result_data)
|
|
|
|
|
result.to_excel(outputfile, engine='openpyxl') # 保存结果,指定 engine='openpyxl'
|
|
|
|
|
|
2025-03-14 03:40:29 +00:00
|
|
|
|
print(u'\nFP-Growth算法运行完毕,结果已保存至:', outputfile)
|