DM-exp-2/code/fpgrowth.py
fly6516 6e9c2a5f91 feat(code): 添加 Apriori 和 FP-Growth 算法实现
- 新增 Apriori算法挖掘关联规则的实现
- 新增 FP-Growth算法挖掘频繁项集的实现
- 添加相应的数据预处理和结果保存代码
- 优化代码结构,提高可读性和可维护性
2025-03-12 16:31:10 +08:00

104 lines
3.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: utf-8 -*-
from __future__ import print_function
import pandas as pd
from collections import defaultdict
class FPNode:
def __init__(self, item=None, count=0, parent=None):
self.item = item
self.count = count
self.parent = parent
self.children = {}
self.next = None
def build_fp_tree(data, min_support):
# 构建FP树
header_table = defaultdict(int)
for transaction in data:
for item in transaction:
header_table[item] += 1
# 移除不满足最小支持度的项
header_table = {k: v for k, v in header_table.items() if v >= min_support}
if not header_table:
return None, None
# 初始化头表
for k in header_table:
header_table[k] = [header_table[k], None]
root = FPNode()
for transaction in data:
filtered_items = [item for item in transaction if item in header_table]
if filtered_items:
filtered_items.sort(key=lambda x: header_table[x][0], reverse=True)
update_fp_tree(filtered_items, root, header_table)
return root, header_table
def update_fp_tree(items, node, header_table):
# 更新FP树
if items[0] in node.children:
node.children[items[0]].count += 1
else:
new_node = FPNode(item=items[0], count=1, parent=node)
node.children[items[0]] = new_node
update_header_table(header_table, items[0], new_node)
if len(items) > 1:
update_fp_tree(items[1:], node.children[items[0]], header_table)
def update_header_table(header_table, item, target_node):
# 更新头表指针
if header_table[item][1] is None:
header_table[item][1] = target_node
else:
current = header_table[item][1]
while current.next:
current = current.next
current.next = target_node
def mine_fp_tree(header_table, prefix, min_support, frequent_itemsets):
# 挖掘FP树中的频繁项集
sorted_items = [item[0] for item in sorted(header_table.items(), key=lambda x: x[1][0])]
for item in sorted_items:
new_prefix = prefix.copy()
new_prefix.add(item)
frequent_itemsets.append(new_prefix)
conditional_pattern_bases = find_prefix_paths(item, header_table)
conditional_fp_tree, conditional_header_table = build_fp_tree(conditional_pattern_bases, min_support)
if conditional_header_table:
mine_fp_tree(conditional_header_table, new_prefix, min_support, frequent_itemsets)
def find_prefix_paths(base_item, header_table):
# 找到条件模式基
paths = []
node = header_table[base_item][1]
while node:
path = []
ascend_tree(node, path)
if path:
paths.append(path)
node = node.next
return paths
def ascend_tree(node, path):
# 从节点向上遍历树
while node.parent and node.parent.item:
path.append(node.parent.item)
node = node.parent
def find_frequent_itemsets(data, min_support):
# 主函数使用FP-Growth算法挖掘频繁项集
root, header_table = build_fp_tree(data, min_support)
if not root:
return []
frequent_itemsets = []
mine_fp_tree(header_table, set(), min_support, frequent_itemsets)
return frequent_itemsets