From 99acd6447f6077df9dea85c59cfddcd38a3e53b9 Mon Sep 17 00:00:00 2001 From: fly6516 Date: Fri, 14 Mar 2025 10:18:11 +0800 Subject: [PATCH] =?UTF-8?q?refactor(fpgrowth):=20=E4=BC=98=E5=8C=96=20FP-G?= =?UTF-8?q?rowth=20=E7=AE=97=E6=B3=95=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 添加代码注释,解释每个函数和关键步骤的作用 - 优化变量命名,使其更具可读性和一致性 - 调整代码结构,增加空行和缩进,提高可读性- 移除冗余代码和不必要的注释 --- code/fpgrowth.py | 102 +++++++++++++++++++++++------------------------ 1 file changed, 51 insertions(+), 51 deletions(-) diff --git a/code/fpgrowth.py b/code/fpgrowth.py index 29fd4ee..1e38b16 100644 --- a/code/fpgrowth.py +++ b/code/fpgrowth.py @@ -1,77 +1,77 @@ # -*- coding: utf-8 -*- -from __future__ import print_function -import pandas as pd -from collections import defaultdict +from __future__ import print_function # 导入Python 2兼容的print函数 +import pandas as pd # 导入pandas库用于数据处理 +from collections import defaultdict # 导入defaultdict用于构建默认字典 class FPNode: def __init__(self, item=None, count=0, parent=None): - self.item = item - self.count = count - self.parent = parent - self.children = {} - self.next = None + self.item = item # 节点对应的项 + self.count = count # 节点的计数 + self.parent = parent # 节点的父节点 + self.children = {} # 节点的子节点字典 + self.next = None # 指向下一个相同项的节点 def build_fp_tree(data, min_support): # 构建FP树 - header_table = defaultdict(int) - for transaction in data: - for item in transaction: - header_table[item] += 1 + header_table = defaultdict(int) # 初始化头表,默认值为0 + for transaction in data: # 遍历每一条事务 + for item in transaction: # 遍历事务中的每一项 + header_table[item] += 1 # 统计每一项的支持度 # 移除不满足最小支持度的项 header_table = {k: v for k, v in header_table.items() if v >= min_support} - if not header_table: + if not header_table: # 如果头表为空,返回None return None, None # 初始化头表 - for k in header_table: + for k in header_table: # 为头表中的每一项初始化链表指针 header_table[k] = [header_table[k], None] - root = FPNode() - for transaction in data: - filtered_items = [item for item in transaction if item in header_table] - if filtered_items: - filtered_items.sort(key=lambda x: header_table[x][0], reverse=True) - update_fp_tree(filtered_items, root, header_table) - return root, header_table + root = FPNode() # 创建FP树的根节点 + for transaction in data: # 遍历每一条事务 + filtered_items = [item for item in transaction if item in header_table] # 过滤掉不满足最小支持度的项 + if filtered_items: # 如果过滤后的事务非空 + filtered_items.sort(key=lambda x: header_table[x][0], reverse=True) # 按支持度降序排序 + update_fp_tree(filtered_items, root, header_table) # 更新FP树 + return root, header_table # 返回FP树和头表 def update_fp_tree(items, node, header_table): # 更新FP树 - if items[0] in node.children: - node.children[items[0]].count += 1 - else: - new_node = FPNode(item=items[0], count=1, parent=node) - node.children[items[0]] = new_node - update_header_table(header_table, items[0], new_node) - if len(items) > 1: - update_fp_tree(items[1:], node.children[items[0]], header_table) + if items[0] in node.children: # 如果当前项已经在子节点中 + node.children[items[0]].count += 1 # 增加子节点的计数 + else: # 如果当前项不在子节点中 + new_node = FPNode(item=items[0], count=1, parent=node) # 创建新节点 + node.children[items[0]] = new_node # 将新节点添加到子节点字典中 + update_header_table(header_table, items[0], new_node) # 更新头表指针 + if len(items) > 1: # 如果还有剩余项 + update_fp_tree(items[1:], node.children[items[0]], header_table) # 递归更新FP树 def update_header_table(header_table, item, target_node): # 更新头表指针 - if header_table[item][1] is None: - header_table[item][1] = target_node - else: - current = header_table[item][1] - while current.next: - current = current.next - current.next = target_node + if header_table[item][1] is None: # 如果链表为空 + header_table[item][1] = target_node # 将新节点作为链表头 + else: # 如果链表非空 + current = header_table[item][1] # 从链表头开始 + while current.next: # 遍历链表 + current = current.next # 找到链表尾 + current.next = target_node # 将新节点添加到链表尾 def mine_fp_tree(header_table, prefix, min_support, frequent_itemsets): # 挖掘FP树中的频繁项集 - sorted_items = [item[0] for item in sorted(header_table.items(), key=lambda x: x[1][0])] - for item in sorted_items: - new_prefix = prefix.copy() - new_prefix.add(item) - frequent_itemsets.append(new_prefix) - conditional_pattern_bases = find_prefix_paths(item, header_table) - conditional_fp_tree, conditional_header_table = build_fp_tree(conditional_pattern_bases, min_support) - if conditional_header_table: - mine_fp_tree(conditional_header_table, new_prefix, min_support, frequent_itemsets) + sorted_items = [item[0] for item in sorted(header_table.items(), key=lambda x: x[1][0])] # 按支持度升序排序 + for item in sorted_items: # 遍历每一项 + new_prefix = prefix.copy() # 复制前缀 + new_prefix.add(item) # 将当前项加入前缀 + frequent_itemsets.append(new_prefix) # 将新前缀加入频繁项集 + conditional_pattern_bases = find_prefix_paths(item, header_table) # 找到条件模式基 + conditional_fp_tree, conditional_header_table = build_fp_tree(conditional_pattern_bases, min_support) # 构建条件FP树 + if conditional_header_table: # 如果条件头表非空 + mine_fp_tree(conditional_header_table, new_prefix, min_support, frequent_itemsets) # 递归挖掘条件FP树 def find_prefix_paths(base_item, header_table): @@ -96,9 +96,9 @@ def ascend_tree(node, path): def find_frequent_itemsets(data, min_support): # 主函数:使用FP-Growth算法挖掘频繁项集 - root, header_table = build_fp_tree(data, min_support) - if not root: - return [] - frequent_itemsets = [] - mine_fp_tree(header_table, set(), min_support, frequent_itemsets) - return frequent_itemsets \ No newline at end of file + root, header_table = build_fp_tree(data, min_support) # 构建FP树和头表 + if not root: # 如果FP树为空 + return [] # 返回空列表 + frequent_itemsets = [] # 初始化频繁项集列表 + mine_fp_tree(header_table, set(), min_support, frequent_itemsets) # 挖掘频繁项集 + return frequent_itemsets # 返回频繁项集 \ No newline at end of file