From 6e9c2a5f918aedac42c4cb138f43a99412e51ebf Mon Sep 17 00:00:00 2001 From: fly6516 Date: Wed, 12 Mar 2025 16:31:10 +0800 Subject: [PATCH] =?UTF-8?q?feat(code):=20=E6=B7=BB=E5=8A=A0=20Apriori=20?= =?UTF-8?q?=E5=92=8C=20FP-Growth=20=E7=AE=97=E6=B3=95=E5=AE=9E=E7=8E=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 新增 Apriori算法挖掘关联规则的实现 - 新增 FP-Growth算法挖掘频繁项集的实现 - 添加相应的数据预处理和结果保存代码 - 优化代码结构,提高可读性和可维护性 --- code/5-6_cal_apriori.py | 24 +++++++++ code/5-6_cal_fpgrowth.py | 41 +++++++++++++++ code/apriori.py | 59 ++++++++++++++++++++++ code/fpgrowth.py | 104 +++++++++++++++++++++++++++++++++++++++ data/menu_orders.xls | Bin 0 -> 25600 bytes tmp/apriori_rules.xls | Bin 0 -> 5632 bytes tmp/apriori_rules.xlsx | Bin 0 -> 5159 bytes tmp/fpgrowth_rules.xlsx | Bin 0 -> 5061 bytes 8 files changed, 228 insertions(+) create mode 100644 code/5-6_cal_apriori.py create mode 100644 code/5-6_cal_fpgrowth.py create mode 100644 code/apriori.py create mode 100644 code/fpgrowth.py create mode 100644 data/menu_orders.xls create mode 100644 tmp/apriori_rules.xls create mode 100644 tmp/apriori_rules.xlsx create mode 100644 tmp/fpgrowth_rules.xlsx diff --git a/code/5-6_cal_apriori.py b/code/5-6_cal_apriori.py new file mode 100644 index 0000000..9c01607 --- /dev/null +++ b/code/5-6_cal_apriori.py @@ -0,0 +1,24 @@ +#-*- coding: utf-8 -*- +#使用Apriori算法挖掘菜品订单关联规则 +from __future__ import print_function +import pandas as pd +from apriori import * #导入自行编写的apriori函数 + +inputfile = '../data/menu_orders.xls' +outputfile = '../tmp/apriori_rules.xlsx' #结果文件,保留 .xlsx 格式 +data = pd.read_excel(inputfile, header = None) + +print(u'\n转换原始数据至0-1矩阵...') +ct = lambda x : pd.Series(1, index = x[pd.notnull(x)]) #转换0-1矩阵的过渡函数 +b = map(ct, data.iloc[:,:].values) #用map方式执行 +data = pd.DataFrame(list(b)).fillna(0) #实现矩阵转换,空值用0填充 +print(u'\n转换完毕。') +del b #删除中间变量b,节省内存 + +support = 0.2 #最小支持度 +confidence = 0.5 #最小置信度 +ms = '---' #连接符,默认'--',用来区分不同元素,如A--B。需要保证原始表格中不含有该字符 + +# 提醒用户需要安装 openpyxl 库以支持 .xlsx 格式 +# 如果未安装,可以通过以下命令安装:pip install openpyxl +find_rule(data, support, confidence, ms).to_excel(outputfile, engine='openpyxl') #保存结果,指定 engine='openpyxl' \ No newline at end of file diff --git a/code/5-6_cal_fpgrowth.py b/code/5-6_cal_fpgrowth.py new file mode 100644 index 0000000..d7bb260 --- /dev/null +++ b/code/5-6_cal_fpgrowth.py @@ -0,0 +1,41 @@ +#-*- coding: utf-8 -*- +# 使用FP-Growth算法挖掘菜品订单关联规则 +from __future__ import print_function +import pandas as pd +from fpgrowth import find_frequent_itemsets # 导入FP-Growth函数 + +inputfile = '../data/menu_orders.xls' +outputfile = '../tmp/fpgrowth_rules.xlsx' # 结果文件,保留 .xlsx 格式 +data = pd.read_excel(inputfile, header=None) + +print(u'\n转换原始数据至0-1矩阵...') +ct = lambda x: pd.Series(1, index=x[pd.notnull(x)]) # 转换0-1矩阵的过渡函数 +b = map(ct, data.iloc[:, :].values) # 用map方式执行 +data = pd.DataFrame(list(b)).fillna(0) # 实现矩阵转换,空值用0填充 +print(u'\n转换完毕。') +del b # 删除中间变量b,节省内存 + +# 将数据转换为事务列表 +transactions = [] +for _, row in data.iterrows(): + transactions.append(list(row[row == 1].index)) + +min_support = 0.2 # 最小支持度 +min_support_count = int(min_support * len(transactions)) # 转换为绝对支持度 + +# 使用FP-Growth算法挖掘频繁项集 +frequent_itemsets = find_frequent_itemsets(transactions, min_support_count) + +# 确保 frequent_itemsets 是一个列表,其中每个元素是一个列表 +frequent_itemsets = [list(itemset) for itemset in frequent_itemsets] + +# 将结果保存为DataFrame +# 修改:将频繁项集转换为DataFrame时,确保每一行对应一个频繁项集的所有元素 +result_data = [] +for itemset in frequent_itemsets: + result_data.append({'Frequent Itemsets': ', '.join(itemset)}) # 将每个频繁项集转换为字符串 + +result = pd.DataFrame(result_data) +result.to_excel(outputfile, engine='openpyxl') # 保存结果,指定 engine='openpyxl' + +print(u'\nFP-Growth算法运行完毕,结果已保存至:', outputfile) diff --git a/code/apriori.py b/code/apriori.py new file mode 100644 index 0000000..619a426 --- /dev/null +++ b/code/apriori.py @@ -0,0 +1,59 @@ +# -*- coding: utf-8 -*- +from __future__ import print_function +import pandas as pd + + +# 自定义连接函数,用于实现L_{k-1}到C_k的连接 +def connect_string(x, ms): + x = list(map(lambda i: sorted(i.split(ms)), x)) + l = len(x[0]) + r = [] + for i in range(len(x)): + for j in range(i, len(x)): + if x[i][:l - 1] == x[j][:l - 1] and x[i][l - 1] != x[j][l - 1]: + r.append(x[i][:l - 1] + sorted([x[j][l - 1], x[i][l - 1]])) + return r + + +# 寻找关联规则的函数 +def find_rule(d, support, confidence, ms=u'--'): + result = pd.DataFrame(index=['support', 'confidence']) # 定义输出结果 + + support_series = 1.0 * d.sum() / len(d) # 支持度序列 + column = list(support_series[support_series > support].index) # 初步根据支持度筛选 + k = 0 + + while len(column) > 1: + k = k + 1 + print(u'\n正在进行第%s次搜索...' % k) + column = connect_string(column, ms) + print(u'数目:%s...' % len(column)) + sf = lambda i: d[i].prod(axis=1, numeric_only=True) # 新一批支持度的计算函数 + + # 创建连接数据,这一步耗时、耗内存最严重。当数据集较大时,可以考虑并行运算优化。 + d_2 = pd.DataFrame(list(map(sf, column)), index=[ms.join(i) for i in column]).T + + support_series_2 = 1.0 * d_2[[ms.join(i) for i in column]].sum() / len(d) # 计算连接后的支持度 + column = list(support_series_2[support_series_2 > support].index) # 新一轮支持度筛选 + support_series = pd.concat([support_series, support_series_2]) + column2 = [] + + for i in column: # 遍历可能的推理,如{A,B,C}究竟是A+B-->C还是B+C-->A还是C+A-->B? + i = i.split(ms) + for j in range(len(i)): + column2.append(i[:j] + i[j + 1:] + i[j:j + 1]) + + cofidence_series = pd.Series(index=[ms.join(i) for i in column2]) # 定义置信度序列 + + for i in column2: # 计算置信度序列 + cofidence_series[ms.join(i)] = support_series[ms.join(sorted(i))] / support_series[ms.join(i[:len(i) - 1])] + + for i in cofidence_series[cofidence_series > confidence].index: # 置信度筛选 + result.loc[i, 'confidence'] = cofidence_series[i] # 使用 .loc 更新置信度 + result.loc[i, 'support'] = support_series[ms.join(sorted(i.split(ms)))] # 使用 .loc 更新支持度 + + result = result.T.sort_values(['confidence', 'support'], ascending=False) # 结果整理,输出 + print(u'\n结果为:') + print(result) + + return result \ No newline at end of file diff --git a/code/fpgrowth.py b/code/fpgrowth.py new file mode 100644 index 0000000..29fd4ee --- /dev/null +++ b/code/fpgrowth.py @@ -0,0 +1,104 @@ +# -*- coding: utf-8 -*- +from __future__ import print_function +import pandas as pd +from collections import defaultdict + + +class FPNode: + def __init__(self, item=None, count=0, parent=None): + self.item = item + self.count = count + self.parent = parent + self.children = {} + self.next = None + + +def build_fp_tree(data, min_support): + # 构建FP树 + header_table = defaultdict(int) + for transaction in data: + for item in transaction: + header_table[item] += 1 + + # 移除不满足最小支持度的项 + header_table = {k: v for k, v in header_table.items() if v >= min_support} + if not header_table: + return None, None + + # 初始化头表 + for k in header_table: + header_table[k] = [header_table[k], None] + + root = FPNode() + for transaction in data: + filtered_items = [item for item in transaction if item in header_table] + if filtered_items: + filtered_items.sort(key=lambda x: header_table[x][0], reverse=True) + update_fp_tree(filtered_items, root, header_table) + return root, header_table + + +def update_fp_tree(items, node, header_table): + # 更新FP树 + if items[0] in node.children: + node.children[items[0]].count += 1 + else: + new_node = FPNode(item=items[0], count=1, parent=node) + node.children[items[0]] = new_node + update_header_table(header_table, items[0], new_node) + if len(items) > 1: + update_fp_tree(items[1:], node.children[items[0]], header_table) + + +def update_header_table(header_table, item, target_node): + # 更新头表指针 + if header_table[item][1] is None: + header_table[item][1] = target_node + else: + current = header_table[item][1] + while current.next: + current = current.next + current.next = target_node + + +def mine_fp_tree(header_table, prefix, min_support, frequent_itemsets): + # 挖掘FP树中的频繁项集 + sorted_items = [item[0] for item in sorted(header_table.items(), key=lambda x: x[1][0])] + for item in sorted_items: + new_prefix = prefix.copy() + new_prefix.add(item) + frequent_itemsets.append(new_prefix) + conditional_pattern_bases = find_prefix_paths(item, header_table) + conditional_fp_tree, conditional_header_table = build_fp_tree(conditional_pattern_bases, min_support) + if conditional_header_table: + mine_fp_tree(conditional_header_table, new_prefix, min_support, frequent_itemsets) + + +def find_prefix_paths(base_item, header_table): + # 找到条件模式基 + paths = [] + node = header_table[base_item][1] + while node: + path = [] + ascend_tree(node, path) + if path: + paths.append(path) + node = node.next + return paths + + +def ascend_tree(node, path): + # 从节点向上遍历树 + while node.parent and node.parent.item: + path.append(node.parent.item) + node = node.parent + + +def find_frequent_itemsets(data, min_support): + # 主函数:使用FP-Growth算法挖掘频繁项集 + root, header_table = build_fp_tree(data, min_support) + if not root: + return [] + frequent_itemsets = [] + mine_fp_tree(header_table, set(), min_support, frequent_itemsets) + return frequent_itemsets \ No newline at end of file diff --git a/data/menu_orders.xls b/data/menu_orders.xls new file mode 100644 index 0000000000000000000000000000000000000000..0d056e229ccf45444378bfe5907e39e9116080af GIT binary patch literal 25600 zcmeHQ2UHYEx2_pxU_eQVh=4F80YS3DzldN~L_`Ev7Zp&VVpz-pK>-z638EOlEQ+o< zU_!;bC`LpCL`B8bMc44Ji+SF?JcY!)lrO{b%?IgbO>jm4xkqCf-4ru2dXaVL}}KJ zI=83J`zgL+>g+|@K^;Qgc^b9zpsrYC6x)a5TS1-8sk0WGS>za1r<$TT6Fu?(YVb%F zx;j}x*sS!B5ySpt`f>$0G>-j%quaAsKrco!kR>!aM*;l-w26ry+oj8Zk90i+ z^qvany%^~%;w{pXMQ<;QxQXaA{ZX!)0vvS({2Gk>Y|>Fgm+CJT^kb9Vr09gGGw>e) zeFTZp{cgfBAY9U(5c2|a3wsL-Cr`K-(~Y_`;dF+%(STMK5*QgVmd;>8)WB86l^vYu z^&w}%u>ciKq&Z|zC^mP}mADWR$_MnWd`f&M-?J&(q>qGP-p!oK_=NIJI1a=X{y|lG zy{>$ErLKIb8s5a7{2(n1l#swTRh6OvfVa0qXb)mXoM}QuykfZ$f=o?A=*tc;;aCbA zY3n)0rs;Z&tqI2z>}^OdrHm!T!Uu~RiCIt%4Fj?WV+A0586IgQit#aw3p_puV1^VQ zC48jg^gz-75*0L0$KlJwuZWJQO#J_s=`2FCGh1j+uE-8JJn`VW^9pveU74V-?Ko|L8lYR1ab@kimgZqZi!=*Yk(nlR6+z!lE*)C~b{qFS&XV9QrPmR423dB0T^P?1(ufc1LoD&Umb zDhW8ITU9}=wH2^aS|tIehgMag2Ps(|B!KRtleRC&gaX6 zShWO1;N3h(mzIFIxJrY_PJ-3|BJmQr=E$oK+l;36+xY8Vmn>0wI0!;)B`7}+kWxBHmfT&U3iCTMm&r+aC**YJEjuusKn>x*M8AT*lJ)`lR(JuM&}pz0a?2Ufz!V6|=aeq0>aI zKpJ3EhsbH@H2EbH#)>7aKtGT&Z2~V?W;}L<3LNqDD%T3MJ2sC#siECPG|_mJpSRqh zmSo}$WH#UqsOBq zAOse}&E3mzv(o~bG!N3TB_L&f&4YB70)Y&(O_~S!S4%)3mJV!+{RB63EwD-RAh0d{ zXOJ?#=0PCtNCSZkvrU=@fk5FugW&AboK0}^+X9<34+4SKe+DV@YaYZ@3IsCDHfbKj zvn3$-kfAx7U?8FeHfbIN!UF#cQs&n@2ww^WGR!t<9;9bWKrk@hoJ}w|&;pw@4+6pA ze+DV@YaYZ*3IsCDHlc&$uscC~Q-iQv;1Q}7;gfEVd}hZl=0m7_C2t@k#1S4`j)_16 zLjLlcs630bC2E|=sM>}`0T-|Fb_2@*D-2Gc99?EPE(~(a8Rg)HuSpI!GV1HId(3j$ zG0Smfkkf@x4(=11@)@W;s0=;dvHwG_~_cv3J!^A0; z!zuGSUHK=ooQ}+LnEcIxQ4ZbT%KScF+siD+fLRWczgaWNq5E5zUrkL7vz$)Ma+v(h zj!_QX-^%<1&(|@_F=Up*Cqy8x?r$!xf~qKHIecb0O#bG?D2MKEF0L87n#PZ8|Clk$ zVe&UOMmcnUb8+2uD}$*$9Acit6|CeL!?Xz+G#xB1SD-=`&QcA$I|)fSY;|G*J582a z%-Xdpd3I9FTor8rn_vlUMyY!#Z1wsDO{9E{mBZE~mawCw>I(1EUNZG{1RSb9mZI;< zVzAYS7Tl}}3CXK~Mj)b<1Q9(}7ExD>DD+YVhyw3DWq&jwc99^WZ^|OJ6C(=!Q30ZG zz_BSEttE))g^n_T#R5q zI1jbLYmE6djdU*Q) z#+YT3Xmxn72Tn6E&NEEzW~tH%5(J7~^78i&1MV#6XNtYe|kLRi-YL%VBF12QUJwB&I$(ctqdfJ!4k#X3A*M z5#&kA>AGO#jItm!MLflebir_$DrG^`MVJ%mBFu?&5#~@AN7UtM+SP39u%(gq(JOhl zxTY*cow)4oAWu>*@*q)Tn|fap> zm>{x?w~Q_Zpo^qjG<~2h;(kH@K?24ph5qA`qynB{A_c|bs(?a$M+_aN8n`MKTZ%s9 znxqPxmNJ~G0yQzGv>&;lAF;E#`LE=;OI6~sjKPnvgg$aGd@k~%e2B4&M^1Od$;tdY zT;3l$01lSWeSq$d*pX8Hhy;m03UhQHV8NbIXdz)4PWL^8@*^4K>rwI}rQ{1Bf-h-Y`5QczDB*#WNG*(U&!Z zyiRbY+9ikgF9{yrE@bhn#CY`O3n7eS74`c;f`>N?Sv*@Y9(|btZ7d~rm zsqqn!@zdAy49I$#VBsB37+&87uMBUam0{&yz{Vrt-#Hc!5+P?tu*p|qjPK%sDX6Co z;Xs}MDH76nND%^9Ch|x$Jj%s~=}&~N%>lU$x}+gw`7l96l@Fhqr;2eDQp zn~|3fwa>yNNsQr%NPGAV={t4GjL0c7LWAZ`i<~~zVb0{ZJ=xd%FF5J+ZyfYBxxSlk z{vjWp)yu_KGOlc0Q1aV4Oa0ROhjs>kY4p1j)TfZIA7@|X+PKm4Y5ZwZwWAl-=I`{~ z|Gtl%WrNYcu?3IZhgIzznYPH#pl5i2_S(Nro+-9*Uz#*HZGB+D{O8l6{SJ9$WW}3i zZNKx`gMF?0tS(94lC-YR9R9$dAuZeEX=&fedX=IBokw~7S^sIR<&&%!Z&Hed=`J#kk1sx4RcU>5dL~PRYDE@q$y^rEgMCC*MvOe&>RL)hx$?tiF!h5=fm*VvM;_ zjZs6*uq&z>r+6vb-!$&>`o1syQ{AfH`TFa3t6q50*ZW#b-7K%OiHl29x9_y#h0K|@ zWZV3jqoKtCx3~4Y?rc;rsK{w`JMCpv!-@m<^@+G$s&mJ;e~sIC%fy}8KZjPDuHD$_ zaSy-p5Bh^8ZL&K+pM@Pn^hqNsls&Rp@<5QQgWp-AT z`-D?A{l_1=w(r#6q;I}q?<%OkHlEIamgf&EUS z!s!g8Wl`oJGw zlsDQw>EB@2MT70Zto-csgxvEGf6s(CE{+D8+QKF{quQ@V-%PI=pm(O@XK!9@XTi#d z`DyO=4{exyI4|Y?;L@MtGWV8dxD7CTb2-Ila7JpM*q`EUSB;xzGg(Fb$gRKjSGidY zH52rC^h?Ct%O;cf#~^*SH;4xF?H&mb%S1gJYcU=YW^-ZuCrP7k?^g~o+giL9u(R5dW?yww)zy^ zarVuQxBa8~ebNuPw9i`La;hwB!Km!%m&?O@?Rt7Vqw?yvA*4R<_>{d3GfzL*#MykC zH*$Ua@Pw1E2b_8uHHy2+#Bjmvi5n)4zF_zLSEKxpeIH}zsQZ5NH2OI1=2Dx9aQ*I2 z)rUV%?C|LJ+jgs-oYq{h_wFAa-8CJ1v#f5{sh)f`FF5f;wXsY1jGMfv$Nl$@EY05^ z5TH4CW}0mm`zJLMyY)LZ>VZlU3A{Ks;@zdT!*21e-XFNC?S%2Kb7D!VdtcolVaJ1P zKllcJbX=IxbER%?PXpU~i#s|E?y-bpYS-yf#O1uduRN|8$=|>6`^P_CPH3noA86gU zG5^7r`e)z2<;?fqdB*L6m)_9e3u_)FzH4;f^i!hO_rL3t_m_R_cS-QBJo@HB!JW1# zm-cOa`?XeXaZ5ap9b>q+{dVg6dS-J-K-Dm(0DB>{E`G+Po-x=d{o@y?4^G zs736zlc&|zFH$Wo_PLufX@b`r_whOg?cYB6ntS=E>BZ^(ZCyh(`e@E6F_?V#zCqHE z@eWq!ljpj1A7*kh_(AuS48Q4LZ0-csY`Ez&I<2(8G;GE5;N|8SJ3P)mXjggP?DCR| zTFZXv4XjR=?8DnnskIzwq&s1dYD8g&J6B^eQ?k~irt!D`lz#o3=C9WaV=Fo@*ltm` z#OJQh!lYgLm(ycz43CNbmycPn%9+eLVdhn#7cDwe4)utbx7_yd$qD(EUGnUl_xw2c zvz1QA>DAgV%B}Kyu=U~cw0HNaLvr3vJ}%gtH2YT1lgsNiR>g(A`S9iR!WzN+Zx3fVo=vD2 zr`|p0`2DUsPR%?qTl?6QTI-ZMy+Q+uH~m&SpZz-aL2~bK>rEj)f46aO^Elu^snen* zd6x%Ok8*yxNyp?b6}RMW3#;_~cMN_$Z~Z9O+>#riT1E+NdTv;%7MI`aVD7aiFWow1 za^6+UDZPHBE;e~*%>JTz8rN!v6pi?Fq}Y-l_b4UBWu5=liTvRqTFd#R(bIYB7p07; z3JJgMbfBE*oVClC7W8Za!S z&kePLqg#jnDlkev{M_)>y`zf;><+y3+YeK!$5o`R?6hTc`NonQZ{sDdg&X?4ny&FVagY}E6LMlIZ^YB*Rk`@z)?Iw`Yr$EZZq zkF`)6T$%HWb^D%ACRVT3)qVMwo?fpCjiJ~mRH zWt_tqM^=}|WT+S4E=!FIPMoyDHfQXI)m!&$nPGf;bkd=Wt;^?XtO?v$o*g~6?W)6Z z*L4D{EJs@y`}a={PToDUym-=fY)v@}I9V#7H{W0a(Pu?QnEbd6#;rh_~!U=lypk1sP4=f2*))9(QN?zK(`xDjjC@JbUlu%mv3DzNkIA z*Ft^PPr;Ago&2!)$G^WVe!80TZQizk>0MXnj+j(&%D-Vs(TYW@cfQZQ#6E8~_w{#( zwaMSIHE(BaDuTP65!}$NW$t>ybqaMKq}{li)-6Ld&O7O)G%lXan>w}pq)v91+GV%) z-c?C?)?KrQx8?9xuQm7C^Bk6jpIH}sYR!hy_7hwlI>+obu^HOMGSJ4sd2PIwwQo-Tt>rq=3$r??P#h$_WSVO)2mXsHd`;3J+{c(?Z>$q za`NQGN6;`~(^>3p@NS5rg)Feq4u)lIzz}Hw*_EB7wiDQsw$78SD7#XURO|#xNV%m4 z&HS=Rvdb0;(mf_5Y7rxgA?pMZj52trO4p_tmk1cb#kUt=|G_0=q4ihrZC{cHyO%uL zZUPF(FF^Z+MngVi7JeM5BlU4)8@3)6QwQ?6!aEQv;Q6*D?h0Wi!-h?mHg&BFX95p> zdF1P^cXx)45AY79=oncf@QDh@5tN*RJr2PSqDV5uokE?v z0CyC1JomPyUE7FJUU=KmuD1|#3K3$W@Q*`q|Kv&?<=&!S?X&s3E+k*Z22f3W^u%ZY zbp(SJz{tiIkA-#;?hAQrdC+}i!;CBq+b&Ck=|GN#wvgq+{iG}nJ6e{8 zfe%?4Hc*y^kLl^OZ&cqC=tVq)ITqxw9l{)&>S19HsoD_v7v+!>wMI^CrKAk~LxO@e zBogpxu7pzsYSKAXASc4aUr9ZPG|}pzWh5vVf{>t~g(US7oWL zMei2253MPw$CIkZld6X{mT;;|X{JtbB4#Cg;MWH(jzIL?82))sXZ-C-da>J)B5FMQ z0H^3khmo5u<+W_mxk-EA5&9t^Z(xSihrk>m(Y%h3MAb2N%feurN{k0QdbmkisD0zCQ*MKDeF+3F846JHYrr79>$TK)Aoxrs@M(;jak*qfD$%dryV5 z1v+A3Tmi#aC$?8M9(+y3C@rA0fYJg=3n(q1w1CnAN((40ptOL}0!j-gEugdj zSO7Q4xPQidI6jxhS4i;rKJKM)pN}tY;A>8}zsJ2d?&I;rAAFeuw%+h93ZeeqI_~#H zFDv1mAL9ZBknr!a;NKy^4?*w^eEd^9xL3#jGk_1jW{}JwSwOOcWCf`UBrLaqUt36B zA=yE)hlGD@4F4<_mVbkUzslpkZ-GGgMhL$W;*#3^a+=Gfd(}|KjQQ7 zOh^fJCIf^&!gEPUL%%ZouHuTV#Fe}DsEhesNh14-*;Xj$;(45f;jR{c;}w?ou7N|xj(td zo!f5W?4>!A2+%4t;e9!Q)*9TSoT$Ff0QKdvUaM`KGNFk6M-@oKIgza`_`CB~o90sU zJ&71DHvdZwJmT{kM$iltiUa+ruSIJthpeX@R+4)-L!=8oSXN2ndE+yU|6!KbjAslv zZhR$@nDDmXc6!5-6^sQ&+8mp2x-N1LSIy^vA$^#I8?-4OJfX!-Z3|YH2C{?SfWl)| z*Hn(r$ZB452hM>Lx~@Mk>!}F;x^?i70^X=kkzfBd&Woqj z_r|$QDwC?>al`pqV}CNneRv4vZCtY+#<(B$6VR^Rw4+cM?R$&!;C{A;v%hQi$jESM zv>nI~7q<`e7mE2p3pa9aZ*R8J=6Rs!`ED5HY(>sfFI;xQ^sY8n!PVv}dbNsUc9aX0 z4U@hG&sAj8F)8?m&y2dmr_(lN2YElHo9t8^4k?bezrVP_eZU=<#~I)2Bp#rK_#SV@acWXxu;4YLByc zOw*{`?HiVZoQ`);%{(~M_-`@3Y5cNR8?+bZ*Y6+psC<&xM=Z%D1NJRHd` zWoZ0qS|in%gAs%|9hFdo*18C-4H0A)j3CT${{0ATbrIScBFHWnL720vm2!yJMTj>< z;4W&2Ak5kNP=rKXgoH()0qKd=LF*`(gD{6_k96Bq09ie{G2K|IN9)W&;ouPix2K|INI~$Z|SA*ZGSQ1yX0gw@^km?gr zRDq}hQ3aw3L>2f>DxeFZe*uJD+jODQl}MKw{jc+mkJ^a7&kFp4VSYxD!cQo&EnyF5 z{_|-zTD)pyw3a?anaih^J%4iFtNvdo+h5sx2|JOqJa&G{JygdZCm;J)9-&5Ip#S?S VOfyw%%Kg`lf2y5@*nd&~9{`~>H#`6U literal 0 HcmV?d00001 diff --git a/tmp/apriori_rules.xlsx b/tmp/apriori_rules.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..5616c71bb6ae108211ca332132faf5342cb424e4 GIT binary patch literal 5159 zcmZ`-1yq!4*Bu%L5QY$>k(BOkq|*yhL&MPBsemJ))KDrZ4H5&=-7O5EbV&#ZNROZ( z->B>U_sai$^RD-u^{zGh%z5_O=RD`xx*8amWB>pF7r^$&#pJ2~!;T2lTQ%w?M%}jV zHo9K!9^SlG9v(aat}a^fTKL_3#P_e&cb42rSLLncs#Lm?zb3l5FbqYG0xvF#BIp|e zTdgR4ZH5Qes(pkb^|h}!k`e%Ym6Td{(zQp$)*K2LWy3O0Zu8pafRa0f;&D1TUfwqK zyO5l9_8k)saPHNcG<1Ij6yN#e#Ls^I0(=s0THi`v)Nol9v7&Z`uKP#6j;!JET~wYU zXaE4{-}&0OdqI9CTnN+B>ER=YydHiZyju%l$SbEP%92yCCW!{81c2|Kc)Z-_1!}XUjQR@->!l-)2 z_!XDjroj?ckj899_Q^`bj;+w3Mj)i$kHi6MHcv?fZ#WN%uR-bht-(s&?yZhpQFA3{ zbI@93oO(cTP{^w^M4Se5R(Cnb#FP9%Qo?A!12JSp7`>8b`jwen8zrlU1hy$;YoqRw zk-Mm|UbWM!8VTMq?v{1YudSx-36&OC6uD>fd+*5mYhHs6w^q`5|77NsX(kgfHUN;x z1^|$vmKi>LViE_e{!>L0`{I4BYSiJ9}locy5jeXgxA-SdOP|p)V_l5Ytugk z8O6B-+QOiZJ5)r7N74xQleC&Mq%gU5zKB zDz=uP=3EYP$w&0#Ao*ydhVraU(Y41HE`68t%Q6tejs&ehxEmj} z5yRs#_u>t0ljx87-MyVo_Ob@f0~3>~VG%>2DOk;F$owgCTmeE!i7cZrzYt{aeopN0 zypND8)$00DfLQ&=QN`AS&Pn>an4sjs0cyBQ9d zz&NnZl(YZRRD4W-#drW=`;ksmPi0Jh*|^WHBP-igrE|f~Z3hMA_1z#6^14mADF%R% zhu`$ONsKh}d*6IhsOD%}mgG$jjlPJ-fPVD%VNN?v-boWu_R31cx&;vJozT<`fy>uF zTkTGoG)|2CioxNxoDi93H3Y8dhPI>HDhE}}al7UFR^xodc zDNdfzQW*1S<-$Aq&IGoKVsn!u^^%~mR|r8Mk+4D*0mQ)ta93N(p57p>$_#hb1`HFctf- zj&2K5bt%KCeWM&ErGkE7U1SOaX<*c}==Rh#%8XFQmP0x?b9{P5H;7X~#`=y3l~>*E z@OsZusHF)>|3rZHWG(2@xLi0{#&U3P6y`fN4Z8(OG8aYtcI{DwK>xp^M?y~cUPVvV)p@_nRrc>$M$p%rTQ2VZ_=ip^!H*oLw zO8)s7As%)cL$A@n0eyJ#y;<>`&b=!Kne3zeZ*$C0c)Bq+)9pl!@EUpM#>?l!haA0l z+qPg9yb=8=i0CttYz1n!9lbO4)R|3q6MLf+cw+~i>m$R3xRA2gsvX4_RBZ6l_*|dT zUdJk?ep>y`Lz`}dK+;|+iMHPVHLdV>q?z9aQz4(;3&ln%g?0?r=Bz~tvFw+yCg9Xx ziS9iYJADCEYvOa5O`$IhUAJZ@CcbZVJ*dh*kV>N+h(69^{E{2_649d=6&P&s(mu6c zSBT8jrJR{dxxun==7@`I)z*^xHO>?G1N7s+9$NB)`!P5Z^FlOZm9{;$UYJ^}Pb94` z6hZ3T;P?FRxZeFsyxZ|5seRe-GNiieAuZw|__(UwNxGI?8}Y-qVlamQqiO>a_TEt6(ApG`4$1B_fcwFFU zW0Ey3v;7ZS8JU$qcm2M-@b9ubzVaASA=;GOvN$zUtbBxATL0#jEUBuQ8Q2Fx^HD-) zN~OWRO(C$M`z0w6n-py{W$6APdW3BeP1wtC_-)4oP4~#hDrCeTD{(!E)Ha`FAt8HK zMS#Uf;kABnknH3iJM)bz+!d-XVEch$STUwcTF^^zR-fgha-J3c>B5@+XbV+4tEEOy zkXsfv+x3F^d+AyVY2}mrSF3b2QQjFrjonJ+*GAKH7XmBd9t#r780McyxfBZ?b%j2A z76vM_Zf}O`T(D$H)&?JiGh|#&FBBk`RtZAhmzfmWa)M+va6;&lux(<&r$QD9WY!f$ z)fN5o`~-nvMkfThc48}0O6gRBAD)+s#{u2tKgey$F1#mnKoSbkz_85uU)77`Z)gLh z7JnQNR(1Erf82lGf%fMH;?^j%s)h~#9N_@~l)pC+e|IluZwCm($D8-p*RNe9%fQt; zkB|5VC&7v<*_e6plXv@6jJ69n9=hdql>^@Nt&oq+mYG8gUI+H-!A z@L{`~)cNdA2*tq5t9^z0M6@-KY@+2s5wbZnuPie6kF>oB`Xn2-H~m6L6ua@X2(aY` zLL8;x`?0X4bG6k@-swo7o#IZ^eC$>HoW6`aX0|SC)#ZXBQJ$)CQud|H9bs7|@)NI6 z5!u1D7hhcYqUhotfh?f3CqqIZxQ9F>?x%badYbz>6yJ?3XRJ;w`N+$61Dqxvel|Wl zmQd4wvy7F@4h!nuY!NEd6jN0-d=EO-?#<5Lo?vE>VWvy*_%6JtoFA^?01u){6M{oU z#;0B@xRw;VayaA%v5B(OdbTA)sD|OM1?vidSEYeZL?mc6VK;>ne>tbBl?%vrNj~{fxKNG8%PI;XbV= zd0j_a+u<@8%tOS(+wg3*%3*uLiQ;HNTEQl6e1lGLiVAK=mD_|3Hl5?n)8KBN%-iLt z)iw1qH$_s=UsswL8>gzS@SOXlju>)Fu;&@BIF@Y2*KQj*2j*>R*KKP$2UaRknD~?` z8JhT*J*DHsG>Zi~k)_+zv*3x|#bwqIiKStm^{f|j!_=xlpVwt{$mCWOPeRk9V5ZaM zIF3tqJ-&yy!ueBvUy%uT#VGl?;sXGLzvbuc6X*i*{yDiU87#zS@DX2;_u`~z={_h? zC=i)cPn~xvT#9N>w~S!1DB&-&?PwRQCr#SBwS=sG4LzdCX_B;+Zp`0EpxxpTXD^N7 z*w5URh0Im$Hd^=KOnDQkI+t7DO7qJhY}+iqlUQI|-`c_{&zXgGyca(Q8(}8RwD;#-`s^{bw?PK_j%|Xv z4v9SUs24nfH`z->oQJ?s1f0k$ip-s(2h>}!3Z)d?t&sIw-!i41 zhUV=}gHS(GELE$4!jN89*#&Pp{K55c*4z^d>J=ZZJvHb?KIOY!EQv-Tidc(TvmY5t z=2FyOA<)=bi)*U5b)vL}wbhv@KVX`5B*=yauG%zijbi<^|r>f*@SIA{gsb0Avis&0GEFhA~jU`jUSh;6SRdfA!bYC0df z--n6mK@H~{6diBtz>T)WN(>`r4ux4ImUl9RUL^!pY`3DTDmMmEn@p6Gj{;job1OR3 zEHKNU&sCeaPFmRqkKG#FqpDe)jmXX!36el}qfVbL|ENC}#NZPYdHj;_?MRJ{Vv&?! zp6#$;s!DVTqt|c}C18@mYp^Jh^r<{?yT^N6gKV3eA>1Jw8H`jGypM?_Q(!XvTwrU= zBUltWVk8NbQ1_rWNjHWh2F%KmfjMRCqf*d`%!u|ulgqa!Z5M=pD!M^s_gN=O$+uCe zMu{3=;RSK==H>bM%u3R5L+2-ugOflkG)sHY`O9ak6r=GQW47{N^}=j=Pmyu6oeT~j zjqB`eZa9(9Yd;$Ag+iBpzFyNg1K|q5TjzV0*`9eu9KsrM+_^%D<7czPT%S%@2Dy9O zrW-W9FV#+gu-#_wT8KZ8>CI zv^RXH1KB;)LWzEt?$4dW+TGpxXYs}*PH3Z!WJ-?*r}#ACiW z*VE5`t0Z=@@^sct<+M@p$+7X9Q<2vK&u?*O=kh$v6dM^_dnG2uz#8z5U83#@2HGyWU)ZBK13pV5;Bb{ua`|DgGs@I+*mWCwv4?;Ut>? z%98iF=tVr`<7fYx?c2c0t$(35XZlF`;HB`EgL8Y?RsJzG0IvLZz{S zarJa}?ZzTAA{C(2s0Geo{64&b72Vx@AZ|Vu`hgx0Z?m5&sz^rsR8hZ0;b&hbK`{@x zU7lFZy-s{+uUY=59BX$U_k@MF>aVK_Q#JDYkzc(pS;`u0FyBeoOB)0#lF>hv`w*6F z6p5S7PJfKOoT1rp_69RyFq3zPeG z2%|qr!(~x(92zgrOMz{}#_p^i7N9Hx>X0L#z5X`ftPKCj6!k@eh0$ z_4oh9mADz;<{bKWfckKJ++P9y>r{F(%FS`&?7i|3FPqgUP?T z_)Xx=KKKt%>DK>c{&!cr3BFn9|A6yQJr%{<|ET(#ac-8>8k<$15~l|LI3~& literal 0 HcmV?d00001 diff --git a/tmp/fpgrowth_rules.xlsx b/tmp/fpgrowth_rules.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..1f99d785e1f941f04d151b595846eb72ef9a08e6 GIT binary patch literal 5061 zcmZ`-1ys}h_Z}rMKo}t~Kvblg(H+vMQqlsGX21ZEkZ^Q2(nzR;qq{oa+x)zm=gc67*#W zv(R#ZIl;LfJ2`QAIM}O4sS~vGklede*_?NLyCh>SU8>j?|1HMR{#IYmAn^3GAb`H+ zS>t0WH;ew>Qs$E;brT*+fsh%hc~z_vq15!{86~AY$-R4 z+)u@(?c9b$JnT9jj_Sc)0!21I+w!uUBpV(^8C5sZ7u1}U1}rL{V`}}CFX4@vmtW9% z4qyNPpnvCU0ds-=OgKNbO{0T{Fvt|z-{ckt#J-w>oiF0os5VejA?BYhgl^V^dM?Jczn?AH%Yt$IL~9bcf2Zft)aMxblVd zcH1_|-SsJwmo1%1l{*F8} z`nPw|BPMT1n~J^^A-v`33a(B;eF5qjJY6@c4+tL^Je<+`UnqJ6RF^ zv#$IO;H8y)58>*8(QqeP?4|n+ne7=z_APlb_?5XfA1tR)v+rUH3L_tCj(zi4+)A@f z2Syrdj@dn(ABzg@F8Sa=)V!}5-0?27yXb@KmJJj0`8&J3%^TLT3M*Tlq!d+a(qp#( z`cCeX??8s=q;_VmAsgn}-*yRav1MyZqd6m~V>as&ljU4#TPSqov8$cyYgARob{_9;VNY!J~Ao1jGz=-En3VxeBPv-pp(k( zV$c$qPfask?_y-gw<1`ogkFxm@+{(nL;<{<4*v`vvHyXauP}nrY5bU*tWSZFOtr{g zD8SJ*>506QK>Plo6?D?}7>0{D+3EX$TtHegzR4}}D zWg6UofZG(1G;YDxqvD((VxtjnPWb*`s2CcuBL~?ZH^Lp~=|mpg0pD^lyj4V?Zr8gM?L5cV0Q` zW1nZ5bSSrN1PnYmuYDdg0kP+Z`ldKesj^DU^jd0@j~_QT^$~s(Lf1B{FqmKR%}qWa z&VJoZ$u&<8piokGqt?3sFO18PS)ul#lYmwcUoM~F{N1`EiJZ|IAqF3-wy|)-1qDaL z&L8i1XQ%kNSS<8h277z7k@2k4B3Z55=hl*$`#axf?jn$>k2o1_#HjjL$lR?x3-8}! z>%`x%G-Sjd&>n*dg^^~;(l~BDJXT4ZT0_>c)QTIfZX$DBB{_yq#ZA{M2MHi@H7=@O zsuP;4n1of1%HBI^+#Dbjvl5S^tA3hIC-4Jh?7qs7&+{-@t`;oYjO9?D@ljMbGbOeT zIQAe0)^WPo@~vupjf=TPhB27XcW)0fz_NhWFXcNy(*a=}E5%TWq=>IP$BQ5hlTk)e z@~~1uoZFNxE4#b#wofCbzH|6HAhh``Cn)>nLfa%DE^^b_jEC=LnFs>smbC{Pz|Bl% zsvVw=8Jx_Qb0*^wm6Q?+hq*78ZdL@t(>!b2<%=)%CvTqeEs8kJi7sH7d?w?N%X{AD z6BgzNDl%`bM{1rjri)d2?fc(KJDZ%#L(MM{dXEEo_hL>M0Nn?{qH zmlTwhbkFh3WiLC7mGv!VKNiaYf^LLMAj%Ge%e?Z6W*8Ag1M_2 zK=F?kyF{h!ol&25!do!@K0uyKZ)R?wPt`LV0D$WE0rC{)Vh6W|LS5n9zdnAQB0dRo zj=4M}(!O2GB8C14RW@nc`oh^@6`i7aA3+{$?bh*3#C(e>4VXC~5I13eC!Jc3=}X;7 z0wW+!N`1b6nhn(8r2^HJ?O3JT&DLSZp(K(MAXj7)!{DI5E+F9MF!!3M$bdaGs7@&M z-WkGeqCM@_!8pI#nS}JD!Eo1r7gl;Y@siK#v{;ZRe;qt9YsQdx$G+VRR{T$mR)-swB?Z=FbN=r@lsblgis~a2G`MMAqLUs~` zUTZ;?Rth_XM_lUVgY^QQ4>-rN{hHMFS6}wA&GWI^vbR4eK!{&EAGLE=0H+Cu1nF(I z7X>AkJ_aRI*;eKw1Sjn(mYBS!Vx2-ydH`aGgsFXh^NO!8;UB+v>0`%DHFv){bKSn( z+yw3EI*NHgc;wZ`&B(5}xgn2;TnL7Bd}f1hSOLiR1l9>z})n{KV=y4cI+ zM#pbn?p7q zKV^jJwDCpSyoor?Hz*>a()1D>xk8*&v%EAF><;wfYq|CQMdopn`12%lBn9zI%iPM7 zL5;>dh;E~wP!e=HSkVC;2Eo zonD4D_?>ADU|USLL$fUrYkFcYDRNXIqm3`$ptrHrK3V zh0a>j9E@gtcDk^!ooJ9ey+R+FTJSTaU3?X^d30Yz=F#d?exh{ZWG3V?gSqNE?iU}K(Kfqg1#w4 z6bezZG4h zxE0olR`LzBs?nkbn7Tmi;oMw5?-_Bbj+nfJ(nwOMsoL94Oy1(@Qn?U<+R*jfmz}W| zokyt1>DF7;Al1vvOim;*f3`a;6*Ko)PG zy%yLjcz<|&E9RA=yc5S!Y@S-Udje82bG|w6FyF$w(qnPs^67T`2MI*8Mry&0#RRpUoQ?GxGC+N!~ZVANPY}fU??_Ns6B` z9*LaSu)))+q|U?@$5JG-GOOBhAal>uWM7j>dI+)DD>>BT$U``n4*Tff;YU4A{T<{R zEXpTDWfP2=NxBB0dNZDbS-@3HK^YQo3i7{KR@A;j(*{tYtLJ$%oYeBo(u$iu%uDS4lo;uu zHB&#dXZt1~PmGpAe)1DJQ{-{(@+}_%jZK*siOFZ#GWuSwk>@u+3gW(I6J4R)* zN3Tv`Sh2R`_R`VT@|7aff@Bdi=mMv(em`3p%E266p^mPm+RvPzaO0mUDv2NdsiJPv z{4Z{{5Md|jEv^Xm?N$Orr*ZD*EOVGEEPAe7CA*9$Q8l+4^$mW;SX5(y{a(~cLid>* zIenn?gkQXV5MCyOo010UWkE*BgDMx?<>z(YBy1j-nmEVcQ_m`YgRh*>VLDhYese#W z{`-p8yk%B8brBXsu9d`hN zW~#7#*E8%li^6+lV|tB5RuPsvGt7tuXQJ=A!kQ-xdG34IU8V2o51(c+pBsXo5$Bxm zdQBqpDbE5Ls)jaQ>wT_^sD;)lTk{1xTTa&Z-Ub#DKG zekc5EieCj@or8aXE9i{|UHZRU_A2;lpZ@_%qh~4_{6D(>YMiUB^d}A@+J5_w#?n&7 VLAyEt06zLHiJsujKV2`t{{bXx*W>^I literal 0 HcmV?d00001