From dc883eaf72f47f1d265f7059bf04843c289d2be6 Mon Sep 17 00:00:00 2001 From: fly6516 Date: Sun, 20 Apr 2025 02:55:27 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E5=88=9B=E5=BB=BA=E4=BA=9A=E9=A9=AC?= =?UTF-8?q?=E9=80=8A=E5=92=8C=E8=B0=B7=E6=AD=8C=E6=9D=83=E9=87=8D=E7=9A=84?= =?UTF-8?q?=E5=B9=BF=E6=92=AD=E5=8F=98=E9=87=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 新增广播变量 amazon_weights_broadcast 和 google_weights_broadcast- 使用 collectAsMap() 方法将 RDD 转换为字典并进行广播 - 这些广播变量可以用于后续的分布式计算中,提高效率 --- 5-1.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/5-1.py b/5-1.py index 3b50023..ac727c0 100644 --- a/5-1.py +++ b/5-1.py @@ -95,6 +95,10 @@ def tfidf(tokens, idfs): amazon_weights_rdd = amazon_rec_to_token.map(lambda x: (x[0], tfidf(x[1], idfs_full_broadcast.value))) google_weights_rdd = google_rec_to_token.map(lambda x: (x[0], tfidf(x[1], idfs_full_broadcast.value))) +# 创建广播变量 +amazon_weights_broadcast = sc.broadcast(amazon_weights_rdd.collectAsMap()) +google_weights_broadcast = sc.broadcast(google_weights_rdd.collectAsMap()) + # 计算权重范数 def norm(weights):