diff --git a/5-1.py b/5-1.py index 3b50023..ac727c0 100644 --- a/5-1.py +++ b/5-1.py @@ -95,6 +95,10 @@ def tfidf(tokens, idfs): amazon_weights_rdd = amazon_rec_to_token.map(lambda x: (x[0], tfidf(x[1], idfs_full_broadcast.value))) google_weights_rdd = google_rec_to_token.map(lambda x: (x[0], tfidf(x[1], idfs_full_broadcast.value))) +# 创建广播变量 +amazon_weights_broadcast = sc.broadcast(amazon_weights_rdd.collectAsMap()) +google_weights_broadcast = sc.broadcast(google_weights_rdd.collectAsMap()) + # 计算权重范数 def norm(weights):