feat: 创建亚马逊和谷歌权重的广播变量
- 新增广播变量 amazon_weights_broadcast 和 google_weights_broadcast- 使用 collectAsMap() 方法将 RDD 转换为字典并进行广播 - 这些广播变量可以用于后续的分布式计算中,提高效率
This commit is contained in:
parent
8bccc2cad7
commit
dc883eaf72
4
5-1.py
4
5-1.py
@ -95,6 +95,10 @@ def tfidf(tokens, idfs):
|
||||
amazon_weights_rdd = amazon_rec_to_token.map(lambda x: (x[0], tfidf(x[1], idfs_full_broadcast.value)))
|
||||
google_weights_rdd = google_rec_to_token.map(lambda x: (x[0], tfidf(x[1], idfs_full_broadcast.value)))
|
||||
|
||||
# 创建广播变量
|
||||
amazon_weights_broadcast = sc.broadcast(amazon_weights_rdd.collectAsMap())
|
||||
google_weights_broadcast = sc.broadcast(google_weights_rdd.collectAsMap())
|
||||
|
||||
|
||||
# 计算权重范数
|
||||
def norm(weights):
|
||||
|
Loading…
Reference in New Issue
Block a user