diff --git a/5-1.py b/5-1.py index 5b90235..4131890 100644 --- a/5-1.py +++ b/5-1.py @@ -131,9 +131,8 @@ def fast_cosine_similarity(record): tokens = record[1] # 使用 .get() 方法来安全地访问字典中的元素,避免 KeyError - s = sum([amazon_weights_broadcast.value[amazon_id].get(token, 0) * google_weights_broadcast.value[google_url].get( - token, 0) - for token in tokens]) + s = sum([amazon_weights_broadcast.value[amazon_id].get(token, 0) * google_weights_broadcast.value[google_url].get(token, 0) + for token in tokens if token in amazon_weights_broadcast.value[amazon_id] and token in google_weights_broadcast.value[google_url]]) # 使用广播变量计算余弦相似度 value = s / (amazon_norms_broadcast.value[amazon_id] * google_norms_broadcast.value[google_url])