From 036a740505fcd98ee192d9b383fe09f5d5dae240 Mon Sep 17 00:00:00 2001 From: fly6516 Date: Sun, 20 Apr 2025 03:01:42 +0800 Subject: [PATCH] =?UTF-8?q?perf(5-1.py):=E4=BC=98=E5=8C=96=20token=20?= =?UTF-8?q?=E6=9D=83=E9=87=8D=E8=AE=A1=E7=AE=97=E9=80=BB=E8=BE=91-=20?= =?UTF-8?q?=E4=BF=AE=E6=94=B9=E4=BA=86=E8=AE=A1=E7=AE=97=20token=20?= =?UTF-8?q?=E6=9D=83=E9=87=8D=E4=B9=98=E7=A7=AF=E7=9A=84=E6=96=B9=E5=BC=8F?= =?UTF-8?q?=EF=BC=8C=E4=BB=85=E5=9C=A8=20token=E5=90=8C=E6=97=B6=E5=AD=98?= =?UTF-8?q?=E5=9C=A8=E4=BA=8E=20Amazon=20=E5=92=8C=20Google=20=E6=95=B0?= =?UTF-8?q?=E6=8D=AE=E4=B8=AD=E6=97=B6=E8=BF=9B=E8=A1=8C=E8=AE=A1=E7=AE=97?= =?UTF-8?q?=20-=20=E8=BF=99=E6=A0=B7=E5=8F=AF=E4=BB=A5=E5=87=8F=E5=B0=91?= =?UTF-8?q?=E4=B8=8D=E5=BF=85=E8=A6=81=E7=9A=84=E8=AE=A1=E7=AE=97=EF=BC=8C?= =?UTF-8?q?=E6=8F=90=E9=AB=98=E4=BB=A3=E7=A0=81=E6=89=A7=E8=A1=8C=E6=95=88?= =?UTF-8?q?=E7=8E=87?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 5-1.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/5-1.py b/5-1.py index 5b90235..4131890 100644 --- a/5-1.py +++ b/5-1.py @@ -131,9 +131,8 @@ def fast_cosine_similarity(record): tokens = record[1] # 使用 .get() 方法来安全地访问字典中的元素,避免 KeyError - s = sum([amazon_weights_broadcast.value[amazon_id].get(token, 0) * google_weights_broadcast.value[google_url].get( - token, 0) - for token in tokens]) + s = sum([amazon_weights_broadcast.value[amazon_id].get(token, 0) * google_weights_broadcast.value[google_url].get(token, 0) + for token in tokens if token in amazon_weights_broadcast.value[amazon_id] and token in google_weights_broadcast.value[google_url]]) # 使用广播变量计算余弦相似度 value = s / (amazon_norms_broadcast.value[amazon_id] * google_norms_broadcast.value[google_url])