diff --git a/3-1.py b/3-1.py index 6424816..48a8157 100644 --- a/3-1.py +++ b/3-1.py @@ -42,8 +42,8 @@ if __name__ == "__main__": stopwords = load_stopwords(sc) # 加载数据 - google = sc.textFile("hdfs://master:9000/user/root/GoogleProducts.csv") - amazon = sc.textFile("hdfs://master:9000/user/root/AmazonProducts.csv") + google = sc.textFile("hdfs://master:9000/user/root/Google.csv") + amazon = sc.textFile("hdfs://master:9000/user/root/Amazon.csv") # 提取内容 google_rdd = google.map(lambda line: extract_info(line, "google")) \