From ee0754ff2f0330eeb997eb84e389dafc9fe3ed3f Mon Sep 17 00:00:00 2001 From: fly6516 Date: Mon, 14 Apr 2025 04:06:06 +0800 Subject: [PATCH] =?UTF-8?q?refactor(2-9.py):=20=E6=9B=B4=E6=96=B0=E6=97=A5?= =?UTF-8?q?=E5=BF=97=E6=96=87=E4=BB=B6=E8=B7=AF=E5=BE=84=E4=B8=BA=20HDFS?= =?UTF-8?q?=20=E8=B7=AF=E5=BE=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 将本地日志文件路径替换为 HDFS 路径,以便在分布式环境中处理大数据 - 此修改允许 Spark 从 HDFS 中读取日志数据,提高了数据处理的可扩展性和容错性 --- 2-9.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/2-9.py b/2-9.py index 454ca96..7b1cd95 100644 --- a/2-9.py +++ b/2-9.py @@ -49,7 +49,7 @@ def main(): .getOrCreate() # 读取日志文件 - logFile = 'apache.access.log.PROJECT' # 替换为您的日志文件路径 + logFile = 'hdfs://master:9000/user/root/apache.access.log.PROJECT' rdd = spark.sparkContext.textFile(logFile) # 解析日志行