fix: error.txt

This commit is contained in:
fly6516 2025-04-22 07:47:29 +00:00
parent de024388a1
commit 3b507342c3

136
error.txt
View File

@ -1,72 +1,34 @@
/usr/bin/python3 /root/PycharmProjects/als_movie/collab_filter.py /usr/bin/python3 /root/PycharmProjects/als_movie/collab_filter.py
25/04/22 06:53:16 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable 25/04/22 07:41:40 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
Setting default log level to "WARN". Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel). To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
Training: 3563128, validation: 1189844, test: 1188989 Training: 3563128, validation: 1189844, test: 1188989
[(21708, 165, 3.5), (21708, 168, 1.0), (21708, 208, 2.0)] [(1, 316, 5.0), (1, 329, 5.0), (1, 356, 5.0)]
[(21708, 110, 4.5), (21708, 1641, 1.5), (21708, 1682, 4.5)] [(1, 185, 5.0), (3, 6377, 4.0), (3, 6539, 5.0)]
[(21708, 95, 3.5), (21708, 153, 1.5), (21708, 161, 4.0)] [(1, 122, 5.0), (1, 231, 5.0), (1, 292, 5.0)]
1189844 1189844
25/04/22 06:53:44 ERROR TaskSetManager: Task 0 in stage 17.0 failed 4 times; aborting job [Stage 142:====================================================>(120 + 2) / 122]25/04/22 07:43:44 ERROR TaskSchedulerImpl: Lost executor 0 on 100.64.0.10: Command exited with code 137
[Stage 142:==================================================>(120 + -40) / 122]25/04/22 07:43:44 ERROR TransportResponseHandler: Still have 1 requests outstanding when connection from /100.64.0.10:59762 is closed
[Stage 142:=========>(120 + -120) / 122][Stage 142:> (0 + 120) / 122]25/04/22 07:45:02 ERROR TaskSchedulerImpl: Lost executor 4 on 100.64.0.10: worker lost
[Stage 142:=========>(120 + -120) / 122][Stage 142:> (0 + 80) / 122]25/04/22 07:45:02 ERROR TaskSchedulerImpl: Lost executor 3 on 100.64.0.10: Remote RPC client disassociated. Likely due to containers exceeding thresholds, or network issues. Check driver logs for WARN messages.
[Stage 131:> (0 + 40) / 120][Stage 142:=========>(120 + -120) / 122]25/04/22 07:45:29 ERROR TaskSetManager: Task 19 in stage 131.1 failed 4 times; aborting job
Traceback (most recent call last): Traceback (most recent call last):
File "/root/PycharmProjects/als_movie/collab_filter.py", line 54, in <module> File "/root/PycharmProjects/als_movie/collab_filter.py", line 56, in <module>
lambda_=regularizationParameter) error = computeError(predictedRatingsRDD, validationRDD)
File "/usr/local/bin/python3.6/lib/python3.6/site-packages/pyspark/mllib/recommendation.py", line 274, in train File "/root/PycharmProjects/als_movie/collab_filter.py", line 15, in computeError
lambda_, blocks, nonnegative, seed) totalError = squaredErrorsRDD.reduce(lambda a, b: a + b)
File "/usr/local/bin/python3.6/lib/python3.6/site-packages/pyspark/mllib/common.py", line 130, in callMLlibFunc File "/usr/local/bin/python3.6/lib/python3.6/site-packages/pyspark/rdd.py", line 844, in reduce
return callJavaFunc(sc, api, *args) vals = self.mapPartitions(func).collect()
File "/usr/local/bin/python3.6/lib/python3.6/site-packages/pyspark/mllib/common.py", line 123, in callJavaFunc File "/usr/local/bin/python3.6/lib/python3.6/site-packages/pyspark/rdd.py", line 816, in collect
return _java2py(sc, func(*args)) sock_info = self.ctx._jvm.PythonRDD.collectAndServe(self._jrdd.rdd())
File "/usr/local/bin/python3.6/lib/python3.6/site-packages/py4j/java_gateway.py", line 1257, in __call__ File "/usr/local/bin/python3.6/lib/python3.6/site-packages/py4j/java_gateway.py", line 1257, in __call__
answer, self.gateway_client, self.target_id, self.name) answer, self.gateway_client, self.target_id, self.name)
File "/usr/local/bin/python3.6/lib/python3.6/site-packages/py4j/protocol.py", line 328, in get_return_value File "/usr/local/bin/python3.6/lib/python3.6/site-packages/py4j/protocol.py", line 328, in get_return_value
format(target_id, ".", name), value) format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o135.trainALSModel. py4j.protocol.Py4JJavaError: An error occurred while calling z:org.apache.spark.api.python.PythonRDD.collectAndServe.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 17.0 failed 4 times, most recent failure: Lost task 0.3 in stage 17.0 (TID 17, 100.64.0.11, executor 2): org.apache.spark.api.python.PythonException: Traceback (most recent call last): : org.apache.spark.SparkException: Job aborted due to stage failure: Task 19 in stage 131.1 failed 4 times, most recent failure: Lost task 19.3 in stage 131.1 (TID 4082, 100.64.0.12, executor 1): java.lang.ArrayIndexOutOfBoundsException
File "/opt/module/spark-2.4.8-bin-hadoop2.7/python/lib/pyspark.zip/pyspark/worker.py", line 364, in main
func, profiler, deserializer, serializer = read_command(pickleSer, infile)
File "/opt/module/spark-2.4.8-bin-hadoop2.7/python/lib/pyspark.zip/pyspark/worker.py", line 69, in read_command
command = serializer._read_with_length(file)
File "/opt/module/spark-2.4.8-bin-hadoop2.7/python/lib/pyspark.zip/pyspark/serializers.py", line 173, in _read_with_length
return self.loads(obj)
File "/opt/module/spark-2.4.8-bin-hadoop2.7/python/lib/pyspark.zip/pyspark/serializers.py", line 587, in loads
return pickle.loads(obj, encoding=encoding)
File "/opt/module/spark-2.4.8-bin-hadoop2.7/python/lib/pyspark.zip/pyspark/mllib/__init__.py", line 28, in <module>
import numpy
ModuleNotFoundError: No module named 'numpy'
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:456)
at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:592)
at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:575)
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:410)
at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:390)
at scala.collection.Iterator$class.foreach(Iterator.scala:891)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1334)
at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48)
at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310)
at scala.collection.AbstractIterator.to(Iterator.scala:1334)
at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302)
at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1334)
at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289)
at scala.collection.AbstractIterator.toArray(Iterator.scala:1334)
at org.apache.spark.rdd.RDD$$anonfun$take$1$$anonfun$31.apply(RDD.scala:1409)
at org.apache.spark.rdd.RDD$$anonfun$take$1$$anonfun$31.apply(RDD.scala:1409)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2107)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2107)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:123)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:411)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:417)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Driver stacktrace: Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1925) at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1925)
@ -87,20 +49,14 @@ Driver stacktrace:
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2067) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2067)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2088) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2088)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2107) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2107)
at org.apache.spark.rdd.RDD$$anonfun$take$1.apply(RDD.scala:1409) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2132)
at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:990)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:385) at org.apache.spark.rdd.RDD.withScope(RDD.scala:385)
at org.apache.spark.rdd.RDD.take(RDD.scala:1382) at org.apache.spark.rdd.RDD.collect(RDD.scala:989)
at org.apache.spark.rdd.RDD$$anonfun$isEmpty$1.apply$mcZ$sp(RDD.scala:1517) at org.apache.spark.api.python.PythonRDD$.collectAndServe(PythonRDD.scala:166)
at org.apache.spark.rdd.RDD$$anonfun$isEmpty$1.apply(RDD.scala:1517) at org.apache.spark.api.python.PythonRDD.collectAndServe(PythonRDD.scala)
at org.apache.spark.rdd.RDD$$anonfun$isEmpty$1.apply(RDD.scala:1517)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:385)
at org.apache.spark.rdd.RDD.isEmpty(RDD.scala:1516)
at org.apache.spark.mllib.recommendation.ALS.run(ALS.scala:240)
at org.apache.spark.mllib.api.python.PythonMLLibAPI.trainALSModel(PythonMLLibAPI.scala:488)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
@ -112,49 +68,7 @@ Driver stacktrace:
at py4j.commands.CallCommand.execute(CallCommand.java:79) at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238) at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:748) at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.spark.api.python.PythonException: Traceback (most recent call last): Caused by: java.lang.ArrayIndexOutOfBoundsException
File "/opt/module/spark-2.4.8-bin-hadoop2.7/python/lib/pyspark.zip/pyspark/worker.py", line 364, in main
func, profiler, deserializer, serializer = read_command(pickleSer, infile)
File "/opt/module/spark-2.4.8-bin-hadoop2.7/python/lib/pyspark.zip/pyspark/worker.py", line 69, in read_command
command = serializer._read_with_length(file)
File "/opt/module/spark-2.4.8-bin-hadoop2.7/python/lib/pyspark.zip/pyspark/serializers.py", line 173, in _read_with_length
return self.loads(obj)
File "/opt/module/spark-2.4.8-bin-hadoop2.7/python/lib/pyspark.zip/pyspark/serializers.py", line 587, in loads
return pickle.loads(obj, encoding=encoding)
File "/opt/module/spark-2.4.8-bin-hadoop2.7/python/lib/pyspark.zip/pyspark/mllib/__init__.py", line 28, in <module>
import numpy
ModuleNotFoundError: No module named 'numpy'
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:456)
at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:592)
at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:575)
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:410)
at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:390)
at scala.collection.Iterator$class.foreach(Iterator.scala:891)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1334)
at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48)
at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310)
at scala.collection.AbstractIterator.to(Iterator.scala:1334)
at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302)
at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1334)
at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289)
at scala.collection.AbstractIterator.toArray(Iterator.scala:1334)
at org.apache.spark.rdd.RDD$$anonfun$take$1$$anonfun$31.apply(RDD.scala:1409)
at org.apache.spark.rdd.RDD$$anonfun$take$1$$anonfun$31.apply(RDD.scala:1409)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2107)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2107)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:123)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:411)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:417)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
... 1 more
Process finished with exit code 1 Process finished with exit code 1