autograd custom.py
client command
${SPARK_HOME}/bin/spark-submit \
--master ${RUNTIME_SPARK_MASTER} \
--deploy-mode client \
--conf spark.driver.host=172.16.0.200 \
--conf spark.driver.port=54321 \
--conf spark.kubernetes.authenticate.driver.serviceAccountName=${RUNTIME_K8S_SERVICE_ACCOUNT} \
--name analytics-zoo-autoestimator \
--conf spark.kubernetes.container.image=${RUNTIME_K8S_SPARK_IMAGE} \
--conf spark.executor.instances=${RUNTIME_EXECUTOR_INSTANCES} \
--conf spark.kubernetes.driver.volumes.persistentVolumeClaim.${RUNTIME_PERSISTENT_VOLUME_CLAIM}.options.claimName=${RUNTIME_PERSISTENT_VOLUME_CLAIM} \
--conf spark.kubernetes.driver.volumes.persistentVolumeClaim.${RUNTIME_PERSISTENT_VOLUME_CLAIM}.mount.path=/tmp \
--conf spark.kubernetes.executor.volumes.persistentVolumeClaim.${RUNTIME_PERSISTENT_VOLUME_CLAIM}.options.claimName=${RUNTIME_PERSISTENT_VOLUME_CLAIM} \
--conf spark.kubernetes.executor.volumes.persistentVolumeClaim.${RUNTIME_PERSISTENT_VOLUME_CLAIM}.mount.path=/tmp \
--conf spark.kubernetes.driver.label.az=true \
--conf spark.kubernetes.executor.label.az=true \
--conf spark.kubernetes.node.selector.spark=true \
--executor-cores ${RUNTIME_EXECUTOR_CORES} \
--executor-memory ${RUNTIME_EXECUTOR_MEMORY} \
--total-executor-cores ${RUNTIME_TOTAL_EXECUTOR_CORES} \
--driver-cores ${RUNTIME_DRIVER_CORES} \
--driver-memory ${RUNTIME_DRIVER_MEMORY} \
--properties-file ${BIGDL_HOME}/conf/spark-bigdl.conf \
--py-files local://${BIGDL_HOME}/python/bigdl-friesian-spark_${SPARK_VERSION}-${BIGDL_VERSION}-python-api.zip,local://${BIGDL_HOME}/python/bigdl-serving-spark_${SPARK_VERSION}-${BIGDL_VERSION}-python-api.zip,local://${BIGDL_HOME}/python/bigdl-dllib-spark_${SPARK_VERSION}-${BIGDL_VERSION}-python-api.zip,local://${BIGDL_HOME}/python/bigdl-orca-spark_${SPARK_VERSION}-${BIGDL_VERSION}-python-api.zip,local:///opt/bigdl-0.14.0-SNAPSHOT/examples/dllib/autograd/custom.py \
--conf spark.driver.extraJavaOptions=-Dderby.stream.error.file=/tmp \
--conf spark.sql.catalogImplementation='in-memory' \
--conf spark.driver.extraClassPath=local://${BIGDL_HOME}/jars/bigdl-orca-spark_${SPARK_VERSION}-${BIGDL_VERSION}-jar-with-dependencies.jar:local://${BIGDL_HOME}/jars/bigdl-dllib-spark_${SPARK_VERSION}-${BIGDL_VERSION}-jar-with-dependencies.jar:local://${BIGDL_HOME}/jars/bigdl-friesian-spark_${SPARK_VERSION}-${BIGDL_VERSION}-jar-with-dependencies.jar \
--conf spark.executor.extraClassPath=local://${BIGDL_HOME}/jars/bigdl-orca-spark_${SPARK_VERSION}-${BIGDL_VERSION}-jar-with-dependencies.jar:local://${BIGDL_HOME}/jars/bigdl-dllib-spark_${SPARK_VERSION}-${BIGDL_VERSION}-jar-with-dependencies.jar:local://${BIGDL_HOME}/jars/bigdl-friesian-spark_${SPARK_VERSION}-${BIGDL_VERSION}-jar-with-dependencies.jar \
local:///opt/bigdl-0.14.0-SNAPSHOT/examples/dllib/autograd/custom.py
cluster command
${SPARK_HOME}/bin/spark-submit \
--master ${RUNTIME_SPARK_MASTER} \
--deploy-mode cluster \
--conf spark.kubernetes.authenticate.driver.serviceAccountName=${RUNTIME_K8S_SERVICE_ACCOUNT} \
--name analytics-zoo-autoestimator \
--conf spark.kubernetes.container.image=${RUNTIME_K8S_SPARK_IMAGE} \
--conf spark.executor.instances=${RUNTIME_EXECUTOR_INSTANCES} \
--conf spark.kubernetes.driver.volumes.persistentVolumeClaim.${RUNTIME_PERSISTENT_VOLUME_CLAIM}.options.claimName=${RUNTIME_PERSISTENT_VOLUME_CLAIM} \
--conf spark.kubernetes.driver.volumes.persistentVolumeClaim.${RUNTIME_PERSISTENT_VOLUME_CLAIM}.mount.path=/tmp \
--conf spark.kubernetes.executor.volumes.persistentVolumeClaim.${RUNTIME_PERSISTENT_VOLUME_CLAIM}.options.claimName=${RUNTIME_PERSISTENT_VOLUME_CLAIM} \
--conf spark.kubernetes.executor.volumes.persistentVolumeClaim.${RUNTIME_PERSISTENT_VOLUME_CLAIM}.mount.path=/tmp \
--conf spark.kubernetes.driver.label.az=true \
--conf spark.kubernetes.executor.label.az=true \
--conf spark.kubernetes.node.selector.spark=true \
--executor-cores ${RUNTIME_EXECUTOR_CORES} \
--executor-memory ${RUNTIME_EXECUTOR_MEMORY} \
--total-executor-cores ${RUNTIME_TOTAL_EXECUTOR_CORES} \
--driver-cores ${RUNTIME_DRIVER_CORES} \
--driver-memory ${RUNTIME_DRIVER_MEMORY} \
--properties-file ${BIGDL_HOME}/conf/spark-bigdl.conf \
--py-files local://${BIGDL_HOME}/python/bigdl-friesian-spark_${SPARK_VERSION}-${BIGDL_VERSION}-python-api.zip,local://${BIGDL_HOME}/python/bigdl-serving-spark_${SPARK_VERSION}-${BIGDL_VERSION}-python-api.zip,local://${BIGDL_HOME}/python/bigdl-dllib-spark_${SPARK_VERSION}-${BIGDL_VERSION}-python-api.zip,local://${BIGDL_HOME}/python/bigdl-orca-spark_${SPARK_VERSION}-${BIGDL_VERSION}-python-api.zip,local:///opt/bigdl-0.14.0-SNAPSHOT/examples/dllib/autograd/custom.py \
--conf spark.driver.extraJavaOptions=-Dderby.stream.error.file=/tmp \
--conf spark.sql.catalogImplementation='in-memory' \
--conf spark.driver.extraClassPath=local://${BIGDL_HOME}/jars/bigdl-orca-spark_${SPARK_VERSION}-${BIGDL_VERSION}-jar-with-dependencies.jar:local://${BIGDL_HOME}/jars/bigdl-dllib-spark_${SPARK_VERSION}-${BIGDL_VERSION}-jar-with-dependencies.jar:local://${BIGDL_HOME}/jars/bigdl-friesian-spark_${SPARK_VERSION}-${BIGDL_VERSION}-jar-with-dependencies.jar \
--conf spark.executor.extraClassPath=local://${BIGDL_HOME}/jars/bigdl-orca-spark_${SPARK_VERSION}-${BIGDL_VERSION}-jar-with-dependencies.jar:local://${BIGDL_HOME}/jars/bigdl-dllib-spark_${SPARK_VERSION}-${BIGDL_VERSION}-jar-with-dependencies.jar:local://${BIGDL_HOME}/jars/bigdl-friesian-spark_${SPARK_VERSION}-${BIGDL_VERSION}-jar-with-dependencies.jar \
local:///opt/bigdl-0.14.0-SNAPSHOT/examples/dllib/autograd/custom.py
exception
cls.getname: com.intel.analytics.bigdl.dllib.utils.python.api.Sample
BigDLBasePickler registering: bigdl.dllib.utils.common Sample
cls.getname: com.intel.analytics.bigdl.dllib.utils.python.api.EvaluatedResult
BigDLBasePickler registering: bigdl.dllib.utils.common EvaluatedResult
cls.getname: com.intel.analytics.bigdl.dllib.utils.python.api.JTensor
BigDLBasePickler registering: bigdl.dllib.utils.common JTensor
cls.getname: com.intel.analytics.bigdl.dllib.utils.python.api.JActivity
BigDLBasePickler registering: bigdl.dllib.utils.common JActivity
creating: createZooKerasSequential
creating: createZooKerasDense
creating: createDefault
creating: createSGD
creating: createZooKerasVariable
creating: createZooKerasVariable
creating: createZooKerasCustomLoss
2021-10-19 05:51:19 INFO LocalOptimizer$:69 - Clone 1 model...
2021-10-19 05:51:20 INFO LocalOptimizer$:69 - Clone 2 model...
2021-10-19 05:51:20 INFO LocalOptimizer$:69 - Clone 3 model...
2021-10-19 05:51:20 INFO LocalOptimizer$:69 - Clone 4 model...
2021-10-19 05:51:20 INFO LocalOptimizer$:69 - Clone 5 model...
2021-10-19 05:51:20 INFO LocalOptimizer$:69 - Clone 6 model...
2021-10-19 05:51:20 INFO LocalOptimizer$:69 - Clone 7 model...
2021-10-19 05:51:20 INFO LocalOptimizer$:69 - Clone 8 model...
2021-10-19 05:51:20 INFO LocalOptimizer$:69 - Clone 9 model...
2021-10-19 05:51:20 INFO LocalOptimizer$:69 - Clone 10 model...
2021-10-19 05:51:20 INFO LocalOptimizer$:69 - Clone 11 model...
2021-10-19 05:51:20 INFO LocalOptimizer$:69 - Clone 12 model...
2021-10-19 05:51:20 INFO LocalOptimizer$:69 - Clone 13 model...
2021-10-19 05:51:20 INFO LocalOptimizer$:69 - Clone 14 model...
2021-10-19 05:51:20 INFO LocalOptimizer$:69 - Clone 15 model...
2021-10-19 05:51:20 INFO LocalOptimizer$:69 - Clone 16 model...
2021-10-19 05:51:20 INFO LocalOptimizer$:119 - model thread pool size is 1
2021-10-19 05:51:20 ERROR ThreadPool$:136 - Error: Layer info: Model[ce577d6d]/KerasLayerWrapper[Mean76c60542_wrapper]
java.lang.IllegalArgumentException: requirement failed: dimension exceeds input dimensionsdimension 1, input dimension 0
at scala.Predef$.require(Predef.scala:281)
at com.intel.analytics.bigdl.dllib.nn.Sum.getPositiveDimension(Sum.scala:64)
at com.intel.analytics.bigdl.dllib.nn.Sum.updateOutput(Sum.scala:75)
at com.intel.analytics.bigdl.dllib.nn.Sum.updateOutput(Sum.scala:44)
at com.intel.analytics.bigdl.dllib.nn.keras.KerasLayer.updateOutput(KerasLayer.scala:274)
at com.intel.analytics.bigdl.dllib.nn.abstractnn.AbstractModule.forward(AbstractModule.scala:283)
at com.intel.analytics.bigdl.dllib.nn.StaticGraph.updateOutput(StaticGraph.scala:62)
at com.intel.analytics.bigdl.dllib.nn.keras.KerasLayer.updateOutput(KerasLayer.scala:274)
at com.intel.analytics.bigdl.dllib.nn.abstractnn.AbstractModule.forward(AbstractModule.scala:283)
at com.intel.analytics.bigdl.dllib.keras.autograd.CustomLoss.updateOutput(CustomLoss.scala:104)
at com.intel.analytics.bigdl.dllib.keras.autograd.CustomLoss.updateOutput(CustomLoss.scala:66)
at com.intel.analytics.bigdl.dllib.nn.abstractnn.AbstractCriterion.forward(AbstractCriterion.scala:73)
at com.intel.analytics.bigdl.dllib.optim.LocalOptimizer.$anonfun$optimize$8(LocalOptimizer.scala:149)
at scala.runtime.java8.JFunction0$mcD$sp.apply(JFunction0$mcD$sp.java:23)
at com.intel.analytics.bigdl.dllib.utils.ThreadPool.$anonfun$invokeAndWait$2(ThreadPool.scala:133)
at scala.concurrent.Future$.$anonfun$apply$1(Future.scala:659)
at scala.util.Success.$anonfun$map$1(Try.scala:255)
at scala.util.Success.map(Try.scala:213)
at scala.concurrent.Future.$anonfun$map$1(Future.scala:292)
at scala.concurrent.impl.Promise.liftedTree1$1(Promise.scala:33)
at scala.concurrent.impl.Promise.$anonfun$transform$1(Promise.scala:33)
at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:64)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
at com.intel.analytics.bigdl.dllib.nn.abstractnn.AbstractModule.forward(AbstractModule.scala:289)
at com.intel.analytics.bigdl.dllib.nn.StaticGraph.updateOutput(StaticGraph.scala:62)
at com.intel.analytics.bigdl.dllib.nn.keras.KerasLayer.updateOutput(KerasLayer.scala:274)
at com.intel.analytics.bigdl.dllib.nn.abstractnn.AbstractModule.forward(AbstractModule.scala:283)
at com.intel.analytics.bigdl.dllib.keras.autograd.CustomLoss.updateOutput(CustomLoss.scala:104)
at com.intel.analytics.bigdl.dllib.keras.autograd.CustomLoss.updateOutput(CustomLoss.scala:66)
at com.intel.analytics.bigdl.dllib.nn.abstractnn.AbstractCriterion.forward(AbstractCriterion.scala:73)
at com.intel.analytics.bigdl.dllib.optim.LocalOptimizer.$anonfun$optimize$8(LocalOptimizer.scala:149)
at scala.runtime.java8.JFunction0$mcD$sp.apply(JFunction0$mcD$sp.java:23)
at com.intel.analytics.bigdl.dllib.utils.ThreadPool.$anonfun$invokeAndWait$2(ThreadPool.scala:133)
at scala.concurrent.Future$.$anonfun$apply$1(Future.scala:659)
at scala.util.Success.$anonfun$map$1(Try.scala:255)
at scala.util.Success.map(Try.scala:213)
at scala.concurrent.Future.$anonfun$map$1(Future.scala:292)
at scala.concurrent.impl.Promise.liftedTree1$1(Promise.scala:33)
at scala.concurrent.impl.Promise.$anonfun$transform$1(Promise.scala:33)
at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:64)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
2021-10-19 05:51:20 ERROR ThreadPool$:136 - Error: Layer info: Model[ce577d6d]/KerasLayerWrapper[Mean76c60542_wrapper]
java.lang.IllegalArgumentException: requirement failed: dimension exceeds input dimensionsdimension 1, input dimension 0
at scala.Predef$.require(Predef.scala:281)
at com.intel.analytics.bigdl.dllib.nn.Sum.getPositiveDimension(Sum.scala:64)
at com.intel.analytics.bigdl.dllib.nn.Sum.updateOutput(Sum.scala:75)
at com.intel.analytics.bigdl.dllib.nn.Sum.updateOutput(Sum.scala:44)
at com.intel.analytics.bigdl.dllib.nn.keras.KerasLayer.updateOutput(KerasLayer.scala:274)
at com.intel.analytics.bigdl.dllib.nn.abstractnn.AbstractModule.forward(AbstractModule.scala:283)
at com.intel.analytics.bigdl.dllib.nn.StaticGraph.updateOutput(StaticGraph.scala:62)
at com.intel.analytics.bigdl.dllib.nn.keras.KerasLayer.updateOutput(KerasLayer.scala:274)
at com.intel.analytics.bigdl.dllib.nn.abstractnn.AbstractModule.forward(AbstractModule.scala:283)
at com.intel.analytics.bigdl.dllib.keras.autograd.CustomLoss.updateOutput(CustomLoss.scala:104)
at com.intel.analytics.bigdl.dllib.keras.autograd.CustomLoss.updateOutput(CustomLoss.scala:66)
at com.intel.analytics.bigdl.dllib.nn.abstractnn.AbstractCriterion.forward(AbstractCriterion.scala:73)
at com.intel.analytics.bigdl.dllib.optim.LocalOptimizer.$anonfun$optimize$8(LocalOptimizer.scala:149)
at scala.runtime.java8.JFunction0$mcD$sp.apply(JFunction0$mcD$sp.java:23)
at com.intel.analytics.bigdl.dllib.utils.ThreadPool.$anonfun$invokeAndWait$2(ThreadPool.scala:133)
at scala.concurrent.Future$.$anonfun$apply$1(Future.scala:659)
at scala.util.Success.$anonfun$map$1(Try.scala:255)
at scala.util.Success.map(Try.scala:213)
at scala.concurrent.Future.$anonfun$map$1(Future.scala:292)
at scala.concurrent.impl.Promise.liftedTree1$1(Promise.scala:33)
at scala.concurrent.impl.Promise.$anonfun$transform$1(Promise.scala:33)
at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:64)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
at com.intel.analytics.bigdl.dllib.nn.abstractnn.AbstractModule.forward(AbstractModule.scala:289)
at com.intel.analytics.bigdl.dllib.nn.StaticGraph.updateOutput(StaticGraph.scala:62)
at com.intel.analytics.bigdl.dllib.nn.keras.KerasLayer.updateOutput(KerasLayer.scala:274)
at com.intel.analytics.bigdl.dllib.nn.abstractnn.AbstractModule.forward(AbstractModule.scala:283)
at com.intel.analytics.bigdl.dllib.keras.autograd.CustomLoss.updateOutput(CustomLoss.scala:104)
at com.intel.analytics.bigdl.dllib.keras.autograd.CustomLoss.updateOutput(CustomLoss.scala:66)
at com.intel.analytics.bigdl.dllib.nn.abstractnn.AbstractCriterion.forward(AbstractCriterion.scala:73)
at com.intel.analytics.bigdl.dllib.optim.LocalOptimizer.$anonfun$optimize$8(LocalOptimizer.scala:149)
at scala.runtime.java8.JFunction0$mcD$sp.apply(JFunction0$mcD$sp.java:23)
at com.intel.analytics.bigdl.dllib.utils.ThreadPool.$anonfun$invokeAndWait$2(ThreadPool.scala:133)
at scala.concurrent.Future$.$anonfun$apply$1(Future.scala:659)
at scala.util.Success.$anonfun$map$1(Try.scala:255)
at scala.util.Success.map(Try.scala:213)
at scala.concurrent.Future.$anonfun$map$1(Future.scala:292)
at scala.concurrent.impl.Promise.liftedTree1$1(Promise.scala:33)
at scala.concurrent.impl.Promise.$anonfun$transform$1(Promise.scala:33)
at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:64)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
autograd custom.py
client command
cluster command
exception