Skip to content

dllib on k8s: IllegalArgumentException: requirement failed: dimension exceeds input dimensionsdimension 1, input dimension 0 #31

@glorysdj

Description

@glorysdj

autograd custom.py

client command

${SPARK_HOME}/bin/spark-submit \
  --master ${RUNTIME_SPARK_MASTER} \
  --deploy-mode client \
  --conf spark.driver.host=172.16.0.200 \
  --conf spark.driver.port=54321 \
  --conf spark.kubernetes.authenticate.driver.serviceAccountName=${RUNTIME_K8S_SERVICE_ACCOUNT} \
  --name analytics-zoo-autoestimator \
  --conf spark.kubernetes.container.image=${RUNTIME_K8S_SPARK_IMAGE} \
  --conf spark.executor.instances=${RUNTIME_EXECUTOR_INSTANCES} \
  --conf spark.kubernetes.driver.volumes.persistentVolumeClaim.${RUNTIME_PERSISTENT_VOLUME_CLAIM}.options.claimName=${RUNTIME_PERSISTENT_VOLUME_CLAIM} \
  --conf spark.kubernetes.driver.volumes.persistentVolumeClaim.${RUNTIME_PERSISTENT_VOLUME_CLAIM}.mount.path=/tmp \
  --conf spark.kubernetes.executor.volumes.persistentVolumeClaim.${RUNTIME_PERSISTENT_VOLUME_CLAIM}.options.claimName=${RUNTIME_PERSISTENT_VOLUME_CLAIM} \
  --conf spark.kubernetes.executor.volumes.persistentVolumeClaim.${RUNTIME_PERSISTENT_VOLUME_CLAIM}.mount.path=/tmp \
  --conf spark.kubernetes.driver.label.az=true \
  --conf spark.kubernetes.executor.label.az=true \
  --conf spark.kubernetes.node.selector.spark=true \
  --executor-cores ${RUNTIME_EXECUTOR_CORES} \
  --executor-memory ${RUNTIME_EXECUTOR_MEMORY} \
  --total-executor-cores ${RUNTIME_TOTAL_EXECUTOR_CORES} \
  --driver-cores ${RUNTIME_DRIVER_CORES} \
  --driver-memory ${RUNTIME_DRIVER_MEMORY} \
  --properties-file ${BIGDL_HOME}/conf/spark-bigdl.conf \
  --py-files local://${BIGDL_HOME}/python/bigdl-friesian-spark_${SPARK_VERSION}-${BIGDL_VERSION}-python-api.zip,local://${BIGDL_HOME}/python/bigdl-serving-spark_${SPARK_VERSION}-${BIGDL_VERSION}-python-api.zip,local://${BIGDL_HOME}/python/bigdl-dllib-spark_${SPARK_VERSION}-${BIGDL_VERSION}-python-api.zip,local://${BIGDL_HOME}/python/bigdl-orca-spark_${SPARK_VERSION}-${BIGDL_VERSION}-python-api.zip,local:///opt/bigdl-0.14.0-SNAPSHOT/examples/dllib/autograd/custom.py \
  --conf spark.driver.extraJavaOptions=-Dderby.stream.error.file=/tmp \
  --conf spark.sql.catalogImplementation='in-memory' \
  --conf spark.driver.extraClassPath=local://${BIGDL_HOME}/jars/bigdl-orca-spark_${SPARK_VERSION}-${BIGDL_VERSION}-jar-with-dependencies.jar:local://${BIGDL_HOME}/jars/bigdl-dllib-spark_${SPARK_VERSION}-${BIGDL_VERSION}-jar-with-dependencies.jar:local://${BIGDL_HOME}/jars/bigdl-friesian-spark_${SPARK_VERSION}-${BIGDL_VERSION}-jar-with-dependencies.jar \
  --conf spark.executor.extraClassPath=local://${BIGDL_HOME}/jars/bigdl-orca-spark_${SPARK_VERSION}-${BIGDL_VERSION}-jar-with-dependencies.jar:local://${BIGDL_HOME}/jars/bigdl-dllib-spark_${SPARK_VERSION}-${BIGDL_VERSION}-jar-with-dependencies.jar:local://${BIGDL_HOME}/jars/bigdl-friesian-spark_${SPARK_VERSION}-${BIGDL_VERSION}-jar-with-dependencies.jar \
  local:///opt/bigdl-0.14.0-SNAPSHOT/examples/dllib/autograd/custom.py

cluster command

${SPARK_HOME}/bin/spark-submit \
  --master ${RUNTIME_SPARK_MASTER} \
  --deploy-mode cluster \
  --conf spark.kubernetes.authenticate.driver.serviceAccountName=${RUNTIME_K8S_SERVICE_ACCOUNT} \
  --name analytics-zoo-autoestimator \
  --conf spark.kubernetes.container.image=${RUNTIME_K8S_SPARK_IMAGE} \
  --conf spark.executor.instances=${RUNTIME_EXECUTOR_INSTANCES} \
  --conf spark.kubernetes.driver.volumes.persistentVolumeClaim.${RUNTIME_PERSISTENT_VOLUME_CLAIM}.options.claimName=${RUNTIME_PERSISTENT_VOLUME_CLAIM} \
  --conf spark.kubernetes.driver.volumes.persistentVolumeClaim.${RUNTIME_PERSISTENT_VOLUME_CLAIM}.mount.path=/tmp \
  --conf spark.kubernetes.executor.volumes.persistentVolumeClaim.${RUNTIME_PERSISTENT_VOLUME_CLAIM}.options.claimName=${RUNTIME_PERSISTENT_VOLUME_CLAIM} \
  --conf spark.kubernetes.executor.volumes.persistentVolumeClaim.${RUNTIME_PERSISTENT_VOLUME_CLAIM}.mount.path=/tmp \
  --conf spark.kubernetes.driver.label.az=true \
  --conf spark.kubernetes.executor.label.az=true \
  --conf spark.kubernetes.node.selector.spark=true \
  --executor-cores ${RUNTIME_EXECUTOR_CORES} \
  --executor-memory ${RUNTIME_EXECUTOR_MEMORY} \
  --total-executor-cores ${RUNTIME_TOTAL_EXECUTOR_CORES} \
  --driver-cores ${RUNTIME_DRIVER_CORES} \
  --driver-memory ${RUNTIME_DRIVER_MEMORY} \
  --properties-file ${BIGDL_HOME}/conf/spark-bigdl.conf \
  --py-files local://${BIGDL_HOME}/python/bigdl-friesian-spark_${SPARK_VERSION}-${BIGDL_VERSION}-python-api.zip,local://${BIGDL_HOME}/python/bigdl-serving-spark_${SPARK_VERSION}-${BIGDL_VERSION}-python-api.zip,local://${BIGDL_HOME}/python/bigdl-dllib-spark_${SPARK_VERSION}-${BIGDL_VERSION}-python-api.zip,local://${BIGDL_HOME}/python/bigdl-orca-spark_${SPARK_VERSION}-${BIGDL_VERSION}-python-api.zip,local:///opt/bigdl-0.14.0-SNAPSHOT/examples/dllib/autograd/custom.py \
  --conf spark.driver.extraJavaOptions=-Dderby.stream.error.file=/tmp \
  --conf spark.sql.catalogImplementation='in-memory' \
  --conf spark.driver.extraClassPath=local://${BIGDL_HOME}/jars/bigdl-orca-spark_${SPARK_VERSION}-${BIGDL_VERSION}-jar-with-dependencies.jar:local://${BIGDL_HOME}/jars/bigdl-dllib-spark_${SPARK_VERSION}-${BIGDL_VERSION}-jar-with-dependencies.jar:local://${BIGDL_HOME}/jars/bigdl-friesian-spark_${SPARK_VERSION}-${BIGDL_VERSION}-jar-with-dependencies.jar \
  --conf spark.executor.extraClassPath=local://${BIGDL_HOME}/jars/bigdl-orca-spark_${SPARK_VERSION}-${BIGDL_VERSION}-jar-with-dependencies.jar:local://${BIGDL_HOME}/jars/bigdl-dllib-spark_${SPARK_VERSION}-${BIGDL_VERSION}-jar-with-dependencies.jar:local://${BIGDL_HOME}/jars/bigdl-friesian-spark_${SPARK_VERSION}-${BIGDL_VERSION}-jar-with-dependencies.jar \
  local:///opt/bigdl-0.14.0-SNAPSHOT/examples/dllib/autograd/custom.py

exception

cls.getname: com.intel.analytics.bigdl.dllib.utils.python.api.Sample
BigDLBasePickler registering: bigdl.dllib.utils.common  Sample
cls.getname: com.intel.analytics.bigdl.dllib.utils.python.api.EvaluatedResult
BigDLBasePickler registering: bigdl.dllib.utils.common  EvaluatedResult
cls.getname: com.intel.analytics.bigdl.dllib.utils.python.api.JTensor
BigDLBasePickler registering: bigdl.dllib.utils.common  JTensor
cls.getname: com.intel.analytics.bigdl.dllib.utils.python.api.JActivity
BigDLBasePickler registering: bigdl.dllib.utils.common  JActivity
creating: createZooKerasSequential
creating: createZooKerasDense
creating: createDefault
creating: createSGD
creating: createZooKerasVariable
creating: createZooKerasVariable
creating: createZooKerasCustomLoss
2021-10-19 05:51:19 INFO  LocalOptimizer$:69 - Clone 1 model...
2021-10-19 05:51:20 INFO  LocalOptimizer$:69 - Clone 2 model...
2021-10-19 05:51:20 INFO  LocalOptimizer$:69 - Clone 3 model...
2021-10-19 05:51:20 INFO  LocalOptimizer$:69 - Clone 4 model...
2021-10-19 05:51:20 INFO  LocalOptimizer$:69 - Clone 5 model...
2021-10-19 05:51:20 INFO  LocalOptimizer$:69 - Clone 6 model...
2021-10-19 05:51:20 INFO  LocalOptimizer$:69 - Clone 7 model...
2021-10-19 05:51:20 INFO  LocalOptimizer$:69 - Clone 8 model...
2021-10-19 05:51:20 INFO  LocalOptimizer$:69 - Clone 9 model...
2021-10-19 05:51:20 INFO  LocalOptimizer$:69 - Clone 10 model...
2021-10-19 05:51:20 INFO  LocalOptimizer$:69 - Clone 11 model...
2021-10-19 05:51:20 INFO  LocalOptimizer$:69 - Clone 12 model...
2021-10-19 05:51:20 INFO  LocalOptimizer$:69 - Clone 13 model...
2021-10-19 05:51:20 INFO  LocalOptimizer$:69 - Clone 14 model...
2021-10-19 05:51:20 INFO  LocalOptimizer$:69 - Clone 15 model...
2021-10-19 05:51:20 INFO  LocalOptimizer$:69 - Clone 16 model...
2021-10-19 05:51:20 INFO  LocalOptimizer$:119 - model thread pool size is 1
2021-10-19 05:51:20 ERROR ThreadPool$:136 - Error: Layer info: Model[ce577d6d]/KerasLayerWrapper[Mean76c60542_wrapper]
java.lang.IllegalArgumentException: requirement failed: dimension exceeds input dimensionsdimension 1, input dimension 0
        at scala.Predef$.require(Predef.scala:281)
        at com.intel.analytics.bigdl.dllib.nn.Sum.getPositiveDimension(Sum.scala:64)
        at com.intel.analytics.bigdl.dllib.nn.Sum.updateOutput(Sum.scala:75)
        at com.intel.analytics.bigdl.dllib.nn.Sum.updateOutput(Sum.scala:44)
        at com.intel.analytics.bigdl.dllib.nn.keras.KerasLayer.updateOutput(KerasLayer.scala:274)
        at com.intel.analytics.bigdl.dllib.nn.abstractnn.AbstractModule.forward(AbstractModule.scala:283)
        at com.intel.analytics.bigdl.dllib.nn.StaticGraph.updateOutput(StaticGraph.scala:62)
        at com.intel.analytics.bigdl.dllib.nn.keras.KerasLayer.updateOutput(KerasLayer.scala:274)
        at com.intel.analytics.bigdl.dllib.nn.abstractnn.AbstractModule.forward(AbstractModule.scala:283)
        at com.intel.analytics.bigdl.dllib.keras.autograd.CustomLoss.updateOutput(CustomLoss.scala:104)
        at com.intel.analytics.bigdl.dllib.keras.autograd.CustomLoss.updateOutput(CustomLoss.scala:66)
        at com.intel.analytics.bigdl.dllib.nn.abstractnn.AbstractCriterion.forward(AbstractCriterion.scala:73)
        at com.intel.analytics.bigdl.dllib.optim.LocalOptimizer.$anonfun$optimize$8(LocalOptimizer.scala:149)
        at scala.runtime.java8.JFunction0$mcD$sp.apply(JFunction0$mcD$sp.java:23)
        at com.intel.analytics.bigdl.dllib.utils.ThreadPool.$anonfun$invokeAndWait$2(ThreadPool.scala:133)
        at scala.concurrent.Future$.$anonfun$apply$1(Future.scala:659)
        at scala.util.Success.$anonfun$map$1(Try.scala:255)
        at scala.util.Success.map(Try.scala:213)
        at scala.concurrent.Future.$anonfun$map$1(Future.scala:292)
        at scala.concurrent.impl.Promise.liftedTree1$1(Promise.scala:33)
        at scala.concurrent.impl.Promise.$anonfun$transform$1(Promise.scala:33)
        at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:64)
        at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
        at java.util.concurrent.FutureTask.run(FutureTask.java:266)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)

        at com.intel.analytics.bigdl.dllib.nn.abstractnn.AbstractModule.forward(AbstractModule.scala:289)
        at com.intel.analytics.bigdl.dllib.nn.StaticGraph.updateOutput(StaticGraph.scala:62)
        at com.intel.analytics.bigdl.dllib.nn.keras.KerasLayer.updateOutput(KerasLayer.scala:274)
        at com.intel.analytics.bigdl.dllib.nn.abstractnn.AbstractModule.forward(AbstractModule.scala:283)
        at com.intel.analytics.bigdl.dllib.keras.autograd.CustomLoss.updateOutput(CustomLoss.scala:104)
        at com.intel.analytics.bigdl.dllib.keras.autograd.CustomLoss.updateOutput(CustomLoss.scala:66)
        at com.intel.analytics.bigdl.dllib.nn.abstractnn.AbstractCriterion.forward(AbstractCriterion.scala:73)
        at com.intel.analytics.bigdl.dllib.optim.LocalOptimizer.$anonfun$optimize$8(LocalOptimizer.scala:149)
        at scala.runtime.java8.JFunction0$mcD$sp.apply(JFunction0$mcD$sp.java:23)
        at com.intel.analytics.bigdl.dllib.utils.ThreadPool.$anonfun$invokeAndWait$2(ThreadPool.scala:133)
        at scala.concurrent.Future$.$anonfun$apply$1(Future.scala:659)
        at scala.util.Success.$anonfun$map$1(Try.scala:255)
        at scala.util.Success.map(Try.scala:213)
        at scala.concurrent.Future.$anonfun$map$1(Future.scala:292)
        at scala.concurrent.impl.Promise.liftedTree1$1(Promise.scala:33)
        at scala.concurrent.impl.Promise.$anonfun$transform$1(Promise.scala:33)
        at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:64)
        at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
        at java.util.concurrent.FutureTask.run(FutureTask.java:266)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)

2021-10-19 05:51:20 ERROR ThreadPool$:136 - Error: Layer info: Model[ce577d6d]/KerasLayerWrapper[Mean76c60542_wrapper]
java.lang.IllegalArgumentException: requirement failed: dimension exceeds input dimensionsdimension 1, input dimension 0
        at scala.Predef$.require(Predef.scala:281)
        at com.intel.analytics.bigdl.dllib.nn.Sum.getPositiveDimension(Sum.scala:64)
        at com.intel.analytics.bigdl.dllib.nn.Sum.updateOutput(Sum.scala:75)
        at com.intel.analytics.bigdl.dllib.nn.Sum.updateOutput(Sum.scala:44)
        at com.intel.analytics.bigdl.dllib.nn.keras.KerasLayer.updateOutput(KerasLayer.scala:274)
        at com.intel.analytics.bigdl.dllib.nn.abstractnn.AbstractModule.forward(AbstractModule.scala:283)
        at com.intel.analytics.bigdl.dllib.nn.StaticGraph.updateOutput(StaticGraph.scala:62)
        at com.intel.analytics.bigdl.dllib.nn.keras.KerasLayer.updateOutput(KerasLayer.scala:274)
        at com.intel.analytics.bigdl.dllib.nn.abstractnn.AbstractModule.forward(AbstractModule.scala:283)
        at com.intel.analytics.bigdl.dllib.keras.autograd.CustomLoss.updateOutput(CustomLoss.scala:104)
        at com.intel.analytics.bigdl.dllib.keras.autograd.CustomLoss.updateOutput(CustomLoss.scala:66)
        at com.intel.analytics.bigdl.dllib.nn.abstractnn.AbstractCriterion.forward(AbstractCriterion.scala:73)
        at com.intel.analytics.bigdl.dllib.optim.LocalOptimizer.$anonfun$optimize$8(LocalOptimizer.scala:149)
        at scala.runtime.java8.JFunction0$mcD$sp.apply(JFunction0$mcD$sp.java:23)
        at com.intel.analytics.bigdl.dllib.utils.ThreadPool.$anonfun$invokeAndWait$2(ThreadPool.scala:133)
        at scala.concurrent.Future$.$anonfun$apply$1(Future.scala:659)
        at scala.util.Success.$anonfun$map$1(Try.scala:255)
        at scala.util.Success.map(Try.scala:213)
        at scala.concurrent.Future.$anonfun$map$1(Future.scala:292)
        at scala.concurrent.impl.Promise.liftedTree1$1(Promise.scala:33)
        at scala.concurrent.impl.Promise.$anonfun$transform$1(Promise.scala:33)
        at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:64)
        at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
        at java.util.concurrent.FutureTask.run(FutureTask.java:266)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)

        at com.intel.analytics.bigdl.dllib.nn.abstractnn.AbstractModule.forward(AbstractModule.scala:289)
        at com.intel.analytics.bigdl.dllib.nn.StaticGraph.updateOutput(StaticGraph.scala:62)
        at com.intel.analytics.bigdl.dllib.nn.keras.KerasLayer.updateOutput(KerasLayer.scala:274)
        at com.intel.analytics.bigdl.dllib.nn.abstractnn.AbstractModule.forward(AbstractModule.scala:283)
        at com.intel.analytics.bigdl.dllib.keras.autograd.CustomLoss.updateOutput(CustomLoss.scala:104)
        at com.intel.analytics.bigdl.dllib.keras.autograd.CustomLoss.updateOutput(CustomLoss.scala:66)
        at com.intel.analytics.bigdl.dllib.nn.abstractnn.AbstractCriterion.forward(AbstractCriterion.scala:73)
        at com.intel.analytics.bigdl.dllib.optim.LocalOptimizer.$anonfun$optimize$8(LocalOptimizer.scala:149)
        at scala.runtime.java8.JFunction0$mcD$sp.apply(JFunction0$mcD$sp.java:23)
        at com.intel.analytics.bigdl.dllib.utils.ThreadPool.$anonfun$invokeAndWait$2(ThreadPool.scala:133)
        at scala.concurrent.Future$.$anonfun$apply$1(Future.scala:659)
        at scala.util.Success.$anonfun$map$1(Try.scala:255)
        at scala.util.Success.map(Try.scala:213)
        at scala.concurrent.Future.$anonfun$map$1(Future.scala:292)
        at scala.concurrent.impl.Promise.liftedTree1$1(Promise.scala:33)
        at scala.concurrent.impl.Promise.$anonfun$transform$1(Promise.scala:33)
        at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:64)
        at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
        at java.util.concurrent.FutureTask.run(FutureTask.java:266)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions