From d954d3401efe769884e50344abfbcae037d65737 Mon Sep 17 00:00:00 2001 From: Mark O'Connor Date: Tue, 17 Feb 2026 12:24:50 +0000 Subject: [PATCH 1/2] Update Gemma 3 optimized logs and MODELS entry --- MODELS.md | 2 +- .../gemma-3-4b-it/t3000/optimized/demo.log | 133 +++++++++--------- .../gemma-3-4b-it/t3000/optimized/eval.log | 120 ++++++++-------- 3 files changed, 127 insertions(+), 128 deletions(-) diff --git a/MODELS.md b/MODELS.md index 40a4afa..d2e86b8 100644 --- a/MODELS.md +++ b/MODELS.md @@ -55,7 +55,7 @@ Note: Keep the table columns padded with spaces and right-justify numeric cells | Qwen/Qwen3-0.6B | t3000 | optimized | 98% | 100% | 59ms | 61.9 | 40960 | | google/gemma-3-4b-it | n150 | optimized | 92% | 100% | 70ms | 14.5 | 40960 | | google/gemma-3-4b-it | n300 | optimized | 94% | 100% | 68ms | 18.5 | 40960 | -| google/gemma-3-4b-it | t3000 | optimized | 91% | 100% | 78ms | 19.4 | 40960 | +| google/gemma-3-4b-it | t3000 | optimized | 91% | 100% | 75ms | 19.7 | 40960 | | microsoft/Phi-3-mini-128k-instruct | n150 | optimized | 94% | 99% | 69ms | 15.9 | 12288 | | microsoft/Phi-3-mini-128k-instruct | n300 | optimized | 91% | 100% | 94ms | 18.3 | 12288 | | microsoft/Phi-3-mini-128k-instruct | t3000 | optimized | 92% | 99% | 105ms | 23.6 | 12288 | diff --git a/models/google/gemma-3-4b-it/t3000/optimized/demo.log b/models/google/gemma-3-4b-it/t3000/optimized/demo.log index aea8268..31e89f2 100644 --- a/models/google/gemma-3-4b-it/t3000/optimized/demo.log +++ b/models/google/gemma-3-4b-it/t3000/optimized/demo.log @@ -1,63 +1,64 @@ -CMD: env HF_HOME=/proj_sw/user_dev/moconnor/hf-cache TT_VISIBLE_DEVICES=0,1,2,3 TT_MESH_GRAPH_DESC_PATH=/proj_sw/user_dev/moconnor/tt-metal/tt_metal/fabric/mesh_graph_descriptors/t3k_mesh_graph_descriptor.textproto TT_METAL_CACHE=/tmp/tt-metal-cache TT_METAL_INSPECTOR_LOG_PATH=/tmp/tt-metal-inspector TT_METAL_INSPECTOR_INITIALIZATION_IS_IMPORTANT=0 python demo.py models/google/gemma-3-4b-it/t3000/optimized/model.py --max_seq_len 40960 -2026-02-12 10:14:06.641 | DEBUG | ttnn::77 - Initial ttnn.CONFIG: +CMD: env HF_HOME=/proj_sw/user_dev/moconnor/hf-cache TT_VISIBLE_DEVICES=0,1,2,3 TT_MESH_GRAPH_DESC_PATH=/proj_sw/user_dev/moconnor/tt-metal/tests/tt_metal/tt_fabric/custom_mesh_descriptors/t3k_1x8_mesh_graph_descriptor.textproto TT_METAL_CACHE=/tmp/tt-metal-cache TT_METAL_INSPECTOR_LOG_PATH=/tmp/tt-metal-inspector TT_METAL_INSPECTOR_INITIALIZATION_IS_IMPORTANT=0 PYTHONUNBUFFERED=1 python demo.py models/google/gemma-3-4b-it/t3000/optimized/model.py --max_seq_len 40960 +2026-02-17 12:14:09.137 | DEBUG | ttnn::77 - Initial ttnn.CONFIG: Config{cache_path=/home/moconnor/.cache/ttnn,model_cache_path=/home/moconnor/.cache/ttnn/models,tmp_dir=/tmp/ttnn,enable_model_cache=false,enable_fast_runtime_mode=true,throw_exception_on_fallback=false,enable_logging=false,enable_graph_report=false,enable_detailed_buffer_report=false,enable_detailed_tensor_report=false,enable_comparison_mode=false,comparison_mode_should_raise_exception=false,comparison_mode_pcc=0.9999,root_report_path=generated/ttnn/reports,report_name=std::nullopt,std::nullopt} Loading tokenizer: google/gemma-3-4b-it Opening TT device... -2026-02-12 10:14:08.152 | info | UMD | Established firmware bundle version: 18.12.1 (topology_discovery.cpp:368) -2026-02-12 10:14:08.184 | info | Device | Opening user mode device driver (tt_cluster.cpp:223) -2026-02-12 10:14:08.193 | info | UMD | Established firmware bundle version: 18.12.1 (topology_discovery.cpp:368) -2026-02-12 10:14:08.268 | info | UMD | Established firmware bundle version: 18.12.1 (topology_discovery.cpp:368) -2026-02-12 10:14:08.330 | info | UMD | Harvesting masks for chip 3 tensix: 0x201 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) -2026-02-12 10:14:08.386 | info | UMD | Harvesting masks for chip 2 tensix: 0x208 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) -2026-02-12 10:14:08.397 | info | UMD | Harvesting masks for chip 1 tensix: 0x210 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) -2026-02-12 10:14:08.407 | info | UMD | Harvesting masks for chip 0 tensix: 0x41 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) -2026-02-12 10:14:08.417 | info | UMD | Harvesting masks for chip 7 tensix: 0x280 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) -2026-02-12 10:14:08.431 | info | UMD | Harvesting masks for chip 6 tensix: 0x208 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) -2026-02-12 10:14:08.444 | info | UMD | Harvesting masks for chip 5 tensix: 0x300 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) -2026-02-12 10:14:08.458 | info | UMD | Harvesting masks for chip 4 tensix: 0x42 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) -2026-02-12 10:14:08.472 | info | UMD | Opening local chip ids/PCIe ids: {0, 1, 2, 3}/[0, 1, 2, 3] and remote chip ids {4, 5, 6, 7} (cluster.cpp:186) -2026-02-12 10:14:08.472 | info | UMD | IOMMU: disabled (cluster.cpp:161) -2026-02-12 10:14:08.472 | info | UMD | KMD version: 2.4.1 (cluster.cpp:164) -2026-02-12 10:14:08.481 | info | UMD | Starting devices in cluster (cluster.cpp:965) -2026-02-12 10:14:08.481 | info | UMD | Mapped hugepage 0x340000000 to NOC address 0x800000000 (silicon_sysmem_manager.cpp:207) -2026-02-12 10:14:08.482 | info | UMD | Mapped hugepage 0x300000000 to NOC address 0x840000000 (silicon_sysmem_manager.cpp:207) -2026-02-12 10:14:08.483 | info | UMD | Mapped hugepage 0x440000000 to NOC address 0x800000000 (silicon_sysmem_manager.cpp:207) -2026-02-12 10:14:08.484 | info | UMD | Mapped hugepage 0x400000000 to NOC address 0x840000000 (silicon_sysmem_manager.cpp:207) -2026-02-12 10:14:08.485 | info | UMD | Mapped hugepage 0x4240000000 to NOC address 0x800000000 (silicon_sysmem_manager.cpp:207) -2026-02-12 10:14:08.485 | info | UMD | Mapped hugepage 0x4200000000 to NOC address 0x840000000 (silicon_sysmem_manager.cpp:207) -2026-02-12 10:14:08.486 | info | UMD | Mapped hugepage 0x4340000000 to NOC address 0x800000000 (silicon_sysmem_manager.cpp:207) -2026-02-12 10:14:08.487 | info | UMD | Mapped hugepage 0x4300000000 to NOC address 0x840000000 (silicon_sysmem_manager.cpp:207) -2026-02-12 10:14:08.546 | info | Distributed | Using custom mesh graph descriptor: /proj_sw/user_dev/moconnor/tt-metal/tt_metal/fabric/mesh_graph_descriptors/t3k_mesh_graph_descriptor.textproto (metal_context.cpp:822) -2026-02-12 10:14:08.547 | info | Fabric | TopologyMapper mapping start (mesh=0): n_log=8, n_phys=8, log_deg_hist={2:4, 3:4}, phys_deg_hist={2:4, 3:4} (topology_mapper_utils.cpp:171) -2026-02-12 10:14:08.552 | DEBUG | ttnn.device:__init__:150 - Using default dispatch core type for this system: DispatchCoreType.ETH -2026-02-12 10:14:08.552 | DEBUG | ttnn.device:__init__:152 - Using default dispatch core axis for this system: DispatchCoreAxis.ROW -2026-02-12 10:14:08.556 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) -2026-02-12 10:14:08.559 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) -2026-02-12 10:14:08.559 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) -2026-02-12 10:14:08.559 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) -2026-02-12 10:14:08.559 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) -2026-02-12 10:14:08.560 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) -2026-02-12 10:14:08.560 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) -2026-02-12 10:14:08.560 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) -2026-02-12 10:14:08.874 | warning | Metal | Got num_routing_planes: 1, which is less than current value: 255, ignoring the override (metal_context.cpp:719) -2026-02-12 10:14:08.874 | info | Metal | Dispatch on FabricConfig::FABRIC_2D with 1 Command Queues +2026-02-17 12:14:10.710 | info | UMD | Established firmware bundle version: 19.4.0 (topology_discovery.cpp:368) +2026-02-17 12:14:10.741 | info | Device | Opening user mode device driver (tt_cluster.cpp:223) +2026-02-17 12:14:10.750 | info | UMD | Established firmware bundle version: 19.4.0 (topology_discovery.cpp:368) +2026-02-17 12:14:10.824 | info | UMD | Established firmware bundle version: 19.4.0 (topology_discovery.cpp:368) +2026-02-17 12:14:10.886 | info | UMD | Harvesting masks for chip 3 tensix: 0x201 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) +2026-02-17 12:14:10.944 | info | UMD | Harvesting masks for chip 2 tensix: 0x280 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) +2026-02-17 12:14:10.955 | info | UMD | Harvesting masks for chip 1 tensix: 0x201 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) +2026-02-17 12:14:10.965 | info | UMD | Harvesting masks for chip 0 tensix: 0x220 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) +2026-02-17 12:14:10.976 | info | UMD | Harvesting masks for chip 7 tensix: 0x210 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) +2026-02-17 12:14:10.990 | info | UMD | Harvesting masks for chip 6 tensix: 0x201 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) +2026-02-17 12:14:11.002 | info | UMD | Harvesting masks for chip 5 tensix: 0x201 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) +2026-02-17 12:14:11.016 | info | UMD | Harvesting masks for chip 4 tensix: 0x240 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) +2026-02-17 12:14:11.030 | info | UMD | Opening local chip ids/PCIe ids: {0, 1, 2, 3}/[0, 1, 3, 2] and remote chip ids {4, 5, 6, 7} (cluster.cpp:186) +2026-02-17 12:14:11.030 | info | UMD | IOMMU: disabled (cluster.cpp:161) +2026-02-17 12:14:11.030 | info | UMD | KMD version: 2.4.1 (cluster.cpp:164) +2026-02-17 12:14:11.038 | info | UMD | Starting devices in cluster (cluster.cpp:965) +2026-02-17 12:14:11.038 | info | UMD | Mapped hugepage 0x280000000 to NOC address 0x800000000 (silicon_sysmem_manager.cpp:207) +2026-02-17 12:14:11.039 | info | UMD | Mapped hugepage 0x240000000 to NOC address 0x840000000 (silicon_sysmem_manager.cpp:207) +2026-02-17 12:14:11.040 | info | UMD | Mapped hugepage 0x300000000 to NOC address 0x800000000 (silicon_sysmem_manager.cpp:207) +2026-02-17 12:14:11.041 | info | UMD | Mapped hugepage 0x2c0000000 to NOC address 0x840000000 (silicon_sysmem_manager.cpp:207) +2026-02-17 12:14:11.042 | info | UMD | Mapped hugepage 0x41c0000000 to NOC address 0x800000000 (silicon_sysmem_manager.cpp:207) +2026-02-17 12:14:11.042 | info | UMD | Mapped hugepage 0x4180000000 to NOC address 0x840000000 (silicon_sysmem_manager.cpp:207) +2026-02-17 12:14:11.043 | info | UMD | Mapped hugepage 0x4240000000 to NOC address 0x800000000 (silicon_sysmem_manager.cpp:207) +2026-02-17 12:14:11.044 | info | UMD | Mapped hugepage 0x4200000000 to NOC address 0x840000000 (silicon_sysmem_manager.cpp:207) +2026-02-17 12:14:11.105 | info | Distributed | Using custom mesh graph descriptor: /proj_sw/user_dev/moconnor/tt-metal/tests/tt_metal/tt_fabric/custom_mesh_descriptors/t3k_1x8_mesh_graph_descriptor.textproto (metal_context.cpp:822) +2026-02-17 12:14:11.106 | info | Fabric | TopologyMapper mapping start (mesh=0): n_log=8, n_phys=8, log_deg_hist={1:2, 2:6}, phys_deg_hist={2:4, 3:4} (topology_mapper_utils.cpp:171) +2026-02-17 12:14:11.106 | info | Fabric | Fast-path path-graph mapping succeeded for mesh 0 (topology_mapper_utils.cpp:401) +2026-02-17 12:14:11.111 | DEBUG | ttnn.device:__init__:150 - Using default dispatch core type for this system: DispatchCoreType.ETH +2026-02-17 12:14:11.111 | DEBUG | ttnn.device:__init__:152 - Using default dispatch core axis for this system: DispatchCoreAxis.ROW +2026-02-17 12:14:11.114 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) +2026-02-17 12:14:11.117 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) +2026-02-17 12:14:11.118 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) +2026-02-17 12:14:11.118 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) +2026-02-17 12:14:11.118 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) +2026-02-17 12:14:11.118 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) +2026-02-17 12:14:11.119 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) +2026-02-17 12:14:11.119 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) +2026-02-17 12:14:11.436 | warning | Metal | Got num_routing_planes: 1, which is less than current value: 255, ignoring the override (metal_context.cpp:719) +2026-02-17 12:14:11.436 | info | Metal | Dispatch on FabricConfig::FABRIC_2D with 1 Command Queues (device_manager.cpp:328) -2026-02-12 10:14:08.889 | info | Metal | Initializing Fabric (device_manager.cpp:404) -2026-02-12 10:14:09.061 | info | Metal | Fabric initialized on Device 0 (device.cpp:386) -2026-02-12 10:14:09.061 | info | Metal | Fabric initialized on Device 1 (device.cpp:386) -2026-02-12 10:14:09.062 | info | Metal | Fabric initialized on Device 2 (device.cpp:386) -2026-02-12 10:14:09.063 | info | Metal | Fabric initialized on Device 3 (device.cpp:386) -2026-02-12 10:14:09.065 | info | Metal | Fabric initialized on Device 4 (device.cpp:386) -2026-02-12 10:14:09.068 | info | Metal | Fabric initialized on Device 5 (device.cpp:386) -2026-02-12 10:14:09.074 | info | Metal | Fabric initialized on Device 6 (device.cpp:386) -2026-02-12 10:14:09.081 | info | Metal | Fabric initialized on Device 7 (device.cpp:386) -2026-02-12 10:14:09.081 | info | Metal | Fabric Initialized with config FabricConfig::FABRIC_2D (device_manager.cpp:409) -2026-02-12 10:14:09.156 | info | Metal | Command Queue initialized on Device 6 (device_manager.cpp:500) -2026-02-12 10:14:09.158 | info | Metal | Command Queue initialized on Device 4 (device_manager.cpp:500) -2026-02-12 10:14:09.158 | info | Metal | Command Queue initialized on Device 7 (device_manager.cpp:500) -2026-02-12 10:14:09.161 | info | Metal | Command Queue initialized on Device 5 (device_manager.cpp:500) +2026-02-17 12:14:11.450 | info | Metal | Initializing Fabric (device_manager.cpp:404) +2026-02-17 12:14:17.362 | info | Metal | Fabric initialized on Device 0 (device.cpp:386) +2026-02-17 12:14:17.798 | info | Metal | Fabric initialized on Device 1 (device.cpp:386) +2026-02-17 12:14:17.798 | info | Metal | Fabric initialized on Device 2 (device.cpp:386) +2026-02-17 12:14:17.841 | info | Metal | Fabric initialized on Device 3 (device.cpp:386) +2026-02-17 12:14:17.926 | info | Metal | Fabric initialized on Device 4 (device.cpp:386) +2026-02-17 12:14:22.139 | info | Metal | Fabric initialized on Device 5 (device.cpp:386) +2026-02-17 12:14:22.146 | info | Metal | Fabric initialized on Device 6 (device.cpp:386) +2026-02-17 12:14:22.148 | info | Metal | Fabric initialized on Device 7 (device.cpp:386) +2026-02-17 12:14:22.148 | info | Metal | Fabric Initialized with config FabricConfig::FABRIC_2D (device_manager.cpp:409) +2026-02-17 12:14:27.395 | info | Metal | Command Queue initialized on Device 6 (device_manager.cpp:500) +2026-02-17 12:14:27.395 | info | Metal | Command Queue initialized on Device 4 (device_manager.cpp:500) +2026-02-17 12:14:27.395 | info | Metal | Command Queue initialized on Device 7 (device_manager.cpp:500) +2026-02-17 12:14:27.397 | info | Metal | Command Queue initialized on Device 5 (device_manager.cpp:500) Loading HuggingFace reference model on CPU: google/gemma-3-4b-it - Loading checkpoint shards: 0%| | 0/2 [00:00:77 - Initial ttnn.CONFIG: +CMD: env HF_HOME=/proj_sw/user_dev/moconnor/hf-cache TT_VISIBLE_DEVICES=0,1,2,3 TT_MESH_GRAPH_DESC_PATH=/proj_sw/user_dev/moconnor/tt-metal/tests/tt_metal/tt_fabric/custom_mesh_descriptors/t3k_1x8_mesh_graph_descriptor.textproto TT_METAL_CACHE=/tmp/tt-metal-cache TT_METAL_INSPECTOR_LOG_PATH=/tmp/tt-metal-inspector TT_METAL_INSPECTOR_INITIALIZATION_IS_IMPORTANT=0 PYTHONUNBUFFERED=1 python eval.py models/google/gemma-3-4b-it/t3000/optimized/model.py --model google/gemma-3-4b-it --prompt_file prompts/bringup_eval_long.txt --max_new_tokens 100 --max_seq_len 40960 +2026-02-17 12:17:50.074 | DEBUG | ttnn::77 - Initial ttnn.CONFIG: Config{cache_path=/home/moconnor/.cache/ttnn,model_cache_path=/home/moconnor/.cache/ttnn/models,tmp_dir=/tmp/ttnn,enable_model_cache=false,enable_fast_runtime_mode=true,throw_exception_on_fallback=false,enable_logging=false,enable_graph_report=false,enable_detailed_buffer_report=false,enable_detailed_tensor_report=false,enable_comparison_mode=false,comparison_mode_should_raise_exception=false,comparison_mode_pcc=0.9999,root_report_path=generated/ttnn/reports,report_name=std::nullopt,std::nullopt} Loading model module: /localdev/moconnor/ttnn_models/models/google/gemma-3-4b-it/t3000/optimized/model.py Loading HuggingFace tokenizer... Loading HuggingFace reference model on CPU... - Loading checkpoint shards: 0%| | 0/2 [00:00 Date: Tue, 17 Feb 2026 15:40:45 +0000 Subject: [PATCH 2/2] Add Gemma 3 4B optimized demo and eval logs --- MODELS.md | 2 +- .../gemma-3-4b-it/t3000/optimized/demo.log | 119 +++++++++--------- .../gemma-3-4b-it/t3000/optimized/eval.log | 115 +++++++++-------- 3 files changed, 117 insertions(+), 119 deletions(-) diff --git a/MODELS.md b/MODELS.md index d2e86b8..3f9b45c 100644 --- a/MODELS.md +++ b/MODELS.md @@ -55,7 +55,7 @@ Note: Keep the table columns padded with spaces and right-justify numeric cells | Qwen/Qwen3-0.6B | t3000 | optimized | 98% | 100% | 59ms | 61.9 | 40960 | | google/gemma-3-4b-it | n150 | optimized | 92% | 100% | 70ms | 14.5 | 40960 | | google/gemma-3-4b-it | n300 | optimized | 94% | 100% | 68ms | 18.5 | 40960 | -| google/gemma-3-4b-it | t3000 | optimized | 91% | 100% | 75ms | 19.7 | 40960 | +| google/gemma-3-4b-it | t3000 | optimized | 91% | 100% | 71ms | 20.6 | 40960 | | microsoft/Phi-3-mini-128k-instruct | n150 | optimized | 94% | 99% | 69ms | 15.9 | 12288 | | microsoft/Phi-3-mini-128k-instruct | n300 | optimized | 91% | 100% | 94ms | 18.3 | 12288 | | microsoft/Phi-3-mini-128k-instruct | t3000 | optimized | 92% | 99% | 105ms | 23.6 | 12288 | diff --git a/models/google/gemma-3-4b-it/t3000/optimized/demo.log b/models/google/gemma-3-4b-it/t3000/optimized/demo.log index 31e89f2..fabc35e 100644 --- a/models/google/gemma-3-4b-it/t3000/optimized/demo.log +++ b/models/google/gemma-3-4b-it/t3000/optimized/demo.log @@ -1,64 +1,63 @@ -CMD: env HF_HOME=/proj_sw/user_dev/moconnor/hf-cache TT_VISIBLE_DEVICES=0,1,2,3 TT_MESH_GRAPH_DESC_PATH=/proj_sw/user_dev/moconnor/tt-metal/tests/tt_metal/tt_fabric/custom_mesh_descriptors/t3k_1x8_mesh_graph_descriptor.textproto TT_METAL_CACHE=/tmp/tt-metal-cache TT_METAL_INSPECTOR_LOG_PATH=/tmp/tt-metal-inspector TT_METAL_INSPECTOR_INITIALIZATION_IS_IMPORTANT=0 PYTHONUNBUFFERED=1 python demo.py models/google/gemma-3-4b-it/t3000/optimized/model.py --max_seq_len 40960 -2026-02-17 12:14:09.137 | DEBUG | ttnn::77 - Initial ttnn.CONFIG: +CMD: env HF_HOME=/proj_sw/user_dev/moconnor/hf-cache TT_VISIBLE_DEVICES=0,1,2,3 TT_MESH_GRAPH_DESC_PATH=/proj_sw/user_dev/moconnor/tt-metal/tt_metal/fabric/mesh_graph_descriptors/t3k_mesh_graph_descriptor.textproto TT_METAL_CACHE=/tmp/tt-metal-cache TT_METAL_INSPECTOR_LOG_PATH=/tmp/tt-metal-inspector TT_METAL_INSPECTOR_INITIALIZATION_IS_IMPORTANT=0 PYTHONUNBUFFERED=1 python demo.py models/google/gemma-3-4b-it/t3000/optimized/model.py --max_seq_len 40960 +2026-02-17 15:30:28.203 | DEBUG | ttnn::77 - Initial ttnn.CONFIG: Config{cache_path=/home/moconnor/.cache/ttnn,model_cache_path=/home/moconnor/.cache/ttnn/models,tmp_dir=/tmp/ttnn,enable_model_cache=false,enable_fast_runtime_mode=true,throw_exception_on_fallback=false,enable_logging=false,enable_graph_report=false,enable_detailed_buffer_report=false,enable_detailed_tensor_report=false,enable_comparison_mode=false,comparison_mode_should_raise_exception=false,comparison_mode_pcc=0.9999,root_report_path=generated/ttnn/reports,report_name=std::nullopt,std::nullopt} Loading tokenizer: google/gemma-3-4b-it Opening TT device... -2026-02-17 12:14:10.710 | info | UMD | Established firmware bundle version: 19.4.0 (topology_discovery.cpp:368) -2026-02-17 12:14:10.741 | info | Device | Opening user mode device driver (tt_cluster.cpp:223) -2026-02-17 12:14:10.750 | info | UMD | Established firmware bundle version: 19.4.0 (topology_discovery.cpp:368) -2026-02-17 12:14:10.824 | info | UMD | Established firmware bundle version: 19.4.0 (topology_discovery.cpp:368) -2026-02-17 12:14:10.886 | info | UMD | Harvesting masks for chip 3 tensix: 0x201 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) -2026-02-17 12:14:10.944 | info | UMD | Harvesting masks for chip 2 tensix: 0x280 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) -2026-02-17 12:14:10.955 | info | UMD | Harvesting masks for chip 1 tensix: 0x201 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) -2026-02-17 12:14:10.965 | info | UMD | Harvesting masks for chip 0 tensix: 0x220 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) -2026-02-17 12:14:10.976 | info | UMD | Harvesting masks for chip 7 tensix: 0x210 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) -2026-02-17 12:14:10.990 | info | UMD | Harvesting masks for chip 6 tensix: 0x201 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) -2026-02-17 12:14:11.002 | info | UMD | Harvesting masks for chip 5 tensix: 0x201 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) -2026-02-17 12:14:11.016 | info | UMD | Harvesting masks for chip 4 tensix: 0x240 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) -2026-02-17 12:14:11.030 | info | UMD | Opening local chip ids/PCIe ids: {0, 1, 2, 3}/[0, 1, 3, 2] and remote chip ids {4, 5, 6, 7} (cluster.cpp:186) -2026-02-17 12:14:11.030 | info | UMD | IOMMU: disabled (cluster.cpp:161) -2026-02-17 12:14:11.030 | info | UMD | KMD version: 2.4.1 (cluster.cpp:164) -2026-02-17 12:14:11.038 | info | UMD | Starting devices in cluster (cluster.cpp:965) -2026-02-17 12:14:11.038 | info | UMD | Mapped hugepage 0x280000000 to NOC address 0x800000000 (silicon_sysmem_manager.cpp:207) -2026-02-17 12:14:11.039 | info | UMD | Mapped hugepage 0x240000000 to NOC address 0x840000000 (silicon_sysmem_manager.cpp:207) -2026-02-17 12:14:11.040 | info | UMD | Mapped hugepage 0x300000000 to NOC address 0x800000000 (silicon_sysmem_manager.cpp:207) -2026-02-17 12:14:11.041 | info | UMD | Mapped hugepage 0x2c0000000 to NOC address 0x840000000 (silicon_sysmem_manager.cpp:207) -2026-02-17 12:14:11.042 | info | UMD | Mapped hugepage 0x41c0000000 to NOC address 0x800000000 (silicon_sysmem_manager.cpp:207) -2026-02-17 12:14:11.042 | info | UMD | Mapped hugepage 0x4180000000 to NOC address 0x840000000 (silicon_sysmem_manager.cpp:207) -2026-02-17 12:14:11.043 | info | UMD | Mapped hugepage 0x4240000000 to NOC address 0x800000000 (silicon_sysmem_manager.cpp:207) -2026-02-17 12:14:11.044 | info | UMD | Mapped hugepage 0x4200000000 to NOC address 0x840000000 (silicon_sysmem_manager.cpp:207) -2026-02-17 12:14:11.105 | info | Distributed | Using custom mesh graph descriptor: /proj_sw/user_dev/moconnor/tt-metal/tests/tt_metal/tt_fabric/custom_mesh_descriptors/t3k_1x8_mesh_graph_descriptor.textproto (metal_context.cpp:822) -2026-02-17 12:14:11.106 | info | Fabric | TopologyMapper mapping start (mesh=0): n_log=8, n_phys=8, log_deg_hist={1:2, 2:6}, phys_deg_hist={2:4, 3:4} (topology_mapper_utils.cpp:171) -2026-02-17 12:14:11.106 | info | Fabric | Fast-path path-graph mapping succeeded for mesh 0 (topology_mapper_utils.cpp:401) -2026-02-17 12:14:11.111 | DEBUG | ttnn.device:__init__:150 - Using default dispatch core type for this system: DispatchCoreType.ETH -2026-02-17 12:14:11.111 | DEBUG | ttnn.device:__init__:152 - Using default dispatch core axis for this system: DispatchCoreAxis.ROW -2026-02-17 12:14:11.114 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) -2026-02-17 12:14:11.117 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) -2026-02-17 12:14:11.118 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) -2026-02-17 12:14:11.118 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) -2026-02-17 12:14:11.118 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) -2026-02-17 12:14:11.118 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) -2026-02-17 12:14:11.119 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) -2026-02-17 12:14:11.119 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) -2026-02-17 12:14:11.436 | warning | Metal | Got num_routing_planes: 1, which is less than current value: 255, ignoring the override (metal_context.cpp:719) -2026-02-17 12:14:11.436 | info | Metal | Dispatch on FabricConfig::FABRIC_2D with 1 Command Queues +2026-02-17 15:30:29.742 | info | UMD | Established firmware bundle version: 18.12.1 (topology_discovery.cpp:368) +2026-02-17 15:30:29.774 | info | Device | Opening user mode device driver (tt_cluster.cpp:223) +2026-02-17 15:30:29.784 | info | UMD | Established firmware bundle version: 18.12.1 (topology_discovery.cpp:368) +2026-02-17 15:30:29.860 | info | UMD | Established firmware bundle version: 18.12.1 (topology_discovery.cpp:368) +2026-02-17 15:30:29.921 | info | UMD | Harvesting masks for chip 3 tensix: 0x202 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) +2026-02-17 15:30:29.984 | info | UMD | Harvesting masks for chip 2 tensix: 0xc dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) +2026-02-17 15:30:29.995 | info | UMD | Harvesting masks for chip 1 tensix: 0x240 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) +2026-02-17 15:30:30.009 | info | UMD | Harvesting masks for chip 0 tensix: 0x201 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) +2026-02-17 15:30:30.022 | info | UMD | Harvesting masks for chip 7 tensix: 0x220 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) +2026-02-17 15:30:30.036 | info | UMD | Harvesting masks for chip 6 tensix: 0x30 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) +2026-02-17 15:30:30.050 | info | UMD | Harvesting masks for chip 5 tensix: 0x280 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) +2026-02-17 15:30:30.063 | info | UMD | Harvesting masks for chip 4 tensix: 0x300 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) +2026-02-17 15:30:30.078 | info | UMD | Opening local chip ids/PCIe ids: {0, 1, 2, 3}/[0, 3, 1, 2] and remote chip ids {4, 5, 6, 7} (cluster.cpp:186) +2026-02-17 15:30:30.078 | info | UMD | IOMMU: disabled (cluster.cpp:161) +2026-02-17 15:30:30.078 | info | UMD | KMD version: 2.4.1 (cluster.cpp:164) +2026-02-17 15:30:30.087 | info | UMD | Starting devices in cluster (cluster.cpp:965) +2026-02-17 15:30:30.088 | info | UMD | Mapped hugepage 0x200000000 to NOC address 0x800000000 (silicon_sysmem_manager.cpp:207) +2026-02-17 15:30:30.089 | info | UMD | Mapped hugepage 0x140000000 to NOC address 0x840000000 (silicon_sysmem_manager.cpp:207) +2026-02-17 15:30:30.090 | info | UMD | Mapped hugepage 0x41c0000000 to NOC address 0x800000000 (silicon_sysmem_manager.cpp:207) +2026-02-17 15:30:30.091 | info | UMD | Mapped hugepage 0x4180000000 to NOC address 0x840000000 (silicon_sysmem_manager.cpp:207) +2026-02-17 15:30:30.092 | info | UMD | Mapped hugepage 0x300000000 to NOC address 0x800000000 (silicon_sysmem_manager.cpp:207) +2026-02-17 15:30:30.092 | info | UMD | Mapped hugepage 0x2c0000000 to NOC address 0x840000000 (silicon_sysmem_manager.cpp:207) +2026-02-17 15:30:30.094 | info | UMD | Mapped hugepage 0x42c0000000 to NOC address 0x800000000 (silicon_sysmem_manager.cpp:207) +2026-02-17 15:30:30.094 | info | UMD | Mapped hugepage 0x4280000000 to NOC address 0x840000000 (silicon_sysmem_manager.cpp:207) +2026-02-17 15:30:30.149 | info | Distributed | Using custom mesh graph descriptor: /proj_sw/user_dev/moconnor/tt-metal/tt_metal/fabric/mesh_graph_descriptors/t3k_mesh_graph_descriptor.textproto (metal_context.cpp:822) +2026-02-17 15:30:30.151 | info | Fabric | TopologyMapper mapping start (mesh=0): n_log=8, n_phys=8, log_deg_hist={2:4, 3:4}, phys_deg_hist={2:4, 3:4} (topology_mapper_utils.cpp:171) +2026-02-17 15:30:30.157 | DEBUG | ttnn.device:__init__:150 - Using default dispatch core type for this system: DispatchCoreType.ETH +2026-02-17 15:30:30.157 | DEBUG | ttnn.device:__init__:152 - Using default dispatch core axis for this system: DispatchCoreAxis.ROW +2026-02-17 15:30:30.162 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) +2026-02-17 15:30:30.166 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) +2026-02-17 15:30:30.167 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) +2026-02-17 15:30:30.167 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) +2026-02-17 15:30:30.168 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) +2026-02-17 15:30:30.168 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) +2026-02-17 15:30:30.169 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) +2026-02-17 15:30:30.169 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) +2026-02-17 15:30:30.494 | warning | Metal | Got num_routing_planes: 1, which is less than current value: 255, ignoring the override (metal_context.cpp:719) +2026-02-17 15:30:30.494 | info | Metal | Dispatch on FabricConfig::FABRIC_1D with 1 Command Queues (device_manager.cpp:328) -2026-02-17 12:14:11.450 | info | Metal | Initializing Fabric (device_manager.cpp:404) -2026-02-17 12:14:17.362 | info | Metal | Fabric initialized on Device 0 (device.cpp:386) -2026-02-17 12:14:17.798 | info | Metal | Fabric initialized on Device 1 (device.cpp:386) -2026-02-17 12:14:17.798 | info | Metal | Fabric initialized on Device 2 (device.cpp:386) -2026-02-17 12:14:17.841 | info | Metal | Fabric initialized on Device 3 (device.cpp:386) -2026-02-17 12:14:17.926 | info | Metal | Fabric initialized on Device 4 (device.cpp:386) -2026-02-17 12:14:22.139 | info | Metal | Fabric initialized on Device 5 (device.cpp:386) -2026-02-17 12:14:22.146 | info | Metal | Fabric initialized on Device 6 (device.cpp:386) -2026-02-17 12:14:22.148 | info | Metal | Fabric initialized on Device 7 (device.cpp:386) -2026-02-17 12:14:22.148 | info | Metal | Fabric Initialized with config FabricConfig::FABRIC_2D (device_manager.cpp:409) -2026-02-17 12:14:27.395 | info | Metal | Command Queue initialized on Device 6 (device_manager.cpp:500) -2026-02-17 12:14:27.395 | info | Metal | Command Queue initialized on Device 4 (device_manager.cpp:500) -2026-02-17 12:14:27.395 | info | Metal | Command Queue initialized on Device 7 (device_manager.cpp:500) -2026-02-17 12:14:27.397 | info | Metal | Command Queue initialized on Device 5 (device_manager.cpp:500) +2026-02-17 15:30:30.509 | info | Metal | Initializing Fabric (device_manager.cpp:404) +2026-02-17 15:30:30.638 | info | Metal | Fabric initialized on Device 0 (device.cpp:386) +2026-02-17 15:30:30.638 | info | Metal | Fabric initialized on Device 1 (device.cpp:386) +2026-02-17 15:30:30.677 | info | Metal | Fabric initialized on Device 2 (device.cpp:386) +2026-02-17 15:30:30.678 | info | Metal | Fabric initialized on Device 3 (device.cpp:386) +2026-02-17 15:30:30.681 | info | Metal | Fabric initialized on Device 4 (device.cpp:386) +2026-02-17 15:30:30.686 | info | Metal | Fabric initialized on Device 5 (device.cpp:386) +2026-02-17 15:30:30.689 | info | Metal | Fabric initialized on Device 6 (device.cpp:386) +2026-02-17 15:30:30.694 | info | Metal | Fabric initialized on Device 7 (device.cpp:386) +2026-02-17 15:30:30.694 | info | Metal | Fabric Initialized with config FabricConfig::FABRIC_1D (device_manager.cpp:409) +2026-02-17 15:30:30.787 | info | Metal | Command Queue initialized on Device 5 (device_manager.cpp:500) +2026-02-17 15:30:30.789 | info | Metal | Command Queue initialized on Device 4 (device_manager.cpp:500) +2026-02-17 15:30:30.790 | info | Metal | Command Queue initialized on Device 7 (device_manager.cpp:500) +2026-02-17 15:30:30.790 | info | Metal | Command Queue initialized on Device 6 (device_manager.cpp:500) Loading HuggingFace reference model on CPU: google/gemma-3-4b-it - Loading checkpoint shards: 0%| | 0/2 [00:00:77 - Initial ttnn.CONFIG: +CMD: env HF_HOME=/proj_sw/user_dev/moconnor/hf-cache TT_VISIBLE_DEVICES=0,1,2,3 TT_MESH_GRAPH_DESC_PATH=/proj_sw/user_dev/moconnor/tt-metal/tt_metal/fabric/mesh_graph_descriptors/t3k_mesh_graph_descriptor.textproto TT_METAL_CACHE=/tmp/tt-metal-cache TT_METAL_INSPECTOR_LOG_PATH=/tmp/tt-metal-inspector TT_METAL_INSPECTOR_INITIALIZATION_IS_IMPORTANT=0 PYTHONUNBUFFERED=1 python eval.py models/google/gemma-3-4b-it/t3000/optimized/model.py --model google/gemma-3-4b-it --prompt_file prompts/bringup_eval_long.txt --max_new_tokens 100 --max_seq_len 40960 +2026-02-17 15:33:25.635 | DEBUG | ttnn::77 - Initial ttnn.CONFIG: Config{cache_path=/home/moconnor/.cache/ttnn,model_cache_path=/home/moconnor/.cache/ttnn/models,tmp_dir=/tmp/ttnn,enable_model_cache=false,enable_fast_runtime_mode=true,throw_exception_on_fallback=false,enable_logging=false,enable_graph_report=false,enable_detailed_buffer_report=false,enable_detailed_tensor_report=false,enable_comparison_mode=false,comparison_mode_should_raise_exception=false,comparison_mode_pcc=0.9999,root_report_path=generated/ttnn/reports,report_name=std::nullopt,std::nullopt} Loading model module: /localdev/moconnor/ttnn_models/models/google/gemma-3-4b-it/t3000/optimized/model.py Loading HuggingFace tokenizer... Loading HuggingFace reference model on CPU... - Loading checkpoint shards: 0%| | 0/2 [00:00