From e0d79a06a3c97a59e99f50fac0c76f17b97856f5 Mon Sep 17 00:00:00 2001 From: Mark O'Connor Date: Tue, 17 Feb 2026 11:44:21 +0000 Subject: [PATCH 1/4] Update Gemma 3 4B functional model logs --- MODELS.md | 2 +- .../gemma-3-4b-it/t3000/functional/demo.log | 128 +++++++++--------- .../gemma-3-4b-it/t3000/functional/eval.log | 117 ++++++++-------- 3 files changed, 126 insertions(+), 121 deletions(-) diff --git a/MODELS.md b/MODELS.md index 40a4afa..fe4992c 100644 --- a/MODELS.md +++ b/MODELS.md @@ -28,7 +28,7 @@ Note: Keep the table columns padded with spaces and right-justify numeric cells | Qwen/Qwen3-0.6B | t3000 | functional | 98% | 100% | 229ms | 6.2 | 40960 | | google/gemma-3-4b-it | n150 | functional | 92% | 100% | 98ms | 13.9 | 40960 | | google/gemma-3-4b-it | n300 | functional | 94% | 100% | 535ms | 3.2 | 40960 | -| google/gemma-3-4b-it | t3000 | functional | 92% | 100% | 330ms | 4.7 | 40960 | +| google/gemma-3-4b-it | t3000 | functional | 92% | 100% | 333ms | 4.9 | 40960 | | microsoft/Phi-3-mini-128k-instruct | n150 | functional | 92% | 99% | 80ms | 13.7 | 12288 | | microsoft/Phi-3-mini-128k-instruct | n300 | functional | 90% | 100% | 193ms | 6.7 | 12288 | | microsoft/Phi-3-mini-128k-instruct | t3000 | functional | 90% | 100% | 184ms | 6.8 | 12288 | diff --git a/models/google/gemma-3-4b-it/t3000/functional/demo.log b/models/google/gemma-3-4b-it/t3000/functional/demo.log index 73046a5..5860d1b 100644 --- a/models/google/gemma-3-4b-it/t3000/functional/demo.log +++ b/models/google/gemma-3-4b-it/t3000/functional/demo.log @@ -1,83 +1,87 @@ -CMD: env HF_HOME=/proj_sw/user_dev/moconnor/hf-cache TT_VISIBLE_DEVICES=0,1,2,3 TT_MESH_GRAPH_DESC_PATH=/proj_sw/user_dev/moconnor/tt-metal/tt_metal/fabric/mesh_graph_descriptors/t3k_mesh_graph_descriptor.textproto TT_METAL_CACHE=/tmp/tt-metal-cache TT_METAL_INSPECTOR_LOG_PATH=/tmp/tt-metal-inspector TT_METAL_INSPECTOR_INITIALIZATION_IS_IMPORTANT=0 python demo.py models/google/gemma-3-4b-it/t3000/functional/model.py --max_seq_len 40960 -2026-02-09 17:36:13.316 | DEBUG | ttnn::77 - Initial ttnn.CONFIG: +CMD: env HF_HOME=/proj_sw/user_dev/moconnor/hf-cache TT_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 TT_MESH_GRAPH_DESC_PATH=/proj_sw/user_dev/moconnor/tt-metal/tt_metal/fabric/mesh_graph_descriptors/t3k_mesh_graph_descriptor.textproto TT_METAL_CACHE=/tmp/tt-metal-cache TT_METAL_INSPECTOR_LOG_PATH=/tmp/tt-metal-inspector TT_METAL_INSPECTOR_INITIALIZATION_IS_IMPORTANT=0 python demo.py models/google/gemma-3-4b-it/t3000/functional/model.py --max_seq_len 40960 +2026-02-17 11:33:32.305 | DEBUG | ttnn::77 - Initial ttnn.CONFIG: Config{cache_path=/home/moconnor/.cache/ttnn,model_cache_path=/home/moconnor/.cache/ttnn/models,tmp_dir=/tmp/ttnn,enable_model_cache=false,enable_fast_runtime_mode=true,throw_exception_on_fallback=false,enable_logging=false,enable_graph_report=false,enable_detailed_buffer_report=false,enable_detailed_tensor_report=false,enable_comparison_mode=false,comparison_mode_should_raise_exception=false,comparison_mode_pcc=0.9999,root_report_path=generated/ttnn/reports,report_name=std::nullopt,std::nullopt} -2026-02-09 17:36:14.875 | info | UMD | Established firmware bundle version: 18.12.1 (topology_discovery.cpp:368) -2026-02-09 17:36:14.908 | info | Device | Opening user mode device driver (tt_cluster.cpp:223) -2026-02-09 17:36:14.918 | info | UMD | Established firmware bundle version: 18.12.1 (topology_discovery.cpp:368) -2026-02-09 17:36:14.990 | info | UMD | Established firmware bundle version: 18.12.1 (topology_discovery.cpp:368) -2026-02-09 17:36:15.053 | info | UMD | Harvesting masks for chip 3 tensix: 0x202 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) -2026-02-09 17:36:15.111 | info | UMD | Harvesting masks for chip 2 tensix: 0x201 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) -2026-02-09 17:36:15.121 | info | UMD | Harvesting masks for chip 1 tensix: 0x220 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) -2026-02-09 17:36:15.132 | info | UMD | Harvesting masks for chip 0 tensix: 0x240 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) -2026-02-09 17:36:15.142 | info | UMD | Harvesting masks for chip 7 tensix: 0x280 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) -2026-02-09 17:36:15.155 | info | UMD | Harvesting masks for chip 6 tensix: 0x210 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) -2026-02-09 17:36:15.169 | info | UMD | Harvesting masks for chip 5 tensix: 0x210 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) -2026-02-09 17:36:15.182 | info | UMD | Harvesting masks for chip 4 tensix: 0x5 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) -2026-02-09 17:36:15.195 | info | UMD | Opening local chip ids/PCIe ids: {0, 1, 2, 3}/[0, 3, 1, 2] and remote chip ids {4, 5, 6, 7} (cluster.cpp:186) -2026-02-09 17:36:15.195 | info | UMD | IOMMU: disabled (cluster.cpp:161) -2026-02-09 17:36:15.195 | info | UMD | KMD version: 2.4.1 (cluster.cpp:164) -2026-02-09 17:36:15.205 | info | UMD | Starting devices in cluster (cluster.cpp:965) -2026-02-09 17:36:15.206 | info | UMD | Mapped hugepage 0x240000000 to NOC address 0x800000000 (silicon_sysmem_manager.cpp:207) -2026-02-09 17:36:15.206 | info | UMD | Mapped hugepage 0x200000000 to NOC address 0x840000000 (silicon_sysmem_manager.cpp:207) -2026-02-09 17:36:15.207 | info | UMD | Mapped hugepage 0x41c0000000 to NOC address 0x800000000 (silicon_sysmem_manager.cpp:207) -2026-02-09 17:36:15.208 | info | UMD | Mapped hugepage 0x4180000000 to NOC address 0x840000000 (silicon_sysmem_manager.cpp:207) -2026-02-09 17:36:15.209 | info | UMD | Mapped hugepage 0x300000000 to NOC address 0x800000000 (silicon_sysmem_manager.cpp:207) -2026-02-09 17:36:15.210 | info | UMD | Mapped hugepage 0x2c0000000 to NOC address 0x840000000 (silicon_sysmem_manager.cpp:207) -2026-02-09 17:36:15.211 | info | UMD | Mapped hugepage 0x42c0000000 to NOC address 0x800000000 (silicon_sysmem_manager.cpp:207) -2026-02-09 17:36:15.211 | info | UMD | Mapped hugepage 0x4280000000 to NOC address 0x840000000 (silicon_sysmem_manager.cpp:207) -2026-02-09 17:36:15.262 | info | Distributed | Using custom mesh graph descriptor: /proj_sw/user_dev/moconnor/tt-metal/tt_metal/fabric/mesh_graph_descriptors/t3k_mesh_graph_descriptor.textproto (metal_context.cpp:822) -2026-02-09 17:36:15.263 | info | Fabric | TopologyMapper mapping start (mesh=0): n_log=8, n_phys=8, log_deg_hist={2:4, 3:4}, phys_deg_hist={2:4, 3:4} (topology_mapper_utils.cpp:171) -2026-02-09 17:36:15.268 | DEBUG | ttnn.device:__init__:150 - Using default dispatch core type for this system: DispatchCoreType.ETH -2026-02-09 17:36:15.268 | DEBUG | ttnn.device:__init__:152 - Using default dispatch core axis for this system: DispatchCoreAxis.ROW -2026-02-09 17:36:15.272 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) -2026-02-09 17:36:15.275 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) -2026-02-09 17:36:15.275 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) -2026-02-09 17:36:15.276 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) -2026-02-09 17:36:15.276 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) -2026-02-09 17:36:15.277 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) -2026-02-09 17:36:15.277 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) -2026-02-09 17:36:15.277 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) -2026-02-09 17:36:15.593 | warning | Metal | Got num_routing_planes: 1, which is less than current value: 255, ignoring the override (metal_context.cpp:719) -2026-02-09 17:36:15.593 | info | Metal | Dispatch on FabricConfig::FABRIC_2D with 1 Command Queues +2026-02-17 11:33:33.829 | info | UMD | Established firmware bundle version: 18.12.1 (topology_discovery.cpp:368) +2026-02-17 11:33:33.858 | info | Device | Opening user mode device driver (tt_cluster.cpp:223) +2026-02-17 11:33:33.869 | info | UMD | Established firmware bundle version: 18.12.1 (topology_discovery.cpp:368) +2026-02-17 11:33:33.945 | info | UMD | Established firmware bundle version: 18.12.1 (topology_discovery.cpp:368) +2026-02-17 11:33:34.007 | info | UMD | Harvesting masks for chip 3 tensix: 0x202 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) +2026-02-17 11:33:34.067 | info | UMD | Harvesting masks for chip 2 tensix: 0xc dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) +2026-02-17 11:33:34.077 | info | UMD | Harvesting masks for chip 1 tensix: 0x240 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) +2026-02-17 11:33:34.087 | info | UMD | Harvesting masks for chip 0 tensix: 0x201 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) +2026-02-17 11:33:34.098 | info | UMD | Harvesting masks for chip 7 tensix: 0x220 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) +2026-02-17 11:33:34.112 | info | UMD | Harvesting masks for chip 6 tensix: 0x30 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) +2026-02-17 11:33:34.126 | info | UMD | Harvesting masks for chip 5 tensix: 0x280 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) +2026-02-17 11:33:34.139 | info | UMD | Harvesting masks for chip 4 tensix: 0x300 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) +2026-02-17 11:33:34.153 | info | UMD | Opening local chip ids/PCIe ids: {0, 1, 2, 3}/[0, 3, 1, 2] and remote chip ids {4, 5, 6, 7} (cluster.cpp:186) +2026-02-17 11:33:34.153 | info | UMD | IOMMU: disabled (cluster.cpp:161) +2026-02-17 11:33:34.153 | info | UMD | KMD version: 2.4.1 (cluster.cpp:164) +2026-02-17 11:33:34.163 | info | UMD | Starting devices in cluster (cluster.cpp:965) +2026-02-17 11:33:34.163 | info | UMD | Mapped hugepage 0x200000000 to NOC address 0x800000000 (silicon_sysmem_manager.cpp:207) +2026-02-17 11:33:34.164 | info | UMD | Mapped hugepage 0x140000000 to NOC address 0x840000000 (silicon_sysmem_manager.cpp:207) +2026-02-17 11:33:34.165 | info | UMD | Mapped hugepage 0x41c0000000 to NOC address 0x800000000 (silicon_sysmem_manager.cpp:207) +2026-02-17 11:33:34.166 | info | UMD | Mapped hugepage 0x4180000000 to NOC address 0x840000000 (silicon_sysmem_manager.cpp:207) +2026-02-17 11:33:34.167 | info | UMD | Mapped hugepage 0x300000000 to NOC address 0x800000000 (silicon_sysmem_manager.cpp:207) +2026-02-17 11:33:34.168 | info | UMD | Mapped hugepage 0x2c0000000 to NOC address 0x840000000 (silicon_sysmem_manager.cpp:207) +2026-02-17 11:33:34.169 | info | UMD | Mapped hugepage 0x42c0000000 to NOC address 0x800000000 (silicon_sysmem_manager.cpp:207) +2026-02-17 11:33:34.169 | info | UMD | Mapped hugepage 0x4280000000 to NOC address 0x840000000 (silicon_sysmem_manager.cpp:207) +2026-02-17 11:33:34.224 | info | Distributed | Using custom mesh graph descriptor: /proj_sw/user_dev/moconnor/tt-metal/tt_metal/fabric/mesh_graph_descriptors/t3k_mesh_graph_descriptor.textproto (metal_context.cpp:822) +2026-02-17 11:33:34.225 | info | Fabric | TopologyMapper mapping start (mesh=0): n_log=8, n_phys=8, log_deg_hist={2:4, 3:4}, phys_deg_hist={2:4, 3:4} (topology_mapper_utils.cpp:171) +2026-02-17 11:33:34.231 | DEBUG | ttnn.device:__init__:150 - Using default dispatch core type for this system: DispatchCoreType.ETH +2026-02-17 11:33:34.231 | DEBUG | ttnn.device:__init__:152 - Using default dispatch core axis for this system: DispatchCoreAxis.ROW +2026-02-17 11:33:34.238 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) +2026-02-17 11:33:34.242 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) +2026-02-17 11:33:34.242 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) +2026-02-17 11:33:34.242 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) +2026-02-17 11:33:34.242 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) +2026-02-17 11:33:34.243 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) +2026-02-17 11:33:34.243 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) +2026-02-17 11:33:34.243 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) +2026-02-17 11:33:34.568 | warning | Metal | Got num_routing_planes: 1, which is less than current value: 255, ignoring the override (metal_context.cpp:719) +2026-02-17 11:33:34.568 | info | Metal | Dispatch on FabricConfig::FABRIC_1D with 1 Command Queues (device_manager.cpp:328) -2026-02-09 17:36:15.608 | info | Metal | Initializing Fabric (device_manager.cpp:404) -2026-02-09 17:36:15.780 | info | Metal | Fabric initialized on Device 0 (device.cpp:386) -2026-02-09 17:36:15.780 | info | Metal | Fabric initialized on Device 1 (device.cpp:386) -2026-02-09 17:36:15.814 | info | Metal | Fabric initialized on Device 2 (device.cpp:386) -2026-02-09 17:36:15.814 | info | Metal | Fabric initialized on Device 3 (device.cpp:386) -2026-02-09 17:36:15.818 | info | Metal | Fabric initialized on Device 4 (device.cpp:386) -2026-02-09 17:36:15.823 | info | Metal | Fabric initialized on Device 5 (device.cpp:386) -2026-02-09 17:36:15.826 | info | Metal | Fabric initialized on Device 6 (device.cpp:386) -2026-02-09 17:36:15.833 | info | Metal | Fabric initialized on Device 7 (device.cpp:386) -2026-02-09 17:36:15.833 | info | Metal | Fabric Initialized with config FabricConfig::FABRIC_2D (device_manager.cpp:409) -2026-02-09 17:36:15.954 | info | Metal | Command Queue initialized on Device 7 (device_manager.cpp:500) -2026-02-09 17:36:15.956 | info | Metal | Command Queue initialized on Device 4 (device_manager.cpp:500) -2026-02-09 17:36:15.956 | info | Metal | Command Queue initialized on Device 6 (device_manager.cpp:500) -2026-02-09 17:36:15.956 | info | Metal | Command Queue initialized on Device 5 (device_manager.cpp:500) +2026-02-17 11:33:34.582 | info | Metal | Initializing Fabric (device_manager.cpp:404) +2026-02-17 11:33:34.762 | info | Metal | Fabric initialized on Device 0 (device.cpp:386) +2026-02-17 11:33:34.763 | info | Metal | Fabric initialized on Device 1 (device.cpp:386) +2026-02-17 11:33:34.763 | info | Metal | Fabric initialized on Device 2 (device.cpp:386) +2026-02-17 11:33:34.764 | info | Metal | Fabric initialized on Device 3 (device.cpp:386) +2026-02-17 11:33:34.766 | info | Metal | Fabric initialized on Device 4 (device.cpp:386) +2026-02-17 11:33:34.772 | info | Metal | Fabric initialized on Device 5 (device.cpp:386) +2026-02-17 11:33:34.775 | info | Metal | Fabric initialized on Device 6 (device.cpp:386) +2026-02-17 11:33:34.781 | info | Metal | Fabric initialized on Device 7 (device.cpp:386) +2026-02-17 11:33:34.781 | info | Metal | Fabric Initialized with config FabricConfig::FABRIC_1D (device_manager.cpp:409) +2026-02-17 11:33:34.873 | info | Metal | Command Queue initialized on Device 6 (device_manager.cpp:500) +2026-02-17 11:33:34.874 | info | Metal | Command Queue initialized on Device 4 (device_manager.cpp:500) +2026-02-17 11:33:34.875 | info | Metal | Command Queue initialized on Device 5 (device_manager.cpp:500) +2026-02-17 11:33:34.875 | info | Metal | Command Queue initialized on Device 7 (device_manager.cpp:500) Loading tokenizer: google/gemma-3-4b-it Opening TT device... Loading HuggingFace reference model on CPU: google/gemma-3-4b-it - Loading checkpoint shards: 0%| | 0/2 [00:00:77 - Initial ttnn.CONFIG: +CMD: env HF_HOME=/proj_sw/user_dev/moconnor/hf-cache TT_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 TT_MESH_GRAPH_DESC_PATH=/proj_sw/user_dev/moconnor/tt-metal/tt_metal/fabric/mesh_graph_descriptors/t3k_mesh_graph_descriptor.textproto TT_METAL_CACHE=/tmp/tt-metal-cache TT_METAL_INSPECTOR_LOG_PATH=/tmp/tt-metal-inspector TT_METAL_INSPECTOR_INITIALIZATION_IS_IMPORTANT=0 python eval.py models/google/gemma-3-4b-it/t3000/functional/model.py --model google/gemma-3-4b-it --prompt_file prompts/bringup_eval_long.txt --max_new_tokens 100 --max_seq_len 40960 +2026-02-17 11:36:57.866 | DEBUG | ttnn::77 - Initial ttnn.CONFIG: Config{cache_path=/home/moconnor/.cache/ttnn,model_cache_path=/home/moconnor/.cache/ttnn/models,tmp_dir=/tmp/ttnn,enable_model_cache=false,enable_fast_runtime_mode=true,throw_exception_on_fallback=false,enable_logging=false,enable_graph_report=false,enable_detailed_buffer_report=false,enable_detailed_tensor_report=false,enable_comparison_mode=false,comparison_mode_should_raise_exception=false,comparison_mode_pcc=0.9999,root_report_path=generated/ttnn/reports,report_name=std::nullopt,std::nullopt} Loading model module: /localdev/moconnor/ttnn_models/models/google/gemma-3-4b-it/t3000/functional/model.py Loading HuggingFace tokenizer... Loading HuggingFace reference model on CPU... - Loading checkpoint shards: 0%| | 0/2 [00:00 Date: Tue, 17 Feb 2026 12:15:56 +0100 Subject: [PATCH 2/4] Add strict JSON checker for run_tests --- tasks/run_tests.yaml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tasks/run_tests.yaml b/tasks/run_tests.yaml index 7b2eb37..d280c08 100644 --- a/tasks/run_tests.yaml +++ b/tasks/run_tests.yaml @@ -15,6 +15,26 @@ prompt: | Deliverables: - Updated MODELS.md with TTFT, t/s/u, top-1 and top-5 columns filled in. - demo.log and eval.log files in the model's own directory start with the command run and then containing the output of the demo/eval runs respectively. +check: | + Output EXACTLY one line of JSON, and nothing else. + - First character MUST be "{" + - Last character MUST be "}" + - No code fences, no markdown, no extra text + - 'reason' must be a single line (no newlines) + + Choose exactly one output (copy the format exactly): + {"success": true, "reason": "PASS"} + {"success": false, "reason": "FAIL: "} + + Do NOT run any tests or TT/TTNN workloads (no demo/eval, no tt-smi) and do NOT download models. + Use only repository evidence (files and diffs) to verify completion against the task intent. + + Pass criteria: + 1. Target model directory from the task details has both demo.log and eval.log. + 2. demo.log starts with the demo command and includes TTFT + decode t/s/u output. + 3. eval.log starts with the eval command and includes Top-1 + Top-5 accuracy output. + 4. MODELS.md has the target row updated with Top-1, Top-5, TTFT, t/s/u, and Seq len values from the run artifacts. + 5. Demo output text is coherent enough to not be obviously corrupted; if it is clearly corrupted, MODELS.md uses 'bad' for Top-1 and Top-5. set_up: | Make sure you are in /localdev/moconnor/ttnn_models If there are any unsaved changes, stash them and be sure to mention it in your output. From 62145561d20f9830ecfa8080a7a290f68c06a682 Mon Sep 17 00:00:00 2001 From: Mark O'Connor Date: Tue, 17 Feb 2026 14:39:13 +0100 Subject: [PATCH 3/4] Clarify 1x8 mesh port requirement in run_tests --- tasks/run_tests.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tasks/run_tests.yaml b/tasks/run_tests.yaml index d280c08..ec44620 100644 --- a/tasks/run_tests.yaml +++ b/tasks/run_tests.yaml @@ -3,6 +3,9 @@ prompt: | Our task today is to run the demo (for TTFT and t/s/u) and long eval test (for top-1 and top-5, use --max_new_tokens 100 --prompt_file prompts/bringup_eval_long.txt) for a model and update the appropriate part of the table in MODELS.md. You should also check the output of the demo looks sensible and is not clearly garbage or corrupted. If it is bad, replace the top-1 and top-5 scores with 'bad'. + Important for 1x8 mesh port issues: + - If the issue title/body includes "1x8 mesh port", you must update the target model's model.py to set `MESH_SHAPE = (1, 8)` and adjust any mesh-axis/sharding assumptions accordingly *before* running demo/eval. Do not run metrics on the old 2x4 mesh. + Notes: - Always produce demo.log and eval.log, even if the command fails (capture stdout/stderr, then continue). - If you see `cannot map elf file into memory: No space left on device`, rerun with `TT_METAL_CACHE=/tmp/tt-metal-cache` and `TT_METAL_RUNTIME_ROOT=/proj_sw/user_dev/moconnor/tt-runtime-root`. @@ -35,6 +38,7 @@ check: | 3. eval.log starts with the eval command and includes Top-1 + Top-5 accuracy output. 4. MODELS.md has the target row updated with Top-1, Top-5, TTFT, t/s/u, and Seq len values from the run artifacts. 5. Demo output text is coherent enough to not be obviously corrupted; if it is clearly corrupted, MODELS.md uses 'bad' for Top-1 and Top-5. + 6. If the issue indicates a 1x8 mesh port (title/body contains "1x8 mesh port"), the target model.py sets `MESH_SHAPE = (1, 8)` (not 2x4). set_up: | Make sure you are in /localdev/moconnor/ttnn_models If there are any unsaved changes, stash them and be sure to mention it in your output. From 79eb59451cdcdc0a154074874492b575b4958bea Mon Sep 17 00:00:00 2001 From: Mark O'Connor Date: Tue, 17 Feb 2026 15:39:15 +0000 Subject: [PATCH 4/4] Update MODELS and refresh gemma functional logs --- MODELS.md | 2 +- .../gemma-3-4b-it/t3000/functional/demo.log | 122 +++++++++--------- .../gemma-3-4b-it/t3000/functional/eval.log | 120 ++++++++--------- 3 files changed, 124 insertions(+), 120 deletions(-) diff --git a/MODELS.md b/MODELS.md index fe4992c..3e40b9d 100644 --- a/MODELS.md +++ b/MODELS.md @@ -28,7 +28,7 @@ Note: Keep the table columns padded with spaces and right-justify numeric cells | Qwen/Qwen3-0.6B | t3000 | functional | 98% | 100% | 229ms | 6.2 | 40960 | | google/gemma-3-4b-it | n150 | functional | 92% | 100% | 98ms | 13.9 | 40960 | | google/gemma-3-4b-it | n300 | functional | 94% | 100% | 535ms | 3.2 | 40960 | -| google/gemma-3-4b-it | t3000 | functional | 92% | 100% | 333ms | 4.9 | 40960 | +| google/gemma-3-4b-it | t3000 | functional | 92% | 100% | 311ms | 4.9 | 40960 | | microsoft/Phi-3-mini-128k-instruct | n150 | functional | 92% | 99% | 80ms | 13.7 | 12288 | | microsoft/Phi-3-mini-128k-instruct | n300 | functional | 90% | 100% | 193ms | 6.7 | 12288 | | microsoft/Phi-3-mini-128k-instruct | t3000 | functional | 90% | 100% | 184ms | 6.8 | 12288 | diff --git a/models/google/gemma-3-4b-it/t3000/functional/demo.log b/models/google/gemma-3-4b-it/t3000/functional/demo.log index 5860d1b..9fa6db2 100644 --- a/models/google/gemma-3-4b-it/t3000/functional/demo.log +++ b/models/google/gemma-3-4b-it/t3000/functional/demo.log @@ -1,63 +1,65 @@ -CMD: env HF_HOME=/proj_sw/user_dev/moconnor/hf-cache TT_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 TT_MESH_GRAPH_DESC_PATH=/proj_sw/user_dev/moconnor/tt-metal/tt_metal/fabric/mesh_graph_descriptors/t3k_mesh_graph_descriptor.textproto TT_METAL_CACHE=/tmp/tt-metal-cache TT_METAL_INSPECTOR_LOG_PATH=/tmp/tt-metal-inspector TT_METAL_INSPECTOR_INITIALIZATION_IS_IMPORTANT=0 python demo.py models/google/gemma-3-4b-it/t3000/functional/model.py --max_seq_len 40960 -2026-02-17 11:33:32.305 | DEBUG | ttnn::77 - Initial ttnn.CONFIG: +python demo.py models/google/gemma-3-4b-it/t3000/functional/model.py +2026-02-17 15:29:41.537 | DEBUG | ttnn::77 - Initial ttnn.CONFIG: Config{cache_path=/home/moconnor/.cache/ttnn,model_cache_path=/home/moconnor/.cache/ttnn/models,tmp_dir=/tmp/ttnn,enable_model_cache=false,enable_fast_runtime_mode=true,throw_exception_on_fallback=false,enable_logging=false,enable_graph_report=false,enable_detailed_buffer_report=false,enable_detailed_tensor_report=false,enable_comparison_mode=false,comparison_mode_should_raise_exception=false,comparison_mode_pcc=0.9999,root_report_path=generated/ttnn/reports,report_name=std::nullopt,std::nullopt} -2026-02-17 11:33:33.829 | info | UMD | Established firmware bundle version: 18.12.1 (topology_discovery.cpp:368) -2026-02-17 11:33:33.858 | info | Device | Opening user mode device driver (tt_cluster.cpp:223) -2026-02-17 11:33:33.869 | info | UMD | Established firmware bundle version: 18.12.1 (topology_discovery.cpp:368) -2026-02-17 11:33:33.945 | info | UMD | Established firmware bundle version: 18.12.1 (topology_discovery.cpp:368) -2026-02-17 11:33:34.007 | info | UMD | Harvesting masks for chip 3 tensix: 0x202 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) -2026-02-17 11:33:34.067 | info | UMD | Harvesting masks for chip 2 tensix: 0xc dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) -2026-02-17 11:33:34.077 | info | UMD | Harvesting masks for chip 1 tensix: 0x240 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) -2026-02-17 11:33:34.087 | info | UMD | Harvesting masks for chip 0 tensix: 0x201 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) -2026-02-17 11:33:34.098 | info | UMD | Harvesting masks for chip 7 tensix: 0x220 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) -2026-02-17 11:33:34.112 | info | UMD | Harvesting masks for chip 6 tensix: 0x30 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) -2026-02-17 11:33:34.126 | info | UMD | Harvesting masks for chip 5 tensix: 0x280 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) -2026-02-17 11:33:34.139 | info | UMD | Harvesting masks for chip 4 tensix: 0x300 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) -2026-02-17 11:33:34.153 | info | UMD | Opening local chip ids/PCIe ids: {0, 1, 2, 3}/[0, 3, 1, 2] and remote chip ids {4, 5, 6, 7} (cluster.cpp:186) -2026-02-17 11:33:34.153 | info | UMD | IOMMU: disabled (cluster.cpp:161) -2026-02-17 11:33:34.153 | info | UMD | KMD version: 2.4.1 (cluster.cpp:164) -2026-02-17 11:33:34.163 | info | UMD | Starting devices in cluster (cluster.cpp:965) -2026-02-17 11:33:34.163 | info | UMD | Mapped hugepage 0x200000000 to NOC address 0x800000000 (silicon_sysmem_manager.cpp:207) -2026-02-17 11:33:34.164 | info | UMD | Mapped hugepage 0x140000000 to NOC address 0x840000000 (silicon_sysmem_manager.cpp:207) -2026-02-17 11:33:34.165 | info | UMD | Mapped hugepage 0x41c0000000 to NOC address 0x800000000 (silicon_sysmem_manager.cpp:207) -2026-02-17 11:33:34.166 | info | UMD | Mapped hugepage 0x4180000000 to NOC address 0x840000000 (silicon_sysmem_manager.cpp:207) -2026-02-17 11:33:34.167 | info | UMD | Mapped hugepage 0x300000000 to NOC address 0x800000000 (silicon_sysmem_manager.cpp:207) -2026-02-17 11:33:34.168 | info | UMD | Mapped hugepage 0x2c0000000 to NOC address 0x840000000 (silicon_sysmem_manager.cpp:207) -2026-02-17 11:33:34.169 | info | UMD | Mapped hugepage 0x42c0000000 to NOC address 0x800000000 (silicon_sysmem_manager.cpp:207) -2026-02-17 11:33:34.169 | info | UMD | Mapped hugepage 0x4280000000 to NOC address 0x840000000 (silicon_sysmem_manager.cpp:207) -2026-02-17 11:33:34.224 | info | Distributed | Using custom mesh graph descriptor: /proj_sw/user_dev/moconnor/tt-metal/tt_metal/fabric/mesh_graph_descriptors/t3k_mesh_graph_descriptor.textproto (metal_context.cpp:822) -2026-02-17 11:33:34.225 | info | Fabric | TopologyMapper mapping start (mesh=0): n_log=8, n_phys=8, log_deg_hist={2:4, 3:4}, phys_deg_hist={2:4, 3:4} (topology_mapper_utils.cpp:171) -2026-02-17 11:33:34.231 | DEBUG | ttnn.device:__init__:150 - Using default dispatch core type for this system: DispatchCoreType.ETH -2026-02-17 11:33:34.231 | DEBUG | ttnn.device:__init__:152 - Using default dispatch core axis for this system: DispatchCoreAxis.ROW -2026-02-17 11:33:34.238 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) -2026-02-17 11:33:34.242 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) -2026-02-17 11:33:34.242 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) -2026-02-17 11:33:34.242 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) -2026-02-17 11:33:34.242 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) -2026-02-17 11:33:34.243 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) -2026-02-17 11:33:34.243 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) -2026-02-17 11:33:34.243 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) -2026-02-17 11:33:34.568 | warning | Metal | Got num_routing_planes: 1, which is less than current value: 255, ignoring the override (metal_context.cpp:719) -2026-02-17 11:33:34.568 | info | Metal | Dispatch on FabricConfig::FABRIC_1D with 1 Command Queues - (device_manager.cpp:328) -2026-02-17 11:33:34.582 | info | Metal | Initializing Fabric (device_manager.cpp:404) -2026-02-17 11:33:34.762 | info | Metal | Fabric initialized on Device 0 (device.cpp:386) -2026-02-17 11:33:34.763 | info | Metal | Fabric initialized on Device 1 (device.cpp:386) -2026-02-17 11:33:34.763 | info | Metal | Fabric initialized on Device 2 (device.cpp:386) -2026-02-17 11:33:34.764 | info | Metal | Fabric initialized on Device 3 (device.cpp:386) -2026-02-17 11:33:34.766 | info | Metal | Fabric initialized on Device 4 (device.cpp:386) -2026-02-17 11:33:34.772 | info | Metal | Fabric initialized on Device 5 (device.cpp:386) -2026-02-17 11:33:34.775 | info | Metal | Fabric initialized on Device 6 (device.cpp:386) -2026-02-17 11:33:34.781 | info | Metal | Fabric initialized on Device 7 (device.cpp:386) -2026-02-17 11:33:34.781 | info | Metal | Fabric Initialized with config FabricConfig::FABRIC_1D (device_manager.cpp:409) -2026-02-17 11:33:34.873 | info | Metal | Command Queue initialized on Device 6 (device_manager.cpp:500) -2026-02-17 11:33:34.874 | info | Metal | Command Queue initialized on Device 4 (device_manager.cpp:500) -2026-02-17 11:33:34.875 | info | Metal | Command Queue initialized on Device 5 (device_manager.cpp:500) -2026-02-17 11:33:34.875 | info | Metal | Command Queue initialized on Device 7 (device_manager.cpp:500) Loading tokenizer: google/gemma-3-4b-it Opening TT device... +2026-02-17 15:29:43.205 | info | UMD | Established firmware bundle version: 19.4.0 (topology_discovery.cpp:368) +2026-02-17 15:29:43.237 | info | Device | Opening user mode device driver (tt_cluster.cpp:223) +2026-02-17 15:29:43.247 | info | UMD | Established firmware bundle version: 19.4.0 (topology_discovery.cpp:368) +2026-02-17 15:29:43.322 | info | UMD | Established firmware bundle version: 19.4.0 (topology_discovery.cpp:368) +2026-02-17 15:29:43.384 | info | UMD | Harvesting masks for chip 3 tensix: 0x201 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) +2026-02-17 15:29:43.444 | info | UMD | Harvesting masks for chip 2 tensix: 0x280 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) +2026-02-17 15:29:43.455 | info | UMD | Harvesting masks for chip 1 tensix: 0x201 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) +2026-02-17 15:29:43.465 | info | UMD | Harvesting masks for chip 0 tensix: 0x220 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) +2026-02-17 15:29:43.476 | info | UMD | Harvesting masks for chip 7 tensix: 0x210 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) +2026-02-17 15:29:43.489 | info | UMD | Harvesting masks for chip 6 tensix: 0x201 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) +2026-02-17 15:29:43.503 | info | UMD | Harvesting masks for chip 5 tensix: 0x201 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) +2026-02-17 15:29:43.517 | info | UMD | Harvesting masks for chip 4 tensix: 0x240 dram: 0x0 eth: 0x0 pcie: 0x0 l2cpu: 0x0 (cluster.cpp:339) +2026-02-17 15:29:43.531 | info | UMD | Opening local chip ids/PCIe ids: {0, 1, 2, 3}/[0, 1, 3, 2] and remote chip ids {4, 5, 6, 7} (cluster.cpp:186) +2026-02-17 15:29:43.531 | info | UMD | IOMMU: disabled (cluster.cpp:161) +2026-02-17 15:29:43.531 | info | UMD | KMD version: 2.4.1 (cluster.cpp:164) +2026-02-17 15:29:43.539 | info | UMD | Starting devices in cluster (cluster.cpp:965) +2026-02-17 15:29:43.540 | info | UMD | Mapped hugepage 0x280000000 to NOC address 0x800000000 (silicon_sysmem_manager.cpp:207) +2026-02-17 15:29:43.541 | info | UMD | Mapped hugepage 0x240000000 to NOC address 0x840000000 (silicon_sysmem_manager.cpp:207) +2026-02-17 15:29:43.542 | info | UMD | Mapped hugepage 0x300000000 to NOC address 0x800000000 (silicon_sysmem_manager.cpp:207) +2026-02-17 15:29:43.542 | info | UMD | Mapped hugepage 0x2c0000000 to NOC address 0x840000000 (silicon_sysmem_manager.cpp:207) +2026-02-17 15:29:43.543 | info | UMD | Mapped hugepage 0x41c0000000 to NOC address 0x800000000 (silicon_sysmem_manager.cpp:207) +2026-02-17 15:29:43.544 | info | UMD | Mapped hugepage 0x4180000000 to NOC address 0x840000000 (silicon_sysmem_manager.cpp:207) +2026-02-17 15:29:43.545 | info | UMD | Mapped hugepage 0x4240000000 to NOC address 0x800000000 (silicon_sysmem_manager.cpp:207) +2026-02-17 15:29:43.545 | info | UMD | Mapped hugepage 0x4200000000 to NOC address 0x840000000 (silicon_sysmem_manager.cpp:207) +2026-02-17 15:29:43.602 | info | Distributed | Using auto discovery to generate mesh graph. (metal_context.cpp:827) +2026-02-17 15:29:43.602 | info | Distributed | Constructing control plane using auto-discovery (no mesh graph descriptor). (metal_context.cpp:804) +2026-02-17 15:29:43.603 | info | Fabric | TopologyMapper mapping start (mesh=0): n_log=8, n_phys=8, log_deg_hist={2:4, 3:4}, phys_deg_hist={2:4, 3:4} (topology_mapper_utils.cpp:171) +2026-02-17 15:29:43.603 | info | Fabric | TopologyMapper mapping start (mesh=0): n_log=8, n_phys=8, log_deg_hist={2:4, 3:4}, phys_deg_hist={2:4, 3:4} (topology_mapper_utils.cpp:171) +2026-02-17 15:29:43.608 | DEBUG | ttnn.device:__init__:150 - Using default dispatch core type for this system: DispatchCoreType.ETH +2026-02-17 15:29:43.608 | DEBUG | ttnn.device:__init__:152 - Using default dispatch core axis for this system: DispatchCoreAxis.ROW +2026-02-17 15:29:43.614 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) +2026-02-17 15:29:43.617 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) +2026-02-17 15:29:43.618 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) +2026-02-17 15:29:43.618 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) +2026-02-17 15:29:43.619 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) +2026-02-17 15:29:43.619 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) +2026-02-17 15:29:43.620 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) +2026-02-17 15:29:43.620 | info | BuildKernels | Skipping deleting built cache (build.cpp:110) +2026-02-17 15:29:43.970 | warning | Metal | Got num_routing_planes: 1, which is less than current value: 255, ignoring the override (metal_context.cpp:719) +2026-02-17 15:29:43.970 | info | Metal | Dispatch on FabricConfig::FABRIC_1D with 1 Command Queues + (device_manager.cpp:328) +2026-02-17 15:29:43.984 | info | Metal | Initializing Fabric (device_manager.cpp:404) +2026-02-17 15:29:44.165 | info | Metal | Fabric initialized on Device 0 (device.cpp:386) +2026-02-17 15:29:44.234 | info | Metal | Fabric initialized on Device 1 (device.cpp:386) +2026-02-17 15:29:44.234 | info | Metal | Fabric initialized on Device 2 (device.cpp:386) +2026-02-17 15:29:44.235 | info | Metal | Fabric initialized on Device 3 (device.cpp:386) +2026-02-17 15:29:44.237 | info | Metal | Fabric initialized on Device 4 (device.cpp:386) +2026-02-17 15:29:44.240 | info | Metal | Fabric initialized on Device 5 (device.cpp:386) +2026-02-17 15:29:44.246 | info | Metal | Fabric initialized on Device 6 (device.cpp:386) +2026-02-17 15:29:44.252 | info | Metal | Fabric initialized on Device 7 (device.cpp:386) +2026-02-17 15:29:44.252 | info | Metal | Fabric Initialized with config FabricConfig::FABRIC_1D (device_manager.cpp:409) +2026-02-17 15:29:44.426 | info | Metal | Command Queue initialized on Device 4 (device_manager.cpp:500) +2026-02-17 15:29:44.426 | info | Metal | Command Queue initialized on Device 6 (device_manager.cpp:500) +2026-02-17 15:29:44.426 | info | Metal | Command Queue initialized on Device 5 (device_manager.cpp:500) +2026-02-17 15:29:44.429 | info | Metal | Command Queue initialized on Device 7 (device_manager.cpp:500) Loading HuggingFace reference model on CPU: google/gemma-3-4b-it - Loading checkpoint shards: 0%| | 0/2 [00:00:77 - Initial ttnn.CONFIG: +python eval.py models/google/gemma-3-4b-it/t3000/functional/model.py --model google/gemma-3-4b-it --prompt_file prompts/bringup_eval_long.txt --max_new_tokens 100 --max_seq_len 40960 +2026-02-17 15:32:34.181 | DEBUG | ttnn::77 - Initial ttnn.CONFIG: Config{cache_path=/home/moconnor/.cache/ttnn,model_cache_path=/home/moconnor/.cache/ttnn/models,tmp_dir=/tmp/ttnn,enable_model_cache=false,enable_fast_runtime_mode=true,throw_exception_on_fallback=false,enable_logging=false,enable_graph_report=false,enable_detailed_buffer_report=false,enable_detailed_tensor_report=false,enable_comparison_mode=false,comparison_mode_should_raise_exception=false,comparison_mode_pcc=0.9999,root_report_path=generated/ttnn/reports,report_name=std::nullopt,std::nullopt} Loading model module: /localdev/moconnor/ttnn_models/models/google/gemma-3-4b-it/t3000/functional/model.py Loading HuggingFace tokenizer... Loading HuggingFace reference model on CPU... - Loading checkpoint shards: 0%| | 0/2 [00:00