diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6aa1064 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +/target/ +**/*.rs.bk +Cargo.lock diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..603fb33 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,1589 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "addr2line" +version = "0.25.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b5d307320b3181d6d7954e663bd7c774a838b8220fe0593c86d9fb09f498b4b" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + +[[package]] +name = "anstream" +version = "0.6.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ae563653d1938f79b1ab1b5e668c87c76a9930414574a6583a7b7e11a8e6192" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd" + +[[package]] +name = "anstyle-parse" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e231f6134f61b71076a3eab506c379d4f36122f2af15a9ff04415ea4c3339e2" +dependencies = [ + "windows-sys 0.60.2", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e0633414522a32ffaac8ac6cc8f748e090c5717661fddeea04219e2344f5f2a" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys 0.60.2", +] + +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi", + "libc", + "winapi", +] + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "backtrace" +version = "0.3.76" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb531853791a215d7c62a30daf0dde835f381ab5de4589cfe7c649d2cbe92bd6" +dependencies = [ + "addr2line", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", + "windows-link", +] + +[[package]] +name = "bindgen" +version = "0.66.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b84e06fc203107bfbad243f4aba2af864eb7db3b1cf46ea0a023b0b433d2a7" +dependencies = [ + "bitflags 2.9.4", + "cexpr", + "clang-sys", + "lazy_static", + "lazycell", + "log", + "peeking_take_while", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn", + "which", +] + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394" + +[[package]] +name = "bumpalo" +version = "3.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" + +[[package]] +name = "bytes" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" + +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + +[[package]] +name = "cc" +version = "1.2.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1354349954c6fc9cb0deab020f27f783cf0b604e8bb754dc4658ecf0d29c35f" +dependencies = [ + "find-msvc-tools", + "shlex", +] + +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom 7.1.3", +] + +[[package]] +name = "cfg-if" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" + +[[package]] +name = "ciborium" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" + +[[package]] +name = "ciborium-ll" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" +dependencies = [ + "ciborium-io", + "half", +] + +[[package]] +name = "clang-sys" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" +dependencies = [ + "glob", + "libc", + "libloading", +] + +[[package]] +name = "clap" +version = "3.2.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ea181bf566f71cb9a5d17a59e1871af638180a18fb0035c92ae62b705207123" +dependencies = [ + "bitflags 1.3.2", + "clap_lex 0.2.4", + "indexmap", + "textwrap", +] + +[[package]] +name = "clap" +version = "4.5.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2134bb3ea021b78629caa971416385309e0131b351b25e01dc16fb54e1b5fae" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2ba64afa3c0a6df7fa517765e31314e983f51dda798ffba27b988194fb65dc9" +dependencies = [ + "anstream", + "anstyle", + "clap_lex 0.7.5", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.47" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbfd7eae0b0f1a6e63d4b13c9c478de77c2eb546fba158ad50b4203dc24b9f9c" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5" +dependencies = [ + "os_str_bytes", +] + +[[package]] +name = "clap_lex" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675" + +[[package]] +name = "cmake" +version = "0.1.54" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7caa3f9de89ddbe2c607f4101924c5abec803763ae9534e4f4d7d8f84aa81f0" +dependencies = [ + "cc", +] + +[[package]] +name = "colorchoice" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" + +[[package]] +name = "criterion" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7c76e09c1aae2bc52b3d2f29e13c6572553b30c4aa1b8a49fd70de6412654cb" +dependencies = [ + "anes", + "atty", + "cast", + "ciborium", + "clap 3.2.25", + "criterion-plot", + "itertools", + "lazy_static", + "num-traits", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +dependencies = [ + "cast", + "itertools", +] + +[[package]] +name = "crossbeam" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1137cd7e7fc0fb5d3c5a8678be38ec56e819125d8d7907411fe24ccb943faca8" +dependencies = [ + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-epoch", + "crossbeam-queue", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-queue" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "env_filter" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "186e05a59d4c50738528153b83b0b0194d3a29507dfec16eccd4b342903397d0" +dependencies = [ + "log", + "regex", +] + +[[package]] +name = "env_logger" +version = "0.11.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f" +dependencies = [ + "anstream", + "anstyle", + "env_filter", + "jiff", + "log", +] + +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys 0.61.1", +] + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + +[[package]] +name = "find-msvc-tools" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ced73b1dacfc750a6db6c0a0c3a3853c8b41997e2e2c563dc90804ae6867959" + +[[package]] +name = "gethostname" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc257fdb4038301ce4b9cd1b3b51704509692bb3ff716a410cbd07925d9dae55" +dependencies = [ + "rustix 1.1.2", + "windows-targets 0.52.6", +] + +[[package]] +name = "getrandom" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasi 0.14.7+wasi-0.2.4", +] + +[[package]] +name = "gimli" +version = "0.32.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e629b9b98ef3dd8afe6ca2bd0f89306cec16d43d907889945bc5d6687f2f13c7" + +[[package]] +name = "glob" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" + +[[package]] +name = "half" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9" +dependencies = [ + "cfg-if", + "crunchy", +] + +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + +[[package]] +name = "histogram" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95aebe0dec9a429e3207e5e34d97f2a7d1064d5ee6d8ed13ce0a26456de000ae" +dependencies = [ + "thiserror", +] + +[[package]] +name = "home" +version = "0.5.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589533453244b0995c858700322199b2becb13b627df2851f64a2775d024abcf" +dependencies = [ + "windows-sys 0.59.0", +] + +[[package]] +name = "indexmap" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +dependencies = [ + "autocfg", + "hashbrown", +] + +[[package]] +name = "io-uring" +version = "0.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "046fa2d4d00aea763528b4950358d0ead425372445dc8ff86312b3c69ff7727b" +dependencies = [ + "bitflags 2.9.4", + "cfg-if", + "libc", +] + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" + +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" + +[[package]] +name = "jiff" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be1f93b8b1eb69c77f24bbb0afdf66f54b632ee39af40ca21c4365a1d7347e49" +dependencies = [ + "jiff-static", + "log", + "portable-atomic", + "portable-atomic-util", + "serde", +] + +[[package]] +name = "jiff-static" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03343451ff899767262ec32146f6d559dd759fdadf42ff0e227c7c48f72594b4" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "js-sys" +version = "0.3.81" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec48937a97411dcb524a265206ccd4c90bb711fca92b2792c407f268825b9305" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "lazycell" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" + +[[package]] +name = "libc" +version = "0.2.176" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "58f929b4d672ea937a23a1ab494143d968337a5f47e56d0815df1e0890ddf174" + +[[package]] +name = "libloading" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" +dependencies = [ + "cfg-if", + "windows-link", +] + +[[package]] +name = "linux-raw-sys" +version = "0.4.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" + +[[package]] +name = "linux-raw-sys" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" + +[[package]] +name = "lock_api" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" + +[[package]] +name = "memchr" +version = "2.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" + +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", +] + +[[package]] +name = "mio" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c" +dependencies = [ + "libc", + "wasi 0.11.1+wasi-snapshot-preview1", + "windows-sys 0.59.0", +] + +[[package]] +name = "nccl-profiler" +version = "0.2.0" +dependencies = [ + "bindgen", + "clap 4.5.48", + "cmake", + "criterion", + "crossbeam", + "env_logger", + "gethostname", + "histogram", + "libc", + "libloading", + "log", + "nom 8.0.0", + "rand", + "serde_json", + "static_assertions", + "tempfile", + "tokio", +] + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "nom" +version = "8.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df9761775871bdef83bee530e60050f7e54b1105350d6884eb0fb4f46c2f9405" +dependencies = [ + "memchr", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "object" +version = "0.37.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe" +dependencies = [ + "memchr", +] + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "once_cell_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" + +[[package]] +name = "oorandom" +version = "11.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" + +[[package]] +name = "os_str_bytes" +version = "6.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2355d85b9a3786f481747ced0e0ff2ba35213a1f9bd406ed906554d7af805a1" + +[[package]] +name = "parking_lot" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets 0.52.6", +] + +[[package]] +name = "peeking_take_while" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" + +[[package]] +name = "pin-project-lite" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" + +[[package]] +name = "plotters" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" + +[[package]] +name = "plotters-svg" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" +dependencies = [ + "plotters-backend", +] + +[[package]] +name = "portable-atomic" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" + +[[package]] +name = "portable-atomic-util" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" +dependencies = [ + "portable-atomic", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn", +] + +[[package]] +name = "proc-macro2" +version = "1.0.101" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "rand" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +dependencies = [ + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" +dependencies = [ + "getrandom", +] + +[[package]] +name = "rayon" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + +[[package]] +name = "redox_syscall" +version = "0.5.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77" +dependencies = [ + "bitflags 2.9.4", +] + +[[package]] +name = "regex" +version = "1.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b5288124840bee7b386bc413c487869b360b2b4ec421ea56425128692f2a82c" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "833eb9ce86d40ef33cb1306d8accf7bc8ec2bfea4355cbdebb3df68b40925cad" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001" + +[[package]] +name = "rustc-demangle" +version = "0.1.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace" + +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + +[[package]] +name = "rustix" +version = "0.38.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" +dependencies = [ + "bitflags 2.9.4", + "errno", + "libc", + "linux-raw-sys 0.4.15", + "windows-sys 0.59.0", +] + +[[package]] +name = "rustix" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" +dependencies = [ + "bitflags 2.9.4", + "errno", + "libc", + "linux-raw-sys 0.11.0", + "windows-sys 0.61.1", +] + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "ryu" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.145" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", + "serde_core", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "signal-hook-registry" +version = "1.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2a4719bff48cee6b39d12c020eeb490953ad2443b7055bd0b21fca26bd8c28b" +dependencies = [ + "libc", +] + +[[package]] +name = "slab" +version = "0.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "socket2" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "233504af464074f9d066d7b5416c5f9b894a5862a6506e306f7b816cdd6f1807" +dependencies = [ + "libc", + "windows-sys 0.59.0", +] + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "syn" +version = "2.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tempfile" +version = "3.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" +dependencies = [ + "fastrand", + "getrandom", + "once_cell", + "rustix 1.1.2", + "windows-sys 0.61.1", +] + +[[package]] +name = "textwrap" +version = "0.16.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c13547615a44dc9c452a8a534638acdf07120d4b6847c8178705da06306a3057" + +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + +[[package]] +name = "tokio" +version = "1.47.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89e49afdadebb872d3145a5638b59eb0691ea23e46ca484037cfab3b76b95038" +dependencies = [ + "backtrace", + "bytes", + "io-uring", + "libc", + "mio", + "parking_lot", + "pin-project-lite", + "signal-hook-registry", + "slab", + "socket2", + "tokio-macros", + "windows-sys 0.59.0", +] + +[[package]] +name = "tokio-macros" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "unicode-ident" +version = "1.0.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "wasi" +version = "0.14.7+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "883478de20367e224c0090af9cf5f9fa85bed63a95c1abf3afc5c083ebc06e8c" +dependencies = [ + "wasip2", +] + +[[package]] +name = "wasip2" +version = "1.0.1+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1da10c01ae9f1ae40cbfac0bac3b1e724b320abfcf52229f80b547c0d250e2d" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "671c9a5a66f49d8a47345ab942e2cb93c7d1d0339065d4f8139c486121b43b19" +dependencies = [ + "bumpalo", + "log", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ca60477e4c59f5f2986c50191cd972e3a50d8a95603bc9434501cf156a9a119" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f07d2f20d4da7b26400c9f4a0511e6e0345b040694e8a75bd41d578fa4421d7" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bad67dc8b2a1a6e5448428adec4c3e84c43e561d8c9ee8a9e5aabeb193ec41d1" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "web-sys" +version = "0.3.81" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9367c417a924a74cae129e6a2ae3b47fabb1f8995595ab474029da749a8be120" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "which" +version = "4.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" +dependencies = [ + "either", + "home", + "once_cell", + "rustix 0.38.44", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys 0.61.1", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-link" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45e46c0661abb7180e7b9c281db115305d49ca1709ab8242adf09666d2173c65" + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets 0.53.4", +] + +[[package]] +name = "windows-sys" +version = "0.61.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f109e41dd4a3c848907eb83d5a42ea98b3769495597450cf6d153507b166f0f" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm 0.52.6", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", +] + +[[package]] +name = "windows-targets" +version = "0.53.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d42b7b7f66d2a06854650af09cfdf8713e427a439c97ad65a6375318033ac4b" +dependencies = [ + "windows-link", + "windows_aarch64_gnullvm 0.53.0", + "windows_aarch64_msvc 0.53.0", + "windows_i686_gnu 0.53.0", + "windows_i686_gnullvm 0.53.0", + "windows_i686_msvc 0.53.0", + "windows_x86_64_gnu 0.53.0", + "windows_x86_64_gnullvm 0.53.0", + "windows_x86_64_msvc 0.53.0", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_i686_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" + +[[package]] +name = "wit-bindgen" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" + +[[package]] +name = "zerocopy" +version = "0.8.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/Cargo.toml b/Cargo.toml index e6d0e1c..32cac14 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,7 +6,7 @@ edition = "2021" [profile.release] debug = 1 opt-level = 3 -lto = true +lto = false [lib] name = "nccl_profiler" @@ -31,6 +31,7 @@ criterion = { version = "0.4", features = ["html_reports"] } clap = { version = "4.5.37", features = ["derive"] } crossbeam = "0.8.4" env_logger = "0.11.5" +gethostname = "1.0.2" histogram = "0.11.2" libc = "0.2.169" libloading = "0.8.6" diff --git a/src/cloud_daemon.rs b/src/cloud_daemon.rs index 16d0435..58bde7e 100644 --- a/src/cloud_daemon.rs +++ b/src/cloud_daemon.rs @@ -201,7 +201,17 @@ async fn exporter( mut rx: mpsc::Receiver, ) -> std::io::Result<()> { let mut latency_file = if let Some(template) = profiler.config.latency_file.as_ref() { - let path = template.replace("%p", &format!("{}", profiler.pid)); + let mut path = template.replace("%p", &format!("{}", profiler.pid)); + if path.contains("%h") { + match gethostname::gethostname().into_string() { + Ok(hostname) => { + path = path.replace("%h", &hostname); + }, + Err(hostname) => { + error!("Failed to convert the host name {:?} into a string.", hostname); + }, + } + } build_bufwriter(path).await } else { None @@ -375,7 +385,7 @@ mod tests { let latency_template = format!("{}/latency-%p.txt", temp_dir_path); let summary_template = format!("{}/summary-%p.txt", temp_dir_path); // create a mock Profiler object - let mut profiler = Profiler::new(Version::V1); + let mut profiler = Profiler::new(Version::V1, None); profiler.pid = pid; profiler.config.track_group = true; profiler.config.track_ncclop = true; @@ -394,6 +404,7 @@ mod tests { Instant::now(), /* id = */ 0, /* comm_hash_override = */ None, + /* phase = */ 0, )); let proxyop_descr_casted = unsafe { proxyop_descr.cast_to_proxyop() }; let proxyops: Vec<_> = (0..N_PROXYOP) @@ -418,6 +429,7 @@ mod tests { start_time: 123, fifo_wait_dur_ns: None, dur_ns: 256, + phase: 0, }); thread_state .send_to_daemon(Message::StepBatch(proxyop.clone(), step_batch, true), true); @@ -468,7 +480,7 @@ mod tests { { let _lg = NCCLOP_TEST_MUTEX.lock().unwrap(); - let profiler = Box::new(Profiler::new(Version::V1)); + let profiler = Box::new(Profiler::new(Version::V1, None)); std::thread::scope(|s| { let (tx, rx) = mpsc::channel::(n_ncclop); @@ -507,6 +519,7 @@ mod tests { Instant::now(), op_idx, /* comm_hash_override= */ None, + /* phase = */ 0, )); thread_state.send_to_daemon(Message::NcclOp(coll), true); } @@ -530,6 +543,7 @@ mod tests { Instant::now() - NCCLOP_TIMEOUT, op_idx, /* comm_hash_override= */ None, + /* phase = */ 0, )); thread_state.send_to_daemon(Message::NcclOp(coll), true); } @@ -559,6 +573,7 @@ mod tests { Instant::now() + Duration::from_secs(3600), op_idx, /* comm_hash_override= */ None, + /* phase = */ 0, )); thread_state.send_to_daemon(Message::NcclOp(coll), true); } diff --git a/src/config.rs b/src/config.rs index 74c4c02..b20c3e3 100644 --- a/src/config.rs +++ b/src/config.rs @@ -20,6 +20,32 @@ use std::str::FromStr; use std::sync::LazyLock; use std::time::Duration; +/// Get the auto-generated pool name based on PGID and rank +pub fn auto_pool_name(rank: Option) -> String { + // Priority 1: Explicit override (testing, debugging) + if let Ok(name) = std::env::var("NCCL_PROFILER_TELEMETRY_POOL_NAME") { + return name; + } + + // Priority 2: Job ID (if launcher sets it) + if let Ok(job_id) = std::env::var("NCCL_PROFILER_JOB_ID") { + if let Some(r) = rank { + return format!("/nccl_telemetry_{}_r{}", job_id, r); + } else { + return format!("/nccl_telemetry_{}", job_id); + } + } + + // Priority 3: Auto-detect PGID (most common case) + let pgid = unsafe { libc::getpgid(0) }; + if let Some(r) = rank { + format!("/nccl_telemetry_j{}_r{}", pgid, r) + } else { + // Fallback if rank not known yet (shouldn't happen) + format!("/nccl_telemetry_j{}", pgid) + } +} + pub static CONFIG: LazyLock = LazyLock::new(Config::from_env); macro_rules! field_from_env { @@ -78,6 +104,12 @@ pub struct Config { // Telemetry uploading config pub gpuviz_lib: String, // copybara:strip(gpuviz) pub telemetry_mode: usize, + + // PhaseScope accounting + pub enable_phase_scope: bool, + pub telemetry_pool_name: Option, + pub telemetry_pool_capacity: usize, + pub debug_phase_tracking: bool, } impl Config { @@ -117,6 +149,12 @@ impl Config { // copybara:strip_end field_from_env!(s, "NCCL_TELEMETRY_MODE", telemetry_mode, 3); + // PhaseScope telemetry + field_from_env!(s, enable_phase_scope, false); + field_from_env!(s, telemetry_pool_name); + field_from_env!(s, telemetry_pool_capacity, 1024); + field_from_env!(s, debug_phase_tracking, false); + s } } diff --git a/src/daemon.rs b/src/daemon.rs index 0c8569a..3cff4f9 100644 --- a/src/daemon.rs +++ b/src/daemon.rs @@ -197,6 +197,12 @@ impl<'a> PollingContext<'a> { } fn reclaim_ncclop(&mut self, op: event::NcclOp) { + // Account NcclOp in phase metrics + // At this point ProxyOps are attached because reclaim happens after queue draining + if let Some(ref tracker) = self.profiler.phase_scope_tracker { + tracker.account_ncclop(&op); + } + self.pending_telemetry .push_back(Telemetry::NcclOp(Box::new(op))); } @@ -371,11 +377,12 @@ impl<'a> PollingContext<'a> { ) { let config = &self.profiler.config; let record_proxyop = config.track_proxyop || config.track_steps; + let attach_to_ncclop = record_proxyop || config.enable_phase_scope; if let Some(parent_handle) = info.parent() { if info.pid == self.profiler.pid { if let Some(ncclop) = self.get_ncclop(parent_handle) { ncclop_update(ncclop, end_time, Some(end_time)); - if record_proxyop { + if attach_to_ncclop { for p in proxyops { ncclop.add_proxyop(*p); } @@ -407,7 +414,8 @@ impl<'a> PollingContext<'a> { Message::NcclOp(op) => { let id = op.id(); let op = self.free_ncclop.take_and_free(op); - let _ = self.ncclops.insert(id, Box::new(op)); + let boxed_op = Box::new(op); + let _ = self.ncclops.insert(id, boxed_op); if self.free_ncclop.num_free() >= slab::FREELIST_BATCH { self.free_ncclop.try_publish(&self.profiler.free_ncclop); } @@ -607,6 +615,66 @@ fn ipc_shm_path(pid: libc::pid_t) -> String { format!("nccl-profiler-{}", pid) } +/// Scans PhaseMetrics HashMap for phases ready to export: +/// - Ready: (pending_coll == 0 && end_time_ns > 0) || timeout +fn check_and_export_ready_phases(ctx: &mut PollingContext) { + let tracker = match &ctx.profiler.phase_scope_tracker { + Some(t) => t, + None => return, + }; + + let now_ns = std::time::SystemTime::now() + .duration_since(std::time::SystemTime::UNIX_EPOCH) + .unwrap_or_default() + .as_nanos() as u64; + + let timeout_ns = ctx.profiler.config.ncclop_timeout.as_nanos() as u64; + + let mut ready_phases = Vec::new(); + + // Scan HashMap for ready phases + { + let map = tracker.phase_metrics.read().unwrap(); + for (phase_id, metrics) in map.iter() { + let end_time_ns = metrics.end_time_ns.load(std::sync::atomic::Ordering::Acquire); + + // Skip if still OPEN + if end_time_ns == 0 { + continue; + } + + let pending = metrics.pending_coll.load(std::sync::atomic::Ordering::Acquire); + let timeout_reached = now_ns >= end_time_ns + timeout_ns; + + // Ready conditions: all accounted OR timeout + if pending == 0 || timeout_reached { + if timeout_reached && pending > 0 && ctx.profiler.config.debug_phase_tracking { + eprintln!("[WARN] Phase 0x{:x} timeout with {} pending ops (timeout={:?})", + phase_id, pending, ctx.profiler.config.ncclop_timeout); + } + ready_phases.push(*phase_id); + } + } + } + + // Export ready phases + for phase_id in ready_phases { + if let Some(scope) = tracker.finalize_and_remove_phase(phase_id) { + // Export to TelemetryPool + if let Ok(pool_guard) = ctx.profiler.telemetry_pool.lock() { + if let Some(pool) = pool_guard.as_ref() { + if let Err(e) = pool.push(scope) { + eprintln!("[DAEMON] Failed to export PhaseScope 0x{:x}: {}", phase_id, e); + } else if ctx.profiler.config.debug_phase_tracking { + eprintln!("[DAEMON] Exported PhaseScope 0x{:x} (ops={}, transfer_ops={})", + phase_id, scope.nccl_op_count, scope.transfer_op_count); + } + } + } + } + } +} + const FIFO_FETCH_INTERVAL: Duration = Duration::from_secs(1); const FIFO_PROCESS_BATCH: usize = 512; const FIFO_RECV_BATCH: usize = profiler::EVENT_QUEUE_SZ; @@ -635,6 +703,9 @@ where let ncclop_timeout = ctx.profiler.config.ncclop_timeout; let ncclop_comp_delay = ctx.profiler.config.ncclop_completion_delay; + let mut check_counter = 0u64; + const CHECK_INTERVAL: u64 = 10; // Check every 10 iterations (~1ms) + while !ctx.stop.load(Ordering::Acquire) { while let Some(ctrl_msg) = ctx.profiler.ctrl_fifo.pop() { match ctrl_msg { @@ -710,6 +781,16 @@ where ctx.free_proxyop.try_publish(&ctx.profiler.free_proxyop); } + // Drain queues before reclaiming NcclOps to ensure ProxyOps are attached, required + // for PhaseScope accounting to work. account_ncclop() in reclaim_ncclop() + // needs ProxyOps for metric calculation + for thread in threads.iter_mut() { + thread.fifo.recv_many(FIFO_FETCH_INTERVAL, usize::MAX, false); + thread.fifo.process_many(usize::MAX, |msg| { + ctx.handle_fifo_message(msg, &mut thread.daemon_state); + }); + } + let mut n_processed = 0; let mut ncclops = std::mem::take(&mut ctx.ncclops); try_reclaim_ncclop( @@ -746,6 +827,13 @@ where } ctx.ncclops = ncclops; + // Periodic check for ready PhaseScopes + check_counter += 1; + if check_counter >= CHECK_INTERVAL { + check_counter = 0; + check_and_export_ready_phases(ctx); + } + exporter.export(ctx, None) } diff --git a/src/event.rs b/src/event.rs index f8c6dde..15256dd 100644 --- a/src/event.rs +++ b/src/event.rs @@ -128,7 +128,8 @@ pub struct NcclOp { child_start_time: Option, comm_hash: Option, // starting from v4 comm_hash is no longer part of the event descriptor descr: nccl_metadata::EventMetadata, - proxyops: Option>, + pub proxyops: Option>, // Public for PhaseScope accounting + phase: u64, // Current Phase tag captured at NcclOp creation time } #[derive(Debug, Clone)] @@ -183,7 +184,7 @@ pub struct ProxyOp { step_histograms: Vec>>, track_steps: bool, aggregate_steps: bool, - steps: Option>, + pub steps: Option>, // Public for PhaseScope accounting } #[derive(Debug, Clone)] @@ -243,6 +244,7 @@ impl NcclOp { time: Instant, id: usize, comm_hash_override: Option, + phase: u64, ) -> Self where E: nccl_metadata::Event, @@ -260,6 +262,7 @@ impl NcclOp { comm_hash: comm_hash_override, descr: descr.clone_to_metadata(), proxyops: None, + phase, } } @@ -267,6 +270,10 @@ impl NcclOp { self.id } + pub fn phase(&self) -> u64 { + self.phase + } + pub fn _get_descr(&self) -> &nccl_metadata::EventMetadata { &self.descr } @@ -592,6 +599,9 @@ impl ProxyStep { start_time: time_to_ns(&start_time), fifo_wait_dur_ns, dur_ns: (self.end_time.unwrap() - net_start_time).as_nanos() as _, + // V4 API path doesn't support phase tracking + // let's hope some day it will: https://github.com/NVIDIA/nccl/issues/1916 + phase: 0, } } } diff --git a/src/lib.rs b/src/lib.rs index 633f1fd..12f0ac8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -22,12 +22,16 @@ mod fixed_batch; mod gpuviz; mod histogram; mod nccl_metadata; +pub mod phase_api; +pub mod phase_scope; mod profiler; pub mod profiler_shim; mod shm_fifo; mod slab; mod spsc; -mod step_tracker; +pub mod step_tracker; +pub mod telemetry_ffi; +pub mod telemetry_pool; use std::sync::OnceLock; diff --git a/src/phase_api.rs b/src/phase_api.rs new file mode 100644 index 0000000..45937d7 --- /dev/null +++ b/src/phase_api.rs @@ -0,0 +1,160 @@ +//! Phase notification API for training code +//! +//! This module provides C FFI functions for training code to signal phase transitions +//! to the CoMMA profiler. These APIs enable phase-aware telemetry aggregation. +//! +//! # API Functions +//! +//! - `ncclProfilerBeginPhase(encoded_phase)` - Signal start of training phase +//! - `ncclProfilerEndPhase(encoded_phase)` - Signal end of training phase +//! +//! # Usage Example (C) +//! +//! ```c +//! // Training code signals phase transitions with opaque phase tags +//! // User can encode any information (e.g., step number, phase type) +//! uint64_t phase_tag = (step << 32) | phase_type; +//! ncclProfilerBeginPhase(phase_tag); +//! // ... forward pass NCCL operations ... +//! ncclProfilerEndPhase(phase_tag); +//! ``` +//! +//! # Python Usage +//! +//! ```python +//! from nccl_telemetry import encode_phase # Helper function (optional) +//! +//! # User can encode phase tags however they want +//! phase_tag = encode_phase(step=42, phase=1) # One suggested encoding +//! ncclProfilerBeginPhase(phase_tag) +//! # ... forward pass ... +//! ncclProfilerEndPhase(phase_tag) +//! ``` + +use crate::profiler_shim::ncclResult_t; +use std::sync::OnceLock; + +/// Global profiler reference for phase API +static PROFILER: OnceLock<&'static crate::profiler::Profiler> = OnceLock::new(); + +/// Initialize phase API with profiler reference +/// +/// This is called internally by profiler init handlers +pub fn init_phase_api(profiler: &'static crate::profiler::Profiler) { + let _ = PROFILER.set(profiler); +} + +/// Begin a training phase +/// +/// # Arguments +/// - `encoded_phase` - Opaque phase tag (interpretation is up to the user) +/// +/// # Returns +/// - ncclSuccess on success +/// - ncclInternalError if phase tracking not enabled or invalid phase (0 reserved) +/// +/// # Safety +/// This function is safe to call from any thread +#[no_mangle] +pub extern "C" fn ncclProfilerBeginPhase(encoded_phase: u64) -> ncclResult_t { + let profiler = match PROFILER.get() { + Some(p) => p, + None => { + eprintln!("ncclProfilerBeginPhase: Profiler not initialized"); + return crate::profiler_shim::ncclResult_t_ncclInternalError; + } + }; + + if !profiler.config.enable_phase_scope { + eprintln!("ncclProfilerBeginPhase: Phase tracking not enabled (set NCCL_PROFILER_ENABLE_PHASE_SCOPE=true)"); + return crate::profiler_shim::ncclResult_t_ncclInternalError; + } + + let tracker = match &profiler.phase_scope_tracker { + Some(t) => t, + None => { + eprintln!("ncclProfilerBeginPhase: PhaseScope tracker not initialized"); + return crate::profiler_shim::ncclResult_t_ncclInternalError; + } + }; + + match tracker.begin_phase(encoded_phase) { + Ok(_) => crate::profiler_shim::ncclResult_t_ncclSuccess, + Err(e) => { + eprintln!("ncclProfilerBeginPhase: {}", e); + crate::profiler_shim::ncclResult_t_ncclInvalidArgument + } + } +} + +/// End a training phase by marking it CLOSED +/// +/// # Arguments +/// - `encoded_phase` - Opaque phase tag (must match begin_phase call) +/// +/// # Returns +/// - ncclSuccess on success +/// - ncclInternalError if phase tracking not enabled +/// +/// # Safety +/// This function is safe to call from any thread +/// +/// This function marks the phase as CLOSED by setting end_time_ns in PhaseMetrics. +/// The daemon will periodically scan for ready phases and export them when: +/// 1. pending_coll == 0 (all NcclOps accounted) +/// 2. OR timeout reached (safety mechanism) +#[no_mangle] +pub extern "C" fn ncclProfilerEndPhase(encoded_phase: u64) -> ncclResult_t { + let profiler = match PROFILER.get() { + Some(p) => p, + None => { + eprintln!("ncclProfilerEndPhase: Profiler not initialized"); + return crate::profiler_shim::ncclResult_t_ncclInternalError; + } + }; + + if !profiler.config.enable_phase_scope { + eprintln!("ncclProfilerEndPhase: Phase tracking not enabled"); + return crate::profiler_shim::ncclResult_t_ncclInternalError; + } + + if let Some(ref tracker) = profiler.phase_scope_tracker { + // Capture end timestamp + let end_ns = std::time::SystemTime::now() + .duration_since(std::time::SystemTime::UNIX_EPOCH) + .unwrap_or_default() + .as_nanos() as u64; + + // Mark phase as CLOSED + let pending = tracker.mark_phase_closed(encoded_phase, end_ns); + + if profiler.config.debug_phase_tracking { + let rank_str = match profiler.rank { + Some(r) => format!("R{}", r), + None => "R?".to_string(), + }; + eprintln!("[PHASE {}] end_phase(0x{:x}): Marked CLOSED (pending={})", + rank_str, encoded_phase, pending); + } + } + + crate::profiler_shim::ncclResult_t_ncclSuccess +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_api_not_initialized() { + // Test that API returns error when profiler not initialized + let phase_tag = 100; // Opaque phase tag + + // These should fail gracefully with error messages + let result = ncclProfilerBeginPhase(phase_tag); + assert_eq!(result, crate::profiler_shim::ncclResult_t_ncclInternalError); + + let result = ncclProfilerEndPhase(phase_tag); + assert_eq!(result, crate::profiler_shim::ncclResult_t_ncclInternalError); + } +} diff --git a/src/phase_scope.rs b/src/phase_scope.rs new file mode 100644 index 0000000..d22ad46 --- /dev/null +++ b/src/phase_scope.rs @@ -0,0 +1,487 @@ +//! Phase-aware telemetry aggregation for NCCL operations +//! +//! # Design +//! +//! - Phase field is an opaque u64 value - interpretation is up to the user +//! - PhaseScope tracks all NCCL events with the same phase tag +//! - Finalized scopes include start/end timestamps and aggregated metrics +//! - TelemetryPool provides pull API for training code to export metrics +//! +//! # Phase Tag +//! +//! The phase field is an opaque u64 value. CoMMA does not interpret its meaning - +//! users can encode any information they need (e.g., step number, phase type, job ID, etc.). +//! +//! # Usage +//! +//! ```rust,no_run +//! use nccl_profiler::phase_scope::PhaseScopeTracker; +//! +//! let tracker = PhaseScopeTracker::new(); +//! +//! // User defines their own phase encoding (example: step in upper 32 bits) +//! let phase_tag = ((42u64) << 32) | 1; // e.g., step=42, phase=1 +//! tracker.begin_phase(phase_tag).unwrap(); +//! +//! // ... NCCL operations happen ... +//! +//! // End phase via FFI: ncclProfilerEndPhase() calls mark_phase_closed() +//! // Daemon exports via finalize_and_remove_phase() when ready +//! ``` + +use crate::event; +use std::collections::HashMap; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::sync::{Arc, RwLock}; +use std::time::SystemTime; + +// Per-phase metrics accumulation + +/// Per-phase accumulated metrics +/// +/// NcclOp-based accounting only, no EventStep accounting +/// Reference counting for correct PhaseScope attribution +/// - start_time_ns: Immutable start timestamp +/// - end_time_ns: Phase state (0 = OPEN, non-zero = CLOSED) +/// - pending_coll: Reference counter (incremented on NcclOp creation, decremented on account_ncclop) +// - all fields are u64 +#[derive(Debug)] +pub struct PhaseMetrics { + /// Phase start timestamp (nanoseconds since UNIX epoch, immutable) + pub start_time_ns: u64, + + /// Phase end timestamp (nanoseconds since UNIX epoch) + /// State marker: 0 = OPEN (phase active), non-zero = CLOSED (endPhase called) + pub end_time_ns: AtomicU64, + + /// Pending NcclOp reference counter + /// Incremented: when NcclOp is created with this phase tag + /// Decremented: when account_ncclop() completes for this phase + /// Ready to export when: pending_coll == 0 AND end_time_ns > 0 + pub pending_coll: AtomicU64, + + /// Total end-to-end latency (network + FIFO wait) in nanoseconds + pub e2e_latency_sum_ns: AtomicU64, + + /// Network-only latency (excludes FIFO wait) in nanoseconds + pub net_latency_sum_ns: AtomicU64, + + /// Total bytes transferred + pub bytes_transferred: AtomicU64, + + /// Number of low-level transfer operations (chunks) + pub transfer_op_count: AtomicU64, + + /// Number of NCCL collective operations + pub nccl_op_count: AtomicU64, +} + +impl PhaseMetrics { + pub fn new(start_time_ns: u64) -> Self { + Self { + start_time_ns, + end_time_ns: AtomicU64::new(0), // OPEN state + pending_coll: AtomicU64::new(0), + e2e_latency_sum_ns: AtomicU64::new(0), + net_latency_sum_ns: AtomicU64::new(0), + bytes_transferred: AtomicU64::new(0), + transfer_op_count: AtomicU64::new(0), + nccl_op_count: AtomicU64::new(0), + } + } + + /// Reset all metrics and return snapshot + pub fn reset(&self) -> PhaseMetricsSnapshot { + PhaseMetricsSnapshot { + e2e_latency_sum_ns: self.e2e_latency_sum_ns.swap(0, Ordering::AcqRel), + net_latency_sum_ns: self.net_latency_sum_ns.swap(0, Ordering::AcqRel), + bytes_transferred: self.bytes_transferred.swap(0, Ordering::AcqRel), + transfer_op_count: self.transfer_op_count.swap(0, Ordering::AcqRel), + nccl_op_count: self.nccl_op_count.swap(0, Ordering::AcqRel), + } + } + + /// Read current metrics without resetting + pub fn read(&self) -> PhaseMetricsSnapshot { + PhaseMetricsSnapshot { + e2e_latency_sum_ns: self.e2e_latency_sum_ns.load(Ordering::Acquire), + net_latency_sum_ns: self.net_latency_sum_ns.load(Ordering::Acquire), + bytes_transferred: self.bytes_transferred.load(Ordering::Acquire), + transfer_op_count: self.transfer_op_count.load(Ordering::Acquire), + nccl_op_count: self.nccl_op_count.load(Ordering::Acquire), + } + } +} + +/// Snapshot of phase metrics at a point in time +#[derive(Debug, Clone, Copy, Default)] +#[repr(C)] +pub struct PhaseMetricsSnapshot { + pub e2e_latency_sum_ns: u64, + pub net_latency_sum_ns: u64, + pub bytes_transferred: u64, + pub transfer_op_count: u64, + pub nccl_op_count: u64, +} + +impl PhaseMetricsSnapshot { + pub fn avg_e2e_latency_ns(&self) -> u64 { + if self.nccl_op_count == 0 { + 0 + } else { + self.e2e_latency_sum_ns / self.nccl_op_count + } + } + + pub fn avg_net_latency_ns(&self) -> u64 { + if self.nccl_op_count == 0 { + 0 + } else { + self.net_latency_sum_ns / self.nccl_op_count + } + } + + pub fn avg_chunk_size(&self) -> u64 { + if self.nccl_op_count == 0 { + 0 + } else { + self.bytes_transferred / self.nccl_op_count + } + } +} + +// PhaseScope: Finalized phase with timestamps + +/// Finalized PhaseScope with start/end timestamps and aggregated metrics +/// +/// This structure represents a completed phase with all NCCL operations aggregated. +/// It includes timestamps to measure phase duration. +/// +/// # C FFI Compatibility +/// +/// This structure is #[repr(C)] for shared memory export via TelemetryPool. +#[derive(Debug, Clone, Copy)] +#[repr(C)] +pub struct PhaseScope { + /// Opaque phase tag (interpretation is up to the user) + pub phase: u64, + + /// Phase start timestamp (nanoseconds since epoch) + pub start_time_ns: u64, + + /// Phase end timestamp (nanoseconds since epoch) + pub end_time_ns: u64, + + /// Total end-to-end latency sum (nanoseconds) + pub e2e_latency_sum_ns: u64, + + /// Network-only latency sum (nanoseconds) + pub net_latency_sum_ns: u64, + + /// Total bytes transferred + pub bytes_transferred: u64, + + /// Number of low-level transfer operations (chunks) + pub transfer_op_count: u64, + + /// Number of NCCL collective operations + pub nccl_op_count: u64, +} + +impl PhaseScope { + /// Get phase duration in nanoseconds + pub fn duration_ns(&self) -> u64 { + self.end_time_ns.saturating_sub(self.start_time_ns) + } + + /// Get phase duration in milliseconds + pub fn duration_ms(&self) -> f64 { + self.duration_ns() as f64 / 1_000_000.0 + } + + /// Get average latency per operation (microseconds) + pub fn avg_latency_us(&self) -> f64 { + if self.nccl_op_count == 0 { + 0.0 + } else { + (self.e2e_latency_sum_ns as f64 / self.nccl_op_count as f64) / 1000.0 + } + } + + /// Get bandwidth in GB/s + pub fn bandwidth_gbps(&self) -> f64 { + let duration_s = self.duration_ns() as f64 / 1_000_000_000.0; + if duration_s > 0.0 { + (self.bytes_transferred as f64 / duration_s) / (1024.0_f64.powi(3)) + } else { + 0.0 + } + } +} + +// PhaseScope tracker with timestamp capture + +/// Global phase scope tracker with timestamp capture +/// +/// This tracker captures metrics for the currently active phase and provides +/// PhaseScope snapshots when phases end. The phase field is treated as opaque u64. +#[derive(Debug)] +pub struct PhaseScopeTracker { + /// Currently active phase (opaque u64 value) + /// 0 = no active phase + current_phase: AtomicU64, + + /// Start timestamp for active phase (nanoseconds since epoch) + /// Only valid when current_phase != 0 + start_time_ns: AtomicU64, + + /// Metrics accumulation for all active phases (key = phase tag) + pub phase_metrics: RwLock>>, + + /// NCCL rank for debug logging (None if rank not yet known) + rank: Option, +} + +impl PhaseScopeTracker { + pub fn new() -> Self { + Self { + current_phase: AtomicU64::new(0), + start_time_ns: AtomicU64::new(0), + phase_metrics: RwLock::new(HashMap::new()), + rank: None, + } + } + + pub fn new_with_rank(rank: Option) -> Self { + Self { + current_phase: AtomicU64::new(0), + start_time_ns: AtomicU64::new(0), + phase_metrics: RwLock::new(HashMap::new()), + rank, + } + } + + /// Lookup metrics for a phase without creating (returns None if not found) + fn try_get_metrics_for_phase(&self, phase: u64) -> Option> { + let map = self.phase_metrics.read().unwrap(); + map.get(&phase).cloned() + } + + /// Begin a new phase + /// + /// # Arguments + /// * `phase` - Opaque phase tag, no assumption about encoding + /// + /// # Returns + /// - Ok(()) on success + /// - Err if phase is 0 (reserved for "no active phase") + pub fn begin_phase(&self, phase: u64) -> Result<(), &'static str> { + if phase == 0 { + return Err("Phase tag 0 is reserved (no active phase)"); + } + + // Capture start timestamp (nanoseconds since UNIX epoch) + let start_ns = SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .unwrap_or_default() + .as_nanos() as u64; + + // Create metrics entry for this phase under exclusive lock + { + let mut map = self.phase_metrics.write().unwrap(); + if map.contains_key(&phase) { + if let Some(r) = self.rank { + eprintln!("[WARN R{}] Phase 0x{:x} collision: reused before export", r, phase); + } + } + map.insert(phase, Arc::new(PhaseMetrics::new(start_ns))); + } + + // Store with Release ordering to ensure all previous writes are visible + self.current_phase.store(phase, Ordering::Release); + self.start_time_ns.store(start_ns, Ordering::Release); + + Ok(()) + } + + /// Get current active encoded phase (0 if none) + pub fn current_phase(&self) -> u64 { + self.current_phase.load(Ordering::Acquire) + } + + /// Get start timestamp of current phase (0 if none) + pub fn start_time_ns(&self) -> u64 { + self.start_time_ns.load(Ordering::Acquire) + } + + /// Increment pending NcclOp counter for a phase + /// + /// Called when NcclOp is created with this phase tag. + /// Must be balanced by account_ncclop() decrement! + pub fn increment_pending(&self, phase: u64) { + if let Some(metrics) = self.try_get_metrics_for_phase(phase) { + metrics.pending_coll.fetch_add(1, Ordering::Relaxed); + } + // If phase not found, it was already exported - skip silently + } + + /// Called by ncclProfilerEndPhase() to transition phase from OPEN to CLOSED. + /// Returns the current pending_coll value for debug logging (0 if phase not found). + pub fn mark_phase_closed(&self, phase: u64, end_time_ns: u64) -> u64 { + match self.try_get_metrics_for_phase(phase) { + Some(metrics) => { + metrics.end_time_ns.store(end_time_ns, Ordering::Release); + metrics.pending_coll.load(Ordering::Acquire) + } + None => 0, // Phase already exported by timeout + } + } + + /// Account for a completed NcclOp by aggregating all child ProxyOp and EventStep metrics + /// + /// This implements hierarchical accounting where NcclOp phase tag determines which + /// PhaseMetrics to update. All ProxyOp and EventStep metrics are summed locally, + /// then applied as a single batched atomic update. + /// + pub fn account_ncclop(&self, op: &event::NcclOp) { + let phase = op.phase(); + + // Skip if no phase tracking + if phase == 0 { + return; + } + + // Local accumulators (no atomics in loop) + let mut total_e2e_latency_ns = 0u64; + let mut total_net_latency_ns = 0u64; + let mut total_bytes = 0u64; + let mut total_steps = 0u64; + + // Sum all ProxyOp metrics + if let Some(proxyops) = op.proxyops.as_ref() { + for proxyop in proxyops { + if let Some(steps) = proxyop.steps.as_ref() { + for step in steps { + let e2e_ns = step.dur_ns as u64 + step.fifo_wait_dur_ns.unwrap_or(0) as u64; + let net_ns = step.dur_ns as u64; + + total_e2e_latency_ns += e2e_ns; + total_net_latency_ns += net_ns; + total_bytes += step.size as u64; + total_steps += 1; + } + } + } + } + + // Lookup metrics for this phase (don't create if already exported) + let metrics = match self.try_get_metrics_for_phase(phase) { + Some(m) => m, + None => { + // Phase was already exported by timeout - drop late operation + return; + } + }; + + metrics.e2e_latency_sum_ns.fetch_add(total_e2e_latency_ns, Ordering::Relaxed); + metrics.net_latency_sum_ns.fetch_add(total_net_latency_ns, Ordering::Relaxed); + metrics.bytes_transferred.fetch_add(total_bytes, Ordering::Relaxed); + metrics.transfer_op_count.fetch_add(total_steps, Ordering::Relaxed); + metrics.nccl_op_count.fetch_add(1, Ordering::Relaxed); + + // This balances the increment from NcclOp creation, see increment_pending() users + let prev = metrics.pending_coll.fetch_sub(1, Ordering::Release); + + // Sanity check: detect underflow + if prev == 0 { + if let Some(r) = self.rank { + eprintln!("[WARN R{}] Phase 0x{:x}: pending_coll underflow in account_ncclop!", r, phase); + } + } + } + + /// Finalize phase and remove from HashMap + /// + /// Called by daemon when phase is ready to export: + /// - pending_coll == 0 (all NcclOps accounted) + /// - end_time_ns > 0 (phase CLOSED) + /// + /// Returns None if phase not found or still OPEN + pub fn finalize_and_remove_phase(&self, phase: u64) -> Option { + let mut map = self.phase_metrics.write().unwrap(); + + let metrics = map.remove(&phase)?; + let end_time_ns = metrics.end_time_ns.load(Ordering::Acquire); + + // Sanity check: should be CLOSED + if end_time_ns == 0 { + eprintln!("[ERROR] Attempted to finalize OPEN phase 0x{:x}", phase); + return None; + } + + // Snapshot final metrics + Some(PhaseScope { + phase, + start_time_ns: metrics.start_time_ns, + end_time_ns, + e2e_latency_sum_ns: metrics.e2e_latency_sum_ns.load(Ordering::Relaxed), + net_latency_sum_ns: metrics.net_latency_sum_ns.load(Ordering::Relaxed), + bytes_transferred: metrics.bytes_transferred.load(Ordering::Relaxed), + transfer_op_count: metrics.transfer_op_count.load(Ordering::Relaxed), + nccl_op_count: metrics.nccl_op_count.load(Ordering::Relaxed), + }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_phase_scope_duration() { + let scope = PhaseScope { + phase: 100, // Opaque phase tag + start_time_ns: 1_000_000_000, // 1 second + end_time_ns: 1_500_000_000, // 1.5 seconds + e2e_latency_sum_ns: 0, + net_latency_sum_ns: 0, + bytes_transferred: 0, + transfer_op_count: 0, + nccl_op_count: 0, + }; + + assert_eq!(scope.duration_ns(), 500_000_000); // 500ms + assert_eq!(scope.duration_ms(), 500.0); + } + + #[test] + fn test_tracker_begin_phase() { + let tracker = PhaseScopeTracker::new(); + + assert_eq!(tracker.current_phase(), 0); + + let phase = 100; // Opaque phase tag + tracker.begin_phase(phase).unwrap(); + assert_eq!(tracker.current_phase(), phase); + + // Phase 0 is reserved + assert!(tracker.begin_phase(0).is_err()); + } + + #[test] + fn test_scope_bandwidth_calculation() { + let scope = PhaseScope { + phase: 100, + start_time_ns: 0, + end_time_ns: 1_000_000_000, // 1 second + e2e_latency_sum_ns: 0, + net_latency_sum_ns: 0, + bytes_transferred: 1024 * 1024 * 1024, // 1 GiB + transfer_op_count: 1, + nccl_op_count: 1, + }; + + let bw = scope.bandwidth_gbps(); + assert!((bw - 1.0).abs() < 0.01); // ~1 GB/s + } +} diff --git a/src/profiler.rs b/src/profiler.rs index c4332a3..808a3a8 100644 --- a/src/profiler.rs +++ b/src/profiler.rs @@ -22,10 +22,12 @@ use crate::event_ffi::AsFFI as _; use crate::gpuviz; use crate::nccl_metadata; use crate::nccl_metadata::{Coll as _, Event as _, NcclOp as _, P2p as _, ProxyStep as _}; +use crate::phase_scope::PhaseScopeTracker; use crate::profiler_shim; use crate::slab; use crate::spsc; use crate::step_tracker::StepTracker; +use crate::telemetry_pool::TelemetryPool; use crate::NcclResult; use crossbeam::queue::ArrayQueue; @@ -53,6 +55,7 @@ pub struct Profiler { pub config: config::Config, pub version: Version, pub pid: libc::pid_t, + pub rank: Option, // NCCL rank (for telemetry pool naming) pub init_time: SystemTime, pub init_instant: Instant, pub gpuviz_lib: Option>, // copybara:strip(gpuviz) @@ -64,13 +67,17 @@ pub struct Profiler { pub free_proxyop: slab::AtomicFreeList, pub free_step_batch: slab::AtomicFreeList, pub cached_clock: clock::CachedClock, + + // PhaseScope accounting + pub phase_scope_tracker: Option, + pub telemetry_pool: Mutex>, } pub const EVENT_QUEUE_SZ: usize = 8192; -const CTRL_FIFO_SZ: usize = 256; +const CTRL_FIFO_SZ: usize = 4096; impl Profiler { - pub fn new(version: Version) -> Self { + pub fn new(version: Version, rank: Option) -> Self { let config = &*config::CONFIG; let init_time = SystemTime::now(); let init_instant = Instant::now(); @@ -78,6 +85,7 @@ impl Profiler { config: config.clone(), version, pid: unsafe { libc::getpid() }, + rank, init_time, init_instant, // copybara:strip_begin(gpuviz) @@ -103,6 +111,14 @@ impl Profiler { free_proxyop: slab::AtomicFreeList::default(), free_step_batch: slab::AtomicFreeList::default(), cached_clock: clock::CachedClock::new(init_instant), + + // Phase-aware telemetry accounting + phase_scope_tracker: if config.enable_phase_scope { + Some(PhaseScopeTracker::new_with_rank(rank)) + } else { + None + }, + telemetry_pool: Mutex::new(None), } } @@ -136,6 +152,24 @@ impl Profiler { let mut lg = self.daemon.lock().unwrap(); let daemon = daemon::Daemon::new(self); *lg = Some(daemon); + + // Initialize TelemetryPool if phase tracking enabled + if self.config.enable_phase_scope { + // Use auto-generated pool name based on PGID + rank + let pool_name = config::auto_pool_name(self.rank); + + match TelemetryPool::create(&pool_name, self.config.telemetry_pool_capacity as u64) { + Ok(pool) => { + let mut pool_lg = self.telemetry_pool.lock().unwrap(); + *pool_lg = Some(pool); + log::info!("TelemetryPool initialized: name={}, capacity={}, rank={:?}", + pool_name, self.config.telemetry_pool_capacity, self.rank); + } + Err(e) => { + log::error!("Failed to create TelemetryPool '{}': {}", pool_name, e); + } + } + } } pub fn join_daemon(&'static self) { @@ -148,9 +182,13 @@ impl Profiler { } pub fn register_thread(&self, thread_ctrl: daemon::ThreadControl) { - self.ctrl_fifo + if self + .ctrl_fifo .push(daemon::ControlMessage::NewThread(thread_ctrl)) - .unwrap(); + .is_err() + { + log::error!("ctrl_fifo full (capacity {}), thread registration dropped", CTRL_FIFO_SZ); + } } pub fn init_thread_state(&'static self) -> Box> { @@ -197,7 +235,9 @@ where { THREAD_STATE.with_borrow_mut(|state| { if state.is_none() { - let profiler = PROFILER.get().unwrap(); + let profiler = PROFILER.get().expect( + "PROFILER not initialized: event handler called before profiler_init completed" + ); *state = Some(profiler.init_thread_state()) } f(state.as_mut().unwrap()) @@ -278,7 +318,21 @@ impl ThreadLocalState<'_> { E: nccl_metadata::Event, { let id = self.profiler.ncclop_cnt.fetch_add(1, Ordering::Relaxed); - let op = event::NcclOp::from_descr(descr, time, id as _, comm_hash_override); + // Capture current phase for this NcclOp + let phase = if let Some(ref tracker) = self.profiler.phase_scope_tracker { + tracker.current_phase() + } else { + 0 + }; + + // Increment pending counter for this phase + if phase != 0 { + if let Some(ref tracker) = self.profiler.phase_scope_tracker { + tracker.increment_pending(phase); + } + } + + let op = event::NcclOp::from_descr(descr, time, id as _, comm_hash_override, phase); self.ncclop_free_list .alloc_new(op, Some(&self.profiler.free_ncclop), true) .map(|op| { @@ -385,8 +439,14 @@ pub fn init_handler( env_logger::init(); // after this point we should be able to use all the log macros - PROFILER.set(Profiler::new(version)).unwrap(); - PROFILER.get().unwrap().spawn_daemon(); + // get_or_init: creates Profiler only on first init, reuses on subsequent cycles + PROFILER.get_or_init(|| Profiler::new(version, None)); + // Always spawn daemon when INIT_FLAG=0 (daemon was stopped on last finalize) + let profiler = PROFILER.get().unwrap(); + profiler.spawn_daemon(); + + // Initialize phase API for training code + crate::phase_api::init_phase_api(profiler); } *lg += 1; @@ -405,15 +465,22 @@ pub fn init_handler_v4( comm_hash: u64, _n_nodes: i32, _n_ranks: i32, - _rank: i32, + rank: i32, version: Version, ) -> NcclResult> { let mut mask = 0; if config::CONFIG.telemetry_mode > 0 { let mut lg = INIT_FLAG.lock().unwrap(); if *lg == 0 { - PROFILER.set(Profiler::new(version)).unwrap(); - PROFILER.get().unwrap().spawn_daemon(); + // get_or_init: creates Profiler only on first init, reuses on subsequent cycles + // Pass rank to Profiler for auto-naming telemetry pool + PROFILER.get_or_init(|| Profiler::new(version, Some(rank))); + // Always spawn daemon when INIT_FLAG=0 (daemon was stopped on last finalize) + let profiler = PROFILER.get().unwrap(); + profiler.spawn_daemon(); + + // Initialize phase API for training code + crate::phase_api::init_phase_api(profiler); } *lg += 1; @@ -674,7 +741,12 @@ pub fn stop_event_handler(event: event::Event) -> NcclResult<()> { } event::Event::ProxyOp(mut data) => { thread_state.fifo.prefetch_next(); - if let Some(step) = data.step_tracker.finalize() { + let phase = if let Some(ref tracker) = thread_state.profiler.phase_scope_tracker { + tracker.current_phase() + } else { + 0 + }; + if let Some(step) = data.step_tracker.finalize(phase) { data.get_steps_mut(thread_state).push(step); } if thread_state.profiler.config.track_proxyop { @@ -714,7 +786,7 @@ pub fn stop_event_handler(event: event::Event) -> NcclResult<()> { thread_state.proxystep_free_list.free(data); } event::Event::Dummy(_) => (), - event::Event::SmallNcclOp(_) => (), + event::Event::SmallNcclOp(_) => {} event::Event::NcclOpLite(_) => {} event::Event::NcclOp(_) => {} }); @@ -731,10 +803,16 @@ where { if let event::Event::ProxyOp(data) = event { with_thread_state(|thread_state| { - // let n_steps = data.n_steps; + // Get current phase for tagging EventSteps + let phase = if let Some(ref tracker) = thread_state.profiler.phase_scope_tracker { + tracker.current_phase() + } else { + 0 + }; + let maybe_step = data.step_tracker.update_step(e_state, e_state_args, || { thread_state.profiler.recent_timer_ns() - }); + }, phase); if let Some(step) = maybe_step { data.n_steps += 1; @@ -794,6 +872,7 @@ pub fn finalize_handler(_comm: Box) -> NcclResult<()> { let mut lg = INIT_FLAG.lock().unwrap(); if *lg == 1 { let profiler = PROFILER.get().unwrap(); + profiler.join_daemon(); } *lg -= 1; diff --git a/src/step_tracker.rs b/src/step_tracker.rs index 225b82c..e427efd 100644 --- a/src/step_tracker.rs +++ b/src/step_tracker.rs @@ -24,6 +24,8 @@ pub struct EventStep { pub start_time: u64, pub fifo_wait_dur_ns: Option, pub dur_ns: u32, + /// Phase ID when this step was created (0 = no phase tracking) + pub phase: u64, } #[derive(Debug, Copy, Clone)] @@ -46,7 +48,7 @@ impl EventStepInProgress { } } - fn finalize(&self) -> EventStep { + fn finalize(&self, phase: u64) -> EventStep { let fifo_wait_dur_ns = self.fifo_ready_time.map(|t| (t - self.start_time) as u32); let net_start_time = self.fifo_ready_time.unwrap_or(self.start_time); EventStep { @@ -55,6 +57,7 @@ impl EventStepInProgress { start_time: self.start_time, fifo_wait_dur_ns, dur_ns: (self.end_time.unwrap() - net_start_time) as _, + phase, } } } @@ -88,6 +91,7 @@ impl StepTracker { state: profiler_shim::ncclProfilerEventState_v1_t, args: &S, get_time_ns: T, + phase: u64, ) -> Option where T: FnOnce() -> u64, @@ -123,7 +127,7 @@ impl StepTracker { // after filling in the size, it is safe to // "finalize" it let last = steps_in_progress.pop_back().unwrap(); - to_emit = Some(last.finalize()); + to_emit = Some(last.finalize(phase)); } } self.last_in_progress_size = args.trans_size(); @@ -161,7 +165,7 @@ impl StepTracker { let in_progress = steps_in_progress.get_mut(i); in_progress.end_time = Some(get_time_ns()); if in_progress.size.is_some() { - to_emit = steps_in_progress.remove_and_apply(i, |s| s.finalize()); + to_emit = steps_in_progress.remove_and_apply(i, |s| s.finalize(phase)); } self.accumulated_size = args.trans_size(); to_emit @@ -174,7 +178,7 @@ impl StepTracker { let in_progress = steps_in_progress.get_mut(i); in_progress.end_time = Some(get_time_ns()); in_progress.size = Some(args.trans_size() - self.accumulated_size); - to_emit = steps_in_progress.remove_and_apply(i, |s| s.finalize()); + to_emit = steps_in_progress.remove_and_apply(i, |s| s.finalize(phase)); self.accumulated_size = args.trans_size(); to_emit } @@ -182,14 +186,14 @@ impl StepTracker { } } - pub fn finalize(&mut self) -> Option { + pub fn finalize(&mut self, phase: u64) -> Option { if self.is_send && self.is_v1 { let steps_in_progress = &mut self.steps_in_progress; let accumulated_size = self.accumulated_size; let last = steps_in_progress.back_mut().unwrap(); last.size = Some(accumulated_size - self.last_in_progress_size); let in_progress = steps_in_progress.pop_back().unwrap(); - Some(in_progress.finalize()) + Some(in_progress.finalize(phase)) } else { None } @@ -219,6 +223,7 @@ mod tests { profiler_shim::proxy_event_state::v1::SEND_TRANSMITTED, &args, get_time, + 0, ) { steps.push(step); } @@ -227,12 +232,13 @@ mod tests { profiler_shim::proxy_event_state::v1::SEND_DONE, &args, get_time, + 0, ) { steps.push(step); } } - if let Some(step) = tracker.finalize() { + if let Some(step) = tracker.finalize(0) { steps.push(step); } @@ -262,6 +268,7 @@ mod tests { profiler_shim::proxy_event_state::v1::RECV_POSTED, &args, get_time, + 0, ) { steps.push(step); } @@ -272,13 +279,14 @@ mod tests { profiler_shim::proxy_event_state::v1::RECV_RECEIVED, &args, get_time, + 0, ) { steps.push(step); } } } - if let Some(step) = tracker.finalize() { + if let Some(step) = tracker.finalize(0) { steps.push(step); } diff --git a/src/telemetry_ffi.rs b/src/telemetry_ffi.rs new file mode 100644 index 0000000..c1c8315 --- /dev/null +++ b/src/telemetry_ffi.rs @@ -0,0 +1,236 @@ +//! C FFI interface for TelemetryPool pull API +//! +//! This module provides C-compatible functions for Python (via ctypes) to: +//! - Open TelemetryPool from shared memory +//! - Pull PhaseScopes +//! - Close handle +//! +//! # Python Usage Example +//! +//! ```python +//! import ctypes +//! +//! lib = ctypes.CDLL("libnccl_profiler.so") +//! +//! # Open pool +//! handle = lib.nccl_telemetry_open(b"/nccl_telemetry_${PGID}_${RANK}") +//! +//! # Pull scope +//! scope = CPhaseScope() +//! if lib.nccl_telemetry_pull(handle, ctypes.byref(scope)) == 1: +//! print(f"Step: {scope.step()}, Duration: {scope.duration_ms()}ms") +//! +//! # Close +//! lib.nccl_telemetry_close(handle) +//! ``` + +use crate::phase_scope::PhaseScope; +use crate::telemetry_pool::TelemetryPool; +use std::ffi::CStr; + +/// C-compatible PhaseScope structure (same as PhaseScope, already #[repr(C)]) +/// +/// This is exported as-is since PhaseScope is already #[repr(C)] +pub type CPhaseScope = PhaseScope; + +/// Opaque handle for TelemetryPool (C pointer) +pub type TelemetryPoolHandle = *mut TelemetryPool; + +/// Open TelemetryPool from shared memory (consumer) +/// +/// # Safety +/// - `name` must be a valid null-terminated C string +/// - Caller must call `nccl_telemetry_close()` to free resources +/// +/// # Returns +/// - Non-null handle on success +/// - Null on failure +#[no_mangle] +pub unsafe extern "C" fn nccl_telemetry_open(name: *const libc::c_char) -> TelemetryPoolHandle { + if name.is_null() { + eprintln!("nccl_telemetry_open: null name"); + return std::ptr::null_mut(); + } + + let name_str = match CStr::from_ptr(name).to_str() { + Ok(s) => s, + Err(e) => { + eprintln!("nccl_telemetry_open: invalid UTF-8 in name: {}", e); + return std::ptr::null_mut(); + } + }; + + match TelemetryPool::open(name_str) { + Ok(pool) => Box::into_raw(Box::new(pool)), + Err(e) => { + eprintln!("nccl_telemetry_open: {}", e); + std::ptr::null_mut() + } + } +} + +/// Pull one PhaseScope from TelemetryPool +/// +/// # Safety +/// - `handle` must be a valid handle from `nccl_telemetry_open()` +/// - `scope` must be a valid pointer to CPhaseScope +/// +/// # Returns +/// - 1 if PhaseScope was pulled (scope populated) +/// - 0 if ring buffer is empty (scope unchanged) +/// - -1 on error +#[no_mangle] +pub unsafe extern "C" fn nccl_telemetry_pull( + handle: TelemetryPoolHandle, + scope: *mut CPhaseScope, +) -> libc::c_int { + if handle.is_null() || scope.is_null() { + eprintln!("nccl_telemetry_pull: null handle or scope"); + return -1; + } + + let pool = &*handle; + + match pool.pull() { + Some(s) => { + std::ptr::write(scope, s); + 1 + } + None => 0, + } +} + +/// Get number of PhaseScopes available in buffer +/// +/// # Safety +/// - `handle` must be a valid handle from `nccl_telemetry_open()` +/// +/// # Returns +/// - Number of PhaseScopes available +/// - -1 on error +#[no_mangle] +pub unsafe extern "C" fn nccl_telemetry_len(handle: TelemetryPoolHandle) -> libc::c_int { + if handle.is_null() { + eprintln!("nccl_telemetry_len: null handle"); + return -1; + } + + let pool = &*handle; + pool.len() as libc::c_int +} + +/// Close TelemetryPool handle and free resources +/// +/// # Safety +/// - `handle` must be a valid handle from `nccl_telemetry_open()` +/// - Handle must not be used after calling this function +#[no_mangle] +pub unsafe extern "C" fn nccl_telemetry_close(handle: TelemetryPoolHandle) { + if !handle.is_null() { + let _ = Box::from_raw(handle); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::ffi::CString; + + #[test] + fn test_ffi_open_close() { + let name = CString::new("/nccl_telemetry_ffi_test").unwrap(); + + // Create pool first + let pool = TelemetryPool::create(name.to_str().unwrap(), 16).unwrap(); + drop(pool); + + // Open via FFI + let handle = unsafe { nccl_telemetry_open(name.as_ptr()) }; + assert!(!handle.is_null()); + + // Close via FFI + unsafe { nccl_telemetry_close(handle) }; + + // Cleanup + unsafe { libc::shm_unlink(name.as_ptr()) }; + } + + #[test] + fn test_ffi_pull() { + let name = CString::new("/nccl_telemetry_ffi_pull_test").unwrap(); + + // Create and push scope + let pool = TelemetryPool::create(name.to_str().unwrap(), 16).unwrap(); + let scope = PhaseScope { + phase: 12345, // Opaque phase tag + start_time_ns: 1000, + end_time_ns: 2000, + e2e_latency_sum_ns: 500, + net_latency_sum_ns: 400, + bytes_transferred: 2048, + transfer_op_count: 10, + nccl_op_count: 10, + }; + pool.push(scope).unwrap(); + drop(pool); + + // Open via FFI + let handle = unsafe { nccl_telemetry_open(name.as_ptr()) }; + assert!(!handle.is_null()); + + // Pull via FFI + let mut pulled = PhaseScope { + phase: 0, + start_time_ns: 0, + end_time_ns: 0, + e2e_latency_sum_ns: 0, + net_latency_sum_ns: 0, + bytes_transferred: 0, + transfer_op_count: 0, + nccl_op_count: 0, + }; + let result = unsafe { nccl_telemetry_pull(handle, &mut pulled) }; + assert_eq!(result, 1); + assert_eq!(pulled.phase, 12345); + assert_eq!(pulled.bytes_transferred, 2048); + + // Pull again (empty) + let result = unsafe { nccl_telemetry_pull(handle, &mut pulled) }; + assert_eq!(result, 0); + + // Close + unsafe { nccl_telemetry_close(handle) }; + + // Cleanup + unsafe { libc::shm_unlink(name.as_ptr()) }; + } + + #[test] + fn test_ffi_len() { + let name = CString::new("/nccl_telemetry_ffi_len_test").unwrap(); + + let pool = TelemetryPool::create(name.to_str().unwrap(), 16).unwrap(); + let scope = PhaseScope { + phase: 100, + start_time_ns: 0, + end_time_ns: 0, + e2e_latency_sum_ns: 0, + net_latency_sum_ns: 0, + bytes_transferred: 0, + transfer_op_count: 0, + nccl_op_count: 0, + }; + pool.push(scope).unwrap(); + pool.push(scope).unwrap(); + drop(pool); + + let handle = unsafe { nccl_telemetry_open(name.as_ptr()) }; + assert!(!handle.is_null()); + + let len = unsafe { nccl_telemetry_len(handle) }; + assert_eq!(len, 2); + + unsafe { nccl_telemetry_close(handle) }; + unsafe { libc::shm_unlink(name.as_ptr()) }; + } +} diff --git a/src/telemetry_pool.rs b/src/telemetry_pool.rs new file mode 100644 index 0000000..3e82b59 --- /dev/null +++ b/src/telemetry_pool.rs @@ -0,0 +1,528 @@ +//! Shared memory TelemetryPool for pull-based PhaseScope export +//! +//! This module implements a lock-free SPSC (Single Producer Single Consumer) ring buffer +//! in shared memory for exporting PhaseScopes from CoMMA daemon to training code. +//! +//! # Architecture +//! +//! ```text +//! [CoMMA Daemon] --push--> [Shared Memory Ring Buffer] <--pull-- [Training Code] +//! (Producer) (TelemetryPool) (Consumer) +//! ``` +//! +//! - **Producer**: CoMMA daemon pushes completed PhaseScopes +//! - **Consumer**: Training code pulls PhaseScopes and exports to OTel/CloudWatch +//! - **Lock-free**: Atomic head/tail pointers for zero-contention access +//! +//! # Usage +//! +//! ## Producer (CoMMA daemon) +//! +//! ```rust,no_run +//! use nccl_profiler::telemetry_pool::TelemetryPool; +//! use nccl_profiler::phase_scope::PhaseScope; +//! +//! let pool = TelemetryPool::create("/nccl_telemetry_0", 1024).unwrap(); +//! +//! let scope = PhaseScope { +//! phase: 1, +//! start_time_ns: 0, +//! end_time_ns: 1000, +//! e2e_latency_sum_ns: 0, +//! net_latency_sum_ns: 0, +//! bytes_transferred: 0, +//! transfer_op_count: 0, +//! nccl_op_count: 0, +//! }; +//! pool.push(scope).unwrap(); +//! ``` +//! +//! ## Consumer (Training code via Python FFI) +//! +//! ```python +//! from nccl_telemetry import NCCLTelemetryClient +//! +//! client = NCCLTelemetryClient() +//! for scope in client.pull_all(): +//! print(f"Step {scope.step}, Phase {scope.phase.name}: {scope.duration_ms}ms") +//! ``` + +use crate::phase_scope::PhaseScope; +use std::sync::atomic::{AtomicU64, Ordering}; + +/// Magic number for TelemetryPool header validation +const TELEMETRY_POOL_MAGIC: u64 = 0x4E43434C54454C50; // "NCCLTLP" + +/// Shared memory ring buffer for PhaseScope export +/// +/// # Memory Layout +/// +/// ```text +/// +-------------------+ +/// | Header (64 bytes) | <- Magic, capacity, head, tail +/// +-------------------+ +/// | PhaseScope[0] | <- Ring buffer entries +/// | PhaseScope[1] | +/// | ... | +/// | PhaseScope[N-1] | +/// +-------------------+ +/// ``` +/// +/// # Lock-free SPSC Protocol +/// +/// - **head**: Write position (producer increments) +/// - **tail**: Read position (consumer increments) +/// - **Empty**: head == tail +/// - **Full**: (head + 1) % capacity == tail +#[repr(C)] +struct TelemetryPoolHeader { + /// Magic number for validation (TELEMETRY_POOL_MAGIC) + magic: u64, + + /// Ring buffer capacity (power of 2 for fast modulo) + capacity: u64, + + /// Write position (producer owns) + head: AtomicU64, + + /// Read position (consumer owns) + tail: AtomicU64, + + /// Padding to 64 bytes + _padding: [u64; 4], +} + +/// TelemetryPool handle for producer (CoMMA daemon) +#[derive(Debug)] +pub struct TelemetryPool { + /// Shared memory file descriptor + shm_fd: i32, + + /// Mapped memory region + ptr: *mut u8, + + /// Total mapped size (header + ring buffer) + size: usize, + + /// Header pointer + header: *mut TelemetryPoolHeader, + + /// Ring buffer pointer + ring: *mut PhaseScope, + + /// Ring buffer capacity (cached from header) + capacity: u64, +} + +unsafe impl Send for TelemetryPool {} +unsafe impl Sync for TelemetryPool {} + +impl TelemetryPool { + /// Create or open TelemetryPool in shared memory + /// + /// # Arguments + /// * `name` - Shared memory name (e.g., "/nccl_telemetry_0") + /// * `capacity` - Ring buffer capacity (power of 2 recommended) + /// + /// # Returns + /// - Ok(TelemetryPool) on success + /// - Err on shared memory failure + pub fn create(name: &str, capacity: u64) -> Result { + if capacity == 0 || capacity > (1 << 30) { + return Err("capacity must be 0 < capacity <= 2^30".to_string()); + } + + // Calculate total size with PAGE_SIZE alignment + let page_size = unsafe { libc::sysconf(libc::_SC_PAGESIZE) as usize }; + let header_size = std::mem::size_of::(); + let ring_size = capacity as usize * std::mem::size_of::(); + let total_size = (header_size + ring_size + page_size - 1) & !(page_size - 1); + + // Create shared memory (POSIX shm_open) + let cname = std::ffi::CString::new(name).unwrap(); + let shm_fd = unsafe { + libc::shm_open( + cname.as_ptr(), + libc::O_CREAT | libc::O_RDWR, + 0o600, + ) + }; + + if shm_fd < 0 { + return Err(format!("shm_open failed: {}", std::io::Error::last_os_error())); + } + + // Resize shared memory + if unsafe { libc::ftruncate(shm_fd, total_size as i64) } < 0 { + unsafe { libc::close(shm_fd) }; + return Err(format!("ftruncate failed: {}", std::io::Error::last_os_error())); + } + + // Memory map + let ptr = unsafe { + libc::mmap( + std::ptr::null_mut(), + total_size, + libc::PROT_READ | libc::PROT_WRITE, + libc::MAP_SHARED, + shm_fd, + 0, + ) + }; + + if ptr == libc::MAP_FAILED { + unsafe { libc::close(shm_fd) }; + return Err(format!("mmap failed: {}", std::io::Error::last_os_error())); + } + + let header = ptr as *mut TelemetryPoolHeader; + let ring = unsafe { ptr.add(header_size) } as *mut PhaseScope; + + // Initialize header (check if already initialized) + unsafe { + if (*header).magic != TELEMETRY_POOL_MAGIC { + // First initialization + std::ptr::write( + header, + TelemetryPoolHeader { + magic: TELEMETRY_POOL_MAGIC, + capacity, + head: AtomicU64::new(0), + tail: AtomicU64::new(0), + _padding: [0; 4], + }, + ); + } + } + + Ok(Self { + shm_fd, + ptr: ptr as *mut u8, + size: total_size, + header, + ring, + capacity, + }) + } + + /// Open existing TelemetryPool for consumer + /// + /// # Arguments + /// * `name` - Shared memory name + /// + /// # Returns + /// - Ok(TelemetryPool) on success + /// - Err if shared memory doesn't exist or invalid + pub fn open(name: &str) -> Result { + let cname = std::ffi::CString::new(name).unwrap(); + let shm_fd = unsafe { + libc::shm_open(cname.as_ptr(), libc::O_RDWR, 0o600) + }; + + if shm_fd < 0 { + return Err(format!("shm_open failed: {}", std::io::Error::last_os_error())); + } + + // Get shared memory size + let mut stat: libc::stat = unsafe { std::mem::zeroed() }; + if unsafe { libc::fstat(shm_fd, &mut stat) } < 0 { + unsafe { libc::close(shm_fd) }; + return Err(format!("fstat failed: {}", std::io::Error::last_os_error())); + } + + let total_size = stat.st_size as usize; + let header_size = std::mem::size_of::(); + + if total_size < header_size { + unsafe { libc::close(shm_fd) }; + return Err("shared memory too small for header".to_string()); + } + + // Memory map + let ptr = unsafe { + libc::mmap( + std::ptr::null_mut(), + total_size, + libc::PROT_READ | libc::PROT_WRITE, + libc::MAP_SHARED, + shm_fd, + 0, + ) + }; + + if ptr == libc::MAP_FAILED { + unsafe { libc::close(shm_fd) }; + return Err(format!("mmap failed: {}", std::io::Error::last_os_error())); + } + + let header = ptr as *mut TelemetryPoolHeader; + let ring = unsafe { ptr.add(header_size) } as *mut PhaseScope; + + // Validate header + unsafe { + if (*header).magic != TELEMETRY_POOL_MAGIC { + libc::munmap(ptr, total_size); + libc::close(shm_fd); + return Err("invalid magic number in shared memory".to_string()); + } + } + + let capacity = unsafe { (*header).capacity }; + + Ok(Self { + shm_fd, + ptr: ptr as *mut u8, + size: total_size, + header, + ring, + capacity, + }) + } + + /// Push PhaseScope to ring buffer (producer only) + /// + /// # Arguments + /// * `scope` - PhaseScope to push + /// + /// # Returns + /// - Ok(()) on success + /// - Err if ring buffer is full + pub fn push(&self, scope: PhaseScope) -> Result<(), &'static str> { + unsafe { + let head = (*self.header).head.load(Ordering::Acquire); + let tail = (*self.header).tail.load(Ordering::Acquire); + + // Check if full: (head + 1) % capacity == tail + let next_head = (head + 1) % self.capacity; + if next_head == tail { + return Err("ring buffer full"); + } + + // Write scope to ring[head] + let entry = self.ring.add(head as usize); + std::ptr::write(entry, scope); + + // Advance head with Release ordering + (*self.header).head.store(next_head, Ordering::Release); + } + + Ok(()) + } + + /// Pull PhaseScope from ring buffer (consumer only) + /// + /// # Returns + /// - Some(PhaseScope) if available + /// - None if ring buffer is empty + pub fn pull(&self) -> Option { + unsafe { + let head = (*self.header).head.load(Ordering::Acquire); + let tail = (*self.header).tail.load(Ordering::Acquire); + + // Check if empty: head == tail + if head == tail { + return None; + } + + // Read scope from ring[tail] + let entry = self.ring.add(tail as usize); + let scope = std::ptr::read(entry); + + // Advance tail with Release ordering + let next_tail = (tail + 1) % self.capacity; + (*self.header).tail.store(next_tail, Ordering::Release); + + Some(scope) + } + } + + /// Pull all available PhaseScopes (consumer convenience) + pub fn pull_all(&self) -> Vec { + let mut scopes = Vec::new(); + while let Some(scope) = self.pull() { + scopes.push(scope); + } + scopes + } + + /// Get current number of PhaseScopes in buffer + pub fn len(&self) -> usize { + unsafe { + let head = (*self.header).head.load(Ordering::Acquire); + let tail = (*self.header).tail.load(Ordering::Acquire); + + if head >= tail { + (head - tail) as usize + } else { + (self.capacity - tail + head) as usize + } + } + } + + /// Check if ring buffer is empty + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Get ring buffer capacity + pub fn capacity(&self) -> u64 { + self.capacity + } +} + +impl Drop for TelemetryPool { + fn drop(&mut self) { + unsafe { + libc::munmap(self.ptr as *mut libc::c_void, self.size); + libc::close(self.shm_fd); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_create_and_open() { + let name = "/nccl_telemetry_test_create"; + let pool = TelemetryPool::create(name, 16).unwrap(); + assert_eq!(pool.capacity(), 16); + assert!(pool.is_empty()); + + // Open from another handle + let pool2 = TelemetryPool::open(name).unwrap(); + assert_eq!(pool2.capacity(), 16); + assert!(pool2.is_empty()); + + // Cleanup + drop(pool); + drop(pool2); + unsafe { libc::shm_unlink(std::ffi::CString::new(name).unwrap().as_ptr()) }; + } + + #[test] + fn test_push_pull() { + let name = "/nccl_telemetry_test_push_pull"; + let pool = TelemetryPool::create(name, 16).unwrap(); + + let scope = PhaseScope { + phase: 100, // Opaque phase tag + start_time_ns: 1000, + end_time_ns: 2000, + e2e_latency_sum_ns: 500, + net_latency_sum_ns: 400, + bytes_transferred: 1024, + transfer_op_count: 10, + nccl_op_count: 10, + }; + + pool.push(scope).unwrap(); + assert_eq!(pool.len(), 1); + + let pulled = pool.pull().unwrap(); + assert_eq!(pulled.phase, scope.phase); + assert_eq!(pulled.bytes_transferred, 1024); + assert!(pool.is_empty()); + + // Cleanup + drop(pool); + unsafe { libc::shm_unlink(std::ffi::CString::new(name).unwrap().as_ptr()) }; + } + + #[test] + fn test_push_full() { + let name = "/nccl_telemetry_test_push_full"; + let pool = TelemetryPool::create(name, 4).unwrap(); + + let scope = PhaseScope { + phase: 100, + start_time_ns: 1000, + end_time_ns: 2000, + e2e_latency_sum_ns: 0, + net_latency_sum_ns: 0, + bytes_transferred: 0, + transfer_op_count: 0, + nccl_op_count: 0, + }; + + // Push 3 items (capacity - 1, since we lose one slot for full detection) + pool.push(scope).unwrap(); + pool.push(scope).unwrap(); + pool.push(scope).unwrap(); + + // Next push should fail (full) + assert!(pool.push(scope).is_err()); + assert_eq!(pool.len(), 3); + + // Cleanup + drop(pool); + unsafe { libc::shm_unlink(std::ffi::CString::new(name).unwrap().as_ptr()) }; + } + + #[test] + fn test_pull_all() { + let name = "/nccl_telemetry_test_pull_all"; + let pool = TelemetryPool::create(name, 16).unwrap(); + + for i in 0..5 { + let scope = PhaseScope { + phase: 100 + i, // Different phase tags + start_time_ns: 1000, + end_time_ns: 2000, + e2e_latency_sum_ns: 0, + net_latency_sum_ns: 0, + bytes_transferred: 0, + transfer_op_count: 0, + nccl_op_count: 0, + }; + pool.push(scope).unwrap(); + } + + assert_eq!(pool.len(), 5); + + let scopes = pool.pull_all(); + assert_eq!(scopes.len(), 5); + assert!(pool.is_empty()); + + for (i, scope) in scopes.iter().enumerate() { + assert_eq!(scope.phase, 100 + i as u64); + } + + // Cleanup + drop(pool); + unsafe { libc::shm_unlink(std::ffi::CString::new(name).unwrap().as_ptr()) }; + } + + #[test] + fn test_cross_process() { + let name = "/nccl_telemetry_test_cross_process"; + let pool = TelemetryPool::create(name, 16).unwrap(); + + let scope = PhaseScope { + phase: 12345, // Opaque phase tag + start_time_ns: 1000, + end_time_ns: 2000, + e2e_latency_sum_ns: 500, + net_latency_sum_ns: 400, + bytes_transferred: 2048, + transfer_op_count: 20, + nccl_op_count: 20, + }; + + pool.push(scope).unwrap(); + drop(pool); + + // Open from "another process" + let pool2 = TelemetryPool::open(name).unwrap(); + assert_eq!(pool2.len(), 1); + + let pulled = pool2.pull().unwrap(); + assert_eq!(pulled.phase, 12345); + assert_eq!(pulled.bytes_transferred, 2048); + + // Cleanup + drop(pool2); + unsafe { libc::shm_unlink(std::ffi::CString::new(name).unwrap().as_ptr()) }; + } +}