From 6b761fb23c672c0565a146a25dd6b97de9e69e58 Mon Sep 17 00:00:00 2001 From: paullegranddc Date: Fri, 12 Jun 2026 18:53:35 +0200 Subject: [PATCH 01/18] feat: agentless RC fetcher --- Cargo.lock | 223 +++-- libdd-common/src/lib.rs | 21 +- libdd-remote-config/Cargo.toml | 17 +- .../examples/remote_config_fetch.rs | 52 +- .../roots/prod/config_root.json | 63 ++ .../roots/prod/director_root.json | 63 ++ .../src/agentless_client/mod.rs | 766 ++++++++++++++++++ libdd-remote-config/src/fetch/fetcher.rs | 247 +++++- libdd-remote-config/src/fetch/shared.rs | 8 +- libdd-remote-config/src/fetch/single.rs | 35 +- libdd-remote-config/src/fetch/test_server.rs | 3 + libdd-remote-config/src/lib.rs | 2 + libdd-trace-protobuf/build.rs | 6 + .../src/pb/remoteconfig.proto | 210 +++++ libdd-trace-protobuf/src/remoteconfig.rs | 400 +++++++++ 15 files changed, 1994 insertions(+), 122 deletions(-) create mode 100644 libdd-remote-config/roots/prod/config_root.json create mode 100644 libdd-remote-config/roots/prod/director_root.json create mode 100644 libdd-remote-config/src/agentless_client/mod.rs diff --git a/Cargo.lock b/Cargo.lock index fff06dbaa3..8ccd0cae6a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -397,10 +397,10 @@ dependencies = [ "axum-core", "bytes", "futures-util", - "http", - "http-body", + "http 1.1.0", + "http-body 1.0.1", "http-body-util", - "itoa", + "itoa 1.0.11", "matchit", "memchr", "mime", @@ -421,8 +421,8 @@ checksum = "08c78f31d7b1291f7ee735c1c6780ccde7785daae9a9206026862dab7d8792d1" dependencies = [ "bytes", "futures-core", - "http", - "http-body", + "http 1.1.0", + "http-body 1.0.1", "http-body-util", "mime", "pin-project-lite", @@ -1226,7 +1226,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe" dependencies = [ "csv-core", - "itoa", + "itoa 1.0.11", "ryu", "serde", ] @@ -1447,7 +1447,7 @@ dependencies = [ "bytes", "constcat", "futures", - "http", + "http 1.1.0", "http-body-util", "libdd-common", "libdd-data-pipeline", @@ -1506,7 +1506,7 @@ dependencies = [ "datadog-live-debugger", "datadog-sidecar-macros", "futures", - "http", + "http 1.1.0", "http-body-util", "httpmock", "libc", @@ -1558,7 +1558,7 @@ dependencies = [ "datadog-ipc", "datadog-live-debugger", "datadog-sidecar", - "http", + "http 1.1.0", "libc", "libdd-common", "libdd-common-ffi", @@ -1636,6 +1636,15 @@ dependencies = [ "syn 2.0.87", ] +[[package]] +name = "derp" +version = "0.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9b84cfd9b6fa437e498215e5625e9e3ae3bf9bb54d623028a181c40820db169" +dependencies = [ + "untrusted 0.7.1", +] + [[package]] name = "diff" version = "0.1.13" @@ -2135,7 +2144,7 @@ dependencies = [ "fnv", "futures-core", "futures-sink", - "http", + "http 1.1.0", "indexmap 2.12.1", "slab", "tokio", @@ -2223,7 +2232,7 @@ dependencies = [ "base64 0.21.7", "bytes", "headers-core", - "http", + "http 1.1.0", "httpdate", "mime", "sha1", @@ -2235,7 +2244,7 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "54b4a22553d4242c49fddb9ba998a99962b5cc6f22cb5a3482bec22522403ce4" dependencies = [ - "http", + "http 1.1.0", ] [[package]] @@ -2314,6 +2323,17 @@ dependencies = [ "tracing", ] +[[package]] +name = "http" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" +dependencies = [ + "bytes", + "fnv", + "itoa 1.0.11", +] + [[package]] name = "http" version = "1.1.0" @@ -2322,7 +2342,18 @@ checksum = "21b9ddb458710bc376481b842f5da65cdf31522de232c1ca8146abce2a358258" dependencies = [ "bytes", "fnv", - "itoa", + "itoa 1.0.11", +] + +[[package]] +name = "http-body" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" +dependencies = [ + "bytes", + "http 0.2.12", + "pin-project-lite", ] [[package]] @@ -2332,7 +2363,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" dependencies = [ "bytes", - "http", + "http 1.1.0", ] [[package]] @@ -2343,8 +2374,8 @@ checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f" dependencies = [ "bytes", "futures-util", - "http", - "http-body", + "http 1.1.0", + "http-body 1.0.1", "pin-project-lite", ] @@ -2377,9 +2408,9 @@ dependencies = [ "futures-timer", "futures-util", "headers", - "http", + "http 1.1.0", "http-body-util", - "hyper", + "hyper 1.6.0", "hyper-util", "lazy_static", "log", @@ -2402,6 +2433,29 @@ version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" +[[package]] +name = "hyper" +version = "0.14.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "http 0.2.12", + "http-body 0.4.6", + "httparse", + "httpdate", + "itoa 1.0.11", + "pin-project-lite", + "socket2 0.5.10", + "tokio", + "tower-service", + "tracing", + "want", +] + [[package]] name = "hyper" version = "1.6.0" @@ -2412,11 +2466,11 @@ dependencies = [ "futures-channel", "futures-util", "h2", - "http", - "http-body", + "http 1.1.0", + "http-body 1.0.1", "httparse", "httpdate", - "itoa", + "itoa 1.0.11", "pin-project-lite", "smallvec", "tokio", @@ -2429,8 +2483,8 @@ version = "0.27.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" dependencies = [ - "http", - "hyper", + "http 1.1.0", + "hyper 1.6.0", "hyper-util", "rustls", "rustls-native-certs", @@ -2447,7 +2501,7 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0" dependencies = [ - "hyper", + "hyper 1.6.0", "hyper-util", "pin-project-lite", "tokio", @@ -2465,9 +2519,9 @@ dependencies = [ "futures-channel", "futures-core", "futures-util", - "http", - "http-body", - "hyper", + "http 1.1.0", + "http-body 1.0.1", + "hyper 1.6.0", "ipnet", "libc", "percent-encoding", @@ -2734,6 +2788,12 @@ dependencies = [ "either", ] +[[package]] +name = "itoa" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" + [[package]] name = "itoa" version = "1.0.11" @@ -2846,7 +2906,7 @@ version = "2.0.0" dependencies = [ "anyhow", "bytes", - "http", + "http 1.1.0", "thiserror 1.0.68", ] @@ -2855,7 +2915,7 @@ name = "libdd-capabilities-impl" version = "2.0.0" dependencies = [ "bytes", - "http", + "http 1.1.0", "http-body-util", "libdd-capabilities", "libdd-common", @@ -2875,11 +2935,11 @@ dependencies = [ "futures-core", "futures-util", "hex", - "http", - "http-body", + "http 1.1.0", + "http-body 1.0.1", "http-body-util", "httparse", - "hyper", + "hyper 1.6.0", "hyper-rustls", "hyper-util", "indexmap 2.12.1", @@ -2916,7 +2976,7 @@ dependencies = [ "chrono", "crossbeam-queue", "function_name", - "hyper", + "hyper 1.6.0", "libdd-common", "serde", ] @@ -2934,7 +2994,7 @@ dependencies = [ "cxx-build", "errno", "goblin", - "http", + "http 1.1.0", "libc", "libdd-common", "libdd-libunwind-sys", @@ -2990,7 +3050,7 @@ dependencies = [ "duplicate 2.0.1", "either", "getrandom 0.2.15", - "http", + "http 1.1.0", "http-body-util", "httpmock", "libdd-capabilities", @@ -3060,7 +3120,7 @@ version = "3.0.0" dependencies = [ "anyhow", "cadence", - "http", + "http 1.1.0", "libdd-common", "serde", "tokio", @@ -3075,7 +3135,7 @@ dependencies = [ "fastrand", "http-body-util", "httpmock", - "hyper", + "hyper 1.6.0", "hyper-util", "libdd-common", "reqwest", @@ -3182,7 +3242,7 @@ dependencies = [ "cxx-build", "futures", "hashbrown 0.16.1", - "http", + "http 1.1.0", "http-body-util", "httparse", "indexmap 2.12.1", @@ -3221,7 +3281,7 @@ dependencies = [ "function_name", "futures", "http-body-util", - "hyper", + "hyper 1.6.0", "libc", "libdd-common", "libdd-common-ffi", @@ -3257,14 +3317,17 @@ dependencies = [ "futures", "futures-util", "hashbrown 0.15.1", - "http", + "http 1.1.0", "http-body-util", - "hyper", + "hyper 1.6.0", "hyper-util", + "libdd-capabilities", + "libdd-capabilities-impl", "libdd-common", "libdd-remote-config", "libdd-trace-protobuf", "manual_future", + "prost", "serde", "serde_json", "serde_with", @@ -3274,6 +3337,7 @@ dependencies = [ "tokio", "tokio-util", "tracing", + "tuf", "uuid", ] @@ -3324,7 +3388,7 @@ dependencies = [ "bytes", "futures", "hashbrown 0.15.1", - "http", + "http 1.1.0", "http-body-util", "httpmock", "libc", @@ -3423,7 +3487,7 @@ dependencies = [ "async-trait", "criterion", "hashbrown 0.15.1", - "http", + "http 1.1.0", "httpmock", "libdd-capabilities", "libdd-capabilities-impl", @@ -3455,11 +3519,11 @@ dependencies = [ "flate2", "futures", "getrandom 0.2.15", - "http", - "http-body", + "http 1.1.0", + "http-body 1.0.1", "http-body-util", "httpmock", - "hyper", + "hyper 1.6.0", "indexmap 2.12.1", "libdd-capabilities", "libdd-capabilities-impl", @@ -3488,7 +3552,7 @@ version = "0.1.0" dependencies = [ "anyhow", "bytes", - "http", + "http 1.1.0", "httpmock", "libdd-common", "libdd-remote-config", @@ -3764,7 +3828,7 @@ dependencies = [ "bytes", "encoding_rs", "futures-util", - "http", + "http 1.1.0", "httparse", "memchr", "mime", @@ -4762,10 +4826,10 @@ dependencies = [ "futures-core", "futures-util", "hickory-resolver", - "http", - "http-body", + "http 1.1.0", + "http-body 1.0.1", "http-body-util", - "hyper", + "hyper 1.6.0", "hyper-rustls", "hyper-util", "js-sys", @@ -4805,7 +4869,7 @@ dependencies = [ "cfg-if", "getrandom 0.2.15", "libc", - "untrusted", + "untrusted 0.9.0", "windows-sys 0.52.0", ] @@ -4969,7 +5033,7 @@ dependencies = [ "aws-lc-rs", "ring", "rustls-pki-types", - "untrusted", + "untrusted 0.9.0", ] [[package]] @@ -5213,7 +5277,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d726bfaff4b320266d395898905d0eba0345aae23b54aee3a737e260fd46db03" dependencies = [ "indexmap 2.12.1", - "itoa", + "itoa 1.0.11", "memchr", "ryu", "serde", @@ -5275,7 +5339,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" dependencies = [ "indexmap 2.12.1", - "itoa", + "itoa 1.0.11", "ryu", "serde", "unsafe-libyaml", @@ -5534,7 +5598,7 @@ version = "2.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6fe17b8deb33a9441280b4266c2d257e166bafbaea6e66b4b34ca139c91766d9" dependencies = [ - "itoa", + "itoa 1.0.11", "ryu", "sval", ] @@ -5545,7 +5609,7 @@ version = "2.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "854addb048a5bafb1f496c98e0ab5b9b581c3843f03ca07c034ae110d3b7c623" dependencies = [ - "itoa", + "itoa 1.0.11", "ryu", "sval", ] @@ -5829,7 +5893,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a7619e19bc266e0f9c5e6686659d394bc57973859340060a69221e57dbc0c40" dependencies = [ "deranged", - "itoa", + "itoa 1.0.11", "num-conv", "powerfmt", "serde", @@ -6018,10 +6082,10 @@ dependencies = [ "base64 0.22.1", "bytes", "h2", - "http", - "http-body", + "http 1.1.0", + "http-body 1.0.1", "http-body-util", - "hyper", + "hyper 1.6.0", "hyper-timeout", "hyper-util", "percent-encoding", @@ -6089,8 +6153,8 @@ dependencies = [ "bitflags", "bytes", "futures-util", - "http", - "http-body", + "http 1.1.0", + "http-body 1.0.1", "iri-string", "pin-project-lite", "tower", @@ -6202,6 +6266,31 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" +[[package]] +name = "tuf" +version = "0.3.0-beta10" +source = "git+https://github.com/DataDog/rust-tuf/?tag=0.3.0-beta10-opw-3#9e8d6077b0e67f13233ad0a347bb7d640705da04" +dependencies = [ + "chrono", + "data-encoding", + "derp", + "futures-io", + "futures-util", + "http 0.2.12", + "hyper 0.14.32", + "itoa 0.4.8", + "log", + "percent-encoding", + "ring", + "serde", + "serde_derive", + "serde_json", + "tempfile", + "thiserror 1.0.68", + "untrusted 0.7.1", + "url", +] + [[package]] name = "twox-hash" version = "1.6.3" @@ -6260,6 +6349,12 @@ version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" +[[package]] +name = "untrusted" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" + [[package]] name = "untrusted" version = "0.9.0" @@ -6362,7 +6457,7 @@ checksum = "9170e001f458781e92711d2ad666110f153e4e50bfd5cbd02db6547625714187" dependencies = [ "float-cmp", "halfbrown", - "itoa", + "itoa 1.0.11", "ryu", ] diff --git a/libdd-common/src/lib.rs b/libdd-common/src/lib.rs index dbd2e4a090..54a6f03193 100644 --- a/libdd-common/src/lib.rs +++ b/libdd-common/src/lib.rs @@ -7,7 +7,8 @@ #![cfg_attr(not(test), deny(clippy::unimplemented))] use anyhow::Context; -use http::uri; +use http::uri::PathAndQuery; +use http::{uri, Uri}; use serde::de::Error; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use std::sync::{Mutex, MutexGuard, RwLock, RwLockReadGuard, RwLockWriteGuard}; @@ -332,6 +333,24 @@ impl Endpoint { /// Default value for the timeout field in milliseconds. pub const DEFAULT_TIMEOUT: u64 = 3_000; + pub fn agentless(site: &str, api_key: String) -> anyhow::Result { + Ok(Self { + url: Uri::builder() + .scheme("https") + .authority( + uri::Authority::try_from(site) + .with_context(|| format!("dd_site is an invalid url: {site}"))?, + ) + .path_and_query(PathAndQuery::from_static("")) + .build() + .unwrap(), + api_key: Some(api_key.into()), + timeout_ms: Self::DEFAULT_TIMEOUT, + test_token: None, + use_system_resolver: true, + }) + } + /// Returns an iterator of optional endpoint-specific headers (api-key, test-token) /// as (header_name, header_value) string tuples for any that are available. pub fn get_optional_headers(&self) -> impl Iterator { diff --git a/libdd-remote-config/Cargo.toml b/libdd-remote-config/Cargo.toml index ae7988ceef..a42ff0030b 100644 --- a/libdd-remote-config/Cargo.toml +++ b/libdd-remote-config/Cargo.toml @@ -23,7 +23,7 @@ client = [ "tokio-util", "manual_future", "time", - "tracing" + "tracing", ] regex-lite = ["libdd-common/regex-lite"] @@ -36,24 +36,33 @@ test = ["hyper/server", "hyper-util"] [dependencies] anyhow = { version = "1.0" } libdd-common = { path = "../libdd-common", version = "4.2.0", default-features = false } +libdd-capabilities = { path = "../libdd-capabilities" } +libdd-capabilities-impl = { version = "2.0.0", path = "../libdd-capabilities-impl", features = ["https"]} libdd-trace-protobuf = { path = "../libdd-trace-protobuf", version = "3.0.2", optional = true } hyper = { workspace = true, optional = true, default-features = false } -http-body-util = {version = "0.1", optional = true } +http-body-util = { version = "0.1", optional = true } http = { version = "1.1", optional = true } base64 = { version = "0.22.1", optional = true } sha2 = { version = "0.10", optional = true } uuid = { version = "1.7.0", features = ["v4"], optional = true } futures-util = { version = "0.3", optional = true } tokio = { version = "1.36.0", optional = true } -tokio-util = { version = "0.7.10", optional = true } +tokio-util = { version = "0.7.10", optional = true } manual_future = { version = "0.1.1", optional = true } -time = { version = "0.3", features = ["parsing", "serde", "formatting"], optional = true } +time = { version = "0.3", features = [ + "parsing", + "serde", + "formatting", +], optional = true } tracing = { version = "0.1", default-features = false, optional = true } serde = "1.0" serde_json = { version = "1.0", features = ["raw_value"] } serde_with = "3" thiserror = "2" hashbrown = "0.15" +tuf = { git = "https://github.com/DataDog/rust-tuf/", tag = "0.3.0-beta10-opw-3" } +prost = "0.14.1" +futures = "0.3" # Test feature hyper-util = { workspace = true, features = ["service"], optional = true } diff --git a/libdd-remote-config/examples/remote_config_fetch.rs b/libdd-remote-config/examples/remote_config_fetch.rs index eb0c79ae95..66ac2033c6 100644 --- a/libdd-remote-config/examples/remote_config_fetch.rs +++ b/libdd-remote-config/examples/remote_config_fetch.rs @@ -8,6 +8,7 @@ use libdd_remote_config::file_change_tracker::{Change, FilePath}; use libdd_remote_config::file_storage::ParsedFileStorage; use libdd_remote_config::RemoteConfigProduct::ApmTracing; use libdd_remote_config::{RemoteConfigParsed, Target}; +use std::process::Command; use std::time::Duration; use tokio::time::sleep; @@ -16,8 +17,45 @@ const SERVICE: &str = "testservice"; const ENV: &str = "testenv"; const VERSION: &str = "1.2.3"; +fn get_hostname() -> String { + Command::new("hostname") + .output() + .ok() + .and_then(|o| String::from_utf8(o.stdout).ok()) + .map(|s| s.trim().to_string()) + .unwrap_or_else(|| "unknown".to_string()) +} + #[tokio::main(flavor = "current_thread")] async fn main() { + let hostname = get_hostname(); + println!("Hostname: {hostname}"); + + let dd_api_key = std::env::var("DD_API_KEY").ok(); + let dd_site = std::env::var("DD_SITE").ok(); + + let (endpoint, agentless_enabled) = match (dd_api_key, dd_site) { + (Some(api_key), Some(site)) => { + println!("DD_API_KEY and DD_SITE are set — enabling agentless mode (site: {site})"); + let endpoint = Endpoint::agentless(&site, api_key) + .expect("Failed to build agentless endpoint from DD_SITE"); + (endpoint, true) + } + _ => { + println!("DD_API_KEY / DD_SITE not set — connecting to local agent"); + ( + Endpoint { + url: http::Uri::from_static("http://localhost:8126"), + api_key: None, + timeout_ms: 5000, // custom timeout, defaults to 3 seconds + test_token: None, + ..Default::default() + }, + false, + ) + } + }; + // SingleChangesFetcher is ideal for a single static (runtime_id, service, env, version) tuple // Otherwise a SharedFetcher (or even a MultiTargetFetcher for a potentially high number of // targets) for multiple targets is needed. These can be manually wired together with a @@ -40,18 +78,16 @@ async fn main() { invariants: ConfigInvariants { language: "awesomelang".to_string(), tracer_version: "99.10.5".to_string(), - endpoint: Endpoint { - url: http::Uri::from_static("http://localhost:8126"), - api_key: None, - timeout_ms: 5000, // custom timeout, defaults to 3 seconds - test_token: None, - ..Default::default() - }, + endpoint, + hostname, + agentless_enabled, }, products: vec![ApmTracing], capabilities: vec![], }, - ); + ) + .await + .expect("Failed to create SingleChangesFetcher"); loop { match fetcher.fetch_changes().await { diff --git a/libdd-remote-config/roots/prod/config_root.json b/libdd-remote-config/roots/prod/config_root.json new file mode 100644 index 0000000000..d5a5eb7894 --- /dev/null +++ b/libdd-remote-config/roots/prod/config_root.json @@ -0,0 +1,63 @@ +{ + "signed": { + "_type": "root", + "spec_version": "1.0", + "version": 16, + "expires": "2026-10-31T17:00:00Z", + "keys": { + "620dacb7dc843acc731e4483c24ceb4121f4de5545f92d15dc2b13299b660e01": { + "keytype": "ed25519", + "scheme": "ed25519", + "keyid_hash_algorithms": ["sha256", "sha512"], + "keyval": { "public": "91d413c791907aae0be739d94a1e5e59c5d5ba65a8bbc1fb2153a5680f2d5958" } + }, + "e1fdd5827bb44defe9b87ed835c854be4b78a86ded013d1646bc416c1c89a9db": { + "keytype": "ed25519", + "scheme": "ed25519", + "keyid_hash_algorithms": ["sha256", "sha512"], + "keyval": { "public": "9323800f89d833ee263d3661c2616da89e405b92beeec334f21d54b5f60fbd85" } + } + }, + "roles": { + "root": { + "keyids": [ + "620dacb7dc843acc731e4483c24ceb4121f4de5545f92d15dc2b13299b660e01", + "e1fdd5827bb44defe9b87ed835c854be4b78a86ded013d1646bc416c1c89a9db" + ], + "threshold": 2 + }, + "snapshot": { + "keyids": [ + "620dacb7dc843acc731e4483c24ceb4121f4de5545f92d15dc2b13299b660e01", + "e1fdd5827bb44defe9b87ed835c854be4b78a86ded013d1646bc416c1c89a9db" + ], + "threshold": 2 + }, + "targets": { + "keyids": [ + "620dacb7dc843acc731e4483c24ceb4121f4de5545f92d15dc2b13299b660e01", + "e1fdd5827bb44defe9b87ed835c854be4b78a86ded013d1646bc416c1c89a9db" + ], + "threshold": 2 + }, + "timestamp": { + "keyids": [ + "620dacb7dc843acc731e4483c24ceb4121f4de5545f92d15dc2b13299b660e01", + "e1fdd5827bb44defe9b87ed835c854be4b78a86ded013d1646bc416c1c89a9db" + ], + "threshold": 2 + } + }, + "consistent_snapshot": true + }, + "signatures": [ + { + "keyid": "620dacb7dc843acc731e4483c24ceb4121f4de5545f92d15dc2b13299b660e01", + "sig": "a8b4ee59576c82bc1bc944df014bbeb90f5cba4ffbd8b7878461da2c934fd3bf93ac4c3b85a7936584da4a5a0cfe93b7150b559fc96423a98a70a11fc844f208" + }, + { + "keyid": "e1fdd5827bb44defe9b87ed835c854be4b78a86ded013d1646bc416c1c89a9db", + "sig": "3332e240a023dc267e87e210c7b46b9fa5772932d84936e3a7a5b5018b0f45fbf068ce60b97beb6e7e6c0c12a68d68a44461e590a934b577c71d4ff6dd94db09" + } + ] +} diff --git a/libdd-remote-config/roots/prod/director_root.json b/libdd-remote-config/roots/prod/director_root.json new file mode 100644 index 0000000000..f3882d62a0 --- /dev/null +++ b/libdd-remote-config/roots/prod/director_root.json @@ -0,0 +1,63 @@ +{ + "signed": { + "_type": "root", + "spec_version": "1.0", + "version": 15, + "expires": "2026-10-31T17:00:00Z", + "keys": { + "44d70fa8eae4c07f26c2767270827b6b9e11e7972926b3b419b5ea14ec32f796": { + "keytype": "ed25519", + "scheme": "ed25519", + "keyid_hash_algorithms": ["sha256", "sha512"], + "keyval": { "public": "286d6ae328365afec0f92519ceab68cd627e34072cde90b2f5d167badea970f2" } + }, + "b2b93a6dccc96d053e6db39181124c85ba4156d43503d4351b5500316fa084e8": { + "keytype": "ed25519", + "scheme": "ed25519", + "keyid_hash_algorithms": ["sha256", "sha512"], + "keyval": { "public": "afdd68be53815d67f8fa99cf101aac4589a358c660adf7dd4e179fe96834d3c9" } + } + }, + "roles": { + "root": { + "keyids": [ + "44d70fa8eae4c07f26c2767270827b6b9e11e7972926b3b419b5ea14ec32f796", + "b2b93a6dccc96d053e6db39181124c85ba4156d43503d4351b5500316fa084e8" + ], + "threshold": 2 + }, + "snapshot": { + "keyids": [ + "44d70fa8eae4c07f26c2767270827b6b9e11e7972926b3b419b5ea14ec32f796", + "b2b93a6dccc96d053e6db39181124c85ba4156d43503d4351b5500316fa084e8" + ], + "threshold": 2 + }, + "targets": { + "keyids": [ + "44d70fa8eae4c07f26c2767270827b6b9e11e7972926b3b419b5ea14ec32f796", + "b2b93a6dccc96d053e6db39181124c85ba4156d43503d4351b5500316fa084e8" + ], + "threshold": 2 + }, + "timestamp": { + "keyids": [ + "44d70fa8eae4c07f26c2767270827b6b9e11e7972926b3b419b5ea14ec32f796", + "b2b93a6dccc96d053e6db39181124c85ba4156d43503d4351b5500316fa084e8" + ], + "threshold": 2 + } + }, + "consistent_snapshot": true + }, + "signatures": [ + { + "keyid": "b2b93a6dccc96d053e6db39181124c85ba4156d43503d4351b5500316fa084e8", + "sig": "ccbe8cdd7dfb9a9d6b4bef8075a7aaf9baafe69a07100f22c04677a9737a23b24055ac3a0776c7021ae6a2fd175a251c0604164ea6705a0a896844766d2ecd07" + }, + { + "keyid": "44d70fa8eae4c07f26c2767270827b6b9e11e7972926b3b419b5ea14ec32f796", + "sig": "068a2e37e93688702e75ebb328b74cd8879832a63179ba1c54976aae4ee03a5e936c7b7274d4a6aa6755c27cfe800097984d94c83be901bde72103dccebcc008" + } + ] +} diff --git a/libdd-remote-config/src/agentless_client/mod.rs b/libdd-remote-config/src/agentless_client/mod.rs new file mode 100644 index 0000000000..32b91af183 --- /dev/null +++ b/libdd-remote-config/src/agentless_client/mod.rs @@ -0,0 +1,766 @@ +// Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +use std::{ + fmt, + future::Future, + time::{Duration, SystemTime, UNIX_EPOCH}, +}; + +use base64::Engine; +use futures::AsyncReadExt as _; +use hashbrown::{HashMap, HashSet}; +use http::{ + header, + uri::{Authority, PathAndQuery}, + Method, Request, Uri, +}; +use libdd_capabilities::{Bytes, HttpClientCapability}; +use libdd_common::Endpoint; +use libdd_trace_protobuf::remoteconfig; +use prost::Message; +use serde_json::Value; +use tracing::debug; +use tuf::repository::RepositoryStorage; +use tuf::{ + metadata::{ + Metadata, MetadataPath, MetadataVersion, RawSignedMetadata, TargetDescription, TargetPath, + }, + repository::RepositoryProvider as _, +}; + +#[allow(dead_code)] // used in tests and reserved for TUF config-repo init +const CONFIG_ROOT: &[u8] = include_bytes!("../../roots/prod/config_root.json"); +const CONFIG_ROOT_VERSION: u64 = 16; +const DIRECTOR_ROOT: &[u8] = include_bytes!("../../roots/prod/director_root.json"); +const DIRECTOR_ROOT_VERSION: u64 = 15; + +const FAKE_AGENT_VERSION: &'static str = "7.78.4"; + +type TUFRepo = tuf::repository::EphemeralRepository; +type TUFClient = tuf::client::Client; + +// Make a remote config API endpoint from and endpoint where `e.url` is the base dd site +// If the endpoint is not suitable (api key not set, not https), returns N +pub fn make_agentless_configs_endpoint(e: Endpoint) -> Option { + dbg!(&e); + if !(e.url.scheme_str().is_some_and(|s| s == "https") + && e.url.authority().is_some() + && e.api_key.is_some()) + { + return None; + } + + let mut parts = e.url.into_parts(); + parts.authority = + Some(Authority::try_from(format!("config.{}", parts.authority?.as_str())).ok()?); + parts.path_and_query = Some(PathAndQuery::from_static("/api/v0.1/configurations")); + + Some(Endpoint { + url: Uri::from_parts(parts).ok()?, + ..e + }) +} + +#[derive(Clone)] +pub struct AgentlessConfig { + pub hostname: String, +} + +struct CachedFile { + hash: Vec<(&'static tuf::crypto::HashAlgorithm, tuf::crypto::HashValue)>, + target_file: Vec, + version: u64, +} + +pub type NativeAgentlessFetcher = AgentlessFetcher; + +pub struct AgentlessFetcher { + http: C, + initialized: bool, + opaque_backend_state: Vec, + director_client: TUFClient, + config_client: TUFClient, + hostname: String, + products: HashSet, + refresh_interval: Duration, + endpoint: Endpoint, + // TODO: Not sure this is needed if the wrapper client already caches files? + target_cache: HashMap, +} + +pub struct ClientResponse<'a> { + pub root_version: u64, + pub target_version: u64, + pub opaque_backend_state: Vec, + pub targets: Vec<( + &'a str, + &'a [u8], + u64, + &'a Vec<(&'static tuf::crypto::HashAlgorithm, tuf::crypto::HashValue)>, + )>, +} + +struct BorrowedTarget<'a> { + pub path: &'a TargetPath, + pub desc: &'a TargetDescription, +} + +impl<'a> BorrowedTarget<'a> { + pub fn try_create(path: &'a TargetPath, desc: &'a TargetDescription) -> anyhow::Result { + if let Some(expiry) = desc.custom().get(CUSTOM_METADATA_EXPIRY_PATH) { + let expiry_ts = expiry + .as_u64() + .ok_or_else(|| anyhow::format_err!("expiry not a number"))?; + + if expiry_ts * 1000 <= now_unix_milli_ts() { + anyhow::bail!("expired target at path: {path}") + } + } + + Ok(Self { path, desc }) + } +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct Target { + pub path: TargetPath, + pub desc: TargetDescription, +} + +const CUSTOM_METADATA_EXPIRY_PATH: &str = "expires"; + +impl Target { + /// Returns Ok(Target) when valid and unexpired; Err(Error) otherwise. + pub fn try_create(path: &TargetPath, desc: &TargetDescription) -> anyhow::Result { + if let Some(expiry) = desc.custom().get(CUSTOM_METADATA_EXPIRY_PATH) { + let expiry_ts = expiry + .as_u64() + .ok_or_else(|| anyhow::format_err!("expiry not a number"))?; + + if expiry_ts * 1000 <= now_unix_milli_ts() { + anyhow::bail!("expired target at path: {path}") + } + } + + Ok(Self { + path: path.clone(), + desc: desc.clone(), + }) + } +} + +enum FetchTargetResult { + Cached, + New(CachedFile), +} + +impl AgentlessFetcher { + /// Create a new `RemoteConfig` client. + /// + /// # Errors + /// Returns an error if TUF root initialization fails. + pub async fn new(cfg: AgentlessConfig, endpoint: Endpoint) -> anyhow::Result { + Ok(Self { + endpoint, + http: C::new_client(), + director_client: TUFClient::with_trusted_root( + tuf::client::Config::default(), + &RawSignedMetadata::new(DIRECTOR_ROOT.to_vec()), + TUFRepo::new(), + TUFRepo::new(), + ) + .await?, + config_client: TUFClient::with_trusted_root( + tuf::client::Config::default(), + &RawSignedMetadata::new(CONFIG_ROOT.to_vec()), + TUFRepo::new(), + TUFRepo::new(), + ) + .await?, + hostname: cfg.hostname, + products: HashSet::new(), + target_cache: HashMap::new(), + + opaque_backend_state: Vec::new(), + refresh_interval: Duration::from_secs(5), + initialized: false, + }) + } + + /// Return the value of a particular target , checking both its length and + /// hashes against the metadata in the config repo. + /// + /// If it is already in the cache, return `Cached` + async fn fetch_target(&self, target: &BorrowedTarget<'_>) -> anyhow::Result { + let expected_hashes = tuf::crypto::retain_supported_hashes(target.desc.hashes()); + if expected_hashes.is_empty() { + anyhow::bail!("no supported hash for path: {}", target.path); + } + let (target_hash_algo, target_hash) = &expected_hashes[0]; + let target_path = target.path; + + dbg!(target.desc.custom()); + let version = target + .desc + .custom() + .get("v") + .and_then(|v| v.as_u64()) + .unwrap_or(0); + + if let Some(item) = self.target_cache.get(target_path) { + if item + .hash + .iter() + .find(|(alg, _)| alg == target_hash_algo) + .is_some_and(|(_, h)| h == target_hash) + && item.target_file.len() as u64 == target.desc.length() + { + return Ok(FetchTargetResult::Cached); + } + } + + // Fetch from the content from the remote __Unverified__ repo + // This is fine as we are comparing the (hash + len) with a validated + // target + let mut read = self + .director_client + .remote_repo() + .fetch_target(target_path) + .await?; + let mut buf = Vec::new(); + read.read_to_end(&mut buf).await?; + + let expected_len = target.desc.length() as usize; + if buf.len() != expected_len { + anyhow::bail!("bad length for file at path: {}", target.path) + } + + { + let hash_algs = expected_hashes + .iter() + .map(|(alg, _val)| (*alg).clone()) + .collect::>(); + let actual_hashes = + tuf::crypto::calculate_hashes_from_slice(&buf, hash_algs.as_slice())?; + let expected: HashMap<_, _> = expected_hashes + .iter() + .map(|(alg, val)| (alg, val)) + .collect(); + + if !(actual_hashes.len() == expected.len() + && actual_hashes + .iter() + .all(|(k, v)| expected.get(&k).is_some_and(|e| *e == v))) + { + anyhow::bail!("hash did not match: {}", target.path) + } + } + + Ok(FetchTargetResult::New(CachedFile { + hash: expected_hashes, + target_file: buf, + version, + })) + } + + pub async fn fetch_config( + &mut self, + c: remoteconfig::Client, + ) -> anyhow::Result { + let ( + current_config_snapshot_version, + current_config_root_version, + current_director_root_version, + ) = if self.initialized { + ( + u64::from( + self.config_client + .database() + .trusted_snapshot() + .ok_or(anyhow::anyhow!("Missing snapshot data"))? + .version(), + ), + u64::from(self.config_client.database().trusted_root().version()), + u64::from(self.director_client.database().trusted_root().version()), + ) + } else { + (0, 0, 0) + }; + + let all_products = c.products.iter().fold(HashSet::new(), |mut acc, p| { + acc.get_or_insert_with(p, String::clone); + acc + }); + let new_products = all_products + .difference(&self.products) + .cloned() + .collect::>(); + let old_products = self + .products + .intersection(&all_products) + .cloned() + .collect::>(); + + let request = remoteconfig::LatestConfigsRequest { + hostname: self.hostname.clone(), + current_config_snapshot_version, + current_config_root_version, + current_director_root_version, + products: old_products, + new_products: new_products, + backend_client_state: self.opaque_backend_state.clone(), + active_clients: vec![c.clone()], + agent_version: FAKE_AGENT_VERSION.to_owned(), + has_error: false, + error: String::new(), + trace_agent_env: String::new(), + org_uuid: String::new(), + tags: vec![], + agent_uuid: String::new(), + }; + let response = self.get_latest_config(request).await?; + if !self.initialized { + self.initialized = true; + } + + self.apply(&response).await?; + self.products = all_products; + + // TODO: filter predicates ? + + Ok(ClientResponse { + root_version: u64::from(self.config_client.database().trusted_root().version()), + target_version: u64::from( + self.config_client + .database() + .trusted_targets() + .ok_or(anyhow::anyhow!("Missing target data"))? + .version(), + ), + opaque_backend_state: self.opaque_backend_state.clone(), + targets: self + .target_cache + .iter() + .map(|(p, t)| (p.as_str(), t.target_file.as_slice(), t.version, &t.hash)) + .collect(), + }) + } + + /// Query the Remote Config org-status endpoint. + /// + /// # Errors + /// Returns an error if the HTTP request fails or the response cannot be decoded. + pub async fn get_org_status(&self) -> anyhow::Result { + let path = PathAndQuery::from_static("/api/v0.1/status"); + let res = self.send_request(Method::GET, path, Bytes::new()).await?; + parse_rc_response(res) + } + + pub async fn get_org_data(&self) -> anyhow::Result { + let path = PathAndQuery::from_static("/api/v0.1/org"); + let res = self.send_request(Method::GET, path, Bytes::new()).await?; + parse_rc_response(res) + } + + pub async fn init(&mut self) -> anyhow::Result<()> { + let response = self.initial_config().await?; + Ok(self.apply(&response).await?) + } + + /// Fetch the initial Remote Config payload for this tracer. + /// + /// # Errors + /// Returns an error if the HTTP request fails or the response cannot be decoded. + pub fn initial_config( + &self, + ) -> impl Future> + Send + use<'_, C> + { + let initial_request = remoteconfig::LatestConfigsRequest { + hostname: self.hostname.clone(), + current_config_snapshot_version: 0, + current_config_root_version: CONFIG_ROOT_VERSION, + current_director_root_version: DIRECTOR_ROOT_VERSION, + new_products: vec![], + active_clients: vec![], + backend_client_state: self.opaque_backend_state.clone(), + agent_version: FAKE_AGENT_VERSION.to_owned(), + products: vec![], + has_error: false, + error: String::new(), + trace_agent_env: String::new(), + tags: vec![], + agent_uuid: String::new(), + org_uuid: String::new(), + }; + self.get_latest_config(initial_request) + } + + /// Fetch the latest Remote Config for this client. + /// + /// # Errors + /// Returns an error if the HTTP request fails or the response cannot be decoded. + #[allow(clippy::future_not_send)] + pub async fn get_latest_config( + &self, + req: remoteconfig::LatestConfigsRequest, + ) -> anyhow::Result { + dbg!(&req); + let path = PathAndQuery::from_static("/api/v0.1/configurations"); + let body = Bytes::from(req.encode_to_vec()); + let res = self.send_request(Method::POST, path, body).await?; + let res = parse_rc_response(res)?; + dbg!(debug_latest_configs_response(&res)); + Ok(res) + } + + #[allow(clippy::future_not_send)] + async fn send_request( + &self, + method: Method, + path: PathAndQuery, + body: Bytes, + ) -> anyhow::Result> { + let req = self + .endpoint + .set_standard_headers( + Request::builder(), + concat!("Libdatadog/", env!("CARGO_PKG_VERSION")), + ) + .header(header::CONTENT_TYPE, "application/x-protobuf") + .uri(url_with_path(self.endpoint.url.clone(), path)?) + .method(method) + .body(body)?; + Ok(self.http.request(req).await?) + } + + async fn apply( + &mut self, + response: &remoteconfig::LatestConfigsResponse, + ) -> anyhow::Result<()> { + // At a high level, what we're doing here is populating the "remote" repos with the metadata + // that we received from upstream (which does not validate it), and then using the clients' + // `update` methods to synchronize that metadata to the "local" repos, during which + // validation is performed. + + let root_path = MetadataPath::root(); + let timestamp_path = MetadataPath::timestamp(); + let snapshot_path = MetadataPath::snapshot(); + let targets_path = MetadataPath::targets(); + + let repo = self.director_client.remote_repo_mut(); + *repo = TUFRepo::new(); + for target_file in &response.target_files { + // let trimmed_path = trim_hash_target_path(&target_file.path)?; + // let trimmed_target_path = TargetPath::new(&trimmed_path)?; + repo.store_target( + &TargetPath::new(&target_file.path)?, + &mut target_file.raw.as_slice(), + ) + .await?; + } + + let config_repo_mut = self.config_client.remote_repo_mut(); + *config_repo_mut = TUFRepo::new(); + let Some(metas) = response.config_metas.as_ref() else { + anyhow::bail!("missing config meta from LatestConfigsResponse") + }; + + store(config_repo_mut, &root_path, &metas.roots).await?; + store_noversion(config_repo_mut, ×tamp_path, &metas.timestamp).await?; + store(config_repo_mut, &snapshot_path, &metas.snapshot).await?; + store(config_repo_mut, &targets_path, &metas.top_targets).await?; + // TODO: We do not store the delegated targets metadata + // This will need to be revisited in order to support proper Uptane + // verification of the full configuration data. + // store(repo, &targets_path, &metas.delegated_targets).await?; + + let director_remote_repo = self.director_client.remote_repo_mut(); + let Some(metas) = response.director_metas.as_ref() else { + anyhow::bail!("missing director meta from LatestConfigsResponse") + }; + + store(director_remote_repo, &root_path, &metas.roots).await?; + store_noversion(director_remote_repo, ×tamp_path, &metas.timestamp).await?; + store(director_remote_repo, &snapshot_path, &metas.snapshot).await?; + store(director_remote_repo, &targets_path, &metas.targets).await?; + + self.config_client.update().await?; + self.director_client.update().await?; + + let mut new_target_path_set = HashSet::new(); + for target in trusted_targets(&self.director_client)? { + new_target_path_set.insert(target.path); + match self.fetch_target(&target).await? { + FetchTargetResult::Cached => {} + FetchTargetResult::New(cached_target) => { + self.target_cache.insert(target.path.clone(), cached_target); + } + } + } + self.target_cache + .retain(|key, _| new_target_path_set.contains(key)); + + // The Remote Config service uses a `custom` field at the top-level of the targets metadata + // to store this field which we are supposed to echo back to the server. That `custom` field + // is not explicitly part of the TUF spec, which is why we need to pull it out of the + // `additional_fields` catch-all here. + if let Some((opaque_backend_state, refresh_interval)) = + get_director_custom(&self.director_client) + { + if let Some(opaque_backend_state) = opaque_backend_state { + self.opaque_backend_state = opaque_backend_state; + } + if let Some(refresh_interval) = refresh_interval { + self.refresh_interval = refresh_interval; + } + } + + Ok(()) + } +} + +fn get_director_custom(director_client: &TUFClient) -> Option<(Option>, Option)> { + let custom = director_client + .database() + .trusted_targets()? + .additional_fields() + .get("custom")?; + + Some(( + custom + .get("opaque_backend_state") + .and_then(Value::as_str) + .and_then(|s| base64::engine::general_purpose::STANDARD.decode(s).ok()), + custom + .get("agent_refresh_interval") + .and_then(Value::as_u64) + .map(Duration::from_secs), + )) +} + +fn url_with_path(base: http::Uri, path: PathAndQuery) -> anyhow::Result { + let mut parts = base.into_parts(); + parts.path_and_query = Some(path); + Ok(http::Uri::from_parts(parts)?) +} + +fn parse_rc_response( + response: http::Response, +) -> anyhow::Result { + let status = response.status().as_u16(); + let body = response.into_body(); + if !(200..300).contains(&status) { + anyhow::bail!( + "Non 2XX status code: {}\n{}", + status, + String::from_utf8_lossy(&body) + ) + } + + Ok(T::decode(body)?) +} + +fn now_unix_milli_ts() -> u64 { + u64::try_from( + SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or(Duration::ZERO) + .as_millis(), + ) + .unwrap_or(u64::MAX) +} + +/// Return the available, unexpired target paths and their descriptions based on the current metadata. +fn trusted_targets( + director_client: &TUFClient, +) -> anyhow::Result> + '_> { + Ok(director_client + .database() + .trusted_targets() + .ok_or_else(|| anyhow::format_err!("missing targets from TUF director client"))? + .targets() + .iter() + .filter_map(|(path, desc)| { + BorrowedTarget::try_create(path, desc) + .inspect_err(|e| { + debug!(%path, "Skipping target: error {}", e); + }) + .ok() + })) +} + +async fn store<'a, T>(repo: &mut TUFRepo, path: &MetadataPath, tms: T) -> anyhow::Result<()> +where + T: IntoIterator + 'a, +{ + for tm in tms { + repo.store_metadata( + path, + MetadataVersion::Number(tm.version as u32), + &mut tm.raw.as_slice(), + ) + .await?; + } + Ok(()) +} + +async fn store_noversion( + repo: &mut TUFRepo, + path: &MetadataPath, + tms: &Option, +) -> anyhow::Result<()> { + if let Some(tm) = tms { + repo.store_metadata(path, MetadataVersion::None, &mut tm.raw.as_slice()) + .await?; + } + Ok(()) +} + +// ── Debug helpers: render `raw: Vec` fields as JSON ──────────────────── + +struct RawJson<'a>(&'a [u8]); + +impl fmt::Debug for RawJson<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let RawJson(bytes) = self; + match serde_json::from_slice::(bytes) { + Ok(v) => write!(f, "{v:#}"), + Err(_) => write!(f, "<{} non-JSON bytes>", bytes.len()), + } + } +} + +struct DebugTopMeta<'a>(&'a remoteconfig::TopMeta); + +impl fmt::Debug for DebugTopMeta<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let remoteconfig::TopMeta { version, raw } = self.0; + f.debug_struct("TopMeta") + .field("version", version) + .field("raw", &RawJson(raw)) + .finish() + } +} + +struct DebugDelegatedMeta<'a>(&'a remoteconfig::DelegatedMeta); + +impl fmt::Debug for DebugDelegatedMeta<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let remoteconfig::DelegatedMeta { version, role, raw } = self.0; + f.debug_struct("DelegatedMeta") + .field("version", version) + .field("role", role) + .field("raw", &RawJson(raw)) + .finish() + } +} + +struct DebugFile<'a>(&'a remoteconfig::File); + +impl fmt::Debug for DebugFile<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let remoteconfig::File { path, raw } = self.0; + f.debug_struct("File") + .field("path", path) + .field("raw", &RawJson(raw)) + .finish() + } +} + +struct DebugConfigMetas<'a>(&'a remoteconfig::ConfigMetas); + +impl fmt::Debug for DebugConfigMetas<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let remoteconfig::ConfigMetas { + roots, + timestamp, + snapshot, + top_targets, + delegated_targets, + } = self.0; + f.debug_struct("ConfigMetas") + .field("roots", &roots.iter().map(DebugTopMeta).collect::>()) + .field("timestamp", ×tamp.as_ref().map(DebugTopMeta)) + .field("snapshot", &snapshot.as_ref().map(DebugTopMeta)) + .field("top_targets", &top_targets.as_ref().map(DebugTopMeta)) + .field( + "delegated_targets", + &delegated_targets + .iter() + .map(DebugDelegatedMeta) + .collect::>(), + ) + .finish() + } +} + +struct DebugDirectorMetas<'a>(&'a remoteconfig::DirectorMetas); + +impl fmt::Debug for DebugDirectorMetas<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let remoteconfig::DirectorMetas { + roots, + timestamp, + snapshot, + targets, + } = &self.0; + f.debug_struct("DirectorMetas") + .field("roots", &roots.iter().map(DebugTopMeta).collect::>()) + .field("timestamp", ×tamp.as_ref().map(DebugTopMeta)) + .field("snapshot", &snapshot.as_ref().map(DebugTopMeta)) + .field("targets", &targets.as_ref().map(DebugTopMeta)) + .finish() + } +} + +struct DebugLatestConfigsResponse<'a>(&'a remoteconfig::LatestConfigsResponse); + +impl fmt::Debug for DebugLatestConfigsResponse<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let remoteconfig::LatestConfigsResponse { + config_metas, + director_metas, + target_files, + } = &self.0; + f.debug_struct("LatestConfigsResponse") + .field("config_metas", &config_metas.as_ref().map(DebugConfigMetas)) + .field( + "director_metas", + &director_metas.as_ref().map(DebugDirectorMetas), + ) + .field( + "target_files", + &target_files.iter().map(DebugFile).collect::>(), + ) + .finish() + } +} + +/// Returns a value that implements [`fmt::Debug`] for [`remoteconfig::LatestConfigsResponse`], +/// rendering every `raw` byte field as a parsed JSON value instead of a raw byte array. +/// +/// Use with the standard formatting machinery: +/// +/// ```rust,ignore +/// println!("{:#?}", debug_latest_configs_response(&response)); +/// ``` +pub fn debug_latest_configs_response( + resp: &remoteconfig::LatestConfigsResponse, +) -> impl fmt::Debug + '_ { + DebugLatestConfigsResponse(resp) +} + +#[cfg(test)] +mod tests { + use super::{CONFIG_ROOT, CONFIG_ROOT_VERSION, DIRECTOR_ROOT, DIRECTOR_ROOT_VERSION}; + + #[test] + fn test_root_version_match() { + let config_root: serde_json::Value = serde_json::from_slice(CONFIG_ROOT).unwrap(); + assert_eq!(config_root["signed"]["version"], CONFIG_ROOT_VERSION); + + let director_root: serde_json::Value = serde_json::from_slice(DIRECTOR_ROOT).unwrap(); + assert_eq!(director_root["signed"]["version"], DIRECTOR_ROOT_VERSION); + } +} diff --git a/libdd-remote-config/src/fetch/fetcher.rs b/libdd-remote-config/src/fetch/fetcher.rs index ffa55def0e..78c9260fed 100644 --- a/libdd-remote-config/src/fetch/fetcher.rs +++ b/libdd-remote-config/src/fetch/fetcher.rs @@ -1,6 +1,9 @@ // Copyright 2021-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 +use crate::agentless_client::{ + make_agentless_configs_endpoint, AgentlessConfig, AgentlessFetcher, NativeAgentlessFetcher, +}; use crate::targets::{Root, TargetsList}; use crate::{RemoteConfigCapabilities, RemoteConfigPath, RemoteConfigProduct, Target}; use base64::Engine; @@ -50,7 +53,9 @@ pub trait FileStorage { pub struct ConfigInvariants { pub language: String, pub tracer_version: String, + pub hostname: String, pub endpoint: Endpoint, + pub agentless_enabled: bool, } struct StoredTargetFile { @@ -156,7 +161,15 @@ impl ConfigFetcherState { pub fn new(invariants: ConfigInvariants) -> Self { ConfigFetcherState { target_files_by_path: Default::default(), - endpoint: get_agent_configs_endpoint(&invariants.endpoint), + endpoint: if invariants.agentless_enabled { + if let Some(e) = make_agentless_configs_endpoint(invariants.endpoint.clone()) { + e + } else { + make_agent_configs_endpoint(&invariants.endpoint) + } + } else { + make_agent_configs_endpoint(&invariants.endpoint) + }, invariants, expire_unused_files: true, } @@ -201,9 +214,15 @@ impl ConfigFetcherState { } } +enum FetcherMode { + Agent, + Agentless(NativeAgentlessFetcher), +} + pub struct ConfigFetcher { pub file_storage: S, state: Arc>, + mode: FetcherMode, } pub struct ConfigClientState { @@ -237,11 +256,29 @@ impl ConfigClientState { } impl ConfigFetcher { - pub fn new(file_storage: S, state: Arc>) -> Self { - ConfigFetcher { + pub async fn new( + file_storage: S, + state: Arc>, + ) -> anyhow::Result { + let mode: FetcherMode = if dbg!(state.invariants.agentless_enabled) { + FetcherMode::Agentless( + AgentlessFetcher::new( + AgentlessConfig { + hostname: state.invariants.hostname.clone(), + }, + state.endpoint.clone(), + ) + .await?, + ) + } else { + FetcherMode::Agent + }; + + Ok(ConfigFetcher { file_storage, state, - } + mode, + }) } /// Sets the apply state on a stored file. @@ -317,39 +354,20 @@ impl ConfigFetcher { client_agent: None, last_seen: 0, capabilities: product_capabilities.encoded_capabilities.clone(), + is_updater: false, + client_updater: None, }), cached_target_files, } } - /// Quite generic fetching implementation: - /// - runs a request against the Remote Config Server, - /// - validates the data, - /// - removes unused files - /// - checks if the files are already known, - /// - stores new files, - /// - returns all currently active files. - /// - /// It also makes sure that old files are dropped before new files are inserted. - /// - /// Returns None if nothing changed. Otherwise Some(active configs). - pub async fn fetch_once( + async fn fetch_agent( &mut self, - runtime_id: &str, + config_req: ClientGetConfigsRequest, target: &Target, - product_capabilities: &ConfigProductCapabilities, - client_id: &str, client_state: &mut ConfigClientState, ) -> anyhow::Result>>> { - let config_req = self.build_config_request( - runtime_id, - target, - product_capabilities, - client_id, - &*client_state, - ); trace!("Submitting remote config request: {config_req:?}"); - let req = self .state .endpoint @@ -555,9 +573,164 @@ impl ConfigFetcher { client_state.last_config_paths = config_paths; Ok(Some(configs)) } + + /// Quite generic fetching implementation: + /// - runs a request against the Remote Config Server, + /// - validates the data, + /// - removes unused files + /// - checks if the files are already known, + /// - stores new files, + /// - returns all currently active files. + /// + /// It also makes sure that old files are dropped before new files are inserted. + /// + /// Returns None if nothing changed. Otherwise Some(active configs). + pub async fn fetch_once( + &mut self, + runtime_id: &str, + target: &Target, + product_capabilities: &ConfigProductCapabilities, + client_id: &str, + client_state: &mut ConfigClientState, + ) -> anyhow::Result>>> { + let config_req = self.build_config_request( + runtime_id, + target, + product_capabilities, + client_id, + &*client_state, + ); + match &mut self.mode { + FetcherMode::Agent => self.fetch_agent(config_req, target, client_state).await, + FetcherMode::Agentless(agentless_fetcher) => { + let res = agentless_fetcher + .fetch_config(config_req.client.unwrap()) + .await?; + + client_state.root_version = res.root_version; + client_state.targets_version = res.target_version; + client_state.opaque_backend_state = res.opaque_backend_state; + client_state.last_error = None; + + let mut target_files = self.state.target_files_by_path.lock_or_panic(); + let mut config_paths = HashSet::new(); + for &(path, _, _, _) in &res.targets { + match RemoteConfigPath::try_parse(path) { + Ok(parsed) => { + config_paths.insert(parsed.into()); + } + Err(e) => warn!("Failed parsing remote config path: {path} - {e:?}"), + } + } + + if self.state.expire_unused_files { + target_files.retain(|k, _| config_paths.contains(k.as_ref())); + } + + for (path, target_file, version, hashes) in res.targets { + let parsed_path = match RemoteConfigPath::try_parse(path) { + Ok(parsed_path) => parsed_path, + Err(e) => { + warn!("Failed parsing remote config path: {path} - {e:?}"); + continue; + } + }; + let Some((_, hash)) = hashes + .iter() + .find(|(h, _)| *h == &tuf::crypto::HashAlgorithm::Sha256) + .or_else(|| { + hashes + .iter() + .find(|(h, _)| *h == &tuf::crypto::HashAlgorithm::Sha512) + }) + else { + // todo no supported hash algorithm? + continue; + }; + let hash = hash.to_string(); + + let handle = if let Some(StoredTargetFile { + hash: old_hash, + handle, + .. + }) = target_files.get(&parsed_path) + { + if old_hash == &hash { + continue; + } + Some(handle.clone()) + } else { + None + }; + + let parsed_path: Arc = Arc::new(parsed_path.into()); + target_files.insert( + parsed_path.clone(), + StoredTargetFile { + hash, + state: ConfigState { + id: parsed_path.config_id.to_string(), + version, + product: parsed_path.product.to_string(), + apply_state: 2, // Acknowledged + apply_error: "".to_string(), + }, + meta: TargetFileMeta { + path: path.to_string(), + length: target_file.len() as i64, + hashes: hashes + .iter() + .map(|(algorithm, hash)| { + Ok(TargetFileHash { + algorithm: match algorithm { + tuf::crypto::HashAlgorithm::Sha256 => { + "sha256".to_string() + } + tuf::crypto::HashAlgorithm::Sha512 => { + "sha512".to_string() + } + tuf::crypto::HashAlgorithm::Unknown(u) => u.clone(), + _ => anyhow::bail!("unhandled has algorithm"), + }, + hash: hash.to_string(), + }) + }) + .collect::>()?, + }, + handle: if let Some(handle) = handle { + self.file_storage + .update(&handle, version, target_file.to_vec())?; + handle + } else { + self.file_storage.store( + version, + parsed_path, + target_file.to_vec(), + )? + }, + expiring: false, + }, + ); + } + let mut configs = Vec::with_capacity(config_paths.len()); + for config in config_paths.iter() { + if let Some(target_file) = target_files.get_mut(config) { + target_file.expiring = false; + configs.push(target_file.handle.clone()); + } else { + anyhow::bail!( + "Found {config} in client_configs response, but it isn't stored." + ); + } + } + client_state.last_config_paths = config_paths; + Ok(Some(configs)) + } + } + } } -fn get_agent_configs_endpoint(endpoint: &Endpoint) -> Endpoint { +fn make_agent_configs_endpoint(endpoint: &Endpoint) -> Endpoint { let mut parts = endpoint.url.clone().into_parts(); parts.path_and_query = Some(PathAndQuery::from_static("/v0.7/config")); #[allow(clippy::unwrap_used)] @@ -689,7 +862,9 @@ pub mod tests { let mut fetcher = ConfigFetcher::new( storage.clone(), Arc::new(ConfigFetcherState::new(server.dummy_options().invariants)), - ); + ) + .await + .unwrap(); let mut opaque_state = ConfigClientState::default(); let mut response = http_common::empty_response(Response::builder()).unwrap(); @@ -727,6 +902,8 @@ pub mod tests { language: "php".to_string(), tracer_version: "1.2.3".to_string(), endpoint: server.endpoint.clone(), + hostname: "host".to_string(), + agentless_enabled: false, }; let product_capabilities = ConfigProductCapabilities::new( vec![ @@ -739,7 +916,9 @@ pub mod tests { let mut fetcher = ConfigFetcher::new( storage.clone(), Arc::new(ConfigFetcherState::new(invariants)), - ); + ) + .await + .unwrap(); let mut opaque_state = ConfigClientState::default(); { @@ -923,7 +1102,9 @@ pub mod tests { let mut fetcher = ConfigFetcher::new( storage, Arc::new(ConfigFetcherState::new(server.dummy_options().invariants)), - ); + ) + .await + .unwrap(); let mut opaque_state = ConfigClientState::default(); // Default: nothing set, agent receives an empty list. @@ -1021,7 +1202,9 @@ pub mod tests { let mut fetcher = ConfigFetcher::new( storage, Arc::new(ConfigFetcherState::new(server.dummy_options().invariants)), - ); + ) + .await + .unwrap(); let mut opaque_state = ConfigClientState::default(); let fetched = fetcher diff --git a/libdd-remote-config/src/fetch/shared.rs b/libdd-remote-config/src/fetch/shared.rs index baf70b2c46..f618b992f8 100644 --- a/libdd-remote-config/src/fetch/shared.rs +++ b/libdd-remote-config/src/fetch/shared.rs @@ -275,7 +275,13 @@ impl SharedFetcher { S::StoredFile: RefcountedFile, { let state = storage.state.clone(); - let mut fetcher = ConfigFetcher::new(storage, state); + let mut fetcher = match ConfigFetcher::new(storage, state).await { + Ok(f) => f, + Err(e) => { + error!("failed to create the fetcher{:?}", e); + return; + } + }; let mut opaque_state = ConfigClientState::default(); diff --git a/libdd-remote-config/src/fetch/single.rs b/libdd-remote-config/src/fetch/single.rs index 3d48c69183..f5baf16908 100644 --- a/libdd-remote-config/src/fetch/single.rs +++ b/libdd-remote-config/src/fetch/single.rs @@ -16,7 +16,7 @@ pub struct SingleFetcher { product_capabilities: ConfigProductCapabilities, runtime_id: String, client_id: String, - opaque_state: ConfigClientState, + client_state: ConfigClientState, } #[derive(Clone, Debug)] @@ -27,12 +27,18 @@ pub struct ConfigOptions { } impl SingleFetcher { - pub fn new(sink: S, target: Target, runtime_id: String, options: ConfigOptions) -> Self { - SingleFetcher { + pub async fn new( + sink: S, + target: Target, + runtime_id: String, + options: ConfigOptions, + ) -> anyhow::Result { + Ok(SingleFetcher { fetcher: ConfigFetcher::new( sink, Arc::new(ConfigFetcherState::new(options.invariants)), - ), + ) + .await?, target: Arc::new(target), product_capabilities: ConfigProductCapabilities::new( options.products, @@ -40,8 +46,8 @@ impl SingleFetcher { ), runtime_id, client_id: uuid::Uuid::new_v4().to_string(), - opaque_state: ConfigClientState::default(), - } + client_state: ConfigClientState::default(), + }) } pub fn with_client_id(mut self, client_id: String) -> Self { @@ -57,7 +63,7 @@ impl SingleFetcher { &self.target, &self.product_capabilities, self.client_id.as_str(), - &mut self.opaque_state, + &mut self.client_state, ) .await } @@ -75,7 +81,7 @@ impl SingleFetcher { /// Sent to the agent on each subsequent poll so it can route configs targeting those /// services to this client. Replace-semantics: the new vec fully overrides the previous one. pub fn set_extra_services(&mut self, services: Vec) { - self.opaque_state.set_extra_services(services); + self.client_state.set_extra_services(services); } } @@ -91,11 +97,16 @@ impl SingleChangesFetcher where S::StoredFile: FilePath, { - pub fn new(sink: S, target: Target, runtime_id: String, options: ConfigOptions) -> Self { - SingleChangesFetcher { + pub async fn new( + sink: S, + target: Target, + runtime_id: String, + options: ConfigOptions, + ) -> anyhow::Result { + Ok(SingleChangesFetcher { changes: ChangeTracker::default(), - fetcher: SingleFetcher::new(sink, target, runtime_id, options), - } + fetcher: SingleFetcher::new(sink, target, runtime_id, options).await?, + }) } pub fn with_client_id(mut self, client_id: String) -> Self { diff --git a/libdd-remote-config/src/fetch/test_server.rs b/libdd-remote-config/src/fetch/test_server.rs index 1ed16ad0f3..ad25972b51 100644 --- a/libdd-remote-config/src/fetch/test_server.rs +++ b/libdd-remote-config/src/fetch/test_server.rs @@ -155,6 +155,7 @@ impl RemoteConfigServer { }) .collect(), client_configs: applied_files.keys().map(|k| k.to_string()).collect(), + config_status: 0, }; Response::new(http_common::Body::from( serde_json::to_vec(&response).unwrap(), @@ -215,6 +216,8 @@ impl RemoteConfigServer { language: "php".to_string(), tracer_version: "1.2.3".to_string(), endpoint: self.endpoint.clone(), + hostname: "localhost".to_string(), + agentless_enabled: false, }, products: vec![ RemoteConfigProduct::ApmTracing, diff --git a/libdd-remote-config/src/lib.rs b/libdd-remote-config/src/lib.rs index 462362d160..01ca7c4fc7 100644 --- a/libdd-remote-config/src/lib.rs +++ b/libdd-remote-config/src/lib.rs @@ -7,6 +7,8 @@ #![cfg_attr(not(test), deny(clippy::todo))] #![cfg_attr(not(test), deny(clippy::unimplemented))] +pub mod agentless_client; + pub mod config; #[cfg(feature = "client")] pub mod fetch; diff --git a/libdd-trace-protobuf/build.rs b/libdd-trace-protobuf/build.rs index c9c891a681..06714040f1 100644 --- a/libdd-trace-protobuf/build.rs +++ b/libdd-trace-protobuf/build.rs @@ -308,6 +308,12 @@ fn generate_protobuf() { "#[serde(default)]", ); + config.type_attribute("ClientUpdater", "#[derive(Deserialize, Serialize)]"); + config.type_attribute("PackageState", "#[derive(Deserialize, Serialize)]"); + config.type_attribute("PackageStateTask", "#[derive(Deserialize, Serialize)]"); + config.type_attribute("TaskError", "#[derive(Deserialize, Serialize)]"); + + config.include_file("_includes.rs"); config diff --git a/libdd-trace-protobuf/src/pb/remoteconfig.proto b/libdd-trace-protobuf/src/pb/remoteconfig.proto index 606bc851e7..cae7be854e 100644 --- a/libdd-trace-protobuf/src/pb/remoteconfig.proto +++ b/libdd-trace-protobuf/src/pb/remoteconfig.proto @@ -6,11 +6,73 @@ option go_package = "pkg/proto/pbgo/core"; // golang // Backend definitions +message ConfigMetas { + repeated TopMeta roots = 1; + TopMeta timestamp = 2; + TopMeta snapshot = 3; + TopMeta topTargets = 4; + repeated DelegatedMeta delegatedTargets = 5; +} + +message DirectorMetas { + repeated TopMeta roots = 1; + TopMeta timestamp = 2; + TopMeta snapshot = 3; + TopMeta targets = 4; +} + +message DelegatedMeta { + uint64 version = 1; + string role = 2; + bytes raw = 3; +} + +message TopMeta { + uint64 version = 1; + bytes raw = 2; +} + message File { string path = 1; bytes raw = 2; } +// Backend queries + +message LatestConfigsRequest { + string hostname = 1; + string agentVersion = 2; + // timestamp and snapshot versions move in tandem so they are the same. + uint64 current_config_snapshot_version = 3; + uint64 current_config_root_version = 9; + uint64 current_director_root_version = 8; + repeated string products = 4; + repeated string new_products = 5; + repeated Client active_clients = 6; + bytes backend_client_state = 10; + bool has_error = 11; + string error = 12; + string trace_agent_env = 13; + string org_uuid = 14; + repeated string tags = 15; + string agent_uuid = 16; +} + +message LatestConfigsResponse { + ConfigMetas config_metas = 1; + DirectorMetas director_metas = 2; + repeated File target_files = 3; +} + +message OrgDataResponse { + string uuid = 1; +} + +message OrgStatusResponse { + bool enabled = 1; + bool authorized = 2; +} + // Client definitions message Client { @@ -24,6 +86,9 @@ message Client { ClientAgent client_agent = 9; uint64 last_seen = 10; bytes capabilities = 11; + reserved 12, 13; + bool is_updater = 14; + ClientUpdater client_updater = 15; } message ClientTracer { @@ -47,6 +112,46 @@ message ClientAgent { repeated string cws_workloads = 5; } +message ClientUpdater { + repeated string tags = 1; + repeated PackageState packages = 2; + uint64 available_disk_space = 3; + string secrets_pub_key = 4; +} + +message PackageState { + string package = 1; + string stable_version = 2; + string experiment_version = 3; + PackageStateTask task = 4; + reserved 5, 6, 7, 8, 9, 10; + string stable_config_version = 11; + string experiment_config_version = 12; + string running_version = 13; + string running_config_version = 14; + uint64 heartbeat_timestamp = 15; + float completion = 16; +} + +message PackageStateTask { + string id = 1; + TaskState state = 2; + TaskError error = 3; +} + +enum TaskState { + IDLE = 0; + RUNNING = 1; + DONE = 2; + INVALID_STATE = 3; + ERROR = 4; +} + +message TaskError { + uint64 code = 1; + string message = 2; +} + message ConfigState { string id = 1; uint64 version = 2; @@ -78,14 +183,119 @@ message TargetFileMeta { repeated TargetFileHash hashes = 3; } +// ConfigSubscriptionProducts is used to targets specific products for tracking +// with a ConfigSubscriptionRequest. +enum ConfigSubscriptionProducts { + INVALID = 0; + + // LIVE_DEBUGGING corresponds to the LIVE_DEBUGING and LIVE_DEBUGGING_SYMBOLDB + // products. + LIVE_DEBUGGING = 1; +} + +// ConfigSubscriptionRequest is used to manage the state of the stream created +// using CreateConfigSubscription. +message ConfigSubscriptionRequest { + + enum Action { + INVALID = 0; + TRACK = 1; + UNTRACK = 2; + } + + + // RuntimeID of the client to track or untrack. + string runtime_id = 1; + + // Action indicates the action to take for the client with the given + // runtime_id. + Action action = 2; + + // If action is TRACK, products indicates the set of products for which the + // client is interested in receiving updates. + ConfigSubscriptionProducts products = 3; +} + +// ConfigSubscriptionResponse is streamed from CreateConfigSubscription with +// updates for matching clients and products. +message ConfigSubscriptionResponse { + + // Client is the client that was tracked or untracked. + Client client = 1; + + // Matched configs are all configs that were matched for the client given + // the subscription request. + // + // If a previously reported config is no longer matched, it will not be + // included in the response. + repeated string matched_configs = 2; + + // Target files are the target files that needs to be sent to the client. + repeated File target_files = 3; +} + message ClientGetConfigsRequest { Client client = 1; repeated TargetFileMeta cached_target_files = 2; } +enum ConfigStatus { + CONFIG_STATUS_OK = 0; + CONFIG_STATUS_EXPIRED = 1; +} + message ClientGetConfigsResponse { repeated bytes roots = 1; bytes targets = 2; repeated File target_files = 3; repeated string client_configs = 4; + ConfigStatus config_status = 5; +} + +// Full state + +message FileMetaState { + uint64 version = 1; + string hash = 2; +} + +message GetStateConfigResponse { + map config_state = 1; + map director_state = 2; + map target_filenames = 3; + repeated Client active_clients = 4; + repeated ConfigSubscriptionState config_subscription_states = 5; +} + +// ConfigSubscriptionState describes the state of a config subscription. +message ConfigSubscriptionState { + message TrackedClient { + string runtime_id = 1; + bool seen_any = 2; + ConfigSubscriptionProducts products = 3; + } + + // SubscriptionID is a process-unique identifier for the subscription. + uint64 subscription_id = 1; + + // TrackedClients is the list of clients that are currently tracked by the + // subscription. + repeated TrackedClient tracked_clients = 2; +} + +message ResetStateConfigResponse {} + + +message TracerPredicateV1 { + string clientID = 1; + string service = 2; + string environment = 3; + string appVersion = 4; + string tracerVersion = 5; + string language = 6; + string runtimeID = 7; +} + +message TracerPredicates { + repeated TracerPredicateV1 tracer_predicates_v1 = 1; } diff --git a/libdd-trace-protobuf/src/remoteconfig.rs b/libdd-trace-protobuf/src/remoteconfig.rs index 9e0a62494a..894da87077 100644 --- a/libdd-trace-protobuf/src/remoteconfig.rs +++ b/libdd-trace-protobuf/src/remoteconfig.rs @@ -3,6 +3,46 @@ use serde::{Deserialize, Serialize}; // This file is @generated by prost-build. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ConfigMetas { + #[prost(message, repeated, tag = "1")] + pub roots: ::prost::alloc::vec::Vec, + #[prost(message, optional, tag = "2")] + pub timestamp: ::core::option::Option, + #[prost(message, optional, tag = "3")] + pub snapshot: ::core::option::Option, + #[prost(message, optional, tag = "4")] + pub top_targets: ::core::option::Option, + #[prost(message, repeated, tag = "5")] + pub delegated_targets: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct DirectorMetas { + #[prost(message, repeated, tag = "1")] + pub roots: ::prost::alloc::vec::Vec, + #[prost(message, optional, tag = "2")] + pub timestamp: ::core::option::Option, + #[prost(message, optional, tag = "3")] + pub snapshot: ::core::option::Option, + #[prost(message, optional, tag = "4")] + pub targets: ::core::option::Option, +} +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct DelegatedMeta { + #[prost(uint64, tag = "1")] + pub version: u64, + #[prost(string, tag = "2")] + pub role: ::prost::alloc::string::String, + #[prost(bytes = "vec", tag = "3")] + pub raw: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct TopMeta { + #[prost(uint64, tag = "1")] + pub version: u64, + #[prost(bytes = "vec", tag = "2")] + pub raw: ::prost::alloc::vec::Vec, +} #[derive(Deserialize, Serialize)] #[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] pub struct File { @@ -12,6 +52,61 @@ pub struct File { #[serde(with = "serde_bytes")] pub raw: ::prost::alloc::vec::Vec, } +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct LatestConfigsRequest { + #[prost(string, tag = "1")] + pub hostname: ::prost::alloc::string::String, + #[prost(string, tag = "2")] + pub agent_version: ::prost::alloc::string::String, + /// timestamp and snapshot versions move in tandem so they are the same. + #[prost(uint64, tag = "3")] + pub current_config_snapshot_version: u64, + #[prost(uint64, tag = "9")] + pub current_config_root_version: u64, + #[prost(uint64, tag = "8")] + pub current_director_root_version: u64, + #[prost(string, repeated, tag = "4")] + pub products: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + #[prost(string, repeated, tag = "5")] + pub new_products: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + #[prost(message, repeated, tag = "6")] + pub active_clients: ::prost::alloc::vec::Vec, + #[prost(bytes = "vec", tag = "10")] + pub backend_client_state: ::prost::alloc::vec::Vec, + #[prost(bool, tag = "11")] + pub has_error: bool, + #[prost(string, tag = "12")] + pub error: ::prost::alloc::string::String, + #[prost(string, tag = "13")] + pub trace_agent_env: ::prost::alloc::string::String, + #[prost(string, tag = "14")] + pub org_uuid: ::prost::alloc::string::String, + #[prost(string, repeated, tag = "15")] + pub tags: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + #[prost(string, tag = "16")] + pub agent_uuid: ::prost::alloc::string::String, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct LatestConfigsResponse { + #[prost(message, optional, tag = "1")] + pub config_metas: ::core::option::Option, + #[prost(message, optional, tag = "2")] + pub director_metas: ::core::option::Option, + #[prost(message, repeated, tag = "3")] + pub target_files: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct OrgDataResponse { + #[prost(string, tag = "1")] + pub uuid: ::prost::alloc::string::String, +} +#[derive(Clone, Copy, PartialEq, Eq, Hash, ::prost::Message)] +pub struct OrgStatusResponse { + #[prost(bool, tag = "1")] + pub enabled: bool, + #[prost(bool, tag = "2")] + pub authorized: bool, +} #[derive(Deserialize, Serialize)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct Client { @@ -34,6 +129,10 @@ pub struct Client { pub last_seen: u64, #[prost(bytes = "vec", tag = "11")] pub capabilities: ::prost::alloc::vec::Vec, + #[prost(bool, tag = "14")] + pub is_updater: bool, + #[prost(message, optional, tag = "15")] + pub client_updater: ::core::option::Option, } #[derive(Deserialize, Serialize)] #[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] @@ -74,6 +173,60 @@ pub struct ClientAgent { pub cws_workloads: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, } #[derive(Deserialize, Serialize)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ClientUpdater { + #[prost(string, repeated, tag = "1")] + pub tags: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + #[prost(message, repeated, tag = "2")] + pub packages: ::prost::alloc::vec::Vec, + #[prost(uint64, tag = "3")] + pub available_disk_space: u64, + #[prost(string, tag = "4")] + pub secrets_pub_key: ::prost::alloc::string::String, +} +#[derive(Deserialize, Serialize)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct PackageState { + #[prost(string, tag = "1")] + pub package: ::prost::alloc::string::String, + #[prost(string, tag = "2")] + pub stable_version: ::prost::alloc::string::String, + #[prost(string, tag = "3")] + pub experiment_version: ::prost::alloc::string::String, + #[prost(message, optional, tag = "4")] + pub task: ::core::option::Option, + #[prost(string, tag = "11")] + pub stable_config_version: ::prost::alloc::string::String, + #[prost(string, tag = "12")] + pub experiment_config_version: ::prost::alloc::string::String, + #[prost(string, tag = "13")] + pub running_version: ::prost::alloc::string::String, + #[prost(string, tag = "14")] + pub running_config_version: ::prost::alloc::string::String, + #[prost(uint64, tag = "15")] + pub heartbeat_timestamp: u64, + #[prost(float, tag = "16")] + pub completion: f32, +} +#[derive(Deserialize, Serialize)] +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct PackageStateTask { + #[prost(string, tag = "1")] + pub id: ::prost::alloc::string::String, + #[prost(enumeration = "TaskState", tag = "2")] + pub state: i32, + #[prost(message, optional, tag = "3")] + pub error: ::core::option::Option, +} +#[derive(Deserialize, Serialize)] +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct TaskError { + #[prost(uint64, tag = "1")] + pub code: u64, + #[prost(string, tag = "2")] + pub message: ::prost::alloc::string::String, +} +#[derive(Deserialize, Serialize)] #[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] pub struct ConfigState { #[prost(string, tag = "1")] @@ -121,6 +274,82 @@ pub struct TargetFileMeta { #[prost(message, repeated, tag = "3")] pub hashes: ::prost::alloc::vec::Vec, } +/// ConfigSubscriptionRequest is used to manage the state of the stream created +/// using CreateConfigSubscription. +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct ConfigSubscriptionRequest { + /// RuntimeID of the client to track or untrack. + #[prost(string, tag = "1")] + pub runtime_id: ::prost::alloc::string::String, + /// Action indicates the action to take for the client with the given + /// runtime_id. + #[prost(enumeration = "config_subscription_request::Action", tag = "2")] + pub action: i32, + /// If action is TRACK, products indicates the set of products for which the + /// client is interested in receiving updates. + #[prost(enumeration = "ConfigSubscriptionProducts", tag = "3")] + pub products: i32, +} +/// Nested message and enum types in `ConfigSubscriptionRequest`. +pub mod config_subscription_request { + #[derive( + Clone, + Copy, + Debug, + PartialEq, + Eq, + Hash, + PartialOrd, + Ord, + ::prost::Enumeration + )] + #[repr(i32)] + pub enum Action { + Invalid = 0, + Track = 1, + Untrack = 2, + } + impl Action { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + Self::Invalid => "INVALID", + Self::Track => "TRACK", + Self::Untrack => "UNTRACK", + } + } + /// Creates an enum from field names used in the ProtoBuf definition. + pub fn from_str_name(value: &str) -> ::core::option::Option { + match value { + "INVALID" => Some(Self::Invalid), + "TRACK" => Some(Self::Track), + "UNTRACK" => Some(Self::Untrack), + _ => None, + } + } + } +} +/// ConfigSubscriptionResponse is streamed from CreateConfigSubscription with +/// updates for matching clients and products. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ConfigSubscriptionResponse { + /// Client is the client that was tracked or untracked. + #[prost(message, optional, tag = "1")] + pub client: ::core::option::Option, + /// Matched configs are all configs that were matched for the client given + /// the subscription request. + /// + /// If a previously reported config is no longer matched, it will not be + /// included in the response. + #[prost(string, repeated, tag = "2")] + pub matched_configs: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + /// Target files are the target files that needs to be sent to the client. + #[prost(message, repeated, tag = "3")] + pub target_files: ::prost::alloc::vec::Vec, +} #[derive(Deserialize, Serialize)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct ClientGetConfigsRequest { @@ -146,4 +375,175 @@ pub struct ClientGetConfigsResponse { #[prost(string, repeated, tag = "4")] #[serde(default)] pub client_configs: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + #[prost(enumeration = "ConfigStatus", tag = "5")] + pub config_status: i32, +} +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct FileMetaState { + #[prost(uint64, tag = "1")] + pub version: u64, + #[prost(string, tag = "2")] + pub hash: ::prost::alloc::string::String, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct GetStateConfigResponse { + #[prost(map = "string, message", tag = "1")] + pub config_state: ::std::collections::HashMap< + ::prost::alloc::string::String, + FileMetaState, + >, + #[prost(map = "string, message", tag = "2")] + pub director_state: ::std::collections::HashMap< + ::prost::alloc::string::String, + FileMetaState, + >, + #[prost(map = "string, string", tag = "3")] + pub target_filenames: ::std::collections::HashMap< + ::prost::alloc::string::String, + ::prost::alloc::string::String, + >, + #[prost(message, repeated, tag = "4")] + pub active_clients: ::prost::alloc::vec::Vec, + #[prost(message, repeated, tag = "5")] + pub config_subscription_states: ::prost::alloc::vec::Vec, +} +/// ConfigSubscriptionState describes the state of a config subscription. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ConfigSubscriptionState { + /// SubscriptionID is a process-unique identifier for the subscription. + #[prost(uint64, tag = "1")] + pub subscription_id: u64, + /// TrackedClients is the list of clients that are currently tracked by the + /// subscription. + #[prost(message, repeated, tag = "2")] + pub tracked_clients: ::prost::alloc::vec::Vec< + config_subscription_state::TrackedClient, + >, +} +/// Nested message and enum types in `ConfigSubscriptionState`. +pub mod config_subscription_state { + #[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] + pub struct TrackedClient { + #[prost(string, tag = "1")] + pub runtime_id: ::prost::alloc::string::String, + #[prost(bool, tag = "2")] + pub seen_any: bool, + #[prost(enumeration = "super::ConfigSubscriptionProducts", tag = "3")] + pub products: i32, + } +} +#[derive(Clone, Copy, PartialEq, Eq, Hash, ::prost::Message)] +pub struct ResetStateConfigResponse {} +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct TracerPredicateV1 { + #[prost(string, tag = "1")] + pub client_id: ::prost::alloc::string::String, + #[prost(string, tag = "2")] + pub service: ::prost::alloc::string::String, + #[prost(string, tag = "3")] + pub environment: ::prost::alloc::string::String, + #[prost(string, tag = "4")] + pub app_version: ::prost::alloc::string::String, + #[prost(string, tag = "5")] + pub tracer_version: ::prost::alloc::string::String, + #[prost(string, tag = "6")] + pub language: ::prost::alloc::string::String, + #[prost(string, tag = "7")] + pub runtime_id: ::prost::alloc::string::String, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct TracerPredicates { + #[prost(message, repeated, tag = "1")] + pub tracer_predicates_v1: ::prost::alloc::vec::Vec, +} +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] +#[repr(i32)] +pub enum TaskState { + Idle = 0, + Running = 1, + Done = 2, + InvalidState = 3, + Error = 4, +} +impl TaskState { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + Self::Idle => "IDLE", + Self::Running => "RUNNING", + Self::Done => "DONE", + Self::InvalidState => "INVALID_STATE", + Self::Error => "ERROR", + } + } + /// Creates an enum from field names used in the ProtoBuf definition. + pub fn from_str_name(value: &str) -> ::core::option::Option { + match value { + "IDLE" => Some(Self::Idle), + "RUNNING" => Some(Self::Running), + "DONE" => Some(Self::Done), + "INVALID_STATE" => Some(Self::InvalidState), + "ERROR" => Some(Self::Error), + _ => None, + } + } +} +/// ConfigSubscriptionProducts is used to targets specific products for tracking +/// with a ConfigSubscriptionRequest. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] +#[repr(i32)] +pub enum ConfigSubscriptionProducts { + Invalid = 0, + /// LIVE_DEBUGGING corresponds to the LIVE_DEBUGING and LIVE_DEBUGGING_SYMBOLDB + /// products. + LiveDebugging = 1, +} +impl ConfigSubscriptionProducts { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + Self::Invalid => "INVALID", + Self::LiveDebugging => "LIVE_DEBUGGING", + } + } + /// Creates an enum from field names used in the ProtoBuf definition. + pub fn from_str_name(value: &str) -> ::core::option::Option { + match value { + "INVALID" => Some(Self::Invalid), + "LIVE_DEBUGGING" => Some(Self::LiveDebugging), + _ => None, + } + } +} +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] +#[repr(i32)] +pub enum ConfigStatus { + Ok = 0, + Expired = 1, +} +impl ConfigStatus { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + Self::Ok => "CONFIG_STATUS_OK", + Self::Expired => "CONFIG_STATUS_EXPIRED", + } + } + /// Creates an enum from field names used in the ProtoBuf definition. + pub fn from_str_name(value: &str) -> ::core::option::Option { + match value { + "CONFIG_STATUS_OK" => Some(Self::Ok), + "CONFIG_STATUS_EXPIRED" => Some(Self::Expired), + _ => None, + } + } } From 1b42d8a3672af5137be42d1a8b6e009a70890990 Mon Sep 17 00:00:00 2001 From: paullegranddc Date: Mon, 15 Jun 2026 14:11:03 +0200 Subject: [PATCH 02/18] fix: cleanup code --- datadog-sidecar-ffi/src/lib.rs | 3 + datadog-sidecar/src/service/sidecar_server.rs | 3 + libdd-common/src/lib.rs | 2 +- libdd-remote-config/Cargo.toml | 2 +- .../src/agentless_client/mod.rs | 160 +++++++----------- libdd-remote-config/src/fetch/fetcher.rs | 59 +++++-- libdd-remote-config/src/fetch/single.rs | 34 ++++ libdd-trace-protobuf/build.rs | 1 - libdd-tracer-flare/src/lib.rs | 30 ++-- 9 files changed, 165 insertions(+), 129 deletions(-) diff --git a/datadog-sidecar-ffi/src/lib.rs b/datadog-sidecar-ffi/src/lib.rs index f3c5c97254..9ac92b6e7e 100644 --- a/datadog-sidecar-ffi/src/lib.rs +++ b/datadog-sidecar-ffi/src/lib.rs @@ -266,6 +266,9 @@ pub unsafe extern "C" fn ddog_remote_config_reader_for_endpoint<'a>( language: language.to_utf8_lossy().into(), tracer_version: tracer_version.to_utf8_lossy().into(), endpoint: endpoint.clone(), + // TODO: hostname will need to be added when agentess is enabled + hostname: String::new(), + agentless_enabled: false, }, &Arc::new(Target { service: service_name.to_utf8_lossy().into(), diff --git a/datadog-sidecar/src/service/sidecar_server.rs b/datadog-sidecar/src/service/sidecar_server.rs index 5b2d27b801..ab65c02039 100644 --- a/datadog-sidecar/src/service/sidecar_server.rs +++ b/datadog-sidecar/src/service/sidecar_server.rs @@ -779,6 +779,9 @@ impl SidecarInterface for ConnectionSidecarHandler { language: config.language, tracer_version: config.tracer_version, endpoint: config.endpoint, + // TODO: hostname will need to be added when agentess is enabled + hostname: String::new(), + agentless_enabled: false, }, products: config.remote_config_products, capabilities: config.remote_config_capabilities, diff --git a/libdd-common/src/lib.rs b/libdd-common/src/lib.rs index 54a6f03193..adfb568b84 100644 --- a/libdd-common/src/lib.rs +++ b/libdd-common/src/lib.rs @@ -343,7 +343,7 @@ impl Endpoint { ) .path_and_query(PathAndQuery::from_static("")) .build() - .unwrap(), + .with_context(|| format!("rc url is invalid for site: {site}"))?, api_key: Some(api_key.into()), timeout_ms: Self::DEFAULT_TIMEOUT, test_token: None, diff --git a/libdd-remote-config/Cargo.toml b/libdd-remote-config/Cargo.toml index a42ff0030b..1655877efb 100644 --- a/libdd-remote-config/Cargo.toml +++ b/libdd-remote-config/Cargo.toml @@ -62,7 +62,7 @@ thiserror = "2" hashbrown = "0.15" tuf = { git = "https://github.com/DataDog/rust-tuf/", tag = "0.3.0-beta10-opw-3" } prost = "0.14.1" -futures = "0.3" +futures = { version = "0.3", features = ["executor"] } # Test feature hyper-util = { workspace = true, features = ["service"], optional = true } diff --git a/libdd-remote-config/src/agentless_client/mod.rs b/libdd-remote-config/src/agentless_client/mod.rs index 32b91af183..938927d907 100644 --- a/libdd-remote-config/src/agentless_client/mod.rs +++ b/libdd-remote-config/src/agentless_client/mod.rs @@ -3,7 +3,6 @@ use std::{ fmt, - future::Future, time::{Duration, SystemTime, UNIX_EPOCH}, }; @@ -35,14 +34,15 @@ const CONFIG_ROOT_VERSION: u64 = 16; const DIRECTOR_ROOT: &[u8] = include_bytes!("../../roots/prod/director_root.json"); const DIRECTOR_ROOT_VERSION: u64 = 15; -const FAKE_AGENT_VERSION: &'static str = "7.78.4"; +const FAKE_AGENT_VERSION: &str = "7.78.4"; type TUFRepo = tuf::repository::EphemeralRepository; type TUFClient = tuf::client::Client; // Make a remote config API endpoint from and endpoint where `e.url` is the base dd site // If the endpoint is not suitable (api key not set, not https), returns N -pub fn make_agentless_configs_endpoint(e: Endpoint) -> Option { +pub fn make_agentless_configs_endpoint(e: &Endpoint) -> Option { + let e = e.clone(); dbg!(&e); if !(e.url.scheme_str().is_some_and(|s| s == "https") && e.url.authority().is_some() @@ -67,12 +67,6 @@ pub struct AgentlessConfig { pub hostname: String, } -struct CachedFile { - hash: Vec<(&'static tuf::crypto::HashAlgorithm, tuf::crypto::HashValue)>, - target_file: Vec, - version: u64, -} - pub type NativeAgentlessFetcher = AgentlessFetcher; pub struct AgentlessFetcher { @@ -85,54 +79,39 @@ pub struct AgentlessFetcher { products: HashSet, refresh_interval: Duration, endpoint: Endpoint, - // TODO: Not sure this is needed if the wrapper client already caches files? + // TODO: Not sure this is needed if the wrapped client already caches files? target_cache: HashMap, } -pub struct ClientResponse<'a> { - pub root_version: u64, - pub target_version: u64, - pub opaque_backend_state: Vec, - pub targets: Vec<( - &'a str, - &'a [u8], - u64, - &'a Vec<(&'static tuf::crypto::HashAlgorithm, tuf::crypto::HashValue)>, - )>, +struct CachedFile { + hashes: Vec<(&'static tuf::crypto::HashAlgorithm, tuf::crypto::HashValue)>, + target_file: Vec, + version: u64, } -struct BorrowedTarget<'a> { - pub path: &'a TargetPath, - pub desc: &'a TargetDescription, +pub struct ClientTargetResponse<'a> { + pub path: &'a str, + pub version: u64, + pub hashes: &'a [(&'static tuf::crypto::HashAlgorithm, tuf::crypto::HashValue)], + pub content: &'a [u8], } -impl<'a> BorrowedTarget<'a> { - pub fn try_create(path: &'a TargetPath, desc: &'a TargetDescription) -> anyhow::Result { - if let Some(expiry) = desc.custom().get(CUSTOM_METADATA_EXPIRY_PATH) { - let expiry_ts = expiry - .as_u64() - .ok_or_else(|| anyhow::format_err!("expiry not a number"))?; - - if expiry_ts * 1000 <= now_unix_milli_ts() { - anyhow::bail!("expired target at path: {path}") - } - } - - Ok(Self { path, desc }) - } +pub struct ClientResponse<'a> { + pub root_version: u64, + pub target_version: u64, + pub opaque_backend_state: &'a [u8], + pub targets: Vec>, } -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct Target { - pub path: TargetPath, - pub desc: TargetDescription, +struct BorrowedTufTarget<'a> { + pub path: &'a tuf::metadata::TargetPath, + pub desc: &'a tuf::metadata::TargetDescription, } const CUSTOM_METADATA_EXPIRY_PATH: &str = "expires"; -impl Target { - /// Returns Ok(Target) when valid and unexpired; Err(Error) otherwise. - pub fn try_create(path: &TargetPath, desc: &TargetDescription) -> anyhow::Result { +impl<'a> BorrowedTufTarget<'a> { + pub fn try_create(path: &'a TargetPath, desc: &'a TargetDescription) -> anyhow::Result { if let Some(expiry) = desc.custom().get(CUSTOM_METADATA_EXPIRY_PATH) { let expiry_ts = expiry .as_u64() @@ -143,10 +122,7 @@ impl Target { } } - Ok(Self { - path: path.clone(), - desc: desc.clone(), - }) + Ok(Self { path, desc }) } } @@ -156,10 +132,11 @@ enum FetchTargetResult { } impl AgentlessFetcher { - /// Create a new `RemoteConfig` client. + /// Create a new `AgentlessFetcher` client. /// /// # Errors /// Returns an error if TUF root initialization fails. + /// This can happen for instance if the trust root certificates have expired pub async fn new(cfg: AgentlessConfig, endpoint: Endpoint) -> anyhow::Result { Ok(Self { endpoint, @@ -183,7 +160,7 @@ impl AgentlessFetcher { target_cache: HashMap::new(), opaque_backend_state: Vec::new(), - refresh_interval: Duration::from_secs(5), + refresh_interval: Duration::from_secs(60), initialized: false, }) } @@ -192,7 +169,10 @@ impl AgentlessFetcher { /// hashes against the metadata in the config repo. /// /// If it is already in the cache, return `Cached` - async fn fetch_target(&self, target: &BorrowedTarget<'_>) -> anyhow::Result { + async fn fetch_target( + &self, + target: &BorrowedTufTarget<'_>, + ) -> anyhow::Result { let expected_hashes = tuf::crypto::retain_supported_hashes(target.desc.hashes()); if expected_hashes.is_empty() { anyhow::bail!("no supported hash for path: {}", target.path); @@ -200,7 +180,6 @@ impl AgentlessFetcher { let (target_hash_algo, target_hash) = &expected_hashes[0]; let target_path = target.path; - dbg!(target.desc.custom()); let version = target .desc .custom() @@ -210,7 +189,7 @@ impl AgentlessFetcher { if let Some(item) = self.target_cache.get(target_path) { if item - .hash + .hashes .iter() .find(|(alg, _)| alg == target_hash_algo) .is_some_and(|(_, h)| h == target_hash) @@ -258,7 +237,7 @@ impl AgentlessFetcher { } Ok(FetchTargetResult::New(CachedFile { - hash: expected_hashes, + hashes: expected_hashes, target_file: buf, version, })) @@ -285,7 +264,7 @@ impl AgentlessFetcher { u64::from(self.director_client.database().trusted_root().version()), ) } else { - (0, 0, 0) + (0, CONFIG_ROOT_VERSION, DIRECTOR_ROOT_VERSION) }; let all_products = c.products.iter().fold(HashSet::new(), |mut acc, p| { @@ -302,15 +281,20 @@ impl AgentlessFetcher { .cloned() .collect::>(); + let now = now_unix_milli_ts(); + let request = remoteconfig::LatestConfigsRequest { hostname: self.hostname.clone(), current_config_snapshot_version, current_config_root_version, current_director_root_version, products: old_products, - new_products: new_products, + new_products, backend_client_state: self.opaque_backend_state.clone(), - active_clients: vec![c.clone()], + active_clients: vec![remoteconfig::Client { + last_seen: now, + ..c + }], agent_version: FAKE_AGENT_VERSION.to_owned(), has_error: false, error: String::new(), @@ -320,14 +304,18 @@ impl AgentlessFetcher { agent_uuid: String::new(), }; let response = self.get_latest_config(request).await?; + + self.apply(&response).await?; if !self.initialized { self.initialized = true; } - - self.apply(&response).await?; self.products = all_products; - // TODO: filter predicates ? + // TODO: + // In the future we will want to query configs for mutliple clients (for PHP, which can have + // many processes use the same rc client) + // This means we will need to dispatch the different files based on filter predicates + // which we currently do not parse Ok(ClientResponse { root_version: u64::from(self.config_client.database().trusted_root().version()), @@ -338,11 +326,16 @@ impl AgentlessFetcher { .ok_or(anyhow::anyhow!("Missing target data"))? .version(), ), - opaque_backend_state: self.opaque_backend_state.clone(), + opaque_backend_state: &self.opaque_backend_state, targets: self .target_cache .iter() - .map(|(p, t)| (p.as_str(), t.target_file.as_slice(), t.version, &t.hash)) + .map(|(p, t)| ClientTargetResponse { + path: p.as_str(), + version: t.version, + hashes: &t.hashes, + content: t.target_file.as_slice(), + }) .collect(), }) } @@ -363,45 +356,11 @@ impl AgentlessFetcher { parse_rc_response(res) } - pub async fn init(&mut self) -> anyhow::Result<()> { - let response = self.initial_config().await?; - Ok(self.apply(&response).await?) - } - - /// Fetch the initial Remote Config payload for this tracer. - /// - /// # Errors - /// Returns an error if the HTTP request fails or the response cannot be decoded. - pub fn initial_config( - &self, - ) -> impl Future> + Send + use<'_, C> - { - let initial_request = remoteconfig::LatestConfigsRequest { - hostname: self.hostname.clone(), - current_config_snapshot_version: 0, - current_config_root_version: CONFIG_ROOT_VERSION, - current_director_root_version: DIRECTOR_ROOT_VERSION, - new_products: vec![], - active_clients: vec![], - backend_client_state: self.opaque_backend_state.clone(), - agent_version: FAKE_AGENT_VERSION.to_owned(), - products: vec![], - has_error: false, - error: String::new(), - trace_agent_env: String::new(), - tags: vec![], - agent_uuid: String::new(), - org_uuid: String::new(), - }; - self.get_latest_config(initial_request) - } - /// Fetch the latest Remote Config for this client. /// /// # Errors /// Returns an error if the HTTP request fails or the response cannot be decoded. - #[allow(clippy::future_not_send)] - pub async fn get_latest_config( + async fn get_latest_config( &self, req: remoteconfig::LatestConfigsRequest, ) -> anyhow::Result { @@ -571,10 +530,11 @@ fn now_unix_milli_ts() -> u64 { .unwrap_or(u64::MAX) } -/// Return the available, unexpired target paths and their descriptions based on the current metadata. +/// Return the available, unexpired target paths and their descriptions based on the current +/// metadata. fn trusted_targets( director_client: &TUFClient, -) -> anyhow::Result> + '_> { +) -> anyhow::Result> + '_> { Ok(director_client .database() .trusted_targets() @@ -582,7 +542,7 @@ fn trusted_targets( .targets() .iter() .filter_map(|(path, desc)| { - BorrowedTarget::try_create(path, desc) + BorrowedTufTarget::try_create(path, desc) .inspect_err(|e| { debug!(%path, "Skipping target: error {}", e); }) diff --git a/libdd-remote-config/src/fetch/fetcher.rs b/libdd-remote-config/src/fetch/fetcher.rs index 78c9260fed..aad3a39852 100644 --- a/libdd-remote-config/src/fetch/fetcher.rs +++ b/libdd-remote-config/src/fetch/fetcher.rs @@ -2,7 +2,8 @@ // SPDX-License-Identifier: Apache-2.0 use crate::agentless_client::{ - make_agentless_configs_endpoint, AgentlessConfig, AgentlessFetcher, NativeAgentlessFetcher, + self, make_agentless_configs_endpoint, AgentlessConfig, AgentlessFetcher, + NativeAgentlessFetcher, }; use crate::targets::{Root, TargetsList}; use crate::{RemoteConfigCapabilities, RemoteConfigPath, RemoteConfigProduct, Target}; @@ -159,18 +160,31 @@ impl ConfigFetcherFilesLock<'_, S> { impl ConfigFetcherState { pub fn new(invariants: ConfigInvariants) -> Self { + let (endpoint, agentless_enabled) = if invariants.agentless_enabled { + match ( + make_agentless_configs_endpoint(&invariants.endpoint), + invariants.hostname.is_empty(), + ) { + (Some(e), false) => (e, true), + (Some(_), true) => { + warn!("rc_config_fetcher: agentless enabled but the hostname is empty. Downgrading to agent endpoint"); + (make_agent_configs_endpoint(&invariants.endpoint), false) + } + (None, _) => { + warn!("rc_config_fetcher: agentless enabled but the endpoint is invalid. Downgrading to agent endpoint"); + (make_agent_configs_endpoint(&invariants.endpoint), false) + } + } + } else { + (make_agent_configs_endpoint(&invariants.endpoint), false) + }; ConfigFetcherState { target_files_by_path: Default::default(), - endpoint: if invariants.agentless_enabled { - if let Some(e) = make_agentless_configs_endpoint(invariants.endpoint.clone()) { - e - } else { - make_agent_configs_endpoint(&invariants.endpoint) - } - } else { - make_agent_configs_endpoint(&invariants.endpoint) + endpoint, + invariants: ConfigInvariants { + agentless_enabled, + ..invariants }, - invariants, expire_unused_files: true, } } @@ -214,6 +228,7 @@ impl ConfigFetcherState { } } +#[allow(clippy::large_enum_variant)] enum FetcherMode { Agent, Agentless(NativeAgentlessFetcher), @@ -256,6 +271,9 @@ impl ConfigClientState { } impl ConfigFetcher { + /// Create a new config fetcher + /// This is guaranteed to be immediate (no await point) if `state.invariants.agentless_enabled` + /// is false pub async fn new( file_storage: S, state: Arc>, @@ -603,18 +621,25 @@ impl ConfigFetcher { match &mut self.mode { FetcherMode::Agent => self.fetch_agent(config_req, target, client_state).await, FetcherMode::Agentless(agentless_fetcher) => { + #[allow(clippy::expect_used)] let res = agentless_fetcher - .fetch_config(config_req.client.unwrap()) + .fetch_config( + config_req + .client + .expect("RC ConfigFetcher::build_config_request should always return a `Some` client"), + ) .await?; client_state.root_version = res.root_version; client_state.targets_version = res.target_version; - client_state.opaque_backend_state = res.opaque_backend_state; + if res.opaque_backend_state != client_state.opaque_backend_state { + client_state.opaque_backend_state = res.opaque_backend_state.to_vec(); + } client_state.last_error = None; let mut target_files = self.state.target_files_by_path.lock_or_panic(); let mut config_paths = HashSet::new(); - for &(path, _, _, _) in &res.targets { + for &agentless_client::ClientTargetResponse { path, .. } in &res.targets { match RemoteConfigPath::try_parse(path) { Ok(parsed) => { config_paths.insert(parsed.into()); @@ -627,7 +652,13 @@ impl ConfigFetcher { target_files.retain(|k, _| config_paths.contains(k.as_ref())); } - for (path, target_file, version, hashes) in res.targets { + for agentless_client::ClientTargetResponse { + path, + content: target_file, + version, + hashes, + } in res.targets + { let parsed_path = match RemoteConfigPath::try_parse(path) { Ok(parsed_path) => parsed_path, Err(e) => { diff --git a/libdd-remote-config/src/fetch/single.rs b/libdd-remote-config/src/fetch/single.rs index f5baf16908..c65ef046f4 100644 --- a/libdd-remote-config/src/fetch/single.rs +++ b/libdd-remote-config/src/fetch/single.rs @@ -50,6 +50,28 @@ impl SingleFetcher { }) } + pub fn new_no_agentless( + sink: S, + target: Target, + runtime_id: String, + options: ConfigOptions, + ) -> anyhow::Result { + Ok(SingleFetcher { + fetcher: futures::executor::block_on(ConfigFetcher::new( + sink, + Arc::new(ConfigFetcherState::new(options.invariants)), + ))?, + target: Arc::new(target), + product_capabilities: ConfigProductCapabilities::new( + options.products, + options.capabilities, + ), + runtime_id, + client_id: uuid::Uuid::new_v4().to_string(), + client_state: ConfigClientState::default(), + }) + } + pub fn with_client_id(mut self, client_id: String) -> Self { self.client_id = client_id; self @@ -109,6 +131,18 @@ where }) } + pub fn new_no_agentless( + sink: S, + target: Target, + runtime_id: String, + options: ConfigOptions, + ) -> anyhow::Result { + Ok(SingleChangesFetcher { + changes: ChangeTracker::default(), + fetcher: SingleFetcher::new_no_agentless(sink, target, runtime_id, options)?, + }) + } + pub fn with_client_id(mut self, client_id: String) -> Self { self.fetcher = self.fetcher.with_client_id(client_id); self diff --git a/libdd-trace-protobuf/build.rs b/libdd-trace-protobuf/build.rs index 06714040f1..a8a10fbee1 100644 --- a/libdd-trace-protobuf/build.rs +++ b/libdd-trace-protobuf/build.rs @@ -313,7 +313,6 @@ fn generate_protobuf() { config.type_attribute("PackageStateTask", "#[derive(Deserialize, Serialize)]"); config.type_attribute("TaskError", "#[derive(Deserialize, Serialize)]"); - config.include_file("_includes.rs"); config diff --git a/libdd-tracer-flare/src/lib.rs b/libdd-tracer-flare/src/lib.rs index 3ff667326f..5eae806528 100644 --- a/libdd-tracer-flare/src/lib.rs +++ b/libdd-tracer-flare/src/lib.rs @@ -181,6 +181,9 @@ impl TracerFlareManager { language, tracer_version, endpoint: remote_config_endpoint, + // TODO: hostname will need to be added when agentess is enabled + hostname: String::new(), + agentless_enabled: false, }, products: vec![ RemoteConfigProduct::AgentConfig, @@ -189,18 +192,21 @@ impl TracerFlareManager { capabilities: vec![], }; - tracer_flare.listener = Some(SingleChangesFetcher::new( - ParsedFileStorage::default(), - Target { - service, - env, - app_version, - tags: vec![], - process_tags: vec![], - }, - runtime_id, - config_to_fetch, - )); + tracer_flare.listener = Some( + SingleChangesFetcher::new_no_agentless( + ParsedFileStorage::default(), + Target { + service, + env, + app_version, + tags: vec![], + process_tags: vec![], + }, + runtime_id, + config_to_fetch, + ) + .map_err(|e| FlareError::ListeningError(e.to_string()))?, + ); Ok(tracer_flare) } From b88902eec1c6e021cfe71e642f03463c49e42029 Mon Sep 17 00:00:00 2001 From: paullegranddc Date: Mon, 15 Jun 2026 15:54:56 +0200 Subject: [PATCH 03/18] fix: typos --- libdd-remote-config/src/agentless_client/mod.rs | 2 +- libdd-remote-config/src/fetch/fetcher.rs | 2 +- libdd-remote-config/src/fetch/shared.rs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/libdd-remote-config/src/agentless_client/mod.rs b/libdd-remote-config/src/agentless_client/mod.rs index 938927d907..a1da31df9d 100644 --- a/libdd-remote-config/src/agentless_client/mod.rs +++ b/libdd-remote-config/src/agentless_client/mod.rs @@ -246,7 +246,7 @@ impl AgentlessFetcher { pub async fn fetch_config( &mut self, c: remoteconfig::Client, - ) -> anyhow::Result { + ) -> anyhow::Result> { let ( current_config_snapshot_version, current_config_root_version, diff --git a/libdd-remote-config/src/fetch/fetcher.rs b/libdd-remote-config/src/fetch/fetcher.rs index aad3a39852..c0504ab0be 100644 --- a/libdd-remote-config/src/fetch/fetcher.rs +++ b/libdd-remote-config/src/fetch/fetcher.rs @@ -721,7 +721,7 @@ impl ConfigFetcher { "sha512".to_string() } tuf::crypto::HashAlgorithm::Unknown(u) => u.clone(), - _ => anyhow::bail!("unhandled has algorithm"), + a => anyhow::bail!("unhandled hash algorithm: {a:?}"), }, hash: hash.to_string(), }) diff --git a/libdd-remote-config/src/fetch/shared.rs b/libdd-remote-config/src/fetch/shared.rs index f618b992f8..06e9df7553 100644 --- a/libdd-remote-config/src/fetch/shared.rs +++ b/libdd-remote-config/src/fetch/shared.rs @@ -278,7 +278,7 @@ impl SharedFetcher { let mut fetcher = match ConfigFetcher::new(storage, state).await { Ok(f) => f, Err(e) => { - error!("failed to create the fetcher{:?}", e); + error!("failed to create the fetcher: {:?}", e); return; } }; From d4767a3dc6e613038e24cb0b333acfd584acc89b Mon Sep 17 00:00:00 2001 From: paullegranddc Date: Mon, 15 Jun 2026 19:43:43 +0200 Subject: [PATCH 04/18] fix: trim rc target path --- .../src/agentless_client/mod.rs | 53 +++++++++++++++---- 1 file changed, 44 insertions(+), 9 deletions(-) diff --git a/libdd-remote-config/src/agentless_client/mod.rs b/libdd-remote-config/src/agentless_client/mod.rs index a1da31df9d..3d1dc2495f 100644 --- a/libdd-remote-config/src/agentless_client/mod.rs +++ b/libdd-remote-config/src/agentless_client/mod.rs @@ -6,6 +6,7 @@ use std::{ time::{Duration, SystemTime, UNIX_EPOCH}, }; +use anyhow::{bail, format_err}; use base64::Engine; use futures::AsyncReadExt as _; use hashbrown::{HashMap, HashSet}; @@ -115,10 +116,10 @@ impl<'a> BorrowedTufTarget<'a> { if let Some(expiry) = desc.custom().get(CUSTOM_METADATA_EXPIRY_PATH) { let expiry_ts = expiry .as_u64() - .ok_or_else(|| anyhow::format_err!("expiry not a number"))?; + .ok_or_else(|| format_err!("expiry not a number"))?; if expiry_ts * 1000 <= now_unix_milli_ts() { - anyhow::bail!("expired target at path: {path}") + bail!("expired target at path: {path}") } } @@ -175,7 +176,7 @@ impl AgentlessFetcher { ) -> anyhow::Result { let expected_hashes = tuf::crypto::retain_supported_hashes(target.desc.hashes()); if expected_hashes.is_empty() { - anyhow::bail!("no supported hash for path: {}", target.path); + bail!("no supported hash for path: {}", target.path); } let (target_hash_algo, target_hash) = &expected_hashes[0]; let target_path = target.path; @@ -212,7 +213,7 @@ impl AgentlessFetcher { let expected_len = target.desc.length() as usize; if buf.len() != expected_len { - anyhow::bail!("bad length for file at path: {}", target.path) + bail!("bad length for file at path: {}", target.path) } { @@ -232,7 +233,7 @@ impl AgentlessFetcher { .iter() .all(|(k, v)| expected.get(&k).is_some_and(|e| *e == v))) { - anyhow::bail!("hash did not match: {}", target.path) + bail!("hash did not match: {}", target.path) } } @@ -410,6 +411,11 @@ impl AgentlessFetcher { let repo = self.director_client.remote_repo_mut(); *repo = TUFRepo::new(); for target_file in &response.target_files { + let trimmed_path = trim_hash_target_path(&target_file.path)?; + let trimmed_target_path = TargetPath::new(&trimmed_path)?; + repo.store_target(&trimmed_target_path, &mut target_file.raw.as_slice()) + .await?; + // let trimmed_path = trim_hash_target_path(&target_file.path)?; // let trimmed_target_path = TargetPath::new(&trimmed_path)?; repo.store_target( @@ -422,7 +428,7 @@ impl AgentlessFetcher { let config_repo_mut = self.config_client.remote_repo_mut(); *config_repo_mut = TUFRepo::new(); let Some(metas) = response.config_metas.as_ref() else { - anyhow::bail!("missing config meta from LatestConfigsResponse") + bail!("missing config meta from LatestConfigsResponse") }; store(config_repo_mut, &root_path, &metas.roots).await?; @@ -436,7 +442,7 @@ impl AgentlessFetcher { let director_remote_repo = self.director_client.remote_repo_mut(); let Some(metas) = response.director_metas.as_ref() else { - anyhow::bail!("missing director meta from LatestConfigsResponse") + bail!("missing director meta from LatestConfigsResponse") }; store(director_remote_repo, &root_path, &metas.roots).await?; @@ -510,7 +516,7 @@ fn parse_rc_response( let status = response.status().as_u16(); let body = response.into_body(); if !(200..300).contains(&status) { - anyhow::bail!( + bail!( "Non 2XX status code: {}\n{}", status, String::from_utf8_lossy(&body) @@ -538,7 +544,7 @@ fn trusted_targets( Ok(director_client .database() .trusted_targets() - .ok_or_else(|| anyhow::format_err!("missing targets from TUF director client"))? + .ok_or_else(|| format_err!("missing targets from TUF director client"))? .targets() .iter() .filter_map(|(path, desc)| { @@ -577,6 +583,35 @@ async fn store_noversion( Ok(()) } +/// See https://datadoghq.atlassian.net/browse/RC-1859 for more information. +fn trim_hash_target_path(target_path: &str) -> anyhow::Result { + let path = std::path::Path::new(target_path); + // Get the last component + let last_component = path + .components() + .next_back() + .ok_or_else(|| format_err!("invalid target: {target_path}"))?; + let basename = match last_component { + std::path::Component::Normal(name) => name + .to_str() + .ok_or_else(|| format_err!("invalid target: {target_path}"))?, + _ => return Err(format_err!("invalid target: {target_path}")), + }; + + // Split the basename at the first occurrence of '.' + let split: Vec<&str> = basename.splitn(2, '.').collect(); + let basename_trimmed = if split.len() > 1 { split[1] } else { basename }; + + // Reconstruct the whole path + let parent = path + .parent() + .ok_or_else(|| format_err!("invalid target: {target_path}"))?; + let mut result_path = parent.components().as_path().to_path_buf(); + result_path.push(basename_trimmed); + Ok(result_path.to_str().unwrap_or_default().to_string()) +} + + // ── Debug helpers: render `raw: Vec` fields as JSON ──────────────────── struct RawJson<'a>(&'a [u8]); From 02bb2ad194cb88a1c6f4d853492c6c93c54c822e Mon Sep 17 00:00:00 2001 From: paullegranddc Date: Wed, 17 Jun 2026 14:06:01 +0200 Subject: [PATCH 05/18] fix: respect agent refresh interval --- .../examples/remote_config_fetch.rs | 2 +- .../src/agentless_client/mod.rs | 3 ++- libdd-remote-config/src/fetch/fetcher.rs | 12 +++++++++- libdd-remote-config/src/fetch/shared.rs | 10 ++++++++- libdd-remote-config/src/fetch/single.rs | 22 +++++++++++++++++-- 5 files changed, 43 insertions(+), 6 deletions(-) diff --git a/libdd-remote-config/examples/remote_config_fetch.rs b/libdd-remote-config/examples/remote_config_fetch.rs index 66ac2033c6..785c99d312 100644 --- a/libdd-remote-config/examples/remote_config_fetch.rs +++ b/libdd-remote-config/examples/remote_config_fetch.rs @@ -118,7 +118,7 @@ async fn main() { } } - sleep(Duration::from_secs(1)).await; + sleep(fetcher.get_refresh_interval()).await; } } diff --git a/libdd-remote-config/src/agentless_client/mod.rs b/libdd-remote-config/src/agentless_client/mod.rs index 3d1dc2495f..52d28400c2 100644 --- a/libdd-remote-config/src/agentless_client/mod.rs +++ b/libdd-remote-config/src/agentless_client/mod.rs @@ -102,6 +102,7 @@ pub struct ClientResponse<'a> { pub target_version: u64, pub opaque_backend_state: &'a [u8], pub targets: Vec>, + pub refresh_interval: Duration, } struct BorrowedTufTarget<'a> { @@ -338,6 +339,7 @@ impl AgentlessFetcher { content: t.target_file.as_slice(), }) .collect(), + refresh_interval: self.refresh_interval, }) } @@ -611,7 +613,6 @@ fn trim_hash_target_path(target_path: &str) -> anyhow::Result { Ok(result_path.to_str().unwrap_or_default().to_string()) } - // ── Debug helpers: render `raw: Vec` fields as JSON ──────────────────── struct RawJson<'a>(&'a [u8]); diff --git a/libdd-remote-config/src/fetch/fetcher.rs b/libdd-remote-config/src/fetch/fetcher.rs index c0504ab0be..0d2dd34e11 100644 --- a/libdd-remote-config/src/fetch/fetcher.rs +++ b/libdd-remote-config/src/fetch/fetcher.rs @@ -249,6 +249,8 @@ pub struct ConfigClientState { /// Services discovered at runtime. Sent to the agent on each poll so it can route configs /// targeting those services to this client. Updated out-of-band by the consumer extra_services: Vec, + /// Server-recommended interval between consecutive polls. + refresh_interval: Option, } impl Default for ConfigClientState { @@ -260,6 +262,7 @@ impl Default for ConfigClientState { root_version: 1, last_error: None, extra_services: vec![], + refresh_interval: None, } } } @@ -268,6 +271,10 @@ impl ConfigClientState { pub fn set_extra_services(&mut self, services: Vec) { self.extra_services = services; } + + pub fn server_recommended_refresh_interval(&self) -> Option { + self.refresh_interval + } } impl ConfigFetcher { @@ -632,6 +639,7 @@ impl ConfigFetcher { client_state.root_version = res.root_version; client_state.targets_version = res.target_version; + client_state.refresh_interval = Some(res.refresh_interval); if res.opaque_backend_state != client_state.opaque_backend_state { client_state.opaque_backend_state = res.opaque_backend_state.to_vec(); } @@ -721,7 +729,9 @@ impl ConfigFetcher { "sha512".to_string() } tuf::crypto::HashAlgorithm::Unknown(u) => u.clone(), - a => anyhow::bail!("unhandled hash algorithm: {a:?}"), + a => { + anyhow::bail!("unhandled hash algorithm: {a:?}") + } }, hash: hash.to_string(), }) diff --git a/libdd-remote-config/src/fetch/shared.rs b/libdd-remote-config/src/fetch/shared.rs index 06e9df7553..8153aa8899 100644 --- a/libdd-remote-config/src/fetch/shared.rs +++ b/libdd-remote-config/src/fetch/shared.rs @@ -320,7 +320,9 @@ impl SharedFetcher { }; match fetched { - Ok(None) => clean_inactive(), // nothing changed + Ok(None) => { + clean_inactive(); + } Ok(Some(files)) => { if !files.is_empty() || !last_files.is_empty() { for file in files.iter() { @@ -355,6 +357,12 @@ impl SharedFetcher { } } + if let Some(interval) = opaque_state.server_recommended_refresh_interval() { + // Keep the run-loop interval in sync with the server-provided value + self.interval + .store(interval.as_nanos() as u64, Ordering::Relaxed); + } + select! { _ = self.cancellation.cancelled() => { break; } _ = sleep(Duration::from_nanos(self.interval.load(Ordering::Relaxed))) => {} diff --git a/libdd-remote-config/src/fetch/single.rs b/libdd-remote-config/src/fetch/single.rs index c65ef046f4..4e191591af 100644 --- a/libdd-remote-config/src/fetch/single.rs +++ b/libdd-remote-config/src/fetch/single.rs @@ -8,6 +8,9 @@ use crate::fetch::{ use crate::file_change_tracker::{Change, ChangeTracker, FilePath, UpdatedFiles}; use crate::{RemoteConfigCapabilities, RemoteConfigPath, RemoteConfigProduct, Target}; use std::sync::Arc; +use std::time::Duration; + +const DEFAULT_REFRESH_INTERVAL: Duration = Duration::from_secs(5); /// Simple implementation pub struct SingleFetcher { @@ -90,10 +93,19 @@ impl SingleFetcher { .await } - pub fn get_client_id(&self) -> &String { + pub fn get_client_id(&self) -> &str { &self.client_id } + /// Returns the server-recommended interval before the next poll. + /// In agentless mode this is updated after every successful fetch. + /// In agent mode it returns the default of 5 seconds. + pub fn get_refresh_interval(&self) -> Duration { + self.client_state + .server_recommended_refresh_interval() + .unwrap_or(DEFAULT_REFRESH_INTERVAL) + } + /// Sets the apply state on a stored file. pub fn set_config_state(&self, file: &RemoteConfigPath, state: ConfigApplyState) { self.fetcher.set_config_state(file, state) @@ -161,10 +173,16 @@ where }) } - pub fn get_client_id(&self) -> &String { + pub fn get_client_id(&self) -> &str { self.fetcher.get_client_id() } + /// Returns the interval before the next poll. + /// See [`SingleFetcher::get_refresh_interval`]. + pub fn get_refresh_interval(&self) -> Duration { + self.fetcher.get_refresh_interval() + } + /// Sets the apply state on a stored file. pub fn set_config_state(&self, file: &S::StoredFile, state: ConfigApplyState) { self.fetcher.set_config_state(file.path(), state) From d7468ec547828fd2d9f84c1167ac13971a350eba Mon Sep 17 00:00:00 2001 From: paullegranddc Date: Thu, 18 Jun 2026 14:17:54 +0200 Subject: [PATCH 06/18] feat: add extra TUF trust root --- datadog-sidecar-ffi/src/lib.rs | 4 +- datadog-sidecar/src/service/sidecar_server.rs | 4 +- .../examples/remote_config_fetch.rs | 17 +- .../roots/gov/config_root.json | 73 +++ .../roots/gov/director_root.json | 73 +++ .../roots/staging/config_root.json | 73 +++ .../roots/staging/director_root.json | 73 +++ .../src/agentless_client/mod.rs | 347 ++++++++++++--- libdd-remote-config/src/fetch/fetcher.rs | 76 ++-- libdd-remote-config/src/fetch/test_server.rs | 3 +- libdd-tracer-flare/src/lib.rs | 4 +- plans/agentless-vs-agent-rc-comparison.md | 418 ++++++++++++++++++ 12 files changed, 1061 insertions(+), 104 deletions(-) create mode 100644 libdd-remote-config/roots/gov/config_root.json create mode 100644 libdd-remote-config/roots/gov/director_root.json create mode 100644 libdd-remote-config/roots/staging/config_root.json create mode 100644 libdd-remote-config/roots/staging/director_root.json create mode 100644 plans/agentless-vs-agent-rc-comparison.md diff --git a/datadog-sidecar-ffi/src/lib.rs b/datadog-sidecar-ffi/src/lib.rs index 9ac92b6e7e..c04cc39ea6 100644 --- a/datadog-sidecar-ffi/src/lib.rs +++ b/datadog-sidecar-ffi/src/lib.rs @@ -266,9 +266,7 @@ pub unsafe extern "C" fn ddog_remote_config_reader_for_endpoint<'a>( language: language.to_utf8_lossy().into(), tracer_version: tracer_version.to_utf8_lossy().into(), endpoint: endpoint.clone(), - // TODO: hostname will need to be added when agentess is enabled - hostname: String::new(), - agentless_enabled: false, + agentless: None, }, &Arc::new(Target { service: service_name.to_utf8_lossy().into(), diff --git a/datadog-sidecar/src/service/sidecar_server.rs b/datadog-sidecar/src/service/sidecar_server.rs index ab65c02039..b21156a26e 100644 --- a/datadog-sidecar/src/service/sidecar_server.rs +++ b/datadog-sidecar/src/service/sidecar_server.rs @@ -779,9 +779,7 @@ impl SidecarInterface for ConnectionSidecarHandler { language: config.language, tracer_version: config.tracer_version, endpoint: config.endpoint, - // TODO: hostname will need to be added when agentess is enabled - hostname: String::new(), - agentless_enabled: false, + agentless: None, }, products: config.remote_config_products, capabilities: config.remote_config_capabilities, diff --git a/libdd-remote-config/examples/remote_config_fetch.rs b/libdd-remote-config/examples/remote_config_fetch.rs index 785c99d312..765a03dc26 100644 --- a/libdd-remote-config/examples/remote_config_fetch.rs +++ b/libdd-remote-config/examples/remote_config_fetch.rs @@ -3,13 +3,13 @@ use libdd_common::tag::Tag; use libdd_common::Endpoint; +use libdd_remote_config::agentless_client::AgentlessConfig; use libdd_remote_config::fetch::{ConfigInvariants, ConfigOptions, SingleChangesFetcher}; use libdd_remote_config::file_change_tracker::{Change, FilePath}; use libdd_remote_config::file_storage::ParsedFileStorage; use libdd_remote_config::RemoteConfigProduct::ApmTracing; use libdd_remote_config::{RemoteConfigParsed, Target}; use std::process::Command; -use std::time::Duration; use tokio::time::sleep; const RUNTIME_ID: &str = "23e76587-5ae1-410c-a05c-137cae600a10"; @@ -34,12 +34,18 @@ async fn main() { let dd_api_key = std::env::var("DD_API_KEY").ok(); let dd_site = std::env::var("DD_SITE").ok(); - let (endpoint, agentless_enabled) = match (dd_api_key, dd_site) { + let (endpoint, agentless) = match (dd_api_key, dd_site) { (Some(api_key), Some(site)) => { println!("DD_API_KEY and DD_SITE are set — enabling agentless mode (site: {site})"); let endpoint = Endpoint::agentless(&site, api_key) .expect("Failed to build agentless endpoint from DD_SITE"); - (endpoint, true) + ( + endpoint, + Some(AgentlessConfig { + hostname, + ..Default::default() + }), + ) } _ => { println!("DD_API_KEY / DD_SITE not set — connecting to local agent"); @@ -51,7 +57,7 @@ async fn main() { test_token: None, ..Default::default() }, - false, + None, ) } }; @@ -79,8 +85,7 @@ async fn main() { language: "awesomelang".to_string(), tracer_version: "99.10.5".to_string(), endpoint, - hostname, - agentless_enabled, + agentless, }, products: vec![ApmTracing], capabilities: vec![], diff --git a/libdd-remote-config/roots/gov/config_root.json b/libdd-remote-config/roots/gov/config_root.json new file mode 100644 index 0000000000..40402b5e70 --- /dev/null +++ b/libdd-remote-config/roots/gov/config_root.json @@ -0,0 +1,73 @@ +{ + "signed": { + "_type": "root", + "spec_version": "1.0", + "version": 1, + "expires": "0001-01-01T00:00:00Z", + "keys": { + "8907affe5835f969ee7680dda2e5b0ece95d839611d481483d89c22b2df42993": { + "keytype": "ecdsa", + "scheme": "ecdsa-sha2-nistp256", + "keyid_hash_algorithms": [ + "sha256", + "sha512" + ], + "keyval": { + "public": "-----BEGIN PUBLIC KEY-----\nMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAES2cvfa8r3HZ4AQeDUurdth7xqFk3\nqOuYtR877knUfOtJe+xU/F/ESVrl4B0ZMcyF3TaucgMsae4OVlc2lAW3Nw==\n-----END PUBLIC KEY-----\n" + } + }, + "fe9b1451a0446f049888c4ece57fa4c8127f50cc2401d0bb15712e9367953425": { + "keytype": "ecdsa", + "scheme": "ecdsa-sha2-nistp256", + "keyid_hash_algorithms": [ + "sha256", + "sha512" + ], + "keyval": { + "public": "-----BEGIN PUBLIC KEY-----\nMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAETSYtYSgft/owrcf+DsvGzdl+wpSr\nAVe0hmZL/hvdC0oogI98nYTYzycP0B5M5xBeP4ZfJm/mlFFsqCHHosQWkA==\n-----END PUBLIC KEY-----\n" + } + } + }, + "roles": { + "root": { + "keyids": [ + "fe9b1451a0446f049888c4ece57fa4c8127f50cc2401d0bb15712e9367953425", + "8907affe5835f969ee7680dda2e5b0ece95d839611d481483d89c22b2df42993" + ], + "threshold": 2 + }, + "snapshot": { + "keyids": [ + "fe9b1451a0446f049888c4ece57fa4c8127f50cc2401d0bb15712e9367953425", + "8907affe5835f969ee7680dda2e5b0ece95d839611d481483d89c22b2df42993" + ], + "threshold": 2 + }, + "targets": { + "keyids": [ + "fe9b1451a0446f049888c4ece57fa4c8127f50cc2401d0bb15712e9367953425", + "8907affe5835f969ee7680dda2e5b0ece95d839611d481483d89c22b2df42993" + ], + "threshold": 2 + }, + "timestamp": { + "keyids": [ + "fe9b1451a0446f049888c4ece57fa4c8127f50cc2401d0bb15712e9367953425", + "8907affe5835f969ee7680dda2e5b0ece95d839611d481483d89c22b2df42993" + ], + "threshold": 2 + } + }, + "consistent_snapshot": true + }, + "signatures": [ + { + "keyid": "fe9b1451a0446f049888c4ece57fa4c8127f50cc2401d0bb15712e9367953425", + "sig": "3046022100f693d8c4ec048f6ac08ab01f5a7cc641c92d3ccf9787b949897da91e57b5c7240221009841eb205814b96b31447bc2893234f691c9ef16d54676f24b94138954cb4d23" + }, + { + "keyid": "8907affe5835f969ee7680dda2e5b0ece95d839611d481483d89c22b2df42993", + "sig": "3044022060b37818fb24ddca63a7bde900572c833cbf4b157c856734ead79c2f5c28775902207a423e9207db7767aff5863817a089f330af20aa9d21cdeb7f0cc9de337a404f" + } + ] +} \ No newline at end of file diff --git a/libdd-remote-config/roots/gov/director_root.json b/libdd-remote-config/roots/gov/director_root.json new file mode 100644 index 0000000000..058ff2a3b4 --- /dev/null +++ b/libdd-remote-config/roots/gov/director_root.json @@ -0,0 +1,73 @@ +{ + "signed": { + "_type": "root", + "spec_version": "1.0", + "version": 1, + "expires": "0001-01-01T00:00:00Z", + "keys": { + "1bd43b99872bee114b2b1c33cff7afbdb8ccfc799751aaa11c2336f3540d1c10": { + "keytype": "ecdsa", + "scheme": "ecdsa-sha2-nistp256", + "keyid_hash_algorithms": [ + "sha256", + "sha512" + ], + "keyval": { + "public": "-----BEGIN PUBLIC KEY-----\nMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEUDdWZozMy6DojzrxkhevLhLzom0E\nnW0C7JWPXgnoL58OHhqDTHhkiUP5H3+fGdVKZ33Vca686aWWSwZUY6xSRQ==\n-----END PUBLIC KEY-----\n" + } + }, + "3360f9a30c063542b2d193fe01854ea3b7ae92c641812b97ce01180bf150c835": { + "keytype": "ecdsa", + "scheme": "ecdsa-sha2-nistp256", + "keyid_hash_algorithms": [ + "sha256", + "sha512" + ], + "keyval": { + "public": "-----BEGIN PUBLIC KEY-----\nMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEE/Evc+4Qx1yPwe0SyvP52C9z8inY\ncTH0eCXHRu+mzShDx7Ne8gyA/vU696i9jcc4pfsOwo1WpIkJsXuqP0jG6A==\n-----END PUBLIC KEY-----\n" + } + } + }, + "roles": { + "root": { + "keyids": [ + "1bd43b99872bee114b2b1c33cff7afbdb8ccfc799751aaa11c2336f3540d1c10", + "3360f9a30c063542b2d193fe01854ea3b7ae92c641812b97ce01180bf150c835" + ], + "threshold": 2 + }, + "snapshot": { + "keyids": [ + "1bd43b99872bee114b2b1c33cff7afbdb8ccfc799751aaa11c2336f3540d1c10", + "3360f9a30c063542b2d193fe01854ea3b7ae92c641812b97ce01180bf150c835" + ], + "threshold": 2 + }, + "targets": { + "keyids": [ + "1bd43b99872bee114b2b1c33cff7afbdb8ccfc799751aaa11c2336f3540d1c10", + "3360f9a30c063542b2d193fe01854ea3b7ae92c641812b97ce01180bf150c835" + ], + "threshold": 2 + }, + "timestamp": { + "keyids": [ + "1bd43b99872bee114b2b1c33cff7afbdb8ccfc799751aaa11c2336f3540d1c10", + "3360f9a30c063542b2d193fe01854ea3b7ae92c641812b97ce01180bf150c835" + ], + "threshold": 2 + } + }, + "consistent_snapshot": true + }, + "signatures": [ + { + "keyid": "1bd43b99872bee114b2b1c33cff7afbdb8ccfc799751aaa11c2336f3540d1c10", + "sig": "3045022016088517105a41506c4465e636483b2eb782d03322da78273a28dd427f2acc78022100cfd965ae1e32c86761d6256a67708e5808f3f98413976373febe23bfefec8da1" + }, + { + "keyid": "3360f9a30c063542b2d193fe01854ea3b7ae92c641812b97ce01180bf150c835", + "sig": "3045022007f880ebf48676740f6cb9d216c91e0f51427ef9f44a73f4a7de8da6e4b4b53d022100b0f87441a0761422d7ac57582215d9570de78a0c30dd86479accedf7a00a6a1b" + } + ] +} \ No newline at end of file diff --git a/libdd-remote-config/roots/staging/config_root.json b/libdd-remote-config/roots/staging/config_root.json new file mode 100644 index 0000000000..9d44633729 --- /dev/null +++ b/libdd-remote-config/roots/staging/config_root.json @@ -0,0 +1,73 @@ +{ + "signatures": [ + { + "keyid": "bd3ea764afdf757f07bab1e9e501a5fda1d49a8da3eaddc53a50dbe2aff92545", + "sig": "928d0b9de72a1a1c2fad453e52950509a434814ca0dc5fb43db5100fdbd732461b38b522051ffedc7c226426ce102c245bc69895fde0f0ca0d9615f84027c60f" + }, + { + "keyid": "6aac6a51efedb4e54915bf9fbd2cfb49fbf428d46052bcaf3c72409c33ecdf5e", + "sig": "146d301f5dd97125ddd34d13ad5c7b1f071bbd249d7c86d17a095c0fbfd680ed21737f45997361e14e79be973914cfb35da39c02ce58f81df12afd9eb49d0003" + } + ], + "signed": { + "_type": "root", + "consistent_snapshot": true, + "expires": "2025-12-01T17:00:00Z", + "keys": { + "6aac6a51efedb4e54915bf9fbd2cfb49fbf428d46052bcaf3c72409c33ecdf5e": { + "keyid_hash_algorithms": [ + "sha256", + "sha512" + ], + "keytype": "ed25519", + "keyval": { + "public": "09402247ef6252018e52c7ba6a3a484936f14dad6ae921c556a1d092f4a68f0f" + }, + "scheme": "ed25519" + }, + "bd3ea764afdf757f07bab1e9e501a5fda1d49a8da3eaddc53a50dbe2aff92545": { + "keyid_hash_algorithms": [ + "sha256", + "sha512" + ], + "keytype": "ed25519", + "keyval": { + "public": "cf248bc222a5dfc9676a2a3ef90526c84adb09649db56686705f69f42908d7d8" + }, + "scheme": "ed25519" + } + }, + "roles": { + "root": { + "keyids": [ + "6aac6a51efedb4e54915bf9fbd2cfb49fbf428d46052bcaf3c72409c33ecdf5e", + "bd3ea764afdf757f07bab1e9e501a5fda1d49a8da3eaddc53a50dbe2aff92545" + ], + "threshold": 2 + }, + "snapshot": { + "keyids": [ + "6aac6a51efedb4e54915bf9fbd2cfb49fbf428d46052bcaf3c72409c33ecdf5e", + "bd3ea764afdf757f07bab1e9e501a5fda1d49a8da3eaddc53a50dbe2aff92545" + ], + "threshold": 2 + }, + "targets": { + "keyids": [ + "6aac6a51efedb4e54915bf9fbd2cfb49fbf428d46052bcaf3c72409c33ecdf5e", + "bd3ea764afdf757f07bab1e9e501a5fda1d49a8da3eaddc53a50dbe2aff92545" + ], + "threshold": 2 + }, + "timestamp": { + "keyids": [ + "6aac6a51efedb4e54915bf9fbd2cfb49fbf428d46052bcaf3c72409c33ecdf5e", + "bd3ea764afdf757f07bab1e9e501a5fda1d49a8da3eaddc53a50dbe2aff92545" + ], + "threshold": 2 + } + }, + "spec_version": "1.0", + "version": 29 + } +} \ No newline at end of file diff --git a/libdd-remote-config/roots/staging/director_root.json b/libdd-remote-config/roots/staging/director_root.json new file mode 100644 index 0000000000..7361c7b9e9 --- /dev/null +++ b/libdd-remote-config/roots/staging/director_root.json @@ -0,0 +1,73 @@ +{ + "signatures": [ + { + "keyid": "233a529fe7c63b5b9081f6e0e2681cc227f85e04ad434d0a165a2f69b87255a6", + "sig": "6d7ddf4bcbd1ce223b5352cae4671ef42800d79f0c94dda905cf0dd8a6198ba69795a19201dc7230e4bd872cf109e827233678bf76389910933472417488320e" + }, + { + "keyid": "6ca796e7b4883af3bb3d522dc0009984dcbf5ad2a6c9ea354d30acc32d8b75d1", + "sig": "a1236d12903e1c4024fc6340c50a0f2fe9972e967eb2bace8d6594e156f0466f772bfc0c9f30e07067904073c0d7ba7d48ad00341405312daf0d7bc502ccc50f" + } + ], + "signed": { + "_type": "root", + "consistent_snapshot": true, + "expires": "1970-01-01T00:00:00Z", + "keys": { + "233a529fe7c63b5b9081f6e0e2681cc227f85e04ad434d0a165a2f69b87255a6": { + "keyid_hash_algorithms": [ + "sha256", + "sha512" + ], + "keytype": "ed25519", + "keyval": { + "public": "f7c278f32e69ce7d5ca5b81bd2cbe2b4b44177eee36ed025ec06bd19e47eaefe" + }, + "scheme": "ed25519" + }, + "6ca796e7b4883af3bb3d522dc0009984dcbf5ad2a6c9ea354d30acc32d8b75d1": { + "keyid_hash_algorithms": [ + "sha256", + "sha512" + ], + "keytype": "ed25519", + "keyval": { + "public": "47be15ec10499208aa5ef9a1e32010cc05c047a98d18ad084d6e4e51baa1b93c" + }, + "scheme": "ed25519" + } + }, + "roles": { + "root": { + "keyids": [ + "6ca796e7b4883af3bb3d522dc0009984dcbf5ad2a6c9ea354d30acc32d8b75d1", + "233a529fe7c63b5b9081f6e0e2681cc227f85e04ad434d0a165a2f69b87255a6" + ], + "threshold": 2 + }, + "snapshot": { + "keyids": [ + "6ca796e7b4883af3bb3d522dc0009984dcbf5ad2a6c9ea354d30acc32d8b75d1", + "233a529fe7c63b5b9081f6e0e2681cc227f85e04ad434d0a165a2f69b87255a6" + ], + "threshold": 2 + }, + "targets": { + "keyids": [ + "6ca796e7b4883af3bb3d522dc0009984dcbf5ad2a6c9ea354d30acc32d8b75d1", + "233a529fe7c63b5b9081f6e0e2681cc227f85e04ad434d0a165a2f69b87255a6" + ], + "threshold": 2 + }, + "timestamp": { + "keyids": [ + "6ca796e7b4883af3bb3d522dc0009984dcbf5ad2a6c9ea354d30acc32d8b75d1", + "233a529fe7c63b5b9081f6e0e2681cc227f85e04ad434d0a165a2f69b87255a6" + ], + "threshold": 2 + } + }, + "spec_version": "1.0", + "version": 1 + } +} \ No newline at end of file diff --git a/libdd-remote-config/src/agentless_client/mod.rs b/libdd-remote-config/src/agentless_client/mod.rs index 52d28400c2..a2f8d7704f 100644 --- a/libdd-remote-config/src/agentless_client/mod.rs +++ b/libdd-remote-config/src/agentless_client/mod.rs @@ -3,6 +3,8 @@ use std::{ fmt, + ops::RangeInclusive, + path::PathBuf, time::{Duration, SystemTime, UNIX_EPOCH}, }; @@ -29,11 +31,86 @@ use tuf::{ repository::RepositoryProvider as _, }; -#[allow(dead_code)] // used in tests and reserved for TUF config-repo init -const CONFIG_ROOT: &[u8] = include_bytes!("../../roots/prod/config_root.json"); -const CONFIG_ROOT_VERSION: u64 = 16; -const DIRECTOR_ROOT: &[u8] = include_bytes!("../../roots/prod/director_root.json"); -const DIRECTOR_ROOT_VERSION: u64 = 15; +// Embedded TUF trust roots, per site +const PROD_CONFIG_ROOT: &[u8] = include_bytes!("../../roots/prod/config_root.json"); +const PROD_CONFIG_ROOT_VERSION: u64 = 16; + +const PROD_DIRECTOR_ROOT: &[u8] = include_bytes!("../../roots/prod/director_root.json"); +const PROD_DIRECTOR_ROOT_VERSION: u64 = 15; + +const STAGING_CONFIG_ROOT: &[u8] = include_bytes!("../../roots/staging/config_root.json"); +const STAGING_CONFIG_ROOT_VERSION: u64 = 29; + +const STAGING_DIRECTOR_ROOT: &[u8] = include_bytes!("../../roots/staging/director_root.json"); +const STAGING_DIRECTOR_ROOT_VERSION: u64 = 1; + +const GOV_CONFIG_ROOT: &[u8] = include_bytes!("../../roots/gov/config_root.json"); +const GOV_CONFIG_ROOT_VERSION: u64 = 1; + +const GOV_DIRECTOR_ROOT: &[u8] = include_bytes!("../../roots/gov/director_root.json"); +const GOV_DIRECTOR_ROOT_VERSION: u64 = 1; + +/// Datadog site selection used to pick a default TUF trust-root pair. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +enum Site { + Prod, + Staging, + Gov, +} + +impl Site { + /// Map an endpoint authority/host to a Datadog site. + /// + /// The configured agentless endpoint authority looks like `config.` + /// (see `make_agentless_configs_endpoint`), so we strip a leading + /// `config.` prefix and apply the same rules the agent uses. + fn from_host(host: &str) -> Self { + let site = host.strip_prefix("config.").unwrap_or(host); + if site == "datad0g.com" || site.ends_with(".datad0g.com") { + Site::Staging + } else if site == "ddog-gov.com" || site.ends_with(".ddog-gov.com") { + Site::Gov + } else { + Site::Prod + } + } + + fn embedded_config_root(self) -> (&'static [u8], u64) { + match self { + Site::Prod => (PROD_CONFIG_ROOT, PROD_CONFIG_ROOT_VERSION), + Site::Staging => (STAGING_CONFIG_ROOT, STAGING_CONFIG_ROOT_VERSION), + Site::Gov => (GOV_CONFIG_ROOT, GOV_CONFIG_ROOT_VERSION), + } + } + + fn embedded_director_root(self) -> (&'static [u8], u64) { + match self { + Site::Prod => (PROD_DIRECTOR_ROOT, PROD_DIRECTOR_ROOT_VERSION), + Site::Staging => (STAGING_DIRECTOR_ROOT, STAGING_DIRECTOR_ROOT_VERSION), + Site::Gov => (GOV_DIRECTOR_ROOT, GOV_DIRECTOR_ROOT_VERSION), + } + } +} + +/// Extract the `version` integer from a signed TUF root JSON document. Used +/// only when loading an override root from disk; the embedded roots have +/// their versions hardcoded above. +fn parse_root_version(raw: &[u8]) -> anyhow::Result { + let v: Value = serde_json::from_slice(raw)?; + v.get("signed") + .and_then(|s| s.get("version")) + .and_then(Value::as_u64) + .ok_or_else(|| format_err!("missing or invalid signed.version in TUF root")) +} + +/// Read a TUF root override from disk, returning the bytes and their parsed +/// version +fn load_root(override_path: &std::path::Path) -> anyhow::Result<(Vec, u64)> { + let bytes = std::fs::read(override_path) + .map_err(|e| format_err!("failed to read TUF root override at {override_path:?}: {e}"))?; + let version = parse_root_version(&bytes)?; + Ok((bytes, version)) +} const FAKE_AGENT_VERSION: &str = "7.78.4"; @@ -63,9 +140,19 @@ pub fn make_agentless_configs_endpoint(e: &Endpoint) -> Option { }) } -#[derive(Clone)] +#[derive(Clone, Debug, Hash, Eq, PartialEq, Default)] pub struct AgentlessConfig { + /// Hostname reported to the RC backend in `LatestConfigsRequest.hostname`. + /// Required (must be non-empty) in agentless mode; an empty value causes + /// `ConfigFetcherState::new` to downgrade to agent mode. pub hostname: String, + /// Optional path to a TUF config-repo root JSON to use instead of the + /// embedded one. Useful for staging/private deployments where the trust + /// chain differs from the published defaults. + pub config_root_override_path: Option, + /// Optional path to a TUF director-repo root JSON to use instead of the + /// embedded one. + pub director_root_override_path: Option, } pub type NativeAgentlessFetcher = AgentlessFetcher; @@ -76,9 +163,13 @@ pub struct AgentlessFetcher { opaque_backend_state: Vec, director_client: TUFClient, config_client: TUFClient, + initial_config_root_version: u64, + initial_director_root_version: u64, hostname: String, products: HashSet, refresh_interval: Duration, + /// Number of consecutive `fetch_config` failures. Reset to 0 on success. + consecutive_failures: u32, endpoint: Endpoint, // TODO: Not sure this is needed if the wrapped client already caches files? target_cache: HashMap, @@ -140,33 +231,73 @@ impl AgentlessFetcher { /// Returns an error if TUF root initialization fails. /// This can happen for instance if the trust root certificates have expired pub async fn new(cfg: AgentlessConfig, endpoint: Endpoint) -> anyhow::Result { + // Pick the default trust roots based on the endpoint's host. Overrides + // (if any) take precedence. + let site = endpoint + .url + .authority() + .map(|a| Site::from_host(a.as_str())) + .unwrap_or(Site::Prod); + + let (config_root_bytes, initial_config_root_version) = + match cfg.config_root_override_path.as_deref() { + Some(p) => load_root(p)?, + None => { + let (embedded, version) = site.embedded_config_root(); + (embedded.to_vec(), version) + } + }; + let (director_root_bytes, initial_director_root_version) = + match cfg.director_root_override_path.as_deref() { + Some(p) => load_root(p)?, + None => { + let (embedded, version) = site.embedded_director_root(); + (embedded.to_vec(), version) + } + }; + Ok(Self { endpoint, http: C::new_client(), director_client: TUFClient::with_trusted_root( tuf::client::Config::default(), - &RawSignedMetadata::new(DIRECTOR_ROOT.to_vec()), + &RawSignedMetadata::new(director_root_bytes), TUFRepo::new(), TUFRepo::new(), ) .await?, config_client: TUFClient::with_trusted_root( tuf::client::Config::default(), - &RawSignedMetadata::new(CONFIG_ROOT.to_vec()), + &RawSignedMetadata::new(config_root_bytes), TUFRepo::new(), TUFRepo::new(), ) .await?, + initial_config_root_version, + initial_director_root_version, hostname: cfg.hostname, products: HashSet::new(), target_cache: HashMap::new(), opaque_backend_state: Vec::new(), refresh_interval: Duration::from_secs(60), + consecutive_failures: 0, initialized: false, }) } + /// Number of consecutive failed `fetch_config` calls. `0` after a success. + pub fn consecutive_failures(&self) -> u32 { + self.consecutive_failures + } + + /// Recommended delay before the next `fetch_config` attempt given the + /// current consecutive-failure count. Returns `None` when no backoff + /// applies (i.e. either no failures yet, or only a single one). + pub fn next_backoff(&self) -> Option { + compute_backoff(self.consecutive_failures) + } + /// Return the value of a particular target , checking both its length and /// hashes against the metadata in the config repo. /// @@ -266,7 +397,11 @@ impl AgentlessFetcher { u64::from(self.director_client.database().trusted_root().version()), ) } else { - (0, CONFIG_ROOT_VERSION, DIRECTOR_ROOT_VERSION) + ( + 0, + self.initial_config_root_version, + self.initial_director_root_version, + ) }; let all_products = c.products.iter().fold(HashSet::new(), |mut acc, p| { @@ -285,6 +420,11 @@ impl AgentlessFetcher { let now = now_unix_milli_ts(); + let (has_error, error) = match c.state.as_ref() { + Some(state) if state.has_error => (true, state.error.clone()), + _ => (false, String::new()), + }; + let request = remoteconfig::LatestConfigsRequest { hostname: self.hostname.clone(), current_config_snapshot_version, @@ -298,14 +438,21 @@ impl AgentlessFetcher { ..c }], agent_version: FAKE_AGENT_VERSION.to_owned(), - has_error: false, - error: String::new(), + has_error, + error, trace_agent_env: String::new(), org_uuid: String::new(), tags: vec![], agent_uuid: String::new(), }; - let response = self.get_latest_config(request).await?; + let response = match self.get_latest_config(request).await { + Ok(r) => r, + Err(e) => { + self.consecutive_failures = self.consecutive_failures.saturating_add(1); + return Err(e); + } + }; + self.consecutive_failures = 0; self.apply(&response).await?; if !self.initialized { @@ -400,7 +547,7 @@ impl AgentlessFetcher { &mut self, response: &remoteconfig::LatestConfigsResponse, ) -> anyhow::Result<()> { - // At a high level, what we're doing here is populating the "remote" repos with the metadata + // At a high level, we're populating the "remote" repos with the metadata // that we received from upstream (which does not validate it), and then using the clients' // `update` methods to synchronize that metadata to the "local" repos, during which // validation is performed. @@ -414,14 +561,8 @@ impl AgentlessFetcher { *repo = TUFRepo::new(); for target_file in &response.target_files { let trimmed_path = trim_hash_target_path(&target_file.path)?; - let trimmed_target_path = TargetPath::new(&trimmed_path)?; - repo.store_target(&trimmed_target_path, &mut target_file.raw.as_slice()) - .await?; - - // let trimmed_path = trim_hash_target_path(&target_file.path)?; - // let trimmed_target_path = TargetPath::new(&trimmed_path)?; repo.store_target( - &TargetPath::new(&target_file.path)?, + &TargetPath::new(&trimmed_path)?, &mut target_file.raw.as_slice(), ) .await?; @@ -487,6 +628,9 @@ impl AgentlessFetcher { } } +const REFRESH_INTERVAL_BOUNDS: RangeInclusive = + Duration::from_secs(1)..=Duration::from_secs(60); + fn get_director_custom(director_client: &TUFClient) -> Option<(Option>, Option)> { let custom = director_client .database() @@ -502,7 +646,9 @@ fn get_director_custom(director_client: &TUFClient) -> Option<(Option>, custom .get("agent_refresh_interval") .and_then(Value::as_u64) - .map(Duration::from_secs), + .map(Duration::from_secs) + // Mirror the agent: silently drop values outside `[1s, 1m]` + .filter(|d| REFRESH_INTERVAL_BOUNDS.contains(d)), )) } @@ -528,6 +674,36 @@ fn parse_rc_response( Ok(T::decode(body)?) } +/// Compute the backoff delay to wait before the next `fetch_config` attempt, +/// given the number of consecutive failures observed so far. +fn compute_backoff(consecutive_failures: u32) -> Option { + match consecutive_failures { + 0 | 1 => None, + 2 => Some(jitter_secs(30, 60)), + 3 => Some(jitter_secs(60, 120)), + _ => Some(Duration::from_secs(120)), + } +} + +/// Pseudo-random duration in `[min_secs, max_secs]`, derived from the current +/// wall-clock subsecond nanos. This is sufficient for jitter purposes (we do +/// not need cryptographic randomness here, and pulling in a `rand` dependency +/// for one usage is overkill). +fn jitter_secs(min_secs: u64, max_secs: u64) -> Duration { + let (lo, hi) = if min_secs <= max_secs { + (min_secs, max_secs) + } else { + (max_secs, min_secs) + }; + let span = hi.saturating_sub(lo); + let nanos = SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|d| u64::from(d.subsec_nanos())) + .unwrap_or(0); + let offset = if span == 0 { 0 } else { nanos % (span + 1) }; + Duration::from_secs(lo + offset) +} + fn now_unix_milli_ts() -> u64 { u64::try_from( SystemTime::now() @@ -585,32 +761,23 @@ async fn store_noversion( Ok(()) } +/// Strip the leading `.` prefix from the basename of a TUF target path. +/// For instance "datadog/2///.config"` => `"datadog/2///config"` +/// /// See https://datadoghq.atlassian.net/browse/RC-1859 for more information. fn trim_hash_target_path(target_path: &str) -> anyhow::Result { - let path = std::path::Path::new(target_path); - // Get the last component - let last_component = path - .components() - .next_back() + let (parent, basename) = target_path + .rsplit_once('/') .ok_or_else(|| format_err!("invalid target: {target_path}"))?; - let basename = match last_component { - std::path::Component::Normal(name) => name - .to_str() - .ok_or_else(|| format_err!("invalid target: {target_path}"))?, - _ => return Err(format_err!("invalid target: {target_path}")), - }; + if basename.is_empty() { + bail!("invalid target: {target_path}") + } - // Split the basename at the first occurrence of '.' - let split: Vec<&str> = basename.splitn(2, '.').collect(); - let basename_trimmed = if split.len() > 1 { split[1] } else { basename }; + // Strip the leading `.` component if present. If the basename + // contains no `.`, keep it as-is (matches the previous behaviour). + let basename_trimmed = basename.split_once('.').map_or(basename, |(_, rest)| rest); - // Reconstruct the whole path - let parent = path - .parent() - .ok_or_else(|| format_err!("invalid target: {target_path}"))?; - let mut result_path = parent.components().as_path().to_path_buf(); - result_path.push(basename_trimmed); - Ok(result_path.to_str().unwrap_or_default().to_string()) + Ok(format!("{parent}/{basename_trimmed}")) } // ── Debug helpers: render `raw: Vec` fields as JSON ──────────────────── @@ -735,12 +902,6 @@ impl fmt::Debug for DebugLatestConfigsResponse<'_> { /// Returns a value that implements [`fmt::Debug`] for [`remoteconfig::LatestConfigsResponse`], /// rendering every `raw` byte field as a parsed JSON value instead of a raw byte array. -/// -/// Use with the standard formatting machinery: -/// -/// ```rust,ignore -/// println!("{:#?}", debug_latest_configs_response(&response)); -/// ``` pub fn debug_latest_configs_response( resp: &remoteconfig::LatestConfigsResponse, ) -> impl fmt::Debug + '_ { @@ -749,14 +910,92 @@ pub fn debug_latest_configs_response( #[cfg(test)] mod tests { - use super::{CONFIG_ROOT, CONFIG_ROOT_VERSION, DIRECTOR_ROOT, DIRECTOR_ROOT_VERSION}; + use super::trim_hash_target_path; + use super::{ + Site, GOV_CONFIG_ROOT, GOV_CONFIG_ROOT_VERSION, GOV_DIRECTOR_ROOT, + GOV_DIRECTOR_ROOT_VERSION, PROD_CONFIG_ROOT, PROD_CONFIG_ROOT_VERSION, PROD_DIRECTOR_ROOT, + PROD_DIRECTOR_ROOT_VERSION, STAGING_CONFIG_ROOT, STAGING_CONFIG_ROOT_VERSION, + STAGING_DIRECTOR_ROOT, STAGING_DIRECTOR_ROOT_VERSION, + }; + + #[test] + fn strips_hash_prefix() { + assert_eq!( + trim_hash_target_path("datadog/2/APM_TRACING/abcd/deadbeef.config").unwrap(), + "datadog/2/APM_TRACING/abcd/config" + ); + } + + #[test] + fn no_hash_prefix_is_kept() { + assert_eq!( + trim_hash_target_path("datadog/2/APM_TRACING/abcd/config").unwrap(), + "datadog/2/APM_TRACING/abcd/config" + ); + } #[test] - fn test_root_version_match() { - let config_root: serde_json::Value = serde_json::from_slice(CONFIG_ROOT).unwrap(); - assert_eq!(config_root["signed"]["version"], CONFIG_ROOT_VERSION); + fn backslash_is_not_a_separator() { + // Windows-style separators must NOT be treated as path separators. + // The whole string is the basename here. + assert!(trim_hash_target_path(r"datadog\2\foo.bar").is_err()); + } + + #[test] + fn empty_or_no_slash_is_error() { + assert!(trim_hash_target_path("").is_err()); + assert!(trim_hash_target_path("deadbeef.config").is_err()); + } + + #[test] + fn trailing_slash_is_error() { + assert!(trim_hash_target_path("datadog/2/foo/").is_err()); + } - let director_root: serde_json::Value = serde_json::from_slice(DIRECTOR_ROOT).unwrap(); - assert_eq!(director_root["signed"]["version"], DIRECTOR_ROOT_VERSION); + #[test] + fn test_root_versions_match() { + // Every embedded root's hardcoded version must match the + // `signed.version` field inside the JSON. If you bump a root file, + // bump the matching `_VERSION` constant in this module too. + for (raw, expected) in [ + (PROD_CONFIG_ROOT, PROD_CONFIG_ROOT_VERSION), + (PROD_DIRECTOR_ROOT, PROD_DIRECTOR_ROOT_VERSION), + (STAGING_CONFIG_ROOT, STAGING_CONFIG_ROOT_VERSION), + (STAGING_DIRECTOR_ROOT, STAGING_DIRECTOR_ROOT_VERSION), + (GOV_CONFIG_ROOT, GOV_CONFIG_ROOT_VERSION), + (GOV_DIRECTOR_ROOT, GOV_DIRECTOR_ROOT_VERSION), + ] { + let v: serde_json::Value = serde_json::from_slice(raw).unwrap(); + assert_eq!(v["signed"]["version"], expected); + } + } + + #[test] + fn test_compute_backoff() { + use super::compute_backoff; + use std::time::Duration; + + assert_eq!(compute_backoff(0), None); + assert_eq!(compute_backoff(1), None); + + let b2 = compute_backoff(2).unwrap(); + assert!((Duration::from_secs(30)..=Duration::from_secs(60)).contains(&b2)); + + let b3 = compute_backoff(3).unwrap(); + assert!((Duration::from_secs(60)..=Duration::from_secs(120)).contains(&b3)); + + assert_eq!(compute_backoff(4), Some(Duration::from_secs(120))); + assert_eq!(compute_backoff(42), Some(Duration::from_secs(120))); + } + + #[test] + fn test_site_from_host() { + assert_eq!(Site::from_host("config.datadoghq.com"), Site::Prod); + assert_eq!(Site::from_host("config.us3.datadoghq.com"), Site::Prod); + assert_eq!(Site::from_host("config.datadoghq.eu"), Site::Prod); + assert_eq!(Site::from_host("config.datad0g.com"), Site::Staging); + assert_eq!(Site::from_host("datad0g.com"), Site::Staging); + assert_eq!(Site::from_host("config.ddog-gov.com"), Site::Gov); + assert_eq!(Site::from_host("config.foo.ddog-gov.com"), Site::Gov); } } diff --git a/libdd-remote-config/src/fetch/fetcher.rs b/libdd-remote-config/src/fetch/fetcher.rs index 0d2dd34e11..3b3b4e2d56 100644 --- a/libdd-remote-config/src/fetch/fetcher.rs +++ b/libdd-remote-config/src/fetch/fetcher.rs @@ -54,9 +54,16 @@ pub trait FileStorage { pub struct ConfigInvariants { pub language: String, pub tracer_version: String, - pub hostname: String, pub endpoint: Endpoint, - pub agentless_enabled: bool, + /// Enables and configures agentless mode. If some the fetcher will + /// talk directly to the RC backend + pub agentless: Option, +} + +impl ConfigInvariants { + pub fn agentless_enabled(&self) -> bool { + self.agentless.is_some() + } } struct StoredTargetFile { @@ -160,29 +167,28 @@ impl ConfigFetcherFilesLock<'_, S> { impl ConfigFetcherState { pub fn new(invariants: ConfigInvariants) -> Self { - let (endpoint, agentless_enabled) = if invariants.agentless_enabled { - match ( + let (endpoint, agentless) = match &invariants.agentless { + Some(agentless_cfg) => match ( make_agentless_configs_endpoint(&invariants.endpoint), - invariants.hostname.is_empty(), + agentless_cfg.hostname.is_empty(), ) { - (Some(e), false) => (e, true), + (Some(e), false) => (e, Some(agentless_cfg.clone())), (Some(_), true) => { warn!("rc_config_fetcher: agentless enabled but the hostname is empty. Downgrading to agent endpoint"); - (make_agent_configs_endpoint(&invariants.endpoint), false) + (make_agent_configs_endpoint(&invariants.endpoint), None) } (None, _) => { warn!("rc_config_fetcher: agentless enabled but the endpoint is invalid. Downgrading to agent endpoint"); - (make_agent_configs_endpoint(&invariants.endpoint), false) + (make_agent_configs_endpoint(&invariants.endpoint), None) } - } - } else { - (make_agent_configs_endpoint(&invariants.endpoint), false) + }, + None => (make_agent_configs_endpoint(&invariants.endpoint), None), }; ConfigFetcherState { target_files_by_path: Default::default(), endpoint, invariants: ConfigInvariants { - agentless_enabled, + agentless, ..invariants }, expire_unused_files: true, @@ -285,18 +291,11 @@ impl ConfigFetcher { file_storage: S, state: Arc>, ) -> anyhow::Result { - let mode: FetcherMode = if dbg!(state.invariants.agentless_enabled) { - FetcherMode::Agentless( - AgentlessFetcher::new( - AgentlessConfig { - hostname: state.invariants.hostname.clone(), - }, - state.endpoint.clone(), - ) - .await?, - ) - } else { - FetcherMode::Agent + let mode: FetcherMode = match &state.invariants.agentless { + Some(agentless_cfg) => FetcherMode::Agentless( + AgentlessFetcher::new(agentless_cfg.clone(), state.endpoint.clone()).await?, + ), + None => FetcherMode::Agent, }; Ok(ConfigFetcher { @@ -629,13 +628,25 @@ impl ConfigFetcher { FetcherMode::Agent => self.fetch_agent(config_req, target, client_state).await, FetcherMode::Agentless(agentless_fetcher) => { #[allow(clippy::expect_used)] - let res = agentless_fetcher - .fetch_config( - config_req - .client - .expect("RC ConfigFetcher::build_config_request should always return a `Some` client"), - ) - .await?; + let client = config_req.client.expect( + "RC ConfigFetcher::build_config_request should always return a `Some` client", + ); + // Capture errors into `client_state.last_error` so the next + // call propagates `has_error` / `error` to the backend. + let res = match agentless_fetcher.fetch_config(client).await { + Ok(res) => res, + Err(e) => { + client_state.last_error = Some(format!("{e:#}")); + // Surface the recommended backoff to the consumer of + // `ConfigClientState::server_recommended_refresh_interval` + // so it waits before the next attempt. `None` means + // "no extra backoff, use the regular interval". + if let Some(backoff) = agentless_fetcher.next_backoff() { + client_state.refresh_interval = Some(backoff); + } + return Err(e); + } + }; client_state.root_version = res.root_version; client_state.targets_version = res.target_version; @@ -943,8 +954,7 @@ pub mod tests { language: "php".to_string(), tracer_version: "1.2.3".to_string(), endpoint: server.endpoint.clone(), - hostname: "host".to_string(), - agentless_enabled: false, + agentless: None, }; let product_capabilities = ConfigProductCapabilities::new( vec![ diff --git a/libdd-remote-config/src/fetch/test_server.rs b/libdd-remote-config/src/fetch/test_server.rs index ad25972b51..cc2d5e751c 100644 --- a/libdd-remote-config/src/fetch/test_server.rs +++ b/libdd-remote-config/src/fetch/test_server.rs @@ -216,8 +216,7 @@ impl RemoteConfigServer { language: "php".to_string(), tracer_version: "1.2.3".to_string(), endpoint: self.endpoint.clone(), - hostname: "localhost".to_string(), - agentless_enabled: false, + agentless: None, }, products: vec![ RemoteConfigProduct::ApmTracing, diff --git a/libdd-tracer-flare/src/lib.rs b/libdd-tracer-flare/src/lib.rs index 5eae806528..af3a489e06 100644 --- a/libdd-tracer-flare/src/lib.rs +++ b/libdd-tracer-flare/src/lib.rs @@ -181,9 +181,7 @@ impl TracerFlareManager { language, tracer_version, endpoint: remote_config_endpoint, - // TODO: hostname will need to be added when agentess is enabled - hostname: String::new(), - agentless_enabled: false, + agentless: None, }, products: vec![ RemoteConfigProduct::AgentConfig, diff --git a/plans/agentless-vs-agent-rc-comparison.md b/plans/agentless-vs-agent-rc-comparison.md new file mode 100644 index 0000000000..17fe6a9034 --- /dev/null +++ b/plans/agentless-vs-agent-rc-comparison.md @@ -0,0 +1,418 @@ +# libdd-remote-config agentless client vs `pkg/config/remote` (Core Agent) — comparison + +## Context + +The goal is to use `libdd-remote-config`'s `AgentlessFetcher` +(`libdd-remote-config/src/agentless_client/mod.rs`) as a drop-in replacement +for the trace-agent's RC client, so a tracer/SDK can talk directly to the RC +backend (`config./api/v0.1/configurations`) instead of going through the +local Datadog agent. + +This document is the result of comparing the agentless implementation in +libdatadog with the Core Agent reference implementation in datadog-agent +(`pkg/config/remote/{service,api,uptane}`), and lists missing features, +discrepancies, missing configuration knobs, and potential bugs. + +## Files compared + +libdatadog (agentless / Rust): +- `libdd-remote-config/src/agentless_client/mod.rs` — TUF client + HTTP calls to `/api/v0.1/{configurations,status,org}` +- `libdd-remote-config/src/fetch/fetcher.rs` — `ConfigFetcher` that switches between agent (`/v0.7/config`) and agentless (`AgentlessFetcher`) modes +- `libdd-remote-config/roots/prod/{config,director}_root.json` — embedded TUF trust roots (prod site only) + +datadog-agent (reference / Go): +- `pkg/config/remote/service/service.go` — `CoreAgentService`, polling loop, backoff, org status, subscriptions +- `pkg/config/remote/service/util.go` — `buildLatestConfigsRequest`, RC key parsing, `targetsCustom` +- `pkg/config/remote/api/http.go` — HTTPClient with TLS guards, header rotation, retries +- `pkg/config/remote/uptane/client.go` — `CoreAgentClient`: Uptane verification, BoltDB cache, org-id check +- `pkg/config/remote/meta/{prod.,staging.,gov.}{config,director}.json` — TUF roots per site + +--- + +## High-level summary + +The libdd agentless client implements the **minimum** to talk to the RC backend +directly: it embeds prod TUF roots, posts `LatestConfigsRequest`, runs Uptane +verification in-memory, and returns parsed targets. The agent implementation is +significantly more featureful: persistent cache, backoff, retries, multi-site +roots, org-id / org-uuid verification, RC-key & PAR-JWT auth, dynamic +credentials rotation, telemetry, subscriptions, and cache-bypass rate limiting. + +The two are roughly equivalent for the **happy path** (fetch + verify a TUF +update), but the agentless client has several gaps that will bite production +use: + +- **Site/trust-root coverage is prod-only** (no staging, no gov, no override). +- **No persistent cache** — every process restart re-downloads full metadata. +- **No org UUID / org ID verification.** +- **No backoff / retry policy.** +- **`agent_version` is hardcoded to `"7.78.4"`.** +- **`dbg!` calls are left in code paths reachable in release builds.** +- **Single-client model only** (TODO in code). +- Several `client_tracer` / `LatestConfigsRequest` fields are silently empty. + +--- + +## Missing features + +### 1. Multi-site TUF trust roots +Agent ships `prod.{config,director}.json`, `staging.{config,director}.json`, +`gov.{config,director}.json` (see `pkg/config/remote/meta/`) and exposes +`WithConfigRootOverride(site, override)` / +`WithDirectorRootOverride(site, override)` so customers on EU/US3/US5/gov/staging +get the correct trust anchors. + +libdd embeds only `roots/prod/{config,director}_root.json`. There is no +`AgentlessConfig::site` field, no override option, and no staging/gov asset. +A staging or gov tenant cannot use the agentless client today — signature +verification would fail on first update. + +### 2. Persistent cache (BoltDB) +Agent uses `uptane/transactional_store.go` (BoltDB) to persist TUF metadata, +target files, and the org UUID across restarts. Restarts therefore continue +from the last `targets`/`snapshot`/`timestamp` version. + +libdd uses `tuf::repository::EphemeralRepository` for both `config` and +`director` clients. After any restart the request starts again at +`current_config_root_version = CONFIG_ROOT_VERSION` (16), forcing the backend +to re-emit the full root chain, snapshot, and targets. This costs bandwidth +and increases the cold-start window where no config is yet applied. + +### 3. Org UUID handshake & verification +Agent: +- Calls `/api/v0.1/org` via `newRCBackendOrgUUIDProvider` and stores the UUID + in BoltDB, keyed by `configLocalStore.GetMetaVersion(metaRoot)` so root + rotation can recover from a bad/locked-out UUID. +- Sends `LatestConfigsRequest.OrgUuid` in every request. +- In `verifyOrg`, parses the snapshot custom field and asserts + `snapshot.custom.OrgUUID == stored OrgUUID`. + +libdd: +- Implements `get_org_data()` returning `OrgDataResponse` but **never calls + it.** +- Always sends `org_uuid: String::new()`. +- Never reads `snapshot.custom.OrgUUID`. A maliciously or accidentally + cross-org-routed update would not be detected. + +### 4. Org ID verification (`WithOrgIDCheck`) +Agent supports asserting all `director` target paths belong to the configured +`orgID` (parsed from the legacy RC-key format), skipping `SourceEmployee` +paths. libdd has no equivalent — there is no `org_id` in `AgentlessConfig`. + +### 5. Org status polling (`/api/v0.1/status`) +Agent runs a separate `orgStatusPoller` every `defaultRefreshInterval` +(1 min) calling `/api/v0.1/status` and logs whether RC is enabled and +authorized. It also uses the result to decide whether to log refresh errors +at `Warn`/`Error` or just `Debug`. + +libdd: `get_org_status()` is implemented but never called anywhere in the +crate. There is no equivalent polling, and no degradation of log level when +RC is simply disabled for the org. + +### 6. RC-key (`DDRCM_*`) auth +Agent supports the legacy RC-key in `getRemoteConfigAuthKeys`: +- Base32-decodes `DDRCM_`, msgpack-decodes into `{AppKey, Datacenter, OrgID}`. +- Sends `DD-Application-Key` header in addition to `DD-Api-Key`. +- Uses the `OrgID` for `WithOrgIDCheck`. + +libdd only supports `DD-Api-Key` via `Endpoint::set_standard_headers`. No +`DDRCM_` support. + +### 7. PAR-JWT (Private Action Runner) auth +Agent supports `WithPARJWT(jwt)` and exposes `UpdatePARJWT(jwt)` to rotate the +token at runtime, adding the `DD-PAR-JWT` header. libdd has no equivalent. + +### 8. Dynamic credentials rotation +Agent: `UpdateAPIKey(string)` and `UpdatePARJWT(string)`. The `apiKeyUpdateCallback` +re-fetches `OrgData` and verifies the stored org UUID hasn't changed (catches +accidental key swaps to a different organization). + +libdd: API key is captured at construction inside `Endpoint`. There's no +public API to rotate it, and certainly no org-uuid-stability check. + +### 9. Backoff / retry / error reporting on the wire +Agent: +- `backoff.NewExpBackoffPolicy(2.0, 30.0, maxBackoff.Seconds(), 2, false)` — + exponential backoff with `[minimalMaxBackoffTime=2m, maximalMaxBackoffTime=5m]` + clamps; configurable via `WithMaxBackoffInterval`. +- `calculateRefreshInterval = defaultRefreshInterval + backoffTime`. +- Sets `LatestConfigsRequest.HasError = true` and `Error = err.Error()` on the + next poll after a failure, so the backend can observe client-side issues. +- Counts 503/504 separately to raise log level after threshold. +- Counts auth (401) errors and after `initialFetchErrorLog=5` downgrades them + to Debug. + +libdd: +- No backoff. The caller polls at the server-recommended `refresh_interval`. +- Always sends `has_error: false, error: ""`. The backend cannot tell the + client failed last poll. +- No HTTP status classification. Any non-2xx returns the raw body in + `parse_rc_response`. + +### 10. `flush` / `CONFIG_STATUS_EXPIRED` semantics +Agent: in `ClientGetConfigs`, when `directorLocalStore`'s `timestamp.json` has +expired (`TimestampExpires().Before(now)`), it returns a +`ClientGetConfigsResponse{ConfigStatus: CONFIG_STATUS_EXPIRED}` to force +downstream clients to drop their state. + +libdd: only checks per-target `custom.expires` in +`BorrowedTufTarget::try_create`. There's no global "timestamp expired, drop +everything" gate, and no signaling channel back to a caller above +`ConfigFetcher` to indicate that state is stale. + +### 11. Delegated targets +`apply()` has an explicit TODO: +``` +// TODO: We do not store the delegated targets metadata +// This will need to be revisited in order to support proper Uptane +// verification of the full configuration data. +``` +Agent stores `delegated_targets` via `directorRemoteStore.update(response)` +and the go-tuf client walks delegations during verification. libdd's +verification is therefore not a complete Uptane verification today. + +### 12. Multi-client support +`fetch_config` has a TODO: only one `Client` is sent per request. The agent +sends `ActiveClients` (all currently-known tracer clients) and runs +`executeTracerPredicates` to filter director targets per client, so a single +agent process serves many tracers. PHP (multi-process) is the canonical case +the TODO calls out. + +### 13. Subscriptions / streaming +Agent has `CreateConfigSubscription` (gRPC stream) so internal agent +components (e.g. live-debugging, symbol DB) receive complete-view pushes. +N/A for the agentless client by design but worth noting. + +### 14. Telemetry +Agent has `RcTelemetryReporter` with `IncRateLimit`, `IncTimeout`, +`SetConfigSubscriptionsActive`, etc., plus `expvar`-exported state +(`orgEnabled`, `apiKeyScoped`, `lastError`). libdd has zero telemetry. + +### 15. TLS guards +Agent enforces: +- `baseURL.Scheme == "https"` unless `remote_configuration.no_tls=true`. +- Rejects `InsecureSkipVerify` unless `remote_configuration.no_tls_validation=true`. +- Forces `transport.IdleConnTimeout = 30s` (backend cuts idle at ~45s). + +libdd: `make_agentless_configs_endpoint` requires `https`. There's no escape +hatch for local-proxy / staging testing, no `IdleConnTimeout` tuning that we +can see (depends on `libdd_capabilities_impl::NativeHttpClient`). + +### 16. Cache bypass rate limiter +Agent: `refreshBypassLimiter` (token-bucket per-window) and +`refreshBypassCh` allow a new tracer to trigger an immediate refresh, bounded +by `WithClientCacheBypassLimit(limit, ...)` (default 5/window, [1,10]). +N/A in libdd because there is one consumer per fetcher. + +### 17. Refresh-interval validation & override semantics +Agent: +- `WithRefreshInterval` clamps to `>= minimalRefreshInterval (5s)`. +- `getRefreshIntervalLocked` only accepts server-recommended intervals in + `[1s, 1m]`, otherwise ignores. +- Server override only honored when caller didn't explicitly set it + (`refreshIntervalOverrideAllowed`). + +libdd: +- Always uses server-supplied `agent_refresh_interval` via + `Duration::from_secs` with **no bounds check** — backend can set it to any + u64. +- Default is hardcoded `Duration::from_secs(60)` — fine. +- No caller-facing override option. + +--- + +## Probable bugs + +### B1. Stray `dbg!` macros +Left in three places, will emit to stderr in release builds: +- `make_agentless_configs_endpoint` (line ~50): `dbg!(&e);` +- `ConfigFetcher::new` in fetcher.rs: `dbg!(state.invariants.agentless_enabled)` +- `get_latest_config`: `dbg!(&req);` and `dbg!(debug_latest_configs_response(&res));` + +These leak the full request (containing the API key indirectly via +endpoint info, and tracer telemetry) and the full TUF response to stderr. +Should be removed or behind `tracing::debug!`. + +### B2. Hardcoded `FAKE_AGENT_VERSION = "7.78.4"` +The backend uses `agent_version` for feature gating and telemetry. Pinning a +fake value means: +- Bug reports look like agent 7.78.4. +- The version will eventually fall behind any minimum-version gating. + +Should be `concat!("libdatadog/", env!("CARGO_PKG_VERSION"))` or accept a +caller-provided value. + +### B3. `trim_hash_target_path` uses `std::path::Path` +TUF paths are always `/`-separated. On Windows, `std::path::Path::components` +treats `\` as a separator too, which could mis-parse adversarial paths. Use +plain `str::rsplit_once('/')`. + +### B4. Target files are stored twice in the director "remote" repo +`apply()`: +```rust +repo.store_target(&trimmed_target_path, ...).await?; +// (duplicated, commented-out section above shows the intent) +repo.store_target(&TargetPath::new(&target_file.path)?, ...).await?; +``` +Each file is stored at both the original `.` and the trimmed +`` path. Then `fetch_target` reads back from `director_client.remote_repo()` +using the original target path. The trimmed copy is therefore unused. This +doubles memory for every target file per refresh. + +### B5. `fetch_target` reads from the **unverified** remote repo +The comment acknowledges this: "Fetch from the content from the remote +__Unverified__ repo. This is fine as we are comparing the (hash + len) with a +validated target." It is correct given the post-fetch hash check, but the +agent goes through `directorTUFClient.DownloadBatch`, which performs the +verification as part of the download (proper TUF). Both end up safe; the +libdd path is just unconventional. + +### B6. `opaque_backend_state` is only updated when present +```rust +if let Some(opaque_backend_state) = opaque_backend_state { + self.opaque_backend_state = opaque_backend_state; +} +``` +If the backend ever stops sending the field, libdd will keep echoing the +stale value forever. Agent overwrites unconditionally with whatever's in +`targetsCustom.OpaqueBackendState` (including empty). + +### B7. `active_clients[].last_seen` is overwritten with `now` +```rust +active_clients: vec![remoteconfig::Client { + last_seen: now, + ..c +}], +``` +Caller-provided `last_seen` is discarded. Not necessarily wrong (we only have +one client), but combined with the multi-client TODO it will need to be +fixed. + +### B8. `store(... tm.version as u32)` truncates silently +`remoteconfig::TopMeta.version` is `u64` on the wire; cast to `u32` in +`MetadataVersion::Number`. Realistic versions are far below `u32::MAX` but a +malformed backend response would wrap silently. + +### B9. `target_cache` doubles the cached file content +The comment notes: +``` +// TODO: Not sure this is needed if the wrapped client already caches files? +target_cache: HashMap, +``` +Each verified file is held both inside the TUF repo's in-memory store **and** +in `target_cache`. For PHP-style multi-process or large config blobs this is +wasted memory. Worth deciding whether the upstream cache is authoritative. + +### B10. `make_agentless_configs_endpoint` rejects `api_key.is_none()` even if a PAR-JWT or app-key would suffice +The check is `e.api_key.is_some()`. There is no alternative auth scheme today +in libdd, but this hardcodes the assumption and would have to change for B7 +(RC key) and #7 (PAR JWT). + +### B11. `agentless_enabled` is silently downgraded to agent +In `ConfigFetcherState::new`: +```rust +warn!("agentless enabled but the hostname is empty. Downgrading to agent endpoint"); +warn!("agentless enabled but the endpoint is invalid. Downgrading to agent endpoint"); +``` +The caller asks for agentless, gets agent. Tracers that have no Datadog agent +to fall back to will silently fail with connection errors against +`/v0.7/config`. Should be a hard error or surface a status the caller can +react to. + +### B12. `#[allow(dead_code)]` on `CONFIG_ROOT` is misleading +The constant **is** used inside `AgentlessFetcher::new` to build the +`config_client`. The `dead_code` allow + stale comment ("reserved for TUF +config-repo init") is a leftover. + +### B13. `BorrowedTufTarget::try_create` interprets `custom.expires` as seconds, multiplied to ms +```rust +if expiry_ts * 1000 <= now_unix_milli_ts() +``` +- `expiry_ts` is read via `as_u64()`, so a JSON value already in ms would be + off by a factor of 1000. +- `expiry_ts * 1000` can wrap a `u64` for very large values (DoS via + malformed metadata; trivially unlikely in practice but worth checked-mul). + +The unit convention should be confirmed against what the backend / TUF spec +emits for that field. Agent uses go-tuf's standard expiry handling, not a +custom `expires` integer. + +### B14. `parse_rc_response` rejects only the body of non-2xx, no `Retry-After` handling +- 401 isn't distinguished — agent has `ErrUnauthorized` mapped to debug-level + logging. +- 503/504 aren't distinguished from 5xx generally. +- `Retry-After` header is ignored. + +### B15. `refresh_interval` not clamped +Agent caps `agent_refresh_interval` to `[1s, 1m]`. libdd accepts whatever the +server says, including 0 (would cause a tight loop in the consumer). + +--- + +## Missing configuration parameters (Option-for-Option) + +Agent `Option` → libdd equivalent today: + +| Agent `Option` | libdd equivalent | Notes | +|---|---|---| +| `WithTraceAgentEnv` | ❌ (sends empty `trace_agent_env`) | | +| `WithDatabaseFileName` | n/a (in-memory) | | +| `WithDatabasePath` | n/a | | +| `WithConfigRootOverride(site, override)` | ❌ — only prod root baked in | **blocker for non-prod sites** | +| `WithDirectorRootOverride(site, override)` | ❌ | **blocker for non-prod sites** | +| `WithRefreshInterval` | ❌ — only server-driven | | +| `WithOrgStatusRefreshInterval` | ❌ — org status never polled | | +| `WithMaxBackoffInterval` | ❌ — no backoff at all | | +| `WithRcKey(DDRCM_*)` | ❌ | | +| `WithAPIKey` | via `Endpoint.api_key` (immutable) | | +| `WithPARJWT` | ❌ | | +| `WithClientCacheBypassLimit` | n/a | | +| `WithClientTTL` | n/a (single client) | | +| `WithAgentPollLoopDisabled` | n/a | | +| `tagsGetter` | ❌ — `tags: vec![]` always | | +| `hostname` | ✅ `AgentlessConfig.hostname` | | +| `agentVersion` | ❌ — hardcoded `"7.78.4"` | | +| `cfg("api_key")` runtime updates | ❌ | | + +Additionally, agent exposes hostname/agent-uuid via `LatestConfigsRequest`: +- `agent_uuid` — always empty in libdd. +- `tags` — always empty. +- `trace_agent_env` — always empty. + +These are used by the backend for routing/diagnostics; missing them is not a +correctness issue but reduces observability and may break some product +features that target by env/host-tags. + +--- + +## Recommended next steps + +The list below is opinionated about what would be required to actually +replace the agent client. Ordering = highest impact first. + +- [ ] **Embed non-prod TUF roots** (staging, gov) and auto-select based on + the `site` in the configured endpoint (the `{site}` in + `https://config.{site}`). Additionally, expose an optional override + **file path** on `ConfigInvariants` so a caller can supply custom + roots from disk (`config_root_override_path`, + `director_root_override_path`). +- [ ] **Propagate `has_error` / `error`** from caller into the next request + (the `ConfigFetcher` already has `client_state.last_error` — wire it + into `agentless_fetcher.fetch_config`). +- [ ] **Add exponential backoff** on consecutive failures with the following + schedule (not the agent's `[2m, 5m]`): + - 1st error → no backoff + - 2nd error → random in `[30s, 60s]` + - 3rd error → random in `[60s, 120s]` + - 4th+ error → `120s` max +- [ ] **Clamp `agent_refresh_interval`** to `[1s, 1m]`, mirroring agent. +- [ ] **Replace `std::path::Path` in `trim_hash_target_path`** with explicit + `/` splitting. + +## Verification + +This is a comparison document, not a code change. Sign-off criteria: +- Agree with the assessment of which gaps are blockers for replacing the + agent client today (sites, org UUID, debug-print removal, agent version). +- Decide which of the "future" items must land before declaring agentless + GA, and which are acceptable carry-over. From f1c57858c079ef833c7a12b26acf1a8307f8f863 Mon Sep 17 00:00:00 2001 From: paullegranddc Date: Thu, 18 Jun 2026 14:19:42 +0200 Subject: [PATCH 07/18] remove extra file --- plans/agentless-vs-agent-rc-comparison.md | 418 ---------------------- 1 file changed, 418 deletions(-) delete mode 100644 plans/agentless-vs-agent-rc-comparison.md diff --git a/plans/agentless-vs-agent-rc-comparison.md b/plans/agentless-vs-agent-rc-comparison.md deleted file mode 100644 index 17fe6a9034..0000000000 --- a/plans/agentless-vs-agent-rc-comparison.md +++ /dev/null @@ -1,418 +0,0 @@ -# libdd-remote-config agentless client vs `pkg/config/remote` (Core Agent) — comparison - -## Context - -The goal is to use `libdd-remote-config`'s `AgentlessFetcher` -(`libdd-remote-config/src/agentless_client/mod.rs`) as a drop-in replacement -for the trace-agent's RC client, so a tracer/SDK can talk directly to the RC -backend (`config./api/v0.1/configurations`) instead of going through the -local Datadog agent. - -This document is the result of comparing the agentless implementation in -libdatadog with the Core Agent reference implementation in datadog-agent -(`pkg/config/remote/{service,api,uptane}`), and lists missing features, -discrepancies, missing configuration knobs, and potential bugs. - -## Files compared - -libdatadog (agentless / Rust): -- `libdd-remote-config/src/agentless_client/mod.rs` — TUF client + HTTP calls to `/api/v0.1/{configurations,status,org}` -- `libdd-remote-config/src/fetch/fetcher.rs` — `ConfigFetcher` that switches between agent (`/v0.7/config`) and agentless (`AgentlessFetcher`) modes -- `libdd-remote-config/roots/prod/{config,director}_root.json` — embedded TUF trust roots (prod site only) - -datadog-agent (reference / Go): -- `pkg/config/remote/service/service.go` — `CoreAgentService`, polling loop, backoff, org status, subscriptions -- `pkg/config/remote/service/util.go` — `buildLatestConfigsRequest`, RC key parsing, `targetsCustom` -- `pkg/config/remote/api/http.go` — HTTPClient with TLS guards, header rotation, retries -- `pkg/config/remote/uptane/client.go` — `CoreAgentClient`: Uptane verification, BoltDB cache, org-id check -- `pkg/config/remote/meta/{prod.,staging.,gov.}{config,director}.json` — TUF roots per site - ---- - -## High-level summary - -The libdd agentless client implements the **minimum** to talk to the RC backend -directly: it embeds prod TUF roots, posts `LatestConfigsRequest`, runs Uptane -verification in-memory, and returns parsed targets. The agent implementation is -significantly more featureful: persistent cache, backoff, retries, multi-site -roots, org-id / org-uuid verification, RC-key & PAR-JWT auth, dynamic -credentials rotation, telemetry, subscriptions, and cache-bypass rate limiting. - -The two are roughly equivalent for the **happy path** (fetch + verify a TUF -update), but the agentless client has several gaps that will bite production -use: - -- **Site/trust-root coverage is prod-only** (no staging, no gov, no override). -- **No persistent cache** — every process restart re-downloads full metadata. -- **No org UUID / org ID verification.** -- **No backoff / retry policy.** -- **`agent_version` is hardcoded to `"7.78.4"`.** -- **`dbg!` calls are left in code paths reachable in release builds.** -- **Single-client model only** (TODO in code). -- Several `client_tracer` / `LatestConfigsRequest` fields are silently empty. - ---- - -## Missing features - -### 1. Multi-site TUF trust roots -Agent ships `prod.{config,director}.json`, `staging.{config,director}.json`, -`gov.{config,director}.json` (see `pkg/config/remote/meta/`) and exposes -`WithConfigRootOverride(site, override)` / -`WithDirectorRootOverride(site, override)` so customers on EU/US3/US5/gov/staging -get the correct trust anchors. - -libdd embeds only `roots/prod/{config,director}_root.json`. There is no -`AgentlessConfig::site` field, no override option, and no staging/gov asset. -A staging or gov tenant cannot use the agentless client today — signature -verification would fail on first update. - -### 2. Persistent cache (BoltDB) -Agent uses `uptane/transactional_store.go` (BoltDB) to persist TUF metadata, -target files, and the org UUID across restarts. Restarts therefore continue -from the last `targets`/`snapshot`/`timestamp` version. - -libdd uses `tuf::repository::EphemeralRepository` for both `config` and -`director` clients. After any restart the request starts again at -`current_config_root_version = CONFIG_ROOT_VERSION` (16), forcing the backend -to re-emit the full root chain, snapshot, and targets. This costs bandwidth -and increases the cold-start window where no config is yet applied. - -### 3. Org UUID handshake & verification -Agent: -- Calls `/api/v0.1/org` via `newRCBackendOrgUUIDProvider` and stores the UUID - in BoltDB, keyed by `configLocalStore.GetMetaVersion(metaRoot)` so root - rotation can recover from a bad/locked-out UUID. -- Sends `LatestConfigsRequest.OrgUuid` in every request. -- In `verifyOrg`, parses the snapshot custom field and asserts - `snapshot.custom.OrgUUID == stored OrgUUID`. - -libdd: -- Implements `get_org_data()` returning `OrgDataResponse` but **never calls - it.** -- Always sends `org_uuid: String::new()`. -- Never reads `snapshot.custom.OrgUUID`. A maliciously or accidentally - cross-org-routed update would not be detected. - -### 4. Org ID verification (`WithOrgIDCheck`) -Agent supports asserting all `director` target paths belong to the configured -`orgID` (parsed from the legacy RC-key format), skipping `SourceEmployee` -paths. libdd has no equivalent — there is no `org_id` in `AgentlessConfig`. - -### 5. Org status polling (`/api/v0.1/status`) -Agent runs a separate `orgStatusPoller` every `defaultRefreshInterval` -(1 min) calling `/api/v0.1/status` and logs whether RC is enabled and -authorized. It also uses the result to decide whether to log refresh errors -at `Warn`/`Error` or just `Debug`. - -libdd: `get_org_status()` is implemented but never called anywhere in the -crate. There is no equivalent polling, and no degradation of log level when -RC is simply disabled for the org. - -### 6. RC-key (`DDRCM_*`) auth -Agent supports the legacy RC-key in `getRemoteConfigAuthKeys`: -- Base32-decodes `DDRCM_`, msgpack-decodes into `{AppKey, Datacenter, OrgID}`. -- Sends `DD-Application-Key` header in addition to `DD-Api-Key`. -- Uses the `OrgID` for `WithOrgIDCheck`. - -libdd only supports `DD-Api-Key` via `Endpoint::set_standard_headers`. No -`DDRCM_` support. - -### 7. PAR-JWT (Private Action Runner) auth -Agent supports `WithPARJWT(jwt)` and exposes `UpdatePARJWT(jwt)` to rotate the -token at runtime, adding the `DD-PAR-JWT` header. libdd has no equivalent. - -### 8. Dynamic credentials rotation -Agent: `UpdateAPIKey(string)` and `UpdatePARJWT(string)`. The `apiKeyUpdateCallback` -re-fetches `OrgData` and verifies the stored org UUID hasn't changed (catches -accidental key swaps to a different organization). - -libdd: API key is captured at construction inside `Endpoint`. There's no -public API to rotate it, and certainly no org-uuid-stability check. - -### 9. Backoff / retry / error reporting on the wire -Agent: -- `backoff.NewExpBackoffPolicy(2.0, 30.0, maxBackoff.Seconds(), 2, false)` — - exponential backoff with `[minimalMaxBackoffTime=2m, maximalMaxBackoffTime=5m]` - clamps; configurable via `WithMaxBackoffInterval`. -- `calculateRefreshInterval = defaultRefreshInterval + backoffTime`. -- Sets `LatestConfigsRequest.HasError = true` and `Error = err.Error()` on the - next poll after a failure, so the backend can observe client-side issues. -- Counts 503/504 separately to raise log level after threshold. -- Counts auth (401) errors and after `initialFetchErrorLog=5` downgrades them - to Debug. - -libdd: -- No backoff. The caller polls at the server-recommended `refresh_interval`. -- Always sends `has_error: false, error: ""`. The backend cannot tell the - client failed last poll. -- No HTTP status classification. Any non-2xx returns the raw body in - `parse_rc_response`. - -### 10. `flush` / `CONFIG_STATUS_EXPIRED` semantics -Agent: in `ClientGetConfigs`, when `directorLocalStore`'s `timestamp.json` has -expired (`TimestampExpires().Before(now)`), it returns a -`ClientGetConfigsResponse{ConfigStatus: CONFIG_STATUS_EXPIRED}` to force -downstream clients to drop their state. - -libdd: only checks per-target `custom.expires` in -`BorrowedTufTarget::try_create`. There's no global "timestamp expired, drop -everything" gate, and no signaling channel back to a caller above -`ConfigFetcher` to indicate that state is stale. - -### 11. Delegated targets -`apply()` has an explicit TODO: -``` -// TODO: We do not store the delegated targets metadata -// This will need to be revisited in order to support proper Uptane -// verification of the full configuration data. -``` -Agent stores `delegated_targets` via `directorRemoteStore.update(response)` -and the go-tuf client walks delegations during verification. libdd's -verification is therefore not a complete Uptane verification today. - -### 12. Multi-client support -`fetch_config` has a TODO: only one `Client` is sent per request. The agent -sends `ActiveClients` (all currently-known tracer clients) and runs -`executeTracerPredicates` to filter director targets per client, so a single -agent process serves many tracers. PHP (multi-process) is the canonical case -the TODO calls out. - -### 13. Subscriptions / streaming -Agent has `CreateConfigSubscription` (gRPC stream) so internal agent -components (e.g. live-debugging, symbol DB) receive complete-view pushes. -N/A for the agentless client by design but worth noting. - -### 14. Telemetry -Agent has `RcTelemetryReporter` with `IncRateLimit`, `IncTimeout`, -`SetConfigSubscriptionsActive`, etc., plus `expvar`-exported state -(`orgEnabled`, `apiKeyScoped`, `lastError`). libdd has zero telemetry. - -### 15. TLS guards -Agent enforces: -- `baseURL.Scheme == "https"` unless `remote_configuration.no_tls=true`. -- Rejects `InsecureSkipVerify` unless `remote_configuration.no_tls_validation=true`. -- Forces `transport.IdleConnTimeout = 30s` (backend cuts idle at ~45s). - -libdd: `make_agentless_configs_endpoint` requires `https`. There's no escape -hatch for local-proxy / staging testing, no `IdleConnTimeout` tuning that we -can see (depends on `libdd_capabilities_impl::NativeHttpClient`). - -### 16. Cache bypass rate limiter -Agent: `refreshBypassLimiter` (token-bucket per-window) and -`refreshBypassCh` allow a new tracer to trigger an immediate refresh, bounded -by `WithClientCacheBypassLimit(limit, ...)` (default 5/window, [1,10]). -N/A in libdd because there is one consumer per fetcher. - -### 17. Refresh-interval validation & override semantics -Agent: -- `WithRefreshInterval` clamps to `>= minimalRefreshInterval (5s)`. -- `getRefreshIntervalLocked` only accepts server-recommended intervals in - `[1s, 1m]`, otherwise ignores. -- Server override only honored when caller didn't explicitly set it - (`refreshIntervalOverrideAllowed`). - -libdd: -- Always uses server-supplied `agent_refresh_interval` via - `Duration::from_secs` with **no bounds check** — backend can set it to any - u64. -- Default is hardcoded `Duration::from_secs(60)` — fine. -- No caller-facing override option. - ---- - -## Probable bugs - -### B1. Stray `dbg!` macros -Left in three places, will emit to stderr in release builds: -- `make_agentless_configs_endpoint` (line ~50): `dbg!(&e);` -- `ConfigFetcher::new` in fetcher.rs: `dbg!(state.invariants.agentless_enabled)` -- `get_latest_config`: `dbg!(&req);` and `dbg!(debug_latest_configs_response(&res));` - -These leak the full request (containing the API key indirectly via -endpoint info, and tracer telemetry) and the full TUF response to stderr. -Should be removed or behind `tracing::debug!`. - -### B2. Hardcoded `FAKE_AGENT_VERSION = "7.78.4"` -The backend uses `agent_version` for feature gating and telemetry. Pinning a -fake value means: -- Bug reports look like agent 7.78.4. -- The version will eventually fall behind any minimum-version gating. - -Should be `concat!("libdatadog/", env!("CARGO_PKG_VERSION"))` or accept a -caller-provided value. - -### B3. `trim_hash_target_path` uses `std::path::Path` -TUF paths are always `/`-separated. On Windows, `std::path::Path::components` -treats `\` as a separator too, which could mis-parse adversarial paths. Use -plain `str::rsplit_once('/')`. - -### B4. Target files are stored twice in the director "remote" repo -`apply()`: -```rust -repo.store_target(&trimmed_target_path, ...).await?; -// (duplicated, commented-out section above shows the intent) -repo.store_target(&TargetPath::new(&target_file.path)?, ...).await?; -``` -Each file is stored at both the original `.` and the trimmed -`` path. Then `fetch_target` reads back from `director_client.remote_repo()` -using the original target path. The trimmed copy is therefore unused. This -doubles memory for every target file per refresh. - -### B5. `fetch_target` reads from the **unverified** remote repo -The comment acknowledges this: "Fetch from the content from the remote -__Unverified__ repo. This is fine as we are comparing the (hash + len) with a -validated target." It is correct given the post-fetch hash check, but the -agent goes through `directorTUFClient.DownloadBatch`, which performs the -verification as part of the download (proper TUF). Both end up safe; the -libdd path is just unconventional. - -### B6. `opaque_backend_state` is only updated when present -```rust -if let Some(opaque_backend_state) = opaque_backend_state { - self.opaque_backend_state = opaque_backend_state; -} -``` -If the backend ever stops sending the field, libdd will keep echoing the -stale value forever. Agent overwrites unconditionally with whatever's in -`targetsCustom.OpaqueBackendState` (including empty). - -### B7. `active_clients[].last_seen` is overwritten with `now` -```rust -active_clients: vec![remoteconfig::Client { - last_seen: now, - ..c -}], -``` -Caller-provided `last_seen` is discarded. Not necessarily wrong (we only have -one client), but combined with the multi-client TODO it will need to be -fixed. - -### B8. `store(... tm.version as u32)` truncates silently -`remoteconfig::TopMeta.version` is `u64` on the wire; cast to `u32` in -`MetadataVersion::Number`. Realistic versions are far below `u32::MAX` but a -malformed backend response would wrap silently. - -### B9. `target_cache` doubles the cached file content -The comment notes: -``` -// TODO: Not sure this is needed if the wrapped client already caches files? -target_cache: HashMap, -``` -Each verified file is held both inside the TUF repo's in-memory store **and** -in `target_cache`. For PHP-style multi-process or large config blobs this is -wasted memory. Worth deciding whether the upstream cache is authoritative. - -### B10. `make_agentless_configs_endpoint` rejects `api_key.is_none()` even if a PAR-JWT or app-key would suffice -The check is `e.api_key.is_some()`. There is no alternative auth scheme today -in libdd, but this hardcodes the assumption and would have to change for B7 -(RC key) and #7 (PAR JWT). - -### B11. `agentless_enabled` is silently downgraded to agent -In `ConfigFetcherState::new`: -```rust -warn!("agentless enabled but the hostname is empty. Downgrading to agent endpoint"); -warn!("agentless enabled but the endpoint is invalid. Downgrading to agent endpoint"); -``` -The caller asks for agentless, gets agent. Tracers that have no Datadog agent -to fall back to will silently fail with connection errors against -`/v0.7/config`. Should be a hard error or surface a status the caller can -react to. - -### B12. `#[allow(dead_code)]` on `CONFIG_ROOT` is misleading -The constant **is** used inside `AgentlessFetcher::new` to build the -`config_client`. The `dead_code` allow + stale comment ("reserved for TUF -config-repo init") is a leftover. - -### B13. `BorrowedTufTarget::try_create` interprets `custom.expires` as seconds, multiplied to ms -```rust -if expiry_ts * 1000 <= now_unix_milli_ts() -``` -- `expiry_ts` is read via `as_u64()`, so a JSON value already in ms would be - off by a factor of 1000. -- `expiry_ts * 1000` can wrap a `u64` for very large values (DoS via - malformed metadata; trivially unlikely in practice but worth checked-mul). - -The unit convention should be confirmed against what the backend / TUF spec -emits for that field. Agent uses go-tuf's standard expiry handling, not a -custom `expires` integer. - -### B14. `parse_rc_response` rejects only the body of non-2xx, no `Retry-After` handling -- 401 isn't distinguished — agent has `ErrUnauthorized` mapped to debug-level - logging. -- 503/504 aren't distinguished from 5xx generally. -- `Retry-After` header is ignored. - -### B15. `refresh_interval` not clamped -Agent caps `agent_refresh_interval` to `[1s, 1m]`. libdd accepts whatever the -server says, including 0 (would cause a tight loop in the consumer). - ---- - -## Missing configuration parameters (Option-for-Option) - -Agent `Option` → libdd equivalent today: - -| Agent `Option` | libdd equivalent | Notes | -|---|---|---| -| `WithTraceAgentEnv` | ❌ (sends empty `trace_agent_env`) | | -| `WithDatabaseFileName` | n/a (in-memory) | | -| `WithDatabasePath` | n/a | | -| `WithConfigRootOverride(site, override)` | ❌ — only prod root baked in | **blocker for non-prod sites** | -| `WithDirectorRootOverride(site, override)` | ❌ | **blocker for non-prod sites** | -| `WithRefreshInterval` | ❌ — only server-driven | | -| `WithOrgStatusRefreshInterval` | ❌ — org status never polled | | -| `WithMaxBackoffInterval` | ❌ — no backoff at all | | -| `WithRcKey(DDRCM_*)` | ❌ | | -| `WithAPIKey` | via `Endpoint.api_key` (immutable) | | -| `WithPARJWT` | ❌ | | -| `WithClientCacheBypassLimit` | n/a | | -| `WithClientTTL` | n/a (single client) | | -| `WithAgentPollLoopDisabled` | n/a | | -| `tagsGetter` | ❌ — `tags: vec![]` always | | -| `hostname` | ✅ `AgentlessConfig.hostname` | | -| `agentVersion` | ❌ — hardcoded `"7.78.4"` | | -| `cfg("api_key")` runtime updates | ❌ | | - -Additionally, agent exposes hostname/agent-uuid via `LatestConfigsRequest`: -- `agent_uuid` — always empty in libdd. -- `tags` — always empty. -- `trace_agent_env` — always empty. - -These are used by the backend for routing/diagnostics; missing them is not a -correctness issue but reduces observability and may break some product -features that target by env/host-tags. - ---- - -## Recommended next steps - -The list below is opinionated about what would be required to actually -replace the agent client. Ordering = highest impact first. - -- [ ] **Embed non-prod TUF roots** (staging, gov) and auto-select based on - the `site` in the configured endpoint (the `{site}` in - `https://config.{site}`). Additionally, expose an optional override - **file path** on `ConfigInvariants` so a caller can supply custom - roots from disk (`config_root_override_path`, - `director_root_override_path`). -- [ ] **Propagate `has_error` / `error`** from caller into the next request - (the `ConfigFetcher` already has `client_state.last_error` — wire it - into `agentless_fetcher.fetch_config`). -- [ ] **Add exponential backoff** on consecutive failures with the following - schedule (not the agent's `[2m, 5m]`): - - 1st error → no backoff - - 2nd error → random in `[30s, 60s]` - - 3rd error → random in `[60s, 120s]` - - 4th+ error → `120s` max -- [ ] **Clamp `agent_refresh_interval`** to `[1s, 1m]`, mirroring agent. -- [ ] **Replace `std::path::Path` in `trim_hash_target_path`** with explicit - `/` splitting. - -## Verification - -This is a comparison document, not a code change. Sign-off criteria: -- Agree with the assessment of which gaps are blockers for replacing the - agent client today (sites, org UUID, debug-print removal, agent version). -- Decide which of the "future" items must land before declaring agentless - GA, and which are acceptable carry-over. From d734ac04bf605477eb0d6bd74ed28737fc53394a Mon Sep 17 00:00:00 2001 From: paullegranddc Date: Fri, 19 Jun 2026 15:36:52 +0200 Subject: [PATCH 08/18] feat: get agent uuid from host --- libdd-common/Cargo.toml | 1 + libdd-common/src/lib.rs | 1 + libdd-common/src/machine_id/linux.rs | 99 ++++++++++++++ libdd-common/src/machine_id/macos.rs | 54 ++++++++ libdd-common/src/machine_id/mod.rs | 81 ++++++++++++ libdd-common/src/machine_id/windows.rs | 121 ++++++++++++++++++ .../src/agentless_client/mod.rs | 21 +-- 7 files changed, 370 insertions(+), 8 deletions(-) create mode 100644 libdd-common/src/machine_id/linux.rs create mode 100644 libdd-common/src/machine_id/macos.rs create mode 100644 libdd-common/src/machine_id/mod.rs create mode 100644 libdd-common/src/machine_id/windows.rs diff --git a/libdd-common/Cargo.toml b/libdd-common/Cargo.toml index b4bca918d6..ea9e800878 100644 --- a/libdd-common/Cargo.toml +++ b/libdd-common/Cargo.toml @@ -72,6 +72,7 @@ features = [ "Win32_Foundation", "Win32_System_Diagnostics_ToolHelp", "Win32_System_Performance", + "Win32_System_Registry", "Win32_System_Threading", ] diff --git a/libdd-common/src/lib.rs b/libdd-common/src/lib.rs index adfb568b84..476424f9d5 100644 --- a/libdd-common/src/lib.rs +++ b/libdd-common/src/lib.rs @@ -22,6 +22,7 @@ pub mod connector; #[cfg(feature = "reqwest")] pub mod dump_server; pub mod entity_id; +pub mod machine_id; pub mod regex_engine; #[macro_use] pub mod cstr; diff --git a/libdd-common/src/machine_id/linux.rs b/libdd-common/src/machine_id/linux.rs new file mode 100644 index 0000000000..1930d1fc54 --- /dev/null +++ b/libdd-common/src/machine_id/linux.rs @@ -0,0 +1,99 @@ +// Copyright 2021-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +use std::path::Path; + +const ETC_MACHINE_ID: &str = "/etc/machine-id"; +const DBUS_MACHINE_ID: &str = "/var/lib/dbus/machine-id"; + +/// Read and trim the contents of `path`, returning `None` on any I/O error or +/// if the resulting string is empty. +fn read_id(path: &Path) -> Option { + let raw = std::fs::read_to_string(path).ok()?; + let trimmed = raw.trim().to_owned(); + if trimmed.is_empty() { + None + } else { + Some(trimmed) + } +} + +/// Return the machine ID from the given paths. +/// +/// Tries `etc_path` first (mirrors `/etc/machine-id`), falls back to +/// `dbus_path` (mirrors `/var/lib/dbus/machine-id`). Returns an empty +/// `String` when both are unavailable, matching the Go agent's behaviour. +/// +/// Accepts explicit paths so tests can inject temporary files without needing +/// a feature flag. +pub fn get_machine_id_impl_paths(etc_path: &Path, dbus_path: &Path) -> String { + read_id(etc_path) + .or_else(|| read_id(dbus_path)) + .unwrap_or_default() +} + +/// Return the machine ID using the standard system paths. +pub fn get_machine_id_impl() -> String { + get_machine_id_impl_paths(Path::new(ETC_MACHINE_ID), Path::new(DBUS_MACHINE_ID)) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Write as _; + + #[test] + fn prefers_etc_machine_id() { + let dir = tempfile::tempdir().unwrap(); + let etc = dir.path().join("etc_machine_id"); + let dbus = dir.path().join("dbus_machine_id"); + std::fs::write(&etc, "aabbccdd\n").unwrap(); + std::fs::write(&dbus, "11223344\n").unwrap(); + assert_eq!(get_machine_id_impl_paths(&etc, &dbus), "aabbccdd"); + } + + #[test] + fn falls_back_to_dbus_when_etc_missing() { + let dir = tempfile::tempdir().unwrap(); + let dbus = dir.path().join("dbus_machine_id"); + std::fs::write(&dbus, "11223344\n").unwrap(); + assert_eq!( + get_machine_id_impl_paths(Path::new("/nonexistent_etc_mid"), &dbus), + "11223344" + ); + } + + #[test] + fn both_missing_returns_empty() { + assert_eq!( + get_machine_id_impl_paths( + Path::new("/nonexistent_etc_mid"), + Path::new("/nonexistent_dbus_mid"), + ), + "" + ); + } + + #[test] + fn trims_whitespace_and_newlines() { + let dir = tempfile::tempdir().unwrap(); + let etc = dir.path().join("etc_machine_id"); + std::fs::write(&etc, " deadbeef \n").unwrap(); + assert_eq!( + get_machine_id_impl_paths(&etc, Path::new("/nonexistent_dbus_mid")), + "deadbeef" + ); + } + + #[test] + fn empty_file_falls_back() { + let dir = tempfile::tempdir().unwrap(); + let etc = dir.path().join("etc_machine_id"); + let dbus = dir.path().join("dbus_machine_id"); + // etc exists but is whitespace-only; should fall back to dbus + let mut f = std::fs::File::create(&etc).unwrap(); + f.write_all(b" \n").unwrap(); + std::fs::write(&dbus, "fallback_id").unwrap(); + assert_eq!(get_machine_id_impl_paths(&etc, &dbus), "fallback_id"); + } +} diff --git a/libdd-common/src/machine_id/macos.rs b/libdd-common/src/machine_id/macos.rs new file mode 100644 index 0000000000..03e2f301f6 --- /dev/null +++ b/libdd-common/src/machine_id/macos.rs @@ -0,0 +1,54 @@ +// Copyright 2021-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +/// Return the platform UUID via the `gethostuuid(3)` BSD syscall. +/// +/// This returns the same 128-bit value that `IOPlatformUUID` exposes via +/// IOKit (`ioreg -rd1 -c IOPlatformExpertDevice`), which is what gopsutil +/// (and therefore the Go agent) returns on macOS. Using the syscall avoids a +/// fork+exec of `ioreg`. +/// +/// The UUID is formatted as uppercase hex with hyphens: +/// `XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX` +/// +/// Returns an empty `String` on syscall failure, matching the Go agent's +/// silent-empty behaviour. +pub fn get_machine_id_impl() -> String { + let mut uuid: [u8; 16] = [0u8; 16]; + // Passing a zero timespec requests an indefinite wait; in practice the + // call returns immediately (the UUID is available after very early boot). + let wait = libc::timespec { + tv_sec: 0, + tv_nsec: 0, + }; + let rc = unsafe { libc::gethostuuid(uuid.as_mut_ptr(), &wait) }; + if rc != 0 { + return String::new(); + } + format!( + "{:02X}{:02X}{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}{:02X}{:02X}{:02X}{:02X}", + uuid[0], uuid[1], uuid[2], uuid[3], + uuid[4], uuid[5], + uuid[6], uuid[7], + uuid[8], uuid[9], + uuid[10], uuid[11], uuid[12], uuid[13], uuid[14], uuid[15], + ) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn returns_nonempty_uuid() { + // On any real macOS host (including CI) the host UUID is always set. + let id = get_machine_id_impl(); + assert!(!id.is_empty(), "expected a non-empty UUID on macOS"); + // Basic shape check: 36 chars, hyphens at positions 8, 13, 18, 23. + assert_eq!(id.len(), 36); + assert_eq!(&id[8..9], "-"); + assert_eq!(&id[13..14], "-"); + assert_eq!(&id[18..19], "-"); + assert_eq!(&id[23..24], "-"); + } +} diff --git a/libdd-common/src/machine_id/mod.rs b/libdd-common/src/machine_id/mod.rs new file mode 100644 index 0000000000..94becbaad9 --- /dev/null +++ b/libdd-common/src/machine_id/mod.rs @@ -0,0 +1,81 @@ +// Copyright 2021-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +//! Host machine identifier, equivalent to `pkg/util/uuid.GetUUID()` in the +//! Go agent. +//! +//! The value is read once at first access, cached for the lifetime of the +//! process, and never replaced with a random UUID on failure. An empty +//! string is the correct fallback — the backend can detect a missing value +//! but cannot detect an incorrect one. +//! +//! # Per-platform source +//! +//! | Platform | Source | +//! |----------|--------| +//! | Linux | `/etc/machine-id` (preferred), fallback `/var/lib/dbus/machine-id` | +//! | macOS | `gethostuuid(3)` — same value as `IOPlatformUUID` | +//! | Windows | `HKLM\SOFTWARE\Microsoft\Cryptography\MachineGuid` | +//! | Other | `""` (matches Go agent failure behaviour) | + +use std::sync::LazyLock; + +#[cfg(target_os = "linux")] +mod linux; + +#[cfg(target_os = "macos")] +mod macos; + +#[cfg(windows)] +mod windows; + +/// Cached host machine ID, populated on first access. +static MACHINE_ID: LazyLock = LazyLock::new(|| { + #[cfg(target_os = "linux")] + { + linux::get_machine_id_impl() + } + #[cfg(target_os = "macos")] + { + macos::get_machine_id_impl() + } + #[cfg(windows)] + { + windows::get_machine_id_impl() + } + #[cfg(not(any(target_os = "linux", target_os = "macos", windows)))] + { + String::new() + } +}); + +/// Returns the host machine ID, cached for the process lifetime. +/// +/// Returns `""` on failure or on unsupported platforms (matches Go agent +/// behaviour — an empty string is preferable to a synthetic random UUID). +pub fn get_machine_id() -> &'static str { + MACHINE_ID.as_str() +} + +#[cfg(test)] +mod tests { + use super::*; + + /// Two successive calls must return the identical pointer/value (the + /// LazyLock must be stable). + #[test] + fn cached_value_is_stable() { + let a = get_machine_id(); + let b = get_machine_id(); + assert_eq!(a, b); + } + + /// The cached value must not contain leading/trailing whitespace or + /// newlines (each platform implementation is responsible for trimming, + /// but we assert it here as a cross-platform contract). + #[test] + fn value_is_trimmed() { + let id = get_machine_id(); + assert_eq!(id, id.trim()); + } +} diff --git a/libdd-common/src/machine_id/windows.rs b/libdd-common/src/machine_id/windows.rs new file mode 100644 index 0000000000..6137c6bfb6 --- /dev/null +++ b/libdd-common/src/machine_id/windows.rs @@ -0,0 +1,121 @@ +// Copyright 2021-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +use windows_sys::Win32::Foundation::{ERROR_SUCCESS, HKEY}; +use windows_sys::Win32::System::Registry::{ + RegCloseKey, RegOpenKeyExW, RegQueryValueExW, HKEY_LOCAL_MACHINE, KEY_READ, KEY_WOW64_64KEY, + REG_SZ, +}; +use windows_sys::Win32::System::Threading::{GetCurrentProcess, IsWow64Process}; + +/// Sub-key containing `MachineGuid`. +const SUBKEY: &str = "SOFTWARE\\Microsoft\\Cryptography"; +/// Value name holding the machine GUID. +const VALUE_NAME: &str = "MachineGuid"; + +/// Returns `true` when the current process is a 32-bit process running under +/// WOW64 on a 64-bit Windows host. We use this to request 64-bit registry +/// view access when reading `HKLM\SOFTWARE\Microsoft\Cryptography`, which only +/// exists in the 64-bit hive. +fn is_wow64() -> bool { + let mut result: i32 = 0; + let ok = unsafe { IsWow64Process(GetCurrentProcess(), &mut result) }; + ok != 0 && result != 0 +} + +/// Encode a Rust `&str` as a null-terminated UTF-16 (`Vec`). +fn to_wide_null(s: &str) -> Vec { + s.encode_utf16().chain(std::iter::once(0u16)).collect() +} + +/// Read the machine GUID from +/// `HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Cryptography\MachineGuid`. +/// +/// Returns an empty `String` on any failure (key/value missing, access +/// denied, or encoding error), matching the Go agent's behaviour. +pub fn get_machine_id_impl() -> String { + // On a 32-bit process running on 64-bit Windows we must request the 64-bit + // registry view; otherwise `RegOpenKeyExW` would redirect into the WOW64 + // 32-bit hive where `MachineGuid` does not exist. + let access = if cfg!(target_pointer_width = "32") && is_wow64() { + KEY_READ | KEY_WOW64_64KEY + } else { + KEY_READ + }; + + let subkey_wide = to_wide_null(SUBKEY); + + let mut hkey: HKEY = 0; + // SAFETY: all pointers are valid; no aliasing hazard. + let status = unsafe { + RegOpenKeyExW( + HKEY_LOCAL_MACHINE, + subkey_wide.as_ptr(), + 0, + access, + &mut hkey, + ) + }; + if status != ERROR_SUCCESS as i32 { + return String::new(); + } + + let result = read_string_value(hkey); + + // SAFETY: `hkey` is a valid open handle returned by `RegOpenKeyExW`. + unsafe { RegCloseKey(hkey) }; + + result +} + +/// Read the `MachineGuid` REG_SZ value from an already-opened registry key +/// handle, returning an empty `String` on any failure. +fn read_string_value(hkey: HKEY) -> String { + let value_wide = to_wide_null(VALUE_NAME); + + // First call: get the required buffer size. + let mut data_type: u32 = 0; + let mut data_len: u32 = 0; + let status = unsafe { + RegQueryValueExW( + hkey, + value_wide.as_ptr(), + std::ptr::null_mut(), + &mut data_type, + std::ptr::null_mut(), + &mut data_len, + ) + }; + if status != ERROR_SUCCESS as i32 || data_type != REG_SZ { + return String::new(); + } + + // `data_len` is in bytes (UTF-16 units × 2), including the null terminator. + let num_u16 = (data_len as usize).div_ceil(2); + let mut buf: Vec = vec![0u16; num_u16]; + + // Second call: read the actual data. + let mut actual_len = data_len; + let status = unsafe { + RegQueryValueExW( + hkey, + value_wide.as_ptr(), + std::ptr::null_mut(), + &mut data_type, + buf.as_mut_ptr().cast(), + &mut actual_len, + ) + }; + if status != ERROR_SUCCESS as i32 { + return String::new(); + } + + // Strip the null terminator(s) and convert to a Rust String. + while buf.last() == Some(&0u16) { + buf.pop(); + } + String::from_utf16(&buf) + .unwrap_or_default() + .trim() + .to_owned() +} diff --git a/libdd-remote-config/src/agentless_client/mod.rs b/libdd-remote-config/src/agentless_client/mod.rs index a2f8d7704f..76d7b0c7d9 100644 --- a/libdd-remote-config/src/agentless_client/mod.rs +++ b/libdd-remote-config/src/agentless_client/mod.rs @@ -142,17 +142,16 @@ pub fn make_agentless_configs_endpoint(e: &Endpoint) -> Option { #[derive(Clone, Debug, Hash, Eq, PartialEq, Default)] pub struct AgentlessConfig { - /// Hostname reported to the RC backend in `LatestConfigsRequest.hostname`. - /// Required (must be non-empty) in agentless mode; an empty value causes + /// Hostname reported to the RC backend + /// Must be non empty in agentless mode; an empty value causes /// `ConfigFetcherState::new` to downgrade to agent mode. pub hostname: String, - /// Optional path to a TUF config-repo root JSON to use instead of the - /// embedded one. Useful for staging/private deployments where the trust - /// chain differs from the published defaults. + /// Optional path to a TUF repo root JSON to use instead of the + /// embedded one pub config_root_override_path: Option, - /// Optional path to a TUF director-repo root JSON to use instead of the - /// embedded one. pub director_root_override_path: Option, + /// Override the `agent_uuid` field sent to the RC backend. + pub agent_uuid: Option, } pub type NativeAgentlessFetcher = AgentlessFetcher; @@ -166,6 +165,7 @@ pub struct AgentlessFetcher { initial_config_root_version: u64, initial_director_root_version: u64, hostname: String, + agent_uuid_override: Option, products: HashSet, refresh_interval: Duration, /// Number of consecutive `fetch_config` failures. Reset to 0 on success. @@ -276,6 +276,7 @@ impl AgentlessFetcher { initial_config_root_version, initial_director_root_version, hostname: cfg.hostname, + agent_uuid_override: cfg.agent_uuid, products: HashSet::new(), target_cache: HashMap::new(), @@ -443,7 +444,11 @@ impl AgentlessFetcher { trace_agent_env: String::new(), org_uuid: String::new(), tags: vec![], - agent_uuid: String::new(), + agent_uuid: self + .agent_uuid_override + .as_deref() + .unwrap_or_else(|| libdd_common::machine_id::get_machine_id()) + .to_owned(), }; let response = match self.get_latest_config(request).await { Ok(r) => r, From 2ac515a687e022c248d0622657e703c39f914257 Mon Sep 17 00:00:00 2001 From: paullegranddc Date: Fri, 19 Jun 2026 17:14:44 +0200 Subject: [PATCH 09/18] fix: always emit a lowercased uuid --- libdd-common/src/machine_id/linux.rs | 132 +++++++++++++---------- libdd-common/src/machine_id/macos.rs | 23 +--- libdd-common/src/machine_id/mod.rs | 142 +++++++++++++++++-------- libdd-common/src/machine_id/windows.rs | 58 +++------- 4 files changed, 193 insertions(+), 162 deletions(-) diff --git a/libdd-common/src/machine_id/linux.rs b/libdd-common/src/machine_id/linux.rs index 1930d1fc54..0888ab854f 100644 --- a/libdd-common/src/machine_id/linux.rs +++ b/libdd-common/src/machine_id/linux.rs @@ -3,97 +3,119 @@ use std::path::Path; -const ETC_MACHINE_ID: &str = "/etc/machine-id"; -const DBUS_MACHINE_ID: &str = "/var/lib/dbus/machine-id"; - -/// Read and trim the contents of `path`, returning `None` on any I/O error or -/// if the resulting string is empty. -fn read_id(path: &Path) -> Option { - let raw = std::fs::read_to_string(path).ok()?; - let trimmed = raw.trim().to_owned(); - if trimmed.is_empty() { +fn read_trimmed(path: &Path) -> Option { + let s = std::fs::read_to_string(path).ok()?; + let s = s.trim().to_owned(); + if s.is_empty() { None } else { - Some(trimmed) + Some(s) } } -/// Return the machine ID from the given paths. -/// -/// Tries `etc_path` first (mirrors `/etc/machine-id`), falls back to -/// `dbus_path` (mirrors `/var/lib/dbus/machine-id`). Returns an empty -/// `String` when both are unavailable, matching the Go agent's behaviour. -/// -/// Accepts explicit paths so tests can inject temporary files without needing -/// a feature flag. -pub fn get_machine_id_impl_paths(etc_path: &Path, dbus_path: &Path) -> String { - read_id(etc_path) - .or_else(|| read_id(dbus_path)) - .unwrap_or_default() +pub fn get_machine_id_impl_paths(dmi_path: &Path, etc_path: &Path, boot_path: &Path) -> String { + if let Some(id) = read_trimmed(dmi_path) { + return id; + } + // agent compatibility: + // gopsutil only accepts /etc/machine-id when it's exactly 32 chars (bare hex) + if let Some(id) = read_trimmed(etc_path) { + if id.len() == 32 { + return id; + } + } + read_trimmed(boot_path).unwrap_or_default() } -/// Return the machine ID using the standard system paths. pub fn get_machine_id_impl() -> String { - get_machine_id_impl_paths(Path::new(ETC_MACHINE_ID), Path::new(DBUS_MACHINE_ID)) + get_machine_id_impl_paths( + Path::new("/sys/class/dmi/id/product_uuid"), + Path::new("/etc/machine-id"), + Path::new("/proc/sys/kernel/random/boot_id"), + ) } #[cfg(test)] mod tests { use super::*; - use std::io::Write as _; + + fn write(path: &Path, content: &[u8]) { + std::fs::write(path, content).unwrap(); + } + + fn tmp_paths( + dir: &tempfile::TempDir, + ) -> (std::path::PathBuf, std::path::PathBuf, std::path::PathBuf) { + ( + dir.path().join("product_uuid"), + dir.path().join("machine_id"), + dir.path().join("boot_id"), + ) + } #[test] - fn prefers_etc_machine_id() { + fn level1_dmi_wins_when_present() { let dir = tempfile::tempdir().unwrap(); - let etc = dir.path().join("etc_machine_id"); - let dbus = dir.path().join("dbus_machine_id"); - std::fs::write(&etc, "aabbccdd\n").unwrap(); - std::fs::write(&dbus, "11223344\n").unwrap(); - assert_eq!(get_machine_id_impl_paths(&etc, &dbus), "aabbccdd"); + let (dmi, etc, boot) = tmp_paths(&dir); + write(&dmi, b"B08FA8A2-B01A-4D2B-BD95-FEC7E30C5AEC\n"); + write(&etc, b"aabbccddaabbccddaabbccddaabbccdd\n"); + write(&boot, b"cccccccccccccccccccccccccccccccc\n"); + assert_eq!( + get_machine_id_impl_paths(&dmi, &etc, &boot), + "B08FA8A2-B01A-4D2B-BD95-FEC7E30C5AEC" + ); } #[test] - fn falls_back_to_dbus_when_etc_missing() { + fn level2_etc_used_when_dmi_absent() { let dir = tempfile::tempdir().unwrap(); - let dbus = dir.path().join("dbus_machine_id"); - std::fs::write(&dbus, "11223344\n").unwrap(); + let (dmi, etc, boot) = tmp_paths(&dir); + write(&etc, b"aabbccddaabbccddaabbccddaabbccdd\n"); + write(&boot, b"cccccccccccccccccccccccccccccccc\n"); assert_eq!( - get_machine_id_impl_paths(Path::new("/nonexistent_etc_mid"), &dbus), - "11223344" + get_machine_id_impl_paths(&dmi, &etc, &boot), + "aabbccddaabbccddaabbccddaabbccdd" ); } #[test] - fn both_missing_returns_empty() { + fn level2_skipped_when_etc_not_32_chars() { + let dir = tempfile::tempdir().unwrap(); + let (dmi, etc, boot) = tmp_paths(&dir); + write(&etc, b"aabbccdd-aabb-ccdd-aabb-ccddaabbccdd\n"); + write(&boot, b"dddddddddddddddddddddddddddddddd\n"); assert_eq!( - get_machine_id_impl_paths( - Path::new("/nonexistent_etc_mid"), - Path::new("/nonexistent_dbus_mid"), - ), - "" + get_machine_id_impl_paths(&dmi, &etc, &boot), + "dddddddddddddddddddddddddddddddd" ); } #[test] - fn trims_whitespace_and_newlines() { + fn level3_boot_id_as_last_resort() { let dir = tempfile::tempdir().unwrap(); - let etc = dir.path().join("etc_machine_id"); - std::fs::write(&etc, " deadbeef \n").unwrap(); + let (dmi, etc, boot) = tmp_paths(&dir); + write(&boot, b"cccccccccccccccccccccccccccccccc\n"); assert_eq!( - get_machine_id_impl_paths(&etc, Path::new("/nonexistent_dbus_mid")), - "deadbeef" + get_machine_id_impl_paths(&dmi, &etc, &boot), + "cccccccccccccccccccccccccccccccc" ); } #[test] - fn empty_file_falls_back() { + fn all_absent_returns_empty() { + let dir = tempfile::tempdir().unwrap(); + let (dmi, etc, boot) = tmp_paths(&dir); + assert_eq!(get_machine_id_impl_paths(&dmi, &etc, &boot), ""); + } + + #[test] + fn trims_whitespace() { let dir = tempfile::tempdir().unwrap(); - let etc = dir.path().join("etc_machine_id"); - let dbus = dir.path().join("dbus_machine_id"); - // etc exists but is whitespace-only; should fall back to dbus - let mut f = std::fs::File::create(&etc).unwrap(); - f.write_all(b" \n").unwrap(); - std::fs::write(&dbus, "fallback_id").unwrap(); - assert_eq!(get_machine_id_impl_paths(&etc, &dbus), "fallback_id"); + let (dmi, etc, boot) = tmp_paths(&dir); + write(&etc, b" aabbccddaabbccddaabbccddaabbccdd \n"); + assert_eq!( + get_machine_id_impl_paths(&dmi, &etc, &boot), + "aabbccddaabbccddaabbccddaabbccdd" + ); } } diff --git a/libdd-common/src/machine_id/macos.rs b/libdd-common/src/machine_id/macos.rs index 03e2f301f6..3fd1e7a58e 100644 --- a/libdd-common/src/machine_id/macos.rs +++ b/libdd-common/src/machine_id/macos.rs @@ -1,22 +1,9 @@ // Copyright 2021-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 -/// Return the platform UUID via the `gethostuuid(3)` BSD syscall. -/// -/// This returns the same 128-bit value that `IOPlatformUUID` exposes via -/// IOKit (`ioreg -rd1 -c IOPlatformExpertDevice`), which is what gopsutil -/// (and therefore the Go agent) returns on macOS. Using the syscall avoids a -/// fork+exec of `ioreg`. -/// -/// The UUID is formatted as uppercase hex with hyphens: -/// `XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX` -/// -/// Returns an empty `String` on syscall failure, matching the Go agent's -/// silent-empty behaviour. +/// Returns `IOPlatformUUID` via `gethostuuid(3)`, which avoids a fork+exec of `ioreg`. pub fn get_machine_id_impl() -> String { - let mut uuid: [u8; 16] = [0u8; 16]; - // Passing a zero timespec requests an indefinite wait; in practice the - // call returns immediately (the UUID is available after very early boot). + let mut uuid = [0u8; 16]; let wait = libc::timespec { tv_sec: 0, tv_nsec: 0, @@ -26,7 +13,7 @@ pub fn get_machine_id_impl() -> String { return String::new(); } format!( - "{:02X}{:02X}{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}{:02X}{:02X}{:02X}{:02X}", + "{:02x}{:02x}{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}{:02x}{:02x}{:02x}{:02x}", uuid[0], uuid[1], uuid[2], uuid[3], uuid[4], uuid[5], uuid[6], uuid[7], @@ -41,10 +28,8 @@ mod tests { #[test] fn returns_nonempty_uuid() { - // On any real macOS host (including CI) the host UUID is always set. let id = get_machine_id_impl(); - assert!(!id.is_empty(), "expected a non-empty UUID on macOS"); - // Basic shape check: 36 chars, hyphens at positions 8, 13, 18, 23. + assert!(!id.is_empty()); assert_eq!(id.len(), 36); assert_eq!(&id[8..9], "-"); assert_eq!(&id[13..14], "-"); diff --git a/libdd-common/src/machine_id/mod.rs b/libdd-common/src/machine_id/mod.rs index 94becbaad9..e6de79d292 100644 --- a/libdd-common/src/machine_id/mod.rs +++ b/libdd-common/src/machine_id/mod.rs @@ -1,22 +1,18 @@ // Copyright 2021-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 -//! Host machine identifier, equivalent to `pkg/util/uuid.GetUUID()` in the -//! Go agent. -//! -//! The value is read once at first access, cached for the lifetime of the -//! process, and never replaced with a random UUID on failure. An empty -//! string is the correct fallback — the backend can detect a missing value -//! but cannot detect an incorrect one. -//! -//! # Per-platform source +//! Host machine identifier, mirroring `pkg/util/uuid.GetUUID()` in the Go agent. //! //! | Platform | Source | //! |----------|--------| -//! | Linux | `/etc/machine-id` (preferred), fallback `/var/lib/dbus/machine-id` | -//! | macOS | `gethostuuid(3)` — same value as `IOPlatformUUID` | -//! | Windows | `HKLM\SOFTWARE\Microsoft\Cryptography\MachineGuid` | -//! | Other | `""` (matches Go agent failure behaviour) | +//! | Linux | `/sys/class/dmi/id/product_uuid` then `/etc/machine-id` → `/proc/sys/kernel/random/boot_id` | +//! | macOS | `gethostuuid(3)` | +//! | Windows | `HKLM\SOFTWARE\Microsoft\Cryptography\MachineGuid` | +//! | Other | `""` | +//! +//! All values are normalised to lowercase `xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`. +//! Returns `""` on failure rather than a random UUID — the backend can detect +//! a missing value but not a wrong one. use std::sync::LazyLock; @@ -29,30 +25,54 @@ mod macos; #[cfg(windows)] mod windows; -/// Cached host machine ID, populated on first access. -static MACHINE_ID: LazyLock = LazyLock::new(|| { - #[cfg(target_os = "linux")] - { - linux::get_machine_id_impl() - } - #[cfg(target_os = "macos")] - { - macos::get_machine_id_impl() - } - #[cfg(windows)] - { - windows::get_machine_id_impl() - } - #[cfg(not(any(target_os = "linux", target_os = "macos", windows)))] - { - String::new() +/// Normalise a raw OS machine-id to a lowercase hyphenated UUID string. +/// Strips hyphens, filters to hex digits, lowercases, then re-inserts hyphens. +/// Returns `""` if the result is not exactly 32 hex digits. +pub(crate) fn normalize_uuid(raw: &str) -> String { + let hex: String = raw + .chars() + .filter(|c| c.is_ascii_hexdigit()) + .flat_map(char::to_lowercase) + .collect(); + + if hex.len() != 32 { + return String::new(); } + + format!( + "{}-{}-{}-{}-{}", + &hex[0..8], + &hex[8..12], + &hex[12..16], + &hex[16..20], + &hex[20..32], + ) +} + +static MACHINE_ID: LazyLock = LazyLock::new(|| { + let raw = { + #[cfg(target_os = "linux")] + { + linux::get_machine_id_impl() + } + #[cfg(target_os = "macos")] + { + macos::get_machine_id_impl() + } + #[cfg(windows)] + { + windows::get_machine_id_impl() + } + #[cfg(not(any(target_os = "linux", target_os = "macos", windows)))] + { + String::new() + } + }; + normalize_uuid(&raw) }); -/// Returns the host machine ID, cached for the process lifetime. -/// -/// Returns `""` on failure or on unsupported platforms (matches Go agent -/// behaviour — an empty string is preferable to a synthetic random UUID). +/// Returns the host machine ID as a lowercase hyphenated UUID, cached for the process lifetime. +/// Returns `""` on failure or unsupported platforms. pub fn get_machine_id() -> &'static str { MACHINE_ID.as_str() } @@ -61,21 +81,55 @@ pub fn get_machine_id() -> &'static str { mod tests { use super::*; - /// Two successive calls must return the identical pointer/value (the - /// LazyLock must be stable). #[test] fn cached_value_is_stable() { - let a = get_machine_id(); - let b = get_machine_id(); - assert_eq!(a, b); + assert_eq!(get_machine_id(), get_machine_id()); } - /// The cached value must not contain leading/trailing whitespace or - /// newlines (each platform implementation is responsible for trimming, - /// but we assert it here as a cross-platform contract). #[test] - fn value_is_trimmed() { + fn value_has_uuid_shape_if_nonempty() { let id = get_machine_id(); - assert_eq!(id, id.trim()); + if id.is_empty() { + return; + } + assert_eq!(id.len(), 36); + for (i, c) in id.chars().enumerate() { + if [8, 13, 18, 23].contains(&i) { + assert_eq!(c, '-'); + } else { + assert!(c.is_ascii_hexdigit() && !c.is_ascii_uppercase()); + } + } + } + + #[test] + fn normalize_bare_hex_inserts_hyphens() { + assert_eq!( + normalize_uuid("b08fa8a2b01a4d2bbd95fec7e30c5aec"), + "b08fa8a2-b01a-4d2b-bd95-fec7e30c5aec" + ); + } + + #[test] + fn normalize_uppercase_uuid_lowercased() { + assert_eq!( + normalize_uuid("B08FA8A2-B01A-4D2B-BD95-FEC7E30C5AEC"), + "b08fa8a2-b01a-4d2b-bd95-fec7e30c5aec" + ); + } + + #[test] + fn normalize_lowercase_uuid_unchanged() { + assert_eq!( + normalize_uuid("b08fa8a2-b01a-4d2b-bd95-fec7e30c5aec"), + "b08fa8a2-b01a-4d2b-bd95-fec7e30c5aec" + ); + } + + #[test] + fn normalize_invalid_returns_empty() { + assert_eq!(normalize_uuid(""), ""); + assert_eq!(normalize_uuid("b08fa8a2"), ""); + assert_eq!(normalize_uuid("zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"), ""); } } diff --git a/libdd-common/src/machine_id/windows.rs b/libdd-common/src/machine_id/windows.rs index 6137c6bfb6..d8abab29f8 100644 --- a/libdd-common/src/machine_id/windows.rs +++ b/libdd-common/src/machine_id/windows.rs @@ -8,49 +8,29 @@ use windows_sys::Win32::System::Registry::{ }; use windows_sys::Win32::System::Threading::{GetCurrentProcess, IsWow64Process}; -/// Sub-key containing `MachineGuid`. -const SUBKEY: &str = "SOFTWARE\\Microsoft\\Cryptography"; -/// Value name holding the machine GUID. -const VALUE_NAME: &str = "MachineGuid"; +fn to_wide_null(s: &str) -> Vec { + s.encode_utf16().chain(std::iter::once(0u16)).collect() +} -/// Returns `true` when the current process is a 32-bit process running under -/// WOW64 on a 64-bit Windows host. We use this to request 64-bit registry -/// view access when reading `HKLM\SOFTWARE\Microsoft\Cryptography`, which only -/// exists in the 64-bit hive. fn is_wow64() -> bool { let mut result: i32 = 0; let ok = unsafe { IsWow64Process(GetCurrentProcess(), &mut result) }; ok != 0 && result != 0 } -/// Encode a Rust `&str` as a null-terminated UTF-16 (`Vec`). -fn to_wide_null(s: &str) -> Vec { - s.encode_utf16().chain(std::iter::once(0u16)).collect() -} - -/// Read the machine GUID from -/// `HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Cryptography\MachineGuid`. -/// -/// Returns an empty `String` on any failure (key/value missing, access -/// denied, or encoding error), matching the Go agent's behaviour. pub fn get_machine_id_impl() -> String { - // On a 32-bit process running on 64-bit Windows we must request the 64-bit - // registry view; otherwise `RegOpenKeyExW` would redirect into the WOW64 - // 32-bit hive where `MachineGuid` does not exist. let access = if cfg!(target_pointer_width = "32") && is_wow64() { KEY_READ | KEY_WOW64_64KEY } else { KEY_READ }; - let subkey_wide = to_wide_null(SUBKEY); - let mut hkey: HKEY = 0; - // SAFETY: all pointers are valid; no aliasing hazard. + // SAFETY: all pointers are valid. let status = unsafe { RegOpenKeyExW( HKEY_LOCAL_MACHINE, - subkey_wide.as_ptr(), + to_wide_null("SOFTWARE\\Microsoft\\Cryptography").as_ptr(), 0, access, &mut hkey, @@ -60,22 +40,11 @@ pub fn get_machine_id_impl() -> String { return String::new(); } - let result = read_string_value(hkey); + let value_wide = to_wide_null("MachineGuid"); - // SAFETY: `hkey` is a valid open handle returned by `RegOpenKeyExW`. - unsafe { RegCloseKey(hkey) }; - - result -} - -/// Read the `MachineGuid` REG_SZ value from an already-opened registry key -/// handle, returning an empty `String` on any failure. -fn read_string_value(hkey: HKEY) -> String { - let value_wide = to_wide_null(VALUE_NAME); - - // First call: get the required buffer size. let mut data_type: u32 = 0; let mut data_len: u32 = 0; + // SAFETY: null data pointer is valid for a size-query call. let status = unsafe { RegQueryValueExW( hkey, @@ -87,15 +56,14 @@ fn read_string_value(hkey: HKEY) -> String { ) }; if status != ERROR_SUCCESS as i32 || data_type != REG_SZ { + // SAFETY: hkey is a valid open handle. + unsafe { RegCloseKey(hkey) }; return String::new(); } - // `data_len` is in bytes (UTF-16 units × 2), including the null terminator. - let num_u16 = (data_len as usize).div_ceil(2); - let mut buf: Vec = vec![0u16; num_u16]; - - // Second call: read the actual data. + let mut buf: Vec = vec![0u16; (data_len as usize).div_ceil(2)]; let mut actual_len = data_len; + // SAFETY: buf has the capacity returned by the size-query call above. let status = unsafe { RegQueryValueExW( hkey, @@ -106,11 +74,13 @@ fn read_string_value(hkey: HKEY) -> String { &mut actual_len, ) }; + // SAFETY: hkey is a valid open handle. + unsafe { RegCloseKey(hkey) }; + if status != ERROR_SUCCESS as i32 { return String::new(); } - // Strip the null terminator(s) and convert to a Rust String. while buf.last() == Some(&0u16) { buf.pop(); } From 1baa6e154fd48336b7c82bb923d7e7781d82d2af Mon Sep 17 00:00:00 2001 From: paullegranddc Date: Mon, 22 Jun 2026 16:15:22 +0200 Subject: [PATCH 10/18] fix: remove intermediary cache --- .../examples/remote_config_fetch.rs | 5 +- .../mod.rs => fetch/agentless.rs} | 258 +++++++-------- libdd-remote-config/src/fetch/fetcher.rs | 294 ++++++++++-------- libdd-remote-config/src/fetch/mod.rs | 3 + libdd-remote-config/src/lib.rs | 2 - 5 files changed, 302 insertions(+), 260 deletions(-) rename libdd-remote-config/src/{agentless_client/mod.rs => fetch/agentless.rs} (85%) diff --git a/libdd-remote-config/examples/remote_config_fetch.rs b/libdd-remote-config/examples/remote_config_fetch.rs index 765a03dc26..cb2e23b4c6 100644 --- a/libdd-remote-config/examples/remote_config_fetch.rs +++ b/libdd-remote-config/examples/remote_config_fetch.rs @@ -3,8 +3,9 @@ use libdd_common::tag::Tag; use libdd_common::Endpoint; -use libdd_remote_config::agentless_client::AgentlessConfig; -use libdd_remote_config::fetch::{ConfigInvariants, ConfigOptions, SingleChangesFetcher}; +use libdd_remote_config::fetch::{ + AgentlessConfig, ConfigInvariants, ConfigOptions, SingleChangesFetcher, +}; use libdd_remote_config::file_change_tracker::{Change, FilePath}; use libdd_remote_config::file_storage::ParsedFileStorage; use libdd_remote_config::RemoteConfigProduct::ApmTracing; diff --git a/libdd-remote-config/src/agentless_client/mod.rs b/libdd-remote-config/src/fetch/agentless.rs similarity index 85% rename from libdd-remote-config/src/agentless_client/mod.rs rename to libdd-remote-config/src/fetch/agentless.rs index 76d7b0c7d9..63740d06b6 100644 --- a/libdd-remote-config/src/agentless_client/mod.rs +++ b/libdd-remote-config/src/fetch/agentless.rs @@ -1,6 +1,8 @@ // Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 +use crate::fetch::{AgentlessTargetCache, FileStorage, NewTarget}; + use std::{ fmt, ops::RangeInclusive, @@ -171,40 +173,38 @@ pub struct AgentlessFetcher { /// Number of consecutive `fetch_config` failures. Reset to 0 on success. consecutive_failures: u32, endpoint: Endpoint, - // TODO: Not sure this is needed if the wrapped client already caches files? - target_cache: HashMap, -} - -struct CachedFile { - hashes: Vec<(&'static tuf::crypto::HashAlgorithm, tuf::crypto::HashValue)>, - target_file: Vec, - version: u64, } -pub struct ClientTargetResponse<'a> { - pub path: &'a str, +pub struct ClientTargetRef { + pub path: String, pub version: u64, - pub hashes: &'a [(&'static tuf::crypto::HashAlgorithm, tuf::crypto::HashValue)], - pub content: &'a [u8], + pub primary_hash: String, + pub length: u64, } -pub struct ClientResponse<'a> { +pub struct ClientResponse { pub root_version: u64, pub target_version: u64, - pub opaque_backend_state: &'a [u8], - pub targets: Vec>, + pub opaque_backend_state: Vec, + /// All currently active targets; content is already stored in the outer cache. + pub targets: Vec, pub refresh_interval: Duration, } -struct BorrowedTufTarget<'a> { - pub path: &'a tuf::metadata::TargetPath, - pub desc: &'a tuf::metadata::TargetDescription, +/// A trusted, unexpired TUF target. Produced by [`trusted_targets`]. +struct TrustedTarget<'a> { + path: &'a tuf::metadata::TargetPath, + length: u64, + version: u64, + all_hashes: Vec<(&'static tuf::crypto::HashAlgorithm, tuf::crypto::HashValue)>, + /// Lowercase hex of the first supported hash; used for cache-hit comparisons. + primary_hash: String, } const CUSTOM_METADATA_EXPIRY_PATH: &str = "expires"; -impl<'a> BorrowedTufTarget<'a> { - pub fn try_create(path: &'a TargetPath, desc: &'a TargetDescription) -> anyhow::Result { +impl<'a> TrustedTarget<'a> { + fn try_create(path: &'a TargetPath, desc: &'a TargetDescription) -> anyhow::Result { if let Some(expiry) = desc.custom().get(CUSTOM_METADATA_EXPIRY_PATH) { let expiry_ts = expiry .as_u64() @@ -215,13 +215,23 @@ impl<'a> BorrowedTufTarget<'a> { } } - Ok(Self { path, desc }) - } -} + let all_hashes = tuf::crypto::retain_supported_hashes(desc.hashes()); + if all_hashes.is_empty() { + bail!("no supported hash algorithm for target at path: {path}") + } + // retain_supported_hashes return order is deterministic. + let primary_hash = all_hashes[0].1.to_string(); + + let version = desc.custom().get("v").and_then(|v| v.as_u64()).unwrap_or(0); -enum FetchTargetResult { - Cached, - New(CachedFile), + Ok(Self { + path, + length: desc.length(), + version, + all_hashes, + primary_hash, + }) + } } impl AgentlessFetcher { @@ -278,7 +288,6 @@ impl AgentlessFetcher { hostname: cfg.hostname, agent_uuid_override: cfg.agent_uuid, products: HashSet::new(), - target_cache: HashMap::new(), opaque_backend_state: Vec::new(), refresh_interval: Duration::from_secs(60), @@ -299,43 +308,11 @@ impl AgentlessFetcher { compute_backoff(self.consecutive_failures) } - /// Return the value of a particular target , checking both its length and - /// hashes against the metadata in the config repo. - /// - /// If it is already in the cache, return `Cached` - async fn fetch_target( - &self, - target: &BorrowedTufTarget<'_>, - ) -> anyhow::Result { - let expected_hashes = tuf::crypto::retain_supported_hashes(target.desc.hashes()); - if expected_hashes.is_empty() { - bail!("no supported hash for path: {}", target.path); - } - let (target_hash_algo, target_hash) = &expected_hashes[0]; + async fn fetch_target(&self, target: &TrustedTarget<'_>) -> anyhow::Result> { let target_path = target.path; - let version = target - .desc - .custom() - .get("v") - .and_then(|v| v.as_u64()) - .unwrap_or(0); - - if let Some(item) = self.target_cache.get(target_path) { - if item - .hashes - .iter() - .find(|(alg, _)| alg == target_hash_algo) - .is_some_and(|(_, h)| h == target_hash) - && item.target_file.len() as u64 == target.desc.length() - { - return Ok(FetchTargetResult::Cached); - } - } - - // Fetch from the content from the remote __Unverified__ repo - // This is fine as we are comparing the (hash + len) with a validated - // target + // Fetch from the remote __unverified__ repo. + // This is fine because we compare the hash+len against TUF-validated metadata. let mut read = self .director_client .remote_repo() @@ -344,43 +321,40 @@ impl AgentlessFetcher { let mut buf = Vec::new(); read.read_to_end(&mut buf).await?; - let expected_len = target.desc.length() as usize; - if buf.len() != expected_len { + if buf.len() as u64 != target.length { bail!("bad length for file at path: {}", target.path) } - { - let hash_algs = expected_hashes - .iter() - .map(|(alg, _val)| (*alg).clone()) - .collect::>(); - let actual_hashes = - tuf::crypto::calculate_hashes_from_slice(&buf, hash_algs.as_slice())?; - let expected: HashMap<_, _> = expected_hashes + let hash_algs = target + .all_hashes + .iter() + .map(|(alg, _val)| (*alg).clone()) + .collect::>(); + let actual_hashes = tuf::crypto::calculate_hashes_from_slice(&buf, hash_algs.as_slice())?; + let expected: HashMap<_, _> = target + .all_hashes + .iter() + .map(|(alg, val)| (alg, val)) + .collect(); + + if !(actual_hashes.len() == expected.len() + && actual_hashes .iter() - .map(|(alg, val)| (alg, val)) - .collect(); - - if !(actual_hashes.len() == expected.len() - && actual_hashes - .iter() - .all(|(k, v)| expected.get(&k).is_some_and(|e| *e == v))) - { - bail!("hash did not match: {}", target.path) - } + .all(|(k, v)| expected.get(&k).is_some_and(|e| *e == v))) + { + bail!("hash did not match: {}", target.path) } - Ok(FetchTargetResult::New(CachedFile { - hashes: expected_hashes, - target_file: buf, - version, - })) + Ok(buf) } - pub async fn fetch_config( + /// Fetch remote config. Newly-downloaded target content is written directly into + /// `cache` rather than buffered inside the agentless fetcher. + pub(crate) async fn fetch_config( &mut self, c: remoteconfig::Client, - ) -> anyhow::Result> { + cache: &AgentlessTargetCache<'_, S>, + ) -> anyhow::Result { let ( current_config_snapshot_version, current_config_root_version, @@ -459,17 +433,15 @@ impl AgentlessFetcher { }; self.consecutive_failures = 0; - self.apply(&response).await?; - if !self.initialized { - self.initialized = true; - } + let active_targets = self.apply(&response, cache).await?; + self.initialized = true; self.products = all_products; // TODO: - // In the future we will want to query configs for mutliple clients (for PHP, which can have - // many processes use the same rc client) + // In the future we will want to query configs for multiple clients (for PHP, which can have + // many processes use the same rc client). // This means we will need to dispatch the different files based on filter predicates - // which we currently do not parse + // which we currently do not parse. Ok(ClientResponse { root_version: u64::from(self.config_client.database().trusted_root().version()), @@ -480,17 +452,8 @@ impl AgentlessFetcher { .ok_or(anyhow::anyhow!("Missing target data"))? .version(), ), - opaque_backend_state: &self.opaque_backend_state, - targets: self - .target_cache - .iter() - .map(|(p, t)| ClientTargetResponse { - path: p.as_str(), - version: t.version, - hashes: &t.hashes, - content: t.target_file.as_slice(), - }) - .collect(), + opaque_backend_state: self.opaque_backend_state.clone(), + targets: active_targets, refresh_interval: self.refresh_interval, }) } @@ -548,10 +511,11 @@ impl AgentlessFetcher { Ok(self.http.request(req).await?) } - async fn apply( + async fn apply( &mut self, response: &remoteconfig::LatestConfigsResponse, - ) -> anyhow::Result<()> { + cache: &AgentlessTargetCache<'_, S>, + ) -> anyhow::Result> { // At a high level, we're populating the "remote" repos with the metadata // that we received from upstream (which does not validate it), and then using the clients' // `update` methods to synchronize that metadata to the "local" repos, during which @@ -601,23 +565,50 @@ impl AgentlessFetcher { self.config_client.update().await?; self.director_client.update().await?; - let mut new_target_path_set = HashSet::new(); - for target in trusted_targets(&self.director_client)? { - new_target_path_set.insert(target.path); - match self.fetch_target(&target).await? { - FetchTargetResult::Cached => {} - FetchTargetResult::New(cached_target) => { - self.target_cache.insert(target.path.clone(), cached_target); - } + let targets: Vec> = trusted_targets(&self.director_client)?.collect(); + + let cached_paths: hashbrown::HashSet<&str> = cache.is_cached_batch( + targets + .iter() + .map(|t| (t.path.as_str(), t.primary_hash.as_str(), t.length)), + ); + + let mut new_targets: Vec = Vec::new(); + for t in &targets { + if cached_paths.contains(t.path.as_str()) { + continue; } + let content = self.fetch_target(t).await?; + new_targets.push(NewTarget { + path: t.path.as_str().to_owned(), + version: t.version, + primary_hash: t.primary_hash.clone(), + hashes: t + .all_hashes + .iter() + .map(|(alg, hash)| (hash_algorithm_to_str(alg).to_owned(), hash.to_string())) + .collect(), + content, + }); } - self.target_cache - .retain(|key, _| new_target_path_set.contains(key)); - - // The Remote Config service uses a `custom` field at the top-level of the targets metadata - // to store this field which we are supposed to echo back to the server. That `custom` field - // is not explicitly part of the TUF spec, which is why we need to pull it out of the - // `additional_fields` catch-all here. + cache.store_batch(new_targets)?; + + let active_path_strs: hashbrown::HashSet<&str> = + targets.iter().map(|t| t.path.as_str()).collect(); + cache.retain_only(&active_path_strs); + + let active_targets: Vec = targets + .iter() + .map(|t| ClientTargetRef { + path: t.path.as_str().to_owned(), + version: t.version, + primary_hash: t.primary_hash.clone(), + length: t.length, + }) + .collect(); + + // The Remote Config service uses a `custom` field at the top-level of the targets + // metadata to store this field which we are supposed to echo back to the server. if let Some((opaque_backend_state, refresh_interval)) = get_director_custom(&self.director_client) { @@ -629,7 +620,7 @@ impl AgentlessFetcher { } } - Ok(()) + Ok(active_targets) } } @@ -719,11 +710,11 @@ fn now_unix_milli_ts() -> u64 { .unwrap_or(u64::MAX) } -/// Return the available, unexpired target paths and their descriptions based on the current -/// metadata. +/// Return all currently trusted, unexpired targets. Targets that are expired or that lack a +/// supported hash algorithm are skipped with a debug log. fn trusted_targets( director_client: &TUFClient, -) -> anyhow::Result> + '_> { +) -> anyhow::Result> + '_> { Ok(director_client .database() .trusted_targets() @@ -731,7 +722,7 @@ fn trusted_targets( .targets() .iter() .filter_map(|(path, desc)| { - BorrowedTufTarget::try_create(path, desc) + TrustedTarget::try_create(path, desc) .inspect_err(|e| { debug!(%path, "Skipping target: error {}", e); }) @@ -766,6 +757,15 @@ async fn store_noversion( Ok(()) } +fn hash_algorithm_to_str(alg: &tuf::crypto::HashAlgorithm) -> &str { + match alg { + tuf::crypto::HashAlgorithm::Sha256 => "sha256", + tuf::crypto::HashAlgorithm::Sha512 => "sha512", + tuf::crypto::HashAlgorithm::Unknown(s) => s.as_str(), + _ => "unknown", + } +} + /// Strip the leading `.` prefix from the basename of a TUF target path. /// For instance "datadog/2///.config"` => `"datadog/2///config"` /// diff --git a/libdd-remote-config/src/fetch/fetcher.rs b/libdd-remote-config/src/fetch/fetcher.rs index 3b3b4e2d56..2d9c8e8368 100644 --- a/libdd-remote-config/src/fetch/fetcher.rs +++ b/libdd-remote-config/src/fetch/fetcher.rs @@ -1,14 +1,14 @@ // Copyright 2021-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 -use crate::agentless_client::{ - self, make_agentless_configs_endpoint, AgentlessConfig, AgentlessFetcher, +use super::agentless::{ + make_agentless_configs_endpoint, AgentlessConfig, AgentlessFetcher, ClientTargetRef, NativeAgentlessFetcher, }; use crate::targets::{Root, TargetsList}; use crate::{RemoteConfigCapabilities, RemoteConfigPath, RemoteConfigProduct, Target}; use base64::Engine; -use hashbrown::HashMap; +use hashbrown::{HashMap, HashSet as HbHashSet}; use http::uri::PathAndQuery; use http::StatusCode; use http_body_util::BodyExt; @@ -66,12 +66,12 @@ impl ConfigInvariants { } } -struct StoredTargetFile { - hash: String, - handle: Arc, - state: ConfigState, - meta: TargetFileMeta, - expiring: bool, +pub(crate) struct StoredTargetFile { + pub(crate) hash: String, + pub(crate) handle: Arc, + pub(crate) state: ConfigState, + pub(crate) meta: TargetFileMeta, + pub(crate) expiring: bool, } pub enum ConfigApplyState { @@ -118,7 +118,7 @@ impl ConfigProductCapabilities { } pub struct ConfigFetcherState { - target_files_by_path: Mutex, StoredTargetFile>>, + pub(crate) target_files_by_path: Mutex, StoredTargetFile>>, pub invariants: ConfigInvariants, endpoint: Endpoint, pub expire_unused_files: bool, @@ -234,6 +234,150 @@ impl ConfigFetcherState { } } +pub(crate) struct NewTarget { + pub path: String, + pub version: u64, + /// Lowercase hex of the primary (sha256-preferred) hash. + pub primary_hash: String, + /// All `(algorithm_name, hex_hash)` pairs for the target. + pub hashes: Vec<(String, String)>, + pub content: Vec, +} + +pub(crate) struct AgentlessTargetCache<'a, S: FileStorage> { + files: &'a Mutex, StoredTargetFile>>, + storage: &'a S, + expire_unused_files: bool, +} + +impl<'a, S: FileStorage> AgentlessTargetCache<'a, S> { + pub(crate) fn new(state: &'a ConfigFetcherState, storage: &'a S) -> Self { + AgentlessTargetCache { + files: &state.target_files_by_path, + storage, + expire_unused_files: state.expire_unused_files, + } + } + + /// Returns the TUF path strings whose `(primary_hash, len)` already matches the cache. + pub(crate) fn is_cached_batch<'b>( + &self, + candidates: impl IntoIterator, + ) -> HbHashSet<&'b str> { + let files = self.files.lock_or_panic(); + candidates + .into_iter() + .filter_map(|(path, primary_hash, len)| { + let parsed = RemoteConfigPath::try_parse(path).ok()?; + let stored = files.get(&parsed)?; + if stored.hash == primary_hash && stored.meta.length as u64 == len { + Some(path) + } else { + None + } + }) + .collect() + } + + pub(crate) fn store_batch( + &self, + targets: impl IntoIterator, + ) -> anyhow::Result<()> { + let mut files = self.files.lock_or_panic(); + for NewTarget { + path, + version, + primary_hash, + hashes, + content, + } in targets + { + let parsed_path = match RemoteConfigPath::try_parse(&path) { + Ok(p) => p, + Err(e) => { + warn!("store_batch: failed to parse remote config path {path}: {e:?}"); + continue; + } + }; + let parsed_path: Arc = Arc::new(parsed_path.into()); + let length = content.len() as i64; + let new_handle = if let Some(existing) = files.get(&parsed_path) { + self.storage + .update(&existing.handle, version, content) + .map(|()| existing.handle.clone())? + } else { + self.storage.store(version, parsed_path.clone(), content)? + }; + files.insert( + parsed_path.clone(), + StoredTargetFile { + hash: primary_hash, + state: ConfigState { + id: parsed_path.config_id.to_string(), + version, + product: parsed_path.product.to_string(), + apply_state: 2, // Acknowledged + apply_error: String::new(), + }, + meta: TargetFileMeta { + path, + length, + hashes: hashes + .into_iter() + .map(|(algorithm, hash)| TargetFileHash { algorithm, hash }) + .collect(), + }, + handle: new_handle, + expiring: false, + }, + ); + } + Ok(()) + } + + /// Evict every entry whose TUF path is not in `active_paths`. No-op when + /// `expire_unused_files` is `false`. + pub(crate) fn retain_only(&self, active_paths: &HbHashSet<&str>) { + if !self.expire_unused_files { + return; + } + self.files + .lock_or_panic() + .retain(|_, stored| active_paths.contains(stored.meta.path.as_str())); + } + + /// Collect `Arc` handles for every target in `targets`, verifying + /// stored hash and length match, and marking each entry as non-expiring. + pub(crate) fn collect_handles( + &self, + targets: &[ClientTargetRef], + ) -> anyhow::Result>> { + let mut files = self.files.lock_or_panic(); + let mut handles = Vec::with_capacity(targets.len()); + for target in targets { + let parsed = RemoteConfigPath::try_parse(&target.path).map_err(|e| { + anyhow::format_err!("collect_handles: bad path {}: {e:?}", target.path) + })?; + let stored = files.get_mut(&parsed).ok_or_else(|| { + anyhow::format_err!( + "collect_handles: path {} not found in cache after fetch", + target.path + ) + })?; + if stored.hash != target.primary_hash || stored.meta.length as u64 != target.length { + anyhow::bail!( + "collect_handles: cache mismatch for {}: stored hash={} len={}, expected hash={} len={}", + target.path, stored.hash, stored.meta.length, + target.primary_hash, target.length + ); + } + stored.expiring = false; + handles.push(stored.handle.clone()); + } + Ok(handles) + } +} + #[allow(clippy::large_enum_variant)] enum FetcherMode { Agent, @@ -631,10 +775,10 @@ impl ConfigFetcher { let client = config_req.client.expect( "RC ConfigFetcher::build_config_request should always return a `Some` client", ); - // Capture errors into `client_state.last_error` so the next - // call propagates `has_error` / `error` to the backend. - let res = match agentless_fetcher.fetch_config(client).await { - Ok(res) => res, + + let cache = AgentlessTargetCache::new(&self.state, &self.file_storage); + let res = match agentless_fetcher.fetch_config(client, &cache).await { + Ok(r) => r, Err(e) => { client_state.last_error = Some(format!("{e:#}")); // Surface the recommended backoff to the consumer of @@ -652,129 +796,25 @@ impl ConfigFetcher { client_state.targets_version = res.target_version; client_state.refresh_interval = Some(res.refresh_interval); if res.opaque_backend_state != client_state.opaque_backend_state { - client_state.opaque_backend_state = res.opaque_backend_state.to_vec(); + client_state.opaque_backend_state = res.opaque_backend_state.clone(); } client_state.last_error = None; - let mut target_files = self.state.target_files_by_path.lock_or_panic(); - let mut config_paths = HashSet::new(); - for &agentless_client::ClientTargetResponse { path, .. } in &res.targets { - match RemoteConfigPath::try_parse(path) { + let mut config_paths: HashSet = HashSet::new(); + for target_ref in &res.targets { + match RemoteConfigPath::try_parse(&target_ref.path) { Ok(parsed) => { config_paths.insert(parsed.into()); } - Err(e) => warn!("Failed parsing remote config path: {path} - {e:?}"), + Err(e) => warn!( + "Failed parsing remote config path: {} - {e:?}", + target_ref.path + ), } } - if self.state.expire_unused_files { - target_files.retain(|k, _| config_paths.contains(k.as_ref())); - } + let configs = cache.collect_handles(&res.targets)?; - for agentless_client::ClientTargetResponse { - path, - content: target_file, - version, - hashes, - } in res.targets - { - let parsed_path = match RemoteConfigPath::try_parse(path) { - Ok(parsed_path) => parsed_path, - Err(e) => { - warn!("Failed parsing remote config path: {path} - {e:?}"); - continue; - } - }; - let Some((_, hash)) = hashes - .iter() - .find(|(h, _)| *h == &tuf::crypto::HashAlgorithm::Sha256) - .or_else(|| { - hashes - .iter() - .find(|(h, _)| *h == &tuf::crypto::HashAlgorithm::Sha512) - }) - else { - // todo no supported hash algorithm? - continue; - }; - let hash = hash.to_string(); - - let handle = if let Some(StoredTargetFile { - hash: old_hash, - handle, - .. - }) = target_files.get(&parsed_path) - { - if old_hash == &hash { - continue; - } - Some(handle.clone()) - } else { - None - }; - - let parsed_path: Arc = Arc::new(parsed_path.into()); - target_files.insert( - parsed_path.clone(), - StoredTargetFile { - hash, - state: ConfigState { - id: parsed_path.config_id.to_string(), - version, - product: parsed_path.product.to_string(), - apply_state: 2, // Acknowledged - apply_error: "".to_string(), - }, - meta: TargetFileMeta { - path: path.to_string(), - length: target_file.len() as i64, - hashes: hashes - .iter() - .map(|(algorithm, hash)| { - Ok(TargetFileHash { - algorithm: match algorithm { - tuf::crypto::HashAlgorithm::Sha256 => { - "sha256".to_string() - } - tuf::crypto::HashAlgorithm::Sha512 => { - "sha512".to_string() - } - tuf::crypto::HashAlgorithm::Unknown(u) => u.clone(), - a => { - anyhow::bail!("unhandled hash algorithm: {a:?}") - } - }, - hash: hash.to_string(), - }) - }) - .collect::>()?, - }, - handle: if let Some(handle) = handle { - self.file_storage - .update(&handle, version, target_file.to_vec())?; - handle - } else { - self.file_storage.store( - version, - parsed_path, - target_file.to_vec(), - )? - }, - expiring: false, - }, - ); - } - let mut configs = Vec::with_capacity(config_paths.len()); - for config in config_paths.iter() { - if let Some(target_file) = target_files.get_mut(config) { - target_file.expiring = false; - configs.push(target_file.handle.clone()); - } else { - anyhow::bail!( - "Found {config} in client_configs response, but it isn't stored." - ); - } - } client_state.last_config_paths = config_paths; Ok(Some(configs)) } diff --git a/libdd-remote-config/src/fetch/mod.rs b/libdd-remote-config/src/fetch/mod.rs index a143195871..2e6909dbcc 100644 --- a/libdd-remote-config/src/fetch/mod.rs +++ b/libdd-remote-config/src/fetch/mod.rs @@ -5,6 +5,8 @@ mod fetcher; mod multitarget; mod shared; mod single; +mod agentless; + #[cfg(any(test, feature = "test"))] pub mod test_server; @@ -14,3 +16,4 @@ pub use fetcher::*; pub use multitarget::*; pub use shared::*; pub use single::*; +pub use agentless::*; diff --git a/libdd-remote-config/src/lib.rs b/libdd-remote-config/src/lib.rs index 01ca7c4fc7..462362d160 100644 --- a/libdd-remote-config/src/lib.rs +++ b/libdd-remote-config/src/lib.rs @@ -7,8 +7,6 @@ #![cfg_attr(not(test), deny(clippy::todo))] #![cfg_attr(not(test), deny(clippy::unimplemented))] -pub mod agentless_client; - pub mod config; #[cfg(feature = "client")] pub mod fetch; From 45eac9630e68291f750de70e5e4bfe6d1be19a73 Mon Sep 17 00:00:00 2001 From: paullegranddc Date: Mon, 22 Jun 2026 16:17:27 +0200 Subject: [PATCH 11/18] fix: remove debug statemements --- libdd-remote-config/src/fetch/agentless.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/libdd-remote-config/src/fetch/agentless.rs b/libdd-remote-config/src/fetch/agentless.rs index 63740d06b6..fa9b03ee20 100644 --- a/libdd-remote-config/src/fetch/agentless.rs +++ b/libdd-remote-config/src/fetch/agentless.rs @@ -123,7 +123,6 @@ type TUFClient = tuf::client::Client; // If the endpoint is not suitable (api key not set, not https), returns N pub fn make_agentless_configs_endpoint(e: &Endpoint) -> Option { let e = e.clone(); - dbg!(&e); if !(e.url.scheme_str().is_some_and(|s| s == "https") && e.url.authority().is_some() && e.api_key.is_some()) @@ -482,12 +481,10 @@ impl AgentlessFetcher { &self, req: remoteconfig::LatestConfigsRequest, ) -> anyhow::Result { - dbg!(&req); let path = PathAndQuery::from_static("/api/v0.1/configurations"); let body = Bytes::from(req.encode_to_vec()); let res = self.send_request(Method::POST, path, body).await?; let res = parse_rc_response(res)?; - dbg!(debug_latest_configs_response(&res)); Ok(res) } From f16100e84b44d5ddda2c8172b2140a68418f51fd Mon Sep 17 00:00:00 2001 From: paullegranddc Date: Mon, 22 Jun 2026 16:26:04 +0200 Subject: [PATCH 12/18] remove unduplicated deps --- Cargo.lock | 79 ++++++++++------------------------ libdd-remote-config/Cargo.toml | 2 +- 2 files changed, 23 insertions(+), 58 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8ccd0cae6a..ca1f6dbbda 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -398,7 +398,7 @@ dependencies = [ "bytes", "futures-util", "http 1.1.0", - "http-body 1.0.1", + "http-body", "http-body-util", "itoa 1.0.11", "matchit", @@ -422,7 +422,7 @@ dependencies = [ "bytes", "futures-core", "http 1.1.0", - "http-body 1.0.1", + "http-body", "http-body-util", "mime", "pin-project-lite", @@ -2345,17 +2345,6 @@ dependencies = [ "itoa 1.0.11", ] -[[package]] -name = "http-body" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" -dependencies = [ - "bytes", - "http 0.2.12", - "pin-project-lite", -] - [[package]] name = "http-body" version = "1.0.1" @@ -2375,7 +2364,7 @@ dependencies = [ "bytes", "futures-util", "http 1.1.0", - "http-body 1.0.1", + "http-body", "pin-project-lite", ] @@ -2410,7 +2399,7 @@ dependencies = [ "headers", "http 1.1.0", "http-body-util", - "hyper 1.6.0", + "hyper", "hyper-util", "lazy_static", "log", @@ -2433,29 +2422,6 @@ version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" -[[package]] -name = "hyper" -version = "0.14.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7" -dependencies = [ - "bytes", - "futures-channel", - "futures-core", - "futures-util", - "http 0.2.12", - "http-body 0.4.6", - "httparse", - "httpdate", - "itoa 1.0.11", - "pin-project-lite", - "socket2 0.5.10", - "tokio", - "tower-service", - "tracing", - "want", -] - [[package]] name = "hyper" version = "1.6.0" @@ -2467,7 +2433,7 @@ dependencies = [ "futures-util", "h2", "http 1.1.0", - "http-body 1.0.1", + "http-body", "httparse", "httpdate", "itoa 1.0.11", @@ -2484,7 +2450,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" dependencies = [ "http 1.1.0", - "hyper 1.6.0", + "hyper", "hyper-util", "rustls", "rustls-native-certs", @@ -2501,7 +2467,7 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0" dependencies = [ - "hyper 1.6.0", + "hyper", "hyper-util", "pin-project-lite", "tokio", @@ -2520,8 +2486,8 @@ dependencies = [ "futures-core", "futures-util", "http 1.1.0", - "http-body 1.0.1", - "hyper 1.6.0", + "http-body", + "hyper", "ipnet", "libc", "percent-encoding", @@ -2936,10 +2902,10 @@ dependencies = [ "futures-util", "hex", "http 1.1.0", - "http-body 1.0.1", + "http-body", "http-body-util", "httparse", - "hyper 1.6.0", + "hyper", "hyper-rustls", "hyper-util", "indexmap 2.12.1", @@ -2976,7 +2942,7 @@ dependencies = [ "chrono", "crossbeam-queue", "function_name", - "hyper 1.6.0", + "hyper", "libdd-common", "serde", ] @@ -3135,7 +3101,7 @@ dependencies = [ "fastrand", "http-body-util", "httpmock", - "hyper 1.6.0", + "hyper", "hyper-util", "libdd-common", "reqwest", @@ -3281,7 +3247,7 @@ dependencies = [ "function_name", "futures", "http-body-util", - "hyper 1.6.0", + "hyper", "libc", "libdd-common", "libdd-common-ffi", @@ -3319,7 +3285,7 @@ dependencies = [ "hashbrown 0.15.1", "http 1.1.0", "http-body-util", - "hyper 1.6.0", + "hyper", "hyper-util", "libdd-capabilities", "libdd-capabilities-impl", @@ -3520,10 +3486,10 @@ dependencies = [ "futures", "getrandom 0.2.15", "http 1.1.0", - "http-body 1.0.1", + "http-body", "http-body-util", "httpmock", - "hyper 1.6.0", + "hyper", "indexmap 2.12.1", "libdd-capabilities", "libdd-capabilities-impl", @@ -4827,9 +4793,9 @@ dependencies = [ "futures-util", "hickory-resolver", "http 1.1.0", - "http-body 1.0.1", + "http-body", "http-body-util", - "hyper 1.6.0", + "hyper", "hyper-rustls", "hyper-util", "js-sys", @@ -6083,9 +6049,9 @@ dependencies = [ "bytes", "h2", "http 1.1.0", - "http-body 1.0.1", + "http-body", "http-body-util", - "hyper 1.6.0", + "hyper", "hyper-timeout", "hyper-util", "percent-encoding", @@ -6154,7 +6120,7 @@ dependencies = [ "bytes", "futures-util", "http 1.1.0", - "http-body 1.0.1", + "http-body", "iri-string", "pin-project-lite", "tower", @@ -6277,7 +6243,6 @@ dependencies = [ "futures-io", "futures-util", "http 0.2.12", - "hyper 0.14.32", "itoa 0.4.8", "log", "percent-encoding", diff --git a/libdd-remote-config/Cargo.toml b/libdd-remote-config/Cargo.toml index 1655877efb..b40753afdb 100644 --- a/libdd-remote-config/Cargo.toml +++ b/libdd-remote-config/Cargo.toml @@ -60,7 +60,7 @@ serde_json = { version = "1.0", features = ["raw_value"] } serde_with = "3" thiserror = "2" hashbrown = "0.15" -tuf = { git = "https://github.com/DataDog/rust-tuf/", tag = "0.3.0-beta10-opw-3" } +tuf = { git = "https://github.com/DataDog/rust-tuf/", tag = "0.3.0-beta10-opw-3", default-features = false } prost = "0.14.1" futures = { version = "0.3", features = ["executor"] } From d263f05862787875a0fdde79e6634527fef78926 Mon Sep 17 00:00:00 2001 From: paullegranddc Date: Tue, 23 Jun 2026 15:08:44 +0200 Subject: [PATCH 13/18] fix:feature gate agntless and fix ubunut tests --- datadog-sidecar-ffi/src/lib.rs | 1 - datadog-sidecar/src/service/sidecar_server.rs | 1 - libdd-remote-config/Cargo.toml | 5 +- libdd-remote-config/src/fetch/agentless.rs | 170 +++++++++++- libdd-remote-config/src/fetch/fetcher.rs | 244 +++++------------- libdd-remote-config/src/fetch/mod.rs | 4 + libdd-trace-protobuf/build.rs | 4 + libdd-tracer-flare/Cargo.toml | 1 + libdd-tracer-flare/src/lib.rs | 1 + 9 files changed, 244 insertions(+), 187 deletions(-) diff --git a/datadog-sidecar-ffi/src/lib.rs b/datadog-sidecar-ffi/src/lib.rs index a106f44062..25fa4550bc 100644 --- a/datadog-sidecar-ffi/src/lib.rs +++ b/datadog-sidecar-ffi/src/lib.rs @@ -262,7 +262,6 @@ pub unsafe extern "C" fn ddog_remote_config_reader_for_endpoint<'a>( language: language.to_utf8_lossy().into(), tracer_version: tracer_version.to_utf8_lossy().into(), endpoint: endpoint.clone(), - agentless: None, }, &Arc::new(Target { service: service_name.to_utf8_lossy().into(), diff --git a/datadog-sidecar/src/service/sidecar_server.rs b/datadog-sidecar/src/service/sidecar_server.rs index 5a0741b96d..7a5e095d13 100644 --- a/datadog-sidecar/src/service/sidecar_server.rs +++ b/datadog-sidecar/src/service/sidecar_server.rs @@ -779,7 +779,6 @@ impl SidecarInterface for ConnectionSidecarHandler { language: config.language, tracer_version: config.tracer_version, endpoint: config.endpoint, - agentless: None, }, products: config.remote_config_products, capabilities: config.remote_config_capabilities, diff --git a/libdd-remote-config/Cargo.toml b/libdd-remote-config/Cargo.toml index 9fccdbea8f..a6889574a4 100644 --- a/libdd-remote-config/Cargo.toml +++ b/libdd-remote-config/Cargo.toml @@ -11,6 +11,7 @@ description = "Datadog Remote Configuration client and config parsers" [features] default = ["client", "https"] +agentless = ["tuf", "client", "https"] client = [ "libdd-trace-protobuf", "http-body-util", @@ -37,7 +38,7 @@ test = ["hyper/server", "hyper-util"] anyhow = { version = "1.0" } libdd-common = { path = "../libdd-common", version = "5.0.0", default-features = false } libdd-capabilities = { path = "../libdd-capabilities", version = "2.0.0" } -libdd-capabilities-impl = { version = "2.0.0", path = "../libdd-capabilities-impl", features = ["https"]} +libdd-capabilities-impl = { version = "2.0.0", path = "../libdd-capabilities-impl", default-features = false } libdd-trace-protobuf = { path = "../libdd-trace-protobuf", version = "3.0.2", optional = true } hyper = { workspace = true, optional = true, default-features = false } http-body-util = { version = "0.1", optional = true } @@ -60,7 +61,7 @@ serde_json = { version = "1.0", features = ["raw_value"] } serde_with = "3" thiserror = "2" hashbrown = "0.15" -tuf = { git = "https://github.com/DataDog/rust-tuf/", tag = "0.3.0-beta10-opw-3", default-features = false } +tuf = { git = "https://github.com/DataDog/rust-tuf/", tag = "0.3.0-beta10-opw-3", default-features = false, optional = true } prost = "0.14.1" futures = { version = "0.3", features = ["executor"] } # `EnumIter` for external consumers to make struct available to runtime diff --git a/libdd-remote-config/src/fetch/agentless.rs b/libdd-remote-config/src/fetch/agentless.rs index fa9b03ee20..9e9b594309 100644 --- a/libdd-remote-config/src/fetch/agentless.rs +++ b/libdd-remote-config/src/fetch/agentless.rs @@ -1,7 +1,7 @@ // Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 -use crate::fetch::{AgentlessTargetCache, FileStorage, NewTarget}; +use crate::fetch::FileStorage; use std::{ fmt, @@ -349,10 +349,10 @@ impl AgentlessFetcher { /// Fetch remote config. Newly-downloaded target content is written directly into /// `cache` rather than buffered inside the agentless fetcher. - pub(crate) async fn fetch_config( + pub(crate) async fn fetch_config( &mut self, c: remoteconfig::Client, - cache: &AgentlessTargetCache<'_, S>, + cache: &TargetCache<'_, Storage>, ) -> anyhow::Result { let ( current_config_snapshot_version, @@ -511,7 +511,7 @@ impl AgentlessFetcher { async fn apply( &mut self, response: &remoteconfig::LatestConfigsResponse, - cache: &AgentlessTargetCache<'_, S>, + cache: &TargetCache<'_, S>, ) -> anyhow::Result> { // At a high level, we're populating the "remote" repos with the metadata // that we received from upstream (which does not validate it), and then using the clients' @@ -782,6 +782,168 @@ fn trim_hash_target_path(target_path: &str) -> anyhow::Result { Ok(format!("{parent}/{basename_trimmed}")) } +pub(crate) struct NewTarget { + pub path: String, + pub version: u64, + /// Lowercase hex of the primary (sha256-preferred) hash. + pub primary_hash: String, + /// All `(algorithm_name, hex_hash)` pairs for the target. + pub hashes: Vec<(String, String)>, + pub content: Vec, +} + +pub(crate) use cache::TargetCache; + +mod cache { + use std::sync::Arc; + + use hashbrown::HashMap; + use libdd_common::MutexExt as _; + use libdd_trace_protobuf::remoteconfig::{ConfigState, TargetFileHash, TargetFileMeta}; + use std::sync::Mutex; + use tracing::warn; + + use crate::{ + fetch::{ClientTargetRef, ConfigFetcherState, FileStorage, NewTarget, StoredTargetFile}, + RemoteConfigPath, + }; + + pub(crate) struct TargetCache<'a, Storage: FileStorage> { + files: &'a Mutex, StoredTargetFile>>, + storage: &'a Storage, + expire_unused_files: bool, + } + + impl<'a, S: FileStorage> TargetCache<'a, S> { + pub(crate) fn new(state: &'a ConfigFetcherState, storage: &'a S) -> Self { + TargetCache { + files: &state.target_files_by_path, + storage, + expire_unused_files: state.expire_unused_files, + } + } + + /// Returns the TUF path strings whose `(primary_hash, len)` already matches the cache. + pub(crate) fn is_cached_batch<'b>( + &self, + candidates: impl IntoIterator, + ) -> hashbrown::HashSet<&'b str> { + let files = self.files.lock_or_panic(); + candidates + .into_iter() + .filter_map(|(path, primary_hash, len)| { + let parsed = RemoteConfigPath::try_parse(path).ok()?; + let stored = files.get(&parsed)?; + if stored.hash == primary_hash && stored.meta.length as u64 == len { + Some(path) + } else { + None + } + }) + .collect() + } + + pub(crate) fn store_batch( + &self, + targets: impl IntoIterator, + ) -> anyhow::Result<()> { + let mut files = self.files.lock_or_panic(); + for NewTarget { + path, + version, + primary_hash, + hashes, + content, + } in targets + { + let parsed_path = match RemoteConfigPath::try_parse(&path) { + Ok(p) => p, + Err(e) => { + warn!("store_batch: failed to parse remote config path {path}: {e:?}"); + continue; + } + }; + let parsed_path: Arc = Arc::new(parsed_path.into()); + let length = content.len() as i64; + let new_handle = if let Some(existing) = files.get(&parsed_path) { + self.storage + .update(&existing.handle, version, content) + .map(|()| existing.handle.clone())? + } else { + self.storage.store(version, parsed_path.clone(), content)? + }; + files.insert( + parsed_path.clone(), + StoredTargetFile { + hash: primary_hash, + state: ConfigState { + id: parsed_path.config_id.to_string(), + version, + product: parsed_path.product.to_string(), + apply_state: 2, // Acknowledged + apply_error: String::new(), + }, + meta: TargetFileMeta { + path, + length, + hashes: hashes + .into_iter() + .map(|(algorithm, hash)| TargetFileHash { algorithm, hash }) + .collect(), + }, + handle: new_handle, + expiring: false, + }, + ); + } + Ok(()) + } + + /// Evict every entry whose TUF path is not in `active_paths`. No-op when + /// `expire_unused_files` is `false`. + pub(crate) fn retain_only(&self, active_paths: &hashbrown::HashSet<&str>) { + if !self.expire_unused_files { + return; + } + self.files + .lock_or_panic() + .retain(|_, stored| active_paths.contains(stored.meta.path.as_str())); + } + + /// Collect `Arc` handles for every target in `targets`, verifying + /// stored hash and length match, and marking each entry as non-expiring. + pub(crate) fn collect_handles( + &self, + targets: &[ClientTargetRef], + ) -> anyhow::Result>> { + let mut files = self.files.lock_or_panic(); + let mut handles = Vec::with_capacity(targets.len()); + for target in targets { + let parsed = RemoteConfigPath::try_parse(&target.path).map_err(|e| { + anyhow::format_err!("collect_handles: bad path {}: {e:?}", target.path) + })?; + let stored = files.get_mut(&parsed).ok_or_else(|| { + anyhow::format_err!( + "collect_handles: path {} not found in cache after fetch", + target.path + ) + })?; + if stored.hash != target.primary_hash || stored.meta.length as u64 != target.length + { + anyhow::bail!( + "collect_handles: cache mismatch for {}: stored hash={} len={}, expected hash={} len={}", + target.path, stored.hash, stored.meta.length, + target.primary_hash, target.length + ); + } + stored.expiring = false; + handles.push(stored.handle.clone()); + } + Ok(handles) + } + } +} + // ── Debug helpers: render `raw: Vec` fields as JSON ──────────────────── struct RawJson<'a>(&'a [u8]); diff --git a/libdd-remote-config/src/fetch/fetcher.rs b/libdd-remote-config/src/fetch/fetcher.rs index d78dee9935..eb974f2a37 100644 --- a/libdd-remote-config/src/fetch/fetcher.rs +++ b/libdd-remote-config/src/fetch/fetcher.rs @@ -1,14 +1,13 @@ // Copyright 2021-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 -use super::agentless::{ - make_agentless_configs_endpoint, AgentlessConfig, AgentlessFetcher, ClientTargetRef, - NativeAgentlessFetcher, -}; +#[cfg(feature = "agentless")] +use super::agentless; + use crate::targets::{Root, TargetsList}; use crate::{RemoteConfigCapabilities, RemoteConfigPath, RemoteConfigProduct, Target}; use base64::Engine; -use hashbrown::{HashMap, HashSet as HbHashSet}; +use hashbrown::HashMap; use http::uri::PathAndQuery; use http::StatusCode; use http_body_util::BodyExt; @@ -55,14 +54,22 @@ pub struct ConfigInvariants { pub language: String, pub tracer_version: String, pub endpoint: Endpoint, + #[cfg(feature = "agentless")] /// Enables and configures agentless mode. If some the fetcher will /// talk directly to the RC backend - pub agentless: Option, + pub agentless: Option, } impl ConfigInvariants { pub fn agentless_enabled(&self) -> bool { - self.agentless.is_some() + #[cfg(feature = "agentless")] + { + self.agentless.is_some() + } + #[cfg(not(feature = "agentless"))] + { + false + } } } @@ -167,31 +174,44 @@ impl ConfigFetcherFilesLock<'_, S> { impl ConfigFetcherState { pub fn new(invariants: ConfigInvariants) -> Self { - let (endpoint, agentless) = match &invariants.agentless { - Some(agentless_cfg) => match ( - make_agentless_configs_endpoint(&invariants.endpoint), - agentless_cfg.hostname.is_empty(), - ) { - (Some(e), false) => (e, Some(agentless_cfg.clone())), - (Some(_), true) => { - warn!("rc_config_fetcher: agentless enabled but the hostname is empty. Downgrading to agent endpoint"); - (make_agent_configs_endpoint(&invariants.endpoint), None) - } - (None, _) => { - warn!("rc_config_fetcher: agentless enabled but the endpoint is invalid. Downgrading to agent endpoint"); - (make_agent_configs_endpoint(&invariants.endpoint), None) - } - }, - None => (make_agent_configs_endpoint(&invariants.endpoint), None), - }; - ConfigFetcherState { - target_files_by_path: Default::default(), - endpoint, - invariants: ConfigInvariants { - agentless, - ..invariants - }, - expire_unused_files: true, + #[cfg(feature = "agentless")] + { + use agentless::make_agentless_configs_endpoint; + let (endpoint, agentless) = match &invariants.agentless { + Some(agentless_cfg) => match ( + make_agentless_configs_endpoint(&invariants.endpoint), + agentless_cfg.hostname.is_empty(), + ) { + (Some(e), false) => (e, Some(agentless_cfg.clone())), + (Some(_), true) => { + warn!("rc_config_fetcher: agentless enabled but the hostname is empty. Downgrading to agent endpoint"); + (make_agent_configs_endpoint(&invariants.endpoint), None) + } + (None, _) => { + warn!("rc_config_fetcher: agentless enabled but the endpoint is invalid. Downgrading to agent endpoint"); + (make_agent_configs_endpoint(&invariants.endpoint), None) + } + }, + None => (make_agent_configs_endpoint(&invariants.endpoint), None), + }; + ConfigFetcherState { + target_files_by_path: Default::default(), + endpoint, + invariants: ConfigInvariants { + agentless, + ..invariants + }, + expire_unused_files: true, + } + } + #[cfg(not(feature = "agentless"))] + { + ConfigFetcherState { + target_files_by_path: Default::default(), + endpoint: make_agent_configs_endpoint(&invariants.endpoint), + invariants: ConfigInvariants { ..invariants }, + expire_unused_files: true, + } } } @@ -234,154 +254,11 @@ impl ConfigFetcherState { } } -pub(crate) struct NewTarget { - pub path: String, - pub version: u64, - /// Lowercase hex of the primary (sha256-preferred) hash. - pub primary_hash: String, - /// All `(algorithm_name, hex_hash)` pairs for the target. - pub hashes: Vec<(String, String)>, - pub content: Vec, -} - -pub(crate) struct AgentlessTargetCache<'a, S: FileStorage> { - files: &'a Mutex, StoredTargetFile>>, - storage: &'a S, - expire_unused_files: bool, -} - -impl<'a, S: FileStorage> AgentlessTargetCache<'a, S> { - pub(crate) fn new(state: &'a ConfigFetcherState, storage: &'a S) -> Self { - AgentlessTargetCache { - files: &state.target_files_by_path, - storage, - expire_unused_files: state.expire_unused_files, - } - } - - /// Returns the TUF path strings whose `(primary_hash, len)` already matches the cache. - pub(crate) fn is_cached_batch<'b>( - &self, - candidates: impl IntoIterator, - ) -> HbHashSet<&'b str> { - let files = self.files.lock_or_panic(); - candidates - .into_iter() - .filter_map(|(path, primary_hash, len)| { - let parsed = RemoteConfigPath::try_parse(path).ok()?; - let stored = files.get(&parsed)?; - if stored.hash == primary_hash && stored.meta.length as u64 == len { - Some(path) - } else { - None - } - }) - .collect() - } - - pub(crate) fn store_batch( - &self, - targets: impl IntoIterator, - ) -> anyhow::Result<()> { - let mut files = self.files.lock_or_panic(); - for NewTarget { - path, - version, - primary_hash, - hashes, - content, - } in targets - { - let parsed_path = match RemoteConfigPath::try_parse(&path) { - Ok(p) => p, - Err(e) => { - warn!("store_batch: failed to parse remote config path {path}: {e:?}"); - continue; - } - }; - let parsed_path: Arc = Arc::new(parsed_path.into()); - let length = content.len() as i64; - let new_handle = if let Some(existing) = files.get(&parsed_path) { - self.storage - .update(&existing.handle, version, content) - .map(|()| existing.handle.clone())? - } else { - self.storage.store(version, parsed_path.clone(), content)? - }; - files.insert( - parsed_path.clone(), - StoredTargetFile { - hash: primary_hash, - state: ConfigState { - id: parsed_path.config_id.to_string(), - version, - product: parsed_path.product.to_string(), - apply_state: 2, // Acknowledged - apply_error: String::new(), - }, - meta: TargetFileMeta { - path, - length, - hashes: hashes - .into_iter() - .map(|(algorithm, hash)| TargetFileHash { algorithm, hash }) - .collect(), - }, - handle: new_handle, - expiring: false, - }, - ); - } - Ok(()) - } - - /// Evict every entry whose TUF path is not in `active_paths`. No-op when - /// `expire_unused_files` is `false`. - pub(crate) fn retain_only(&self, active_paths: &HbHashSet<&str>) { - if !self.expire_unused_files { - return; - } - self.files - .lock_or_panic() - .retain(|_, stored| active_paths.contains(stored.meta.path.as_str())); - } - - /// Collect `Arc` handles for every target in `targets`, verifying - /// stored hash and length match, and marking each entry as non-expiring. - pub(crate) fn collect_handles( - &self, - targets: &[ClientTargetRef], - ) -> anyhow::Result>> { - let mut files = self.files.lock_or_panic(); - let mut handles = Vec::with_capacity(targets.len()); - for target in targets { - let parsed = RemoteConfigPath::try_parse(&target.path).map_err(|e| { - anyhow::format_err!("collect_handles: bad path {}: {e:?}", target.path) - })?; - let stored = files.get_mut(&parsed).ok_or_else(|| { - anyhow::format_err!( - "collect_handles: path {} not found in cache after fetch", - target.path - ) - })?; - if stored.hash != target.primary_hash || stored.meta.length as u64 != target.length { - anyhow::bail!( - "collect_handles: cache mismatch for {}: stored hash={} len={}, expected hash={} len={}", - target.path, stored.hash, stored.meta.length, - target.primary_hash, target.length - ); - } - stored.expiring = false; - handles.push(stored.handle.clone()); - } - Ok(handles) - } -} - #[allow(clippy::large_enum_variant)] enum FetcherMode { Agent, - Agentless(NativeAgentlessFetcher), + #[cfg(feature = "agentless")] + Agentless(agentless::NativeAgentlessFetcher), } pub struct ConfigFetcher { @@ -435,12 +312,19 @@ impl ConfigFetcher { file_storage: S, state: Arc>, ) -> anyhow::Result { + #[cfg(feature = "agentless")] let mode: FetcherMode = match &state.invariants.agentless { Some(agentless_cfg) => FetcherMode::Agentless( - AgentlessFetcher::new(agentless_cfg.clone(), state.endpoint.clone()).await?, + agentless::NativeAgentlessFetcher::new( + agentless_cfg.clone(), + state.endpoint.clone(), + ) + .await?, ), None => FetcherMode::Agent, }; + #[cfg(not(feature = "agentless"))] + let mode: FetcherMode = FetcherMode::Agent; Ok(ConfigFetcher { file_storage, @@ -770,13 +654,14 @@ impl ConfigFetcher { ); match &mut self.mode { FetcherMode::Agent => self.fetch_agent(config_req, target, client_state).await, + #[cfg(feature = "agentless")] FetcherMode::Agentless(agentless_fetcher) => { #[allow(clippy::expect_used)] let client = config_req.client.expect( "RC ConfigFetcher::build_config_request should always return a `Some` client", ); - let cache = AgentlessTargetCache::new(&self.state, &self.file_storage); + let cache = agentless::TargetCache::new(&self.state, &self.file_storage); let res = match agentless_fetcher.fetch_config(client, &cache).await { Ok(r) => r, Err(e) => { @@ -994,6 +879,7 @@ pub mod tests { language: "php".to_string(), tracer_version: "1.2.3".to_string(), endpoint: server.endpoint.clone(), + #[cfg(feature = "agentless")] agentless: None, }; let product_capabilities = ConfigProductCapabilities::new( diff --git a/libdd-remote-config/src/fetch/mod.rs b/libdd-remote-config/src/fetch/mod.rs index 075ace8beb..43a2ea0603 100644 --- a/libdd-remote-config/src/fetch/mod.rs +++ b/libdd-remote-config/src/fetch/mod.rs @@ -1,7 +1,9 @@ // Copyright 2021-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 +#[cfg(feature = "agentless")] mod agentless; + mod fetcher; mod multitarget; mod shared; @@ -10,7 +12,9 @@ mod single; #[cfg(any(test, feature = "test"))] pub mod test_server; +#[cfg(feature = "agentless")] pub use agentless::*; + #[allow(clippy::useless_attribute)] // different clippy versions are differently picky #[cfg_attr(test, allow(ambiguous_glob_reexports))] // ignore mod tests re-export pub use fetcher::*; diff --git a/libdd-trace-protobuf/build.rs b/libdd-trace-protobuf/build.rs index a8a10fbee1..e0e716d787 100644 --- a/libdd-trace-protobuf/build.rs +++ b/libdd-trace-protobuf/build.rs @@ -307,6 +307,10 @@ fn generate_protobuf() { "ClientGetConfigsResponse.client_configs", "#[serde(default)]", ); + config.field_attribute( + "ClientGetConfigsResponse.config_status", + "#[serde(default)]", + ); config.type_attribute("ClientUpdater", "#[derive(Deserialize, Serialize)]"); config.type_attribute("PackageState", "#[derive(Deserialize, Serialize)]"); diff --git a/libdd-tracer-flare/Cargo.toml b/libdd-tracer-flare/Cargo.toml index 61044913b7..947f5e12da 100644 --- a/libdd-tracer-flare/Cargo.toml +++ b/libdd-tracer-flare/Cargo.toml @@ -35,4 +35,5 @@ tokio = { version = "1.36.0", features = ["time", "macros", "rt"] } [features] default = ["listener"] +agentless = ["libdd-remote-config/agentless"] listener = ["libdd-remote-config/client"] diff --git a/libdd-tracer-flare/src/lib.rs b/libdd-tracer-flare/src/lib.rs index af3a489e06..9d171e14b0 100644 --- a/libdd-tracer-flare/src/lib.rs +++ b/libdd-tracer-flare/src/lib.rs @@ -181,6 +181,7 @@ impl TracerFlareManager { language, tracer_version, endpoint: remote_config_endpoint, + #[cfg(feature = "agentless")] agentless: None, }, products: vec![ From 98e2d489d54f610ed17b37a054570f7505bd44e4 Mon Sep 17 00:00:00 2001 From: paullegranddc Date: Tue, 23 Jun 2026 15:58:31 +0200 Subject: [PATCH 14/18] fix: feature gate more thngs --- datadog-sidecar-ffi/src/lib.rs | 1 + datadog-sidecar/src/service/sidecar_server.rs | 1 + .../examples/remote_config_fetch.rs | 42 +++++++++++++------ libdd-remote-config/src/fetch/fetcher.rs | 3 +- libdd-trace-protobuf/src/remoteconfig.rs | 1 + libdd-tracer-flare/src/lib.rs | 1 - 6 files changed, 34 insertions(+), 15 deletions(-) diff --git a/datadog-sidecar-ffi/src/lib.rs b/datadog-sidecar-ffi/src/lib.rs index 25fa4550bc..a106f44062 100644 --- a/datadog-sidecar-ffi/src/lib.rs +++ b/datadog-sidecar-ffi/src/lib.rs @@ -262,6 +262,7 @@ pub unsafe extern "C" fn ddog_remote_config_reader_for_endpoint<'a>( language: language.to_utf8_lossy().into(), tracer_version: tracer_version.to_utf8_lossy().into(), endpoint: endpoint.clone(), + agentless: None, }, &Arc::new(Target { service: service_name.to_utf8_lossy().into(), diff --git a/datadog-sidecar/src/service/sidecar_server.rs b/datadog-sidecar/src/service/sidecar_server.rs index 7a5e095d13..5a0741b96d 100644 --- a/datadog-sidecar/src/service/sidecar_server.rs +++ b/datadog-sidecar/src/service/sidecar_server.rs @@ -779,6 +779,7 @@ impl SidecarInterface for ConnectionSidecarHandler { language: config.language, tracer_version: config.tracer_version, endpoint: config.endpoint, + agentless: None, }, products: config.remote_config_products, capabilities: config.remote_config_capabilities, diff --git a/libdd-remote-config/examples/remote_config_fetch.rs b/libdd-remote-config/examples/remote_config_fetch.rs index cb2e23b4c6..d36ddd35d7 100644 --- a/libdd-remote-config/examples/remote_config_fetch.rs +++ b/libdd-remote-config/examples/remote_config_fetch.rs @@ -3,9 +3,7 @@ use libdd_common::tag::Tag; use libdd_common::Endpoint; -use libdd_remote_config::fetch::{ - AgentlessConfig, ConfigInvariants, ConfigOptions, SingleChangesFetcher, -}; +use libdd_remote_config::fetch::{ConfigInvariants, ConfigOptions, SingleChangesFetcher}; use libdd_remote_config::file_change_tracker::{Change, FilePath}; use libdd_remote_config::file_storage::ParsedFileStorage; use libdd_remote_config::RemoteConfigProduct::ApmTracing; @@ -37,16 +35,34 @@ async fn main() { let (endpoint, agentless) = match (dd_api_key, dd_site) { (Some(api_key), Some(site)) => { - println!("DD_API_KEY and DD_SITE are set — enabling agentless mode (site: {site})"); - let endpoint = Endpoint::agentless(&site, api_key) - .expect("Failed to build agentless endpoint from DD_SITE"); - ( - endpoint, - Some(AgentlessConfig { - hostname, - ..Default::default() - }), - ) + #[cfg(feature = "agentless")] + { + println!("DD_API_KEY and DD_SITE are set — enabling agentless mode (site: {site})"); + let endpoint = Endpoint::agentless(&site, api_key) + .expect("Failed to build agentless endpoint from DD_SITE"); + ( + endpoint, + Some(AgentlessConfig { + hostname, + ..Default::default() + }), + ) + } + #[cfg(not(feature = "agentless"))] + { + let _ = (api_key, site); + println!("DD_API_KEY and DD_SITE are set but agentless feature not enabled"); + ( + Endpoint { + url: http::Uri::from_static("http://localhost:8126"), + api_key: None, + timeout_ms: 5000, // custom timeout, defaults to 3 seconds + test_token: None, + ..Default::default() + }, + None, + ) + } } _ => { println!("DD_API_KEY / DD_SITE not set — connecting to local agent"); diff --git a/libdd-remote-config/src/fetch/fetcher.rs b/libdd-remote-config/src/fetch/fetcher.rs index eb974f2a37..7ed1b4a6d9 100644 --- a/libdd-remote-config/src/fetch/fetcher.rs +++ b/libdd-remote-config/src/fetch/fetcher.rs @@ -58,6 +58,8 @@ pub struct ConfigInvariants { /// Enables and configures agentless mode. If some the fetcher will /// talk directly to the RC backend pub agentless: Option, + #[cfg(not(feature = "agentless"))] + pub agentless: Option, } impl ConfigInvariants { @@ -879,7 +881,6 @@ pub mod tests { language: "php".to_string(), tracer_version: "1.2.3".to_string(), endpoint: server.endpoint.clone(), - #[cfg(feature = "agentless")] agentless: None, }; let product_capabilities = ConfigProductCapabilities::new( diff --git a/libdd-trace-protobuf/src/remoteconfig.rs b/libdd-trace-protobuf/src/remoteconfig.rs index 894da87077..469059f024 100644 --- a/libdd-trace-protobuf/src/remoteconfig.rs +++ b/libdd-trace-protobuf/src/remoteconfig.rs @@ -376,6 +376,7 @@ pub struct ClientGetConfigsResponse { #[serde(default)] pub client_configs: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, #[prost(enumeration = "ConfigStatus", tag = "5")] + #[serde(default)] pub config_status: i32, } #[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] diff --git a/libdd-tracer-flare/src/lib.rs b/libdd-tracer-flare/src/lib.rs index 9d171e14b0..af3a489e06 100644 --- a/libdd-tracer-flare/src/lib.rs +++ b/libdd-tracer-flare/src/lib.rs @@ -181,7 +181,6 @@ impl TracerFlareManager { language, tracer_version, endpoint: remote_config_endpoint, - #[cfg(feature = "agentless")] agentless: None, }, products: vec![ From f71677404b39a5e402b211c575057df03c7a2358 Mon Sep 17 00:00:00 2001 From: paullegranddc Date: Tue, 23 Jun 2026 16:10:52 +0200 Subject: [PATCH 15/18] fix: feature gate the right way --- libdd-remote-config/src/fetch/fetcher.rs | 54 +++++++++--------------- 1 file changed, 20 insertions(+), 34 deletions(-) diff --git a/libdd-remote-config/src/fetch/fetcher.rs b/libdd-remote-config/src/fetch/fetcher.rs index 7ed1b4a6d9..b1f2d01619 100644 --- a/libdd-remote-config/src/fetch/fetcher.rs +++ b/libdd-remote-config/src/fetch/fetcher.rs @@ -64,14 +64,7 @@ pub struct ConfigInvariants { impl ConfigInvariants { pub fn agentless_enabled(&self) -> bool { - #[cfg(feature = "agentless")] - { - self.agentless.is_some() - } - #[cfg(not(feature = "agentless"))] - { - false - } + self.agentless.is_some() } } @@ -176,12 +169,11 @@ impl ConfigFetcherFilesLock<'_, S> { impl ConfigFetcherState { pub fn new(invariants: ConfigInvariants) -> Self { - #[cfg(feature = "agentless")] - { - use agentless::make_agentless_configs_endpoint; - let (endpoint, agentless) = match &invariants.agentless { - Some(agentless_cfg) => match ( - make_agentless_configs_endpoint(&invariants.endpoint), + let (endpoint, agentless) = match &invariants.agentless { + Some(agentless_cfg) => { + #[cfg(feature = "agentless")] + match ( + agentless::make_agentless_configs_endpoint(&invariants.endpoint), agentless_cfg.hostname.is_empty(), ) { (Some(e), false) => (e, Some(agentless_cfg.clone())), @@ -193,27 +185,21 @@ impl ConfigFetcherState { warn!("rc_config_fetcher: agentless enabled but the endpoint is invalid. Downgrading to agent endpoint"); (make_agent_configs_endpoint(&invariants.endpoint), None) } - }, - None => (make_agent_configs_endpoint(&invariants.endpoint), None), - }; - ConfigFetcherState { - target_files_by_path: Default::default(), - endpoint, - invariants: ConfigInvariants { - agentless, - ..invariants - }, - expire_unused_files: true, - } - } - #[cfg(not(feature = "agentless"))] - { - ConfigFetcherState { - target_files_by_path: Default::default(), - endpoint: make_agent_configs_endpoint(&invariants.endpoint), - invariants: ConfigInvariants { ..invariants }, - expire_unused_files: true, + } + + #[cfg(not(feature = "agentless"))] + match *agentless_cfg {} } + None => (make_agent_configs_endpoint(&invariants.endpoint), None), + }; + ConfigFetcherState { + target_files_by_path: Default::default(), + endpoint, + invariants: ConfigInvariants { + agentless, + ..invariants + }, + expire_unused_files: true, } } From 54b4e97bad396cbdaa17fa64d8732e39d5590d15 Mon Sep 17 00:00:00 2001 From: paullegranddc Date: Tue, 23 Jun 2026 17:38:35 +0200 Subject: [PATCH 16/18] fix: path import --- libdd-remote-config/examples/remote_config_fetch.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/libdd-remote-config/examples/remote_config_fetch.rs b/libdd-remote-config/examples/remote_config_fetch.rs index d36ddd35d7..08e6548359 100644 --- a/libdd-remote-config/examples/remote_config_fetch.rs +++ b/libdd-remote-config/examples/remote_config_fetch.rs @@ -37,6 +37,7 @@ async fn main() { (Some(api_key), Some(site)) => { #[cfg(feature = "agentless")] { + use libdd_remote_config::fetch::AgentlessConfig; println!("DD_API_KEY and DD_SITE are set — enabling agentless mode (site: {site})"); let endpoint = Endpoint::agentless(&site, api_key) .expect("Failed to build agentless endpoint from DD_SITE"); From 66a32a13483198e147940161e3b6e1ded3bb1c5e Mon Sep 17 00:00:00 2001 From: paullegranddc Date: Thu, 25 Jun 2026 16:27:10 +0200 Subject: [PATCH 17/18] fix: add timeout --- libdd-remote-config/src/fetch/agentless.rs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/libdd-remote-config/src/fetch/agentless.rs b/libdd-remote-config/src/fetch/agentless.rs index 9e9b594309..e7a2837c55 100644 --- a/libdd-remote-config/src/fetch/agentless.rs +++ b/libdd-remote-config/src/fetch/agentless.rs @@ -505,7 +505,16 @@ impl AgentlessFetcher { .uri(url_with_path(self.endpoint.url.clone(), path)?) .method(method) .body(body)?; - Ok(self.http.request(req).await?) + let timeout = Duration::from_millis(self.endpoint.timeout_ms); + let response = tokio::time::timeout(timeout, self.http.request(req)) + .await + .map_err(|_| { + format_err!( + "Remote config request timed out after {}ms", + self.endpoint.timeout_ms, + ) + })??; + Ok(response) } async fn apply( From 368ddab344a2d672e3dde555f9cc646b2cb38545 Mon Sep 17 00:00:00 2001 From: paullegranddc Date: Thu, 25 Jun 2026 19:41:14 +0200 Subject: [PATCH 18/18] feat: macro bench example --- .../examples/remote_config_agentless_bench.rs | 305 ++++++++++++++++++ 1 file changed, 305 insertions(+) create mode 100644 libdd-remote-config/examples/remote_config_agentless_bench.rs diff --git a/libdd-remote-config/examples/remote_config_agentless_bench.rs b/libdd-remote-config/examples/remote_config_agentless_bench.rs new file mode 100644 index 0000000000..7451bba59a --- /dev/null +++ b/libdd-remote-config/examples/remote_config_agentless_bench.rs @@ -0,0 +1,305 @@ +// Copyright 2021-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +//! Benchmark for agentless Remote Config fetching. +//! +//! Measures, for three distinct phases, three quantities each: +//! +//! 1. Client init: `SingleChangesFetcher::new`, which performs the TUF root bootstrap when +//! running in agentless mode. +//! 2. Initial fetch: the first call to `fetch_changes` on a freshly built client. +//! 3. Refetch: the second call to `fetch_changes`, with the client already warm. +//! +//! For each phase we report: +//! +//! * Wall-clock time: total elapsed time, end-to-end. +//! * Poll/CPU time: sum of time spent inside `Future::poll` calls on the current thread. This +//! is an approximation of the active computation time (parsing, TUF verification, request +//! building, response decoding, ...). +//! * Await/IO time: `wall - poll`, the time the future spent suspended waiting for IO (DNS, +//! TCP, TLS handshake, server response, ...). +//! +//! The instrumentation works by polling the benchmarked future manually on a `current_thread` +//! tokio runtime and accumulating the duration of each `poll()` invocation. No additional +//! dependencies are required. +//! +//! Usage: +//! DD_API_KEY=... DD_SITE=datadoghq.com \ +//! cargo run --release --example remote_config_agentless_bench \ +//! -p libdd-remote-config --features agentless +//! +//! Without `DD_API_KEY` / `DD_SITE`, this example exits — agentless mode is required. + +use libdd_common::tag::Tag; +use libdd_common::Endpoint; +use libdd_remote_config::fetch::{ConfigInvariants, ConfigOptions, SingleChangesFetcher}; +use libdd_remote_config::file_storage::ParsedFileStorage; +use libdd_remote_config::RemoteConfigProduct::ApmTracing; +use libdd_remote_config::Target; +use std::future::Future; +use std::pin::Pin; +use std::process::Command; +use std::task::{Context, Poll}; +use std::time::{Duration, Instant}; + +#[cfg(feature = "agentless")] +use libdd_remote_config::fetch::AgentlessConfig; + +const RUNTIME_ID: &str = "23e76587-5ae1-410c-a05c-137cae600a10"; +const SERVICE: &str = "bench-service"; +const ENV: &str = "bench-env"; +const VERSION: &str = "1.2.3"; + +fn get_hostname() -> String { + Command::new("hostname") + .output() + .ok() + .and_then(|o| String::from_utf8(o.stdout).ok()) + .map(|s| s.trim().to_string()) + .unwrap_or_else(|| "unknown".to_string()) +} + +/// A future wrapper that accumulates the time spent inside each `poll()` call into +/// `*poll_time`. The wall time is the elapsed between calling `Instrumented::new` (or just +/// before `.await`) and the future completing. +struct Instrumented<'a, F> { + inner: F, + poll_time: &'a mut Duration, +} + +impl<'a, F: Future + Unpin> Instrumented<'a, F> { + fn new(inner: F, poll_time: &'a mut Duration) -> Self { + *poll_time = Duration::ZERO; + Self { inner, poll_time } + } +} + +impl Future for Instrumented<'_, F> { + type Output = F::Output; + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + let start = Instant::now(); + let res = Pin::new(&mut self.inner).poll(cx); + let elapsed = start.elapsed(); + *self.poll_time += elapsed; + res + } +} + +#[derive(Default, Clone, Copy)] +struct Sample { + wall: Duration, + poll: Duration, +} + +impl Sample { + fn io(&self) -> Duration { + self.wall.saturating_sub(self.poll) + } +} + +fn print_row(label: &str, s: Sample) { + let wall_ms = s.wall.as_secs_f64() * 1000.0; + let poll_ms = s.poll.as_secs_f64() * 1000.0; + let io_ms = s.io().as_secs_f64() * 1000.0; + let poll_pct = if s.wall.as_nanos() > 0 { + 100.0 * s.poll.as_secs_f64() / s.wall.as_secs_f64() + } else { + 0.0 + }; + println!( + " {label:<18} wall = {wall_ms:>9.3} ms poll/CPU = {poll_ms:>9.3} ms \ + await/IO = {io_ms:>9.3} ms ({poll_pct:>5.1}% poll)" + ); +} + +async fn run_one_iteration( + iter: usize, + endpoint: Endpoint, + #[cfg(feature = "agentless")] agentless: Option, + #[cfg(not(feature = "agentless"))] agentless: Option, +) -> anyhow::Result<(Sample, Sample, Sample)> { + let target = Target { + service: SERVICE.to_string(), + env: ENV.to_string(), + app_version: VERSION.to_string(), + tags: vec![Tag::new("bench", "true").unwrap()], + process_tags: vec![], + }; + + let options = ConfigOptions { + invariants: ConfigInvariants { + language: "benchlang".to_string(), + tracer_version: "0.0.1".to_string(), + endpoint, + agentless, + }, + products: vec![ApmTracing], + capabilities: vec![], + }; + + // --- 1. Client init: TUF root bootstrap in agentless mode --- + let mut init_poll = Duration::ZERO; + let init_wall_start = Instant::now(); + let fetcher_fut = Box::pin(SingleChangesFetcher::new( + ParsedFileStorage::default(), + target, + RUNTIME_ID.to_string(), + options, + )); + let mut fetcher = Instrumented::new(fetcher_fut, &mut init_poll).await?; + let init = Sample { + wall: init_wall_start.elapsed(), + poll: init_poll, + }; + + // --- 2. Initial fetch: first call to fetch_changes on a fresh client --- + let mut first_poll = Duration::ZERO; + let first_wall_start = Instant::now(); + // R is inferred from `ParsedFileStorage`'s `UpdatedFiles` impl. + let first_fut = Box::pin(fetcher.fetch_changes()); + let first_changes: Vec<_> = Instrumented::new(first_fut, &mut first_poll).await?; + let first = Sample { + wall: first_wall_start.elapsed(), + poll: first_poll, + }; + + // --- 3. Refetch: second call to fetch_changes (warm client) --- + let mut refetch_poll = Duration::ZERO; + let refetch_wall_start = Instant::now(); + let refetch_fut = Box::pin(fetcher.fetch_changes()); + let refetch_changes: Vec<_> = Instrumented::new(refetch_fut, &mut refetch_poll).await?; + let refetch = Sample { + wall: refetch_wall_start.elapsed(), + poll: refetch_poll, + }; + + println!( + "Iteration #{iter}: initial fetch returned {} change(s), refetch returned {} change(s)", + first_changes.len(), + refetch_changes.len(), + ); + + Ok((init, first, refetch)) +} + +#[tokio::main(flavor = "current_thread")] +async fn main() -> anyhow::Result<()> { + let hostname = get_hostname(); + println!("Hostname: {hostname}"); + + let dd_api_key = std::env::var("DD_API_KEY").ok(); + let dd_site = std::env::var("DD_SITE").ok(); + let iterations: usize = std::env::var("BENCH_ITERATIONS") + .ok() + .and_then(|v| v.parse().ok()) + .unwrap_or(5); + + let (endpoint, agentless): (Endpoint, Option<_>) = match (dd_api_key, dd_site) { + #[cfg(feature = "agentless")] + (Some(api_key), Some(site)) => { + println!("Agentless mode enabled (site: {site})"); + let endpoint = Endpoint::agentless(&site, api_key) + .expect("Failed to build agentless endpoint from DD_SITE"); + ( + endpoint, + Some(AgentlessConfig { + hostname: hostname.clone(), + ..Default::default() + }), + ) + } + #[cfg(not(feature = "agentless"))] + (Some(_), Some(_)) => { + eprintln!( + "This benchmark requires the `agentless` feature. \ + Re-run with: --features agentless" + ); + std::process::exit(1); + } + _ => { + eprintln!( + "DD_API_KEY and DD_SITE are required for the agentless benchmark.\n\ + Example:\n DD_API_KEY=... DD_SITE=datadoghq.com \\\n \ + cargo run --release --example remote_config_agentless_bench \\\n \ + -p libdd-remote-config --features agentless" + ); + std::process::exit(1); + } + }; + + println!("Running {iterations} iteration(s)\n"); + + let mut inits = Vec::with_capacity(iterations); + let mut firsts = Vec::with_capacity(iterations); + let mut refetches = Vec::with_capacity(iterations); + + for i in 0..iterations { + match run_one_iteration( + i, + endpoint.clone(), + agentless.clone(), + ) + .await + { + Ok((init, first, refetch)) => { + print_row(" client init", init); + print_row(" initial fetch", first); + print_row(" refetch", refetch); + println!(); + inits.push(init); + firsts.push(first); + refetches.push(refetch); + } + Err(e) => { + eprintln!("Iteration {i} failed: {e:?}"); + } + } + } + + if inits.is_empty() { + anyhow::bail!("All iterations failed"); + } + + println!( + "=== Summary over {} successful iteration(s) ===", + inits.len() + ); + print_summary("client init", &inits); + print_summary("initial fetch", &firsts); + print_summary("refetch", &refetches); + + Ok(()) +} + +fn print_summary(label: &str, samples: &[Sample]) { + let n = samples.len() as u32; + let sum_wall: Duration = samples.iter().map(|s| s.wall).sum(); + let sum_poll: Duration = samples.iter().map(|s| s.poll).sum(); + let avg = Sample { + wall: sum_wall / n, + poll: sum_poll / n, + }; + + let min_wall = samples.iter().map(|s| s.wall).min().unwrap(); + let max_wall = samples.iter().map(|s| s.wall).max().unwrap(); + let min_poll = samples.iter().map(|s| s.poll).min().unwrap(); + let max_poll = samples.iter().map(|s| s.poll).max().unwrap(); + + println!("{label}:"); + print_row("avg", avg); + print_row( + "min", + Sample { + wall: min_wall, + poll: min_poll, + }, + ); + print_row( + "max", + Sample { + wall: max_wall, + poll: max_poll, + }, + ); +}