From acbdbb864a6c37a06db43f8a954bb9cc62926250 Mon Sep 17 00:00:00 2001 From: O957 <127630341+O957@users.noreply.github.com> Date: Mon, 16 Mar 2026 13:45:33 -0400 Subject: [PATCH 01/10] remove included_locations references; update codebase with excluded_locations; add helper function --- NAMESPACE | 4 +- R/constants.R | 77 +---------- R/location_exclusions.R | 129 +++++++++++++++++++ R/summarize_ref_date_forecasts.R | 84 ++---------- R/update_hub_target_data.R | 36 +++--- R/write_ref_date_summary.R | 15 ++- R/write_viz_target_data.R | 20 ++- R/write_webtext.R | 53 +++++--- actions/generate-viz-data/action.yaml | 23 +++- actions/update-target-data/action.yaml | 13 ++ man/apply_location_exclusions.Rd | 35 +++++ man/check_hospital_reporting_latency.Rd | 9 +- man/default_excluded_locations.Rd | 19 +++ man/excluded_locations.Rd | 19 --- man/generate_webtext_block.Rd | 9 +- man/get_hubverse_format_nhsn_data.Rd | 8 +- man/get_hubverse_format_nssp_data.Rd | 8 +- man/included_locations.Rd | 20 --- man/summarize_ref_date_forecasts.Rd | 5 +- man/update_hub_target_data.Rd | 8 +- man/write_ref_date_summary.Rd | 5 +- man/write_ref_date_summary_all.Rd | 5 +- man/write_ref_date_summary_ens.Rd | 5 +- man/write_viz_target_data.Rd | 10 +- man/write_webtext.Rd | 9 +- tests/testthat/test_update_hub_target_data.R | 9 +- 26 files changed, 349 insertions(+), 288 deletions(-) create mode 100644 R/location_exclusions.R create mode 100644 man/apply_location_exclusions.Rd create mode 100644 man/default_excluded_locations.Rd delete mode 100644 man/excluded_locations.Rd delete mode 100644 man/included_locations.Rd diff --git a/NAMESPACE b/NAMESPACE index 7173711..df77858 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,9 +1,10 @@ # Generated by roxygen2: do not edit by hand +export(apply_location_exclusions) export(assert_data_up_to_date) export(check_authorized_users) export(check_changes_for_autoapproval) -export(excluded_locations) +export(default_excluded_locations) export(flatten_task) export(flatten_task_list) export(generate_hub_baseline) @@ -21,7 +22,6 @@ export(get_nhsn_col_name) export(get_nssp_col_name) export(get_target_data_type) export(get_target_label) -export(included_locations) export(is_ed_target) export(is_hosp_target) export(summarize_ref_date_forecasts) diff --git a/R/constants.R b/R/constants.R index 60522ae..af20b92 100644 --- a/R/constants.R +++ b/R/constants.R @@ -1,74 +1,9 @@ -#' Two digits FIPS codes for locations excluded from Hubs' -#' target data. +#' Default US state/territory abbreviations excluded +#' from hub data. #' -#' Excludes Virgin Islands (78), Northern Mariana -#' Islands (69), Guam (66), American Samoa (60), and Minor -#' Outlying Islands (74). +#' Excludes Virgin Islands (VI), Guam (GU), +#' American Samoa (AS), Northern Mariana Islands (MP), +#' and Minor Outlying Islands (UM). #' #' @export -excluded_locations <- c("78", "74", "69", "66", "60") - -#' Two digits FIPS codes for locations included in Hubs' -#' target data. -#' -#' Includes 50 states, US national, DC, and Puerto Rico -#' (PR). Excludes Virgin Islands (78), Northern Mariana -#' Islands (69), Guam (66), American Samoa (60), and Minor -#' Outlying Islands (74). -#' -#' @export -included_locations <- c( - "01", - "02", - "04", - "05", - "06", - "08", - "09", - "10", - "11", - "12", - "13", - "15", - "16", - "17", - "18", - "19", - "20", - "21", - "22", - "23", - "24", - "25", - "26", - "27", - "28", - "29", - "30", - "31", - "32", - "33", - "34", - "35", - "36", - "37", - "38", - "39", - "40", - "41", - "42", - "44", - "45", - "46", - "47", - "48", - "49", - "50", - "51", - "53", - "54", - "55", - "56", - "72", - "US" -) +default_excluded_locations <- c("VI", "GU", "AS", "MP", "UM") diff --git a/R/location_exclusions.R b/R/location_exclusions.R new file mode 100644 index 0000000..d97dd92 --- /dev/null +++ b/R/location_exclusions.R @@ -0,0 +1,129 @@ +#' Normalize excluded locations to a named list. +#' +#' Converts a character vector or named list of excluded +#' locations into a consistent named list format. +#' +#' @param excluded_locations NULL, character vector, or +#' named list of character vectors. +#' +#' @return Named list of character vectors. +#' @noRd +normalize_excluded_locations <- function(excluded_locations) { + if (is.null(excluded_locations)) { + return(list()) + } + if (is.character(excluded_locations)) { + return(list("all" = excluded_locations)) + } + if (is.list(excluded_locations)) { + return(excluded_locations) + } + cli::cli_abort( + "{.arg excluded_locations} must be NULL, a character vector, or a named list." + ) +} + + +#' Build a target-location exclusion data frame. +#' +#' Constructs a tibble of target/location pairs to +#' exclude. Entries keyed by "all" are expanded into +#' one row per supported target. Errors if any named +#' targets in the exclusion list are not in +#' `supported_targets`. +#' +#' @param excluded_locations Named list as returned by +#' `normalize_excluded_locations()`. +#' @param supported_targets character vector of targets +#' the hub accepts, as returned by +#' `get_hub_supported_targets()`. +#' +#' @return A tibble with columns "target" and "location". +#' (hub codes). +#' @noRd +build_exclusion_df <- function(excluded_locations, supported_targets) { + named_targets <- setdiff(names(excluded_locations), "all") + invalid_targets <- setdiff(named_targets, supported_targets) + if (length(invalid_targets) > 0) { + cli::cli_abort( + "{.arg excluded_locations} contains unknown target{?s}: {.val {invalid_targets}}." + ) + } + + merged <- purrr::map( + purrr::set_names(supported_targets), + \(tgt) unique(c(excluded_locations[["all"]], excluded_locations[[tgt]])) + ) + + tibble::enframe(merged, name = "target", value = "location") |> + tidyr::unnest(cols = "location") |> + dplyr::mutate( + location = forecasttools::us_location_recode( + .data$location, + "abbr", + "hub" + ) + ) +} + + +#' Apply location exclusions to a data frame. +#' +#' Removes rows from a data frame based on excluded +#' location abbreviations. Target-specific +#' exclusions, when `supported_targets` is provided and +#' the data contains a "target" column, are +#' supported; otherwise, exclusions are applied are +#' across all rows. +#' +#' @param data Data frame with a "location" column +#' containing hub-format location codes and, optionally, +#' a "target" column. +#' @param excluded_locations NULL, character vector, or +#' named list of US state/territory abbreviations to +#' exclude. If a character vector, locations are excluded +#' across all rows. If a named list, names should be +#' target names (or "all" for global exclusions) mapping +#' to character vectors of abbreviations. Default: NULL. +#' @param supported_targets Character vector of valid +#' target names. Required for target-specific exclusions +#' when `excluded_locations` is a named list with +#' non-"all" keys. Default: NULL. +#' +#' @return Data frame with excluded rows removed. +#' @export +apply_location_exclusions <- function( + data, + excluded_locations, + supported_targets = NULL +) { + excluded_locations <- normalize_excluded_locations(excluded_locations) + if (length(excluded_locations) == 0) { + return(data) + } + + if (!is.null(supported_targets) && "target" %in% names(data)) { + exclusion_df <- build_exclusion_df( + excluded_locations, + supported_targets + ) + data <- dplyr::anti_join( + data, + exclusion_df, + by = c("target", "location") + ) + } else { + all_excluded <- unique(unlist(excluded_locations)) + excluded_codes <- forecasttools::us_location_recode( + all_excluded, + "abbr", + "hub" + ) + data <- dplyr::filter( + data, + !(.data$location %in% excluded_codes) + ) + } + + return(data) +} diff --git a/R/summarize_ref_date_forecasts.R b/R/summarize_ref_date_forecasts.R index b6fafb5..440c622 100644 --- a/R/summarize_ref_date_forecasts.R +++ b/R/summarize_ref_date_forecasts.R @@ -1,72 +1,3 @@ -#' Normalize excluded locations to a named list. -#' -#' Converts a character vector or named list of excluded -#' locations into a consistent named list format. -#' -#' @param excluded_locations NULL, character vector, or -#' named list of character vector. -#' -#' @return Named list of character vectors. -#' @noRd -normalize_excluded_locations <- function(excluded_locations) { - if (is.null(excluded_locations)) { - return(list()) - } - if (is.character(excluded_locations)) { - return(list("all" = excluded_locations)) - } - if (is.list(excluded_locations)) { - return(excluded_locations) - } - cli::cli_abort( - "{.arg excluded_locations} must be NULL, a character vector, or a named list." - ) -} - - -#' Build a target-location exclusion data frame. -#' -#' Constructs a tibble of target/location pairs to -#' exclude. Entries keyed by "all" are expanded into -#' one row per supported target. Errors if any named -#' targets in the exclusion list are not in -#' `supported_targets`. -#' -#' @param excluded_locations Named list as returned by -#' `normalize_excluded_locations()`. -#' @param supported_targets character vector of targets -#' the hub accepts, as returned by -#' `get_hub_supported_targets()`. -#' -#' @return A tibble with columns "target" and "location" -#' (hub codes). -#' @noRd -build_exclusion_df <- function(excluded_locations, supported_targets) { - named_targets <- setdiff(names(excluded_locations), "all") - invalid_targets <- setdiff(named_targets, supported_targets) - if (length(invalid_targets) > 0) { - cli::cli_abort( - "{.arg excluded_locations} contains unknown target{?s}: {.val {invalid_targets}}." - ) - } - - merged <- purrr::map( - purrr::set_names(supported_targets), - \(tgt) unique(c(excluded_locations[["all"]], excluded_locations[[tgt]])) - ) - - tibble::enframe(merged, name = "target", value = "location") |> - tidyr::unnest(cols = "location") |> - dplyr::mutate( - location = forecasttools::us_location_recode( - .data$location, - "abbr", - "hub" - ) - ) -} - - #' Summarize forecast hub data for a specific reference date. #' #' This function generates a tibble of forecast data @@ -90,7 +21,8 @@ build_exclusion_df <- function(excluded_locations, supported_targets) { #' targets. If a named list, names should be target names #' (or "all" for global exclusions) mapping to character #' vectors of abbreviations. Converted to hub codes -#' internally. Default: NULL. +#' internally. Default: +#' [hubhelpr::default_excluded_locations]. #' @param targets character vector, target name(s) to filter #' forecasts. If NULL (default), does not filter by target. #' @param model_ids character vector of model IDs to include. @@ -105,12 +37,11 @@ summarize_ref_date_forecasts <- function( disease, population_data, horizons_to_include = c(0, 1, 2), - excluded_locations = NULL, + excluded_locations = hubhelpr::default_excluded_locations, targets = NULL, model_ids = NULL ) { reference_date <- lubridate::as_date(reference_date) - excluded_locations <- normalize_excluded_locations(excluded_locations) model_metadata <- hubData::load_model_metadata( base_hub_path, @@ -131,10 +62,11 @@ summarize_ref_date_forecasts <- function( ) supported_targets <- get_hub_supported_targets(base_hub_path) - exclusion_df <- build_exclusion_df(excluded_locations, supported_targets) - - current_forecasts <- current_forecasts |> - dplyr::anti_join(exclusion_df, by = c("target", "location")) + current_forecasts <- apply_location_exclusions( + current_forecasts, + excluded_locations, + supported_targets + ) if (nrow(current_forecasts) == 0) { model_filter_msg <- if (!is.null(model_ids)) { diff --git a/R/update_hub_target_data.R b/R/update_hub_target_data.R index db6f1e6..4ae7f8c 100644 --- a/R/update_hub_target_data.R +++ b/R/update_hub_target_data.R @@ -69,9 +69,9 @@ merge_target_data <- function( #' @param disease Disease name ("covid" or "rsv"). #' @param as_of As-of date of the data pull. Default is #' the system date as determined by [lubridate::today()]. -#' @param included_locations Vector of location codes to -#' include in the output. -#' Default value `hubhelpr::included_locations`. +#' @param excluded_locations Character vector or named list +#' of US state/territory abbreviations to exclude. +#' Default: [hubhelpr::default_excluded_locations]. #' @param start_date First week-ending #' date to include for the NHSN dataset. Default value #' is NULL (no filtering). @@ -84,7 +84,7 @@ merge_target_data <- function( get_hubverse_format_nhsn_data <- function( disease, as_of = lubridate::today(), - included_locations = hubhelpr::included_locations, + excluded_locations = hubhelpr::default_excluded_locations, start_date = NULL, end_date = NULL ) { @@ -109,8 +109,8 @@ get_hubverse_format_nhsn_data <- function( as_of = !!as_of, target = glue::glue("wk inc {disease} hosp") ) |> - dplyr::filter(.data$location %in% !!included_locations) |> - dplyr::select(tidyselect::all_of(hubverse_ts_req_cols)) + dplyr::select(tidyselect::all_of(hubverse_ts_req_cols)) |> + apply_location_exclusions(excluded_locations) return(hubverse_format_nhsn_data) } @@ -124,9 +124,9 @@ get_hubverse_format_nhsn_data <- function( #' @param base_hub_path Path to the base hub directory. #' @param as_of As-of date of the data pull. Default is #' the system date as determined by [lubridate::today()]. -#' @param included_locations Vector of location codes to -#' include in the output. -#' Default value `hubhelpr::included_locations`. +#' @param excluded_locations Character vector or named list +#' of US state/territory abbreviations to exclude. +#' Default: [hubhelpr::default_excluded_locations]. #' @param nssp_update_local Logical. Whether to update NSSP #' data from local file `auxiliary-data/latest.parquet` #' (default: FALSE). @@ -143,7 +143,7 @@ get_hubverse_format_nssp_data <- function( disease, base_hub_path, as_of = lubridate::today(), - included_locations = hubhelpr::included_locations, + excluded_locations = hubhelpr::default_excluded_locations, nssp_update_local = FALSE, start_date = NULL, end_date = NULL @@ -194,9 +194,9 @@ get_hubverse_format_nssp_data <- function( as_of = !!as_of, target = glue::glue("wk inc {disease} prop ed visits") ) |> - dplyr::filter(.data$location %in% !!included_locations) |> dplyr::select(tidyselect::all_of(hubverse_ts_req_cols)) |> - dplyr::arrange(.data$date, .data$location) + dplyr::arrange(.data$date, .data$location) |> + apply_location_exclusions(excluded_locations) return(hubverse_format_nssp_data) } @@ -215,9 +215,9 @@ get_hubverse_format_nssp_data <- function( #' @param start_date First week-ending #' date to include for the NHSN dataset. Default value #' is "2024-11-09". -#' @param included_locations Vector of location codes to -#' include in the output. -#' Default value `hubhelpr::included_locations`. +#' @param excluded_locations Character vector or named list +#' of US state/territory abbreviations to exclude. +#' Default: [hubhelpr::default_excluded_locations]. #' @param legacy_file Logical. Whether to write legacy #' CSV output (default: FALSE). #' @param nssp_update_local Logical. Whether to update NSSP @@ -237,7 +237,7 @@ update_hub_target_data <- function( disease, as_of = lubridate::today(), start_date = lubridate::as_date("2024-11-09"), - included_locations = hubhelpr::included_locations, + excluded_locations = hubhelpr::default_excluded_locations, legacy_file = FALSE, nssp_update_local = FALSE, overwrite_existing = FALSE @@ -247,7 +247,7 @@ update_hub_target_data <- function( nhsn_data <- get_hubverse_format_nhsn_data( disease, as_of = as_of, - included_locations = included_locations, + excluded_locations = excluded_locations, start_date = start_date ) @@ -263,7 +263,7 @@ update_hub_target_data <- function( disease, base_hub_path, as_of = as_of, - included_locations = included_locations, + excluded_locations = excluded_locations, nssp_update_local = nssp_update_local ) diff --git a/R/write_ref_date_summary.R b/R/write_ref_date_summary.R index 96322f2..fc6526e 100644 --- a/R/write_ref_date_summary.R +++ b/R/write_ref_date_summary.R @@ -20,7 +20,8 @@ #' character vector, locations are excluded across all #' targets. If a named list, names should be target names #' (or "all" for global exclusions) mapping to character -#' vectors of abbreviations. Default: NULL. +#' vectors of abbreviations. Default: +#' [hubhelpr::default_excluded_locations]. #' @param output_format character, output file format. One #' of "csv", "tsv", or "parquet". Default: "csv". #' @param targets character vector, target name(s) to @@ -47,7 +48,7 @@ write_ref_date_summary <- function( disease, file_suffix, horizons_to_include = c(0, 1, 2), - excluded_locations = NULL, + excluded_locations = hubhelpr::default_excluded_locations, output_format = "csv", targets = NULL, model_ids = NULL, @@ -122,7 +123,8 @@ write_ref_date_summary <- function( #' character vector, locations are excluded across all #' targets. If a named list, names should be target names #' (or "all" for global exclusions) mapping to character -#' vectors of abbreviations. Default: NULL. +#' vectors of abbreviations. Default: +#' [hubhelpr::default_excluded_locations]. #' @param output_format character, output file format. One #' of "csv", "tsv", or "parquet". Default: "csv". #' @param targets character vector, target name(s) to @@ -142,7 +144,7 @@ write_ref_date_summary_ens <- function( disease, horizons_to_include = c(0, 1, 2), population_data = hubhelpr::population_data, - excluded_locations = NULL, + excluded_locations = hubhelpr::default_excluded_locations, output_format = "csv", targets = NULL, overwrite_existing = FALSE @@ -214,7 +216,8 @@ write_ref_date_summary_ens <- function( #' character vector, locations are excluded across all #' targets. If a named list, names should be target names #' (or "all" for global exclusions) mapping to character -#' vectors of abbreviations. Default: NULL. +#' vectors of abbreviations. Default: +#' [hubhelpr::default_excluded_locations]. #' @param output_format character, output file format. One #' of "csv", "tsv", or "parquet". Default: "csv". #' @param targets character vector, target name(s) to @@ -234,7 +237,7 @@ write_ref_date_summary_all <- function( disease, horizons_to_include = c(0, 1, 2), population_data = hubhelpr::population_data, - excluded_locations = NULL, + excluded_locations = hubhelpr::default_excluded_locations, output_format = "csv", targets = NULL, overwrite_existing = FALSE diff --git a/R/write_viz_target_data.R b/R/write_viz_target_data.R index b7bc06e..90fdace 100644 --- a/R/write_viz_target_data.R +++ b/R/write_viz_target_data.R @@ -27,16 +27,14 @@ #' @param end_date Date, latest date to include in data. #' Default: NULL (no filtering). Used only when #' use_hub_data = FALSE. -#' @param included_locations Character vector of location -#' codes to include in the output. Default -#' hubhelpr::included_locations. #' @param excluded_locations Character vector or named list #' specifying US state abbreviations to exclude. If a #' character vector, locations are excluded across all #' targets. If a named list, names should be target names #' (or "all" for global exclusions) mapping to character #' vectors of abbreviations. Converted to hub codes -#' internally. Default: NULL. +#' internally. Default: +#' [hubhelpr::default_excluded_locations]. #' @param output_format Character, output file format. One #' of "csv", "tsv", or "parquet". Default: "csv". #' @param overwrite_existing logical. If TRUE, overwrite @@ -55,15 +53,13 @@ write_viz_target_data <- function( as_of = "latest", start_date = NULL, end_date = NULL, - included_locations = hubhelpr::included_locations, - excluded_locations = NULL, + excluded_locations = hubhelpr::default_excluded_locations, output_format = "csv", overwrite_existing = FALSE ) { if (use_hub_data) { target_data <- hubData::connect_target_timeseries(base_hub_path) |> forecasttools::hub_target_data_as_of(as_of = as_of) |> - dplyr::filter(.data$location %in% !!included_locations) |> dplyr::collect() } else { nhsn_data <- get_hubverse_format_nhsn_data( @@ -89,12 +85,12 @@ write_viz_target_data <- function( target_data <- dplyr::bind_rows(nhsn_data, nssp_data) } - excluded_locations <- normalize_excluded_locations(excluded_locations) supported_targets <- get_hub_supported_targets(base_hub_path) - exclusion_df <- build_exclusion_df(excluded_locations, supported_targets) - - target_data <- target_data |> - dplyr::anti_join(exclusion_df, by = c("target", "location")) + target_data <- apply_location_exclusions( + target_data, + excluded_locations, + supported_targets + ) target_data <- target_data |> dplyr::mutate( diff --git a/R/write_webtext.R b/R/write_webtext.R index 71f0de8..1d86373 100644 --- a/R/write_webtext.R +++ b/R/write_webtext.R @@ -9,19 +9,30 @@ #' forecast. #' @param disease Character, disease name ("covid" or #' "rsv"). -#' @param included_locations Character vector of location -#' codes that are expected to report. Default -#' hubhelpr::included_locations. +#' @param excluded_locations Character vector of US +#' state/territory abbreviations to exclude from expected +#' reporting locations. Default: +#' [hubhelpr::default_excluded_locations]. #' #' @return Character string describing reporting issues, #' or empty string if no issues. check_hospital_reporting_latency <- function( reference_date, disease, - included_locations = hubhelpr::included_locations + excluded_locations = hubhelpr::default_excluded_locations ) { desired_weekendingdate <- as.Date(reference_date) - lubridate::dweeks(1) + excluded_codes <- forecasttools::us_location_recode( + excluded_locations, + "abbr", + "hub" + ) + expected_locations <- setdiff( + forecasttools::us_location_table$code, + excluded_codes + ) + disease_abbr <- dplyr::case_match( disease, "covid" ~ "c19", @@ -33,7 +44,7 @@ check_hospital_reporting_latency <- function( ) included_jurisdictions <- forecasttools::us_location_recode( - included_locations, + expected_locations, "code", "hrd" ) @@ -63,7 +74,7 @@ check_hospital_reporting_latency <- function( ) locations_in_data <- unique(percent_hosp_reporting_below80$location) - missing_locations <- setdiff(included_locations, locations_in_data) + missing_locations <- setdiff(expected_locations, locations_in_data) if (length(missing_locations) > 0) { missing_location_names <- forecasttools::us_location_recode( @@ -173,8 +184,8 @@ compute_change_direction <- function( #' @param all_model_metadata Data frame of model metadata. #' @param hub_name Character, hub name. #' @param reference_date Date, the reference date. -#' @param included_locations Character vector of location -#' codes. +#' @param excluded_locations Character vector of US +#' state/territory abbreviations to exclude. #' #' @return Named list of template placeholder values with #' keys prefixed by the target data type. @@ -188,7 +199,7 @@ compute_target_webtext_values <- function( all_model_metadata, hub_name, reference_date, - included_locations + excluded_locations ) { target_type <- get_target_data_type(target) @@ -293,7 +304,7 @@ compute_target_webtext_values <- function( values[["hosp_reporting_flag_text"]] <- check_hospital_reporting_latency( reference_date = reference_date, disease = disease, - included_locations = included_locations + excluded_locations = excluded_locations ) } @@ -317,9 +328,10 @@ compute_target_webtext_values <- function( #' with weekly summary files. #' @param targets Character vector of target names to #' generate text for. -#' @param included_locations Character vector of location -#' codes that are expected to report. Default -#' hubhelpr::included_locations. +#' @param excluded_locations Character vector of US +#' state/territory abbreviations to exclude from expected +#' reporting locations. Default: +#' [hubhelpr::default_excluded_locations]. #' @param input_format Character, input file format for #' reading summary data files. One of "csv", "tsv", or #' "parquet". Default: "csv". @@ -333,7 +345,7 @@ generate_webtext_block <- function( base_hub_path, weekly_data_path, targets, - included_locations = hubhelpr::included_locations, + excluded_locations = hubhelpr::default_excluded_locations, input_format = "csv" ) { checkmate::assert_choice(disease, choices = c("covid", "rsv")) @@ -395,7 +407,7 @@ generate_webtext_block <- function( all_model_metadata = all_model_metadata, hub_name = hub_name, reference_date = reference_date, - included_locations = included_locations + excluded_locations = excluded_locations ) |> purrr::list_flatten() @@ -453,9 +465,10 @@ generate_webtext_block <- function( #' @param targets Character vector of target names to #' generate text for. Default NULL discovers targets #' from hub time-series data. -#' @param included_locations Character vector of location -#' codes that are expected to report. Default -#' hubhelpr::included_locations. +#' @param excluded_locations Character vector of US +#' state/territory abbreviations to exclude from expected +#' reporting locations. Default: +#' [hubhelpr::default_excluded_locations]. #' @param input_format Character, input file format for #' reading summary data files. One of "csv", "tsv", or #' "parquet". Default: "csv". @@ -469,7 +482,7 @@ write_webtext <- function( base_hub_path, hub_reports_path, targets = NULL, - included_locations = hubhelpr::included_locations, + excluded_locations = hubhelpr::default_excluded_locations, input_format = "csv", overwrite_existing = FALSE ) { @@ -492,7 +505,7 @@ write_webtext <- function( base_hub_path = base_hub_path, weekly_data_path = weekly_data_path, targets = targets, - included_locations = included_locations, + excluded_locations = excluded_locations, input_format = input_format ) diff --git a/actions/generate-viz-data/action.yaml b/actions/generate-viz-data/action.yaml index ccf6a94..fca7133 100644 --- a/actions/generate-viz-data/action.yaml +++ b/actions/generate-viz-data/action.yaml @@ -27,6 +27,10 @@ inputs: description: "JSON array of full target names (e.g., '[\"wk inc covid hosp\", \"wk inc covid prop ed visits\"]'). Defaults to all unique targets in time-series data." required: false default: "" + excluded_locations: + description: "JSON array of US state/territory abbreviations to exclude from output (e.g., '[\"VI\", \"GU\", \"AS\", \"MP\", \"UM\"]'). Defaults to hubhelpr::default_excluded_locations." + required: false + default: "" runs: using: "composite" @@ -54,25 +58,35 @@ runs: targets <- jsonlite::fromJSON(input_targets) } + input_excluded <- '${{ inputs.excluded_locations }}' + if (nchar(input_excluded) == 0) { + excluded_locations <- hubhelpr::default_excluded_locations + } else { + excluded_locations <- jsonlite::fromJSON(input_excluded) + } + hubhelpr::write_ref_date_summary_ens( reference_date = ref_date, base_hub_path = base_hub_path, hub_reports_path = hub_reports_path, - disease = disease + disease = disease, + excluded_locations = excluded_locations ) hubhelpr::write_ref_date_summary_all( reference_date = ref_date, base_hub_path = base_hub_path, hub_reports_path = hub_reports_path, - disease = disease + disease = disease, + excluded_locations = excluded_locations ) hubhelpr::write_viz_target_data( reference_date = ref_date, base_hub_path = base_hub_path, hub_reports_path = hub_reports_path, - disease = disease + disease = disease, + excluded_locations = excluded_locations ) hubhelpr::write_webtext( @@ -80,7 +94,8 @@ runs: disease = disease, base_hub_path = base_hub_path, hub_reports_path = hub_reports_path, - targets = targets + targets = targets, + excluded_locations = excluded_locations ) writeLines( diff --git a/actions/update-target-data/action.yaml b/actions/update-target-data/action.yaml index 8a4bac8..8cc189f 100644 --- a/actions/update-target-data/action.yaml +++ b/actions/update-target-data/action.yaml @@ -30,6 +30,10 @@ inputs: description: "Whether to overwrite existing target data files ('true' or 'false')." required: false default: "false" + excluded_locations: + description: "JSON array of US state/territory abbreviations to exclude from output (e.g., '[\"VI\", \"GU\", \"AS\", \"MP\", \"UM\"]'). Defaults to hubhelpr::default_excluded_locations." + required: false + default: "" runs: using: "composite" @@ -42,10 +46,19 @@ runs: DATA_CDC_GOV_API_KEY_SECRET: ${{ inputs.api_key_secret }} run: | today <- lubridate::today() + + input_excluded <- '${{ inputs.excluded_locations }}' + if (nchar(input_excluded) == 0) { + excluded_locations <- hubhelpr::default_excluded_locations + } else { + excluded_locations <- jsonlite::fromJSON(input_excluded) + } + hubhelpr::update_hub_target_data( base_hub_path = "${{ inputs.base_hub_path }}", disease = "${{ inputs.disease }}", as_of = today, + excluded_locations = excluded_locations, legacy_file = as.logical("${{ inputs.legacy_file }}"), nssp_update_local = as.logical("${{ inputs.nssp_update_local }}"), overwrite_existing = as.logical("${{ inputs.overwrite_existing }}") diff --git a/man/apply_location_exclusions.Rd b/man/apply_location_exclusions.Rd new file mode 100644 index 0000000..39c7e8e --- /dev/null +++ b/man/apply_location_exclusions.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/location_exclusions.R +\name{apply_location_exclusions} +\alias{apply_location_exclusions} +\title{Apply location exclusions to a data frame.} +\usage{ +apply_location_exclusions(data, excluded_locations, supported_targets = NULL) +} +\arguments{ +\item{data}{Data frame with a "location" column +containing hub-format location codes, and optionally +a "target" column.} + +\item{excluded_locations}{NULL, character vector, or +named list of US state/territory abbreviations to +exclude. If a character vector, locations are excluded +across all rows. If a named list, names should be +target names (or "all" for global exclusions) mapping +to character vectors of abbreviations. Default: NULL.} + +\item{supported_targets}{Character vector of valid +target names. Required for target-specific exclusions +when \code{excluded_locations} is a named list with +non-"all" keys. Default: NULL.} +} +\value{ +Data frame with excluded rows removed. +} +\description{ +Removes rows from a data frame based on excluded +location abbreviations. Supports target-specific +exclusions when \code{supported_targets} is provided and +the data contains a "target" column; otherwise +applies blanket exclusion across all rows. +} diff --git a/man/check_hospital_reporting_latency.Rd b/man/check_hospital_reporting_latency.Rd index 0ee95f2..40da8c8 100644 --- a/man/check_hospital_reporting_latency.Rd +++ b/man/check_hospital_reporting_latency.Rd @@ -7,7 +7,7 @@ check_hospital_reporting_latency( reference_date, disease, - included_locations = hubhelpr::included_locations + excluded_locations = hubhelpr::default_excluded_locations ) } \arguments{ @@ -17,9 +17,10 @@ forecast.} \item{disease}{Character, disease name ("covid" or "rsv").} -\item{included_locations}{Character vector of location -codes that are expected to report. Default -hubhelpr::included_locations.} +\item{excluded_locations}{Character vector of US +state/territory abbreviations to exclude from expected +reporting locations. Default: +\link{default_excluded_locations}.} } \value{ Character string describing reporting issues, diff --git a/man/default_excluded_locations.Rd b/man/default_excluded_locations.Rd new file mode 100644 index 0000000..12c7ca5 --- /dev/null +++ b/man/default_excluded_locations.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/constants.R +\docType{data} +\name{default_excluded_locations} +\alias{default_excluded_locations} +\title{Default US state/territory abbreviations excluded +from hub data.} +\format{ +An object of class \code{character} of length 5. +} +\usage{ +default_excluded_locations +} +\description{ +Excludes Virgin Islands (VI), Guam (GU), +American Samoa (AS), Northern Mariana Islands (MP), +and Minor Outlying Islands (UM). +} +\keyword{datasets} diff --git a/man/excluded_locations.Rd b/man/excluded_locations.Rd deleted file mode 100644 index 8686e50..0000000 --- a/man/excluded_locations.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/constants.R -\docType{data} -\name{excluded_locations} -\alias{excluded_locations} -\title{Two digits FIPS codes for locations excluded from Hubs' -target data.} -\format{ -An object of class \code{character} of length 5. -} -\usage{ -excluded_locations -} -\description{ -Excludes Virgin Islands (78), Northern Mariana -Islands (69), Guam (66), American Samoa (60), and Minor -Outlying Islands (74). -} -\keyword{datasets} diff --git a/man/generate_webtext_block.Rd b/man/generate_webtext_block.Rd index 4be93b1..28e7a26 100644 --- a/man/generate_webtext_block.Rd +++ b/man/generate_webtext_block.Rd @@ -10,7 +10,7 @@ generate_webtext_block( base_hub_path, weekly_data_path, targets, - included_locations = hubhelpr::included_locations, + excluded_locations = hubhelpr::default_excluded_locations, input_format = "csv" ) } @@ -30,9 +30,10 @@ with weekly summary files.} \item{targets}{Character vector of target names to generate text for.} -\item{included_locations}{Character vector of location -codes that are expected to report. Default -hubhelpr::included_locations.} +\item{excluded_locations}{Character vector of US +state/territory abbreviations to exclude from expected +reporting locations. Default: +\link{default_excluded_locations}.} \item{input_format}{Character, input file format for reading summary data files. One of "csv", "tsv", or diff --git a/man/get_hubverse_format_nhsn_data.Rd b/man/get_hubverse_format_nhsn_data.Rd index 405d684..d6e85e1 100644 --- a/man/get_hubverse_format_nhsn_data.Rd +++ b/man/get_hubverse_format_nhsn_data.Rd @@ -7,7 +7,7 @@ get_hubverse_format_nhsn_data( disease, as_of = lubridate::today(), - included_locations = hubhelpr::included_locations, + excluded_locations = hubhelpr::default_excluded_locations, start_date = NULL, end_date = NULL ) @@ -18,9 +18,9 @@ get_hubverse_format_nhsn_data( \item{as_of}{As-of date of the data pull. Default is the system date as determined by \code{\link[lubridate:now]{lubridate::today()}}.} -\item{included_locations}{Vector of location codes to -include in the output. -Default value \code{hubhelpr::included_locations}.} +\item{excluded_locations}{Character vector or named list +of US state/territory abbreviations to exclude. +Default: \link{default_excluded_locations}.} \item{start_date}{First week-ending date to include for the NHSN dataset. Default value diff --git a/man/get_hubverse_format_nssp_data.Rd b/man/get_hubverse_format_nssp_data.Rd index 099dc81..e8ddf23 100644 --- a/man/get_hubverse_format_nssp_data.Rd +++ b/man/get_hubverse_format_nssp_data.Rd @@ -8,7 +8,7 @@ get_hubverse_format_nssp_data( disease, base_hub_path, as_of = lubridate::today(), - included_locations = hubhelpr::included_locations, + excluded_locations = hubhelpr::default_excluded_locations, nssp_update_local = FALSE, start_date = NULL, end_date = NULL @@ -22,9 +22,9 @@ get_hubverse_format_nssp_data( \item{as_of}{As-of date of the data pull. Default is the system date as determined by \code{\link[lubridate:now]{lubridate::today()}}.} -\item{included_locations}{Vector of location codes to -include in the output. -Default value \code{hubhelpr::included_locations}.} +\item{excluded_locations}{Character vector or named list +of US state/territory abbreviations to exclude. +Default: \link{default_excluded_locations}.} \item{nssp_update_local}{Logical. Whether to update NSSP data from local file \code{auxiliary-data/latest.parquet} diff --git a/man/included_locations.Rd b/man/included_locations.Rd deleted file mode 100644 index 593e3e8..0000000 --- a/man/included_locations.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/constants.R -\docType{data} -\name{included_locations} -\alias{included_locations} -\title{Two digits FIPS codes for locations included in Hubs' -target data.} -\format{ -An object of class \code{character} of length 53. -} -\usage{ -included_locations -} -\description{ -Includes 50 states, US national, DC, and Puerto Rico -(PR). Excludes Virgin Islands (78), Northern Mariana -Islands (69), Guam (66), American Samoa (60), and Minor -Outlying Islands (74). -} -\keyword{datasets} diff --git a/man/summarize_ref_date_forecasts.Rd b/man/summarize_ref_date_forecasts.Rd index 07659b3..6a369d8 100644 --- a/man/summarize_ref_date_forecasts.Rd +++ b/man/summarize_ref_date_forecasts.Rd @@ -10,7 +10,7 @@ summarize_ref_date_forecasts( disease, population_data, horizons_to_include = c(0, 1, 2), - excluded_locations = NULL, + excluded_locations = hubhelpr::default_excluded_locations, targets = NULL, model_ids = NULL ) @@ -37,7 +37,8 @@ character vector, locations are excluded across all targets. If a named list, names should be target names (or "all" for global exclusions) mapping to character vectors of abbreviations. Converted to hub codes -internally. Default: NULL.} +internally. Default: +\link{default_excluded_locations}.} \item{targets}{character vector, target name(s) to filter forecasts. If NULL (default), does not filter by target.} diff --git a/man/update_hub_target_data.Rd b/man/update_hub_target_data.Rd index 394abc3..35361e0 100644 --- a/man/update_hub_target_data.Rd +++ b/man/update_hub_target_data.Rd @@ -9,7 +9,7 @@ update_hub_target_data( disease, as_of = lubridate::today(), start_date = lubridate::as_date("2024-11-09"), - included_locations = hubhelpr::included_locations, + excluded_locations = hubhelpr::default_excluded_locations, legacy_file = FALSE, nssp_update_local = FALSE, overwrite_existing = FALSE @@ -27,9 +27,9 @@ the system date as determined by \code{\link[lubridate:now]{lubridate::today()}} date to include for the NHSN dataset. Default value is "2024-11-09".} -\item{included_locations}{Vector of location codes to -include in the output. -Default value \code{hubhelpr::included_locations}.} +\item{excluded_locations}{Character vector or named list +of US state/territory abbreviations to exclude. +Default: \link{default_excluded_locations}.} \item{legacy_file}{Logical. Whether to write legacy CSV output (default: FALSE).} diff --git a/man/write_ref_date_summary.Rd b/man/write_ref_date_summary.Rd index a5bd67b..0070226 100644 --- a/man/write_ref_date_summary.Rd +++ b/man/write_ref_date_summary.Rd @@ -11,7 +11,7 @@ write_ref_date_summary( disease, file_suffix, horizons_to_include = c(0, 1, 2), - excluded_locations = NULL, + excluded_locations = hubhelpr::default_excluded_locations, output_format = "csv", targets = NULL, model_ids = NULL, @@ -44,7 +44,8 @@ specifying US state abbreviations to exclude. If a character vector, locations are excluded across all targets. If a named list, names should be target names (or "all" for global exclusions) mapping to character -vectors of abbreviations. Default: NULL.} +vectors of abbreviations. Default: +\link{default_excluded_locations}.} \item{output_format}{character, output file format. One of "csv", "tsv", or "parquet". Default: "csv".} diff --git a/man/write_ref_date_summary_all.Rd b/man/write_ref_date_summary_all.Rd index b63d78a..9f9dcdd 100644 --- a/man/write_ref_date_summary_all.Rd +++ b/man/write_ref_date_summary_all.Rd @@ -11,7 +11,7 @@ write_ref_date_summary_all( disease, horizons_to_include = c(0, 1, 2), population_data = hubhelpr::population_data, - excluded_locations = NULL, + excluded_locations = hubhelpr::default_excluded_locations, output_format = "csv", targets = NULL, overwrite_existing = FALSE @@ -40,7 +40,8 @@ specifying US state abbreviations to exclude. If a character vector, locations are excluded across all targets. If a named list, names should be target names (or "all" for global exclusions) mapping to character -vectors of abbreviations. Default: NULL.} +vectors of abbreviations. Default: +\link{default_excluded_locations}.} \item{output_format}{character, output file format. One of "csv", "tsv", or "parquet". Default: "csv".} diff --git a/man/write_ref_date_summary_ens.Rd b/man/write_ref_date_summary_ens.Rd index 655eedc..4a8b915 100644 --- a/man/write_ref_date_summary_ens.Rd +++ b/man/write_ref_date_summary_ens.Rd @@ -11,7 +11,7 @@ write_ref_date_summary_ens( disease, horizons_to_include = c(0, 1, 2), population_data = hubhelpr::population_data, - excluded_locations = NULL, + excluded_locations = hubhelpr::default_excluded_locations, output_format = "csv", targets = NULL, overwrite_existing = FALSE @@ -40,7 +40,8 @@ specifying US state abbreviations to exclude. If a character vector, locations are excluded across all targets. If a named list, names should be target names (or "all" for global exclusions) mapping to character -vectors of abbreviations. Default: NULL.} +vectors of abbreviations. Default: +\link{default_excluded_locations}.} \item{output_format}{character, output file format. One of "csv", "tsv", or "parquet". Default: "csv".} diff --git a/man/write_viz_target_data.Rd b/man/write_viz_target_data.Rd index 97f6213..d7b5b3c 100644 --- a/man/write_viz_target_data.Rd +++ b/man/write_viz_target_data.Rd @@ -13,8 +13,7 @@ write_viz_target_data( as_of = "latest", start_date = NULL, end_date = NULL, - included_locations = hubhelpr::included_locations, - excluded_locations = NULL, + excluded_locations = hubhelpr::default_excluded_locations, output_format = "csv", overwrite_existing = FALSE ) @@ -48,17 +47,14 @@ use_hub_data = FALSE.} Default: NULL (no filtering). Used only when use_hub_data = FALSE.} -\item{included_locations}{Character vector of location -codes to include in the output. Default -hubhelpr::included_locations.} - \item{excluded_locations}{Character vector or named list specifying US state abbreviations to exclude. If a character vector, locations are excluded across all targets. If a named list, names should be target names (or "all" for global exclusions) mapping to character vectors of abbreviations. Converted to hub codes -internally. Default: NULL.} +internally. Default: +\link{default_excluded_locations}.} \item{output_format}{Character, output file format. One of "csv", "tsv", or "parquet". Default: "csv".} diff --git a/man/write_webtext.Rd b/man/write_webtext.Rd index ec29a61..5ff676b 100644 --- a/man/write_webtext.Rd +++ b/man/write_webtext.Rd @@ -11,7 +11,7 @@ write_webtext( base_hub_path, hub_reports_path, targets = NULL, - included_locations = hubhelpr::included_locations, + excluded_locations = hubhelpr::default_excluded_locations, input_format = "csv", overwrite_existing = FALSE ) @@ -32,9 +32,10 @@ reports directory.} generate text for. Default NULL discovers targets from hub time-series data.} -\item{included_locations}{Character vector of location -codes that are expected to report. Default -hubhelpr::included_locations.} +\item{excluded_locations}{Character vector of US +state/territory abbreviations to exclude from expected +reporting locations. Default: +\link{default_excluded_locations}.} \item{input_format}{Character, input file format for reading summary data files. One of "csv", "tsv", or diff --git a/tests/testthat/test_update_hub_target_data.R b/tests/testthat/test_update_hub_target_data.R index 8271635..2f5df25 100644 --- a/tests/testthat/test_update_hub_target_data.R +++ b/tests/testthat/test_update_hub_target_data.R @@ -40,7 +40,14 @@ purrr::walk(c("covid", "rsv"), function(disease) { ) expect_setequal( unique(target_ts$location), - setdiff(forecasttools::us_location_table$code, excluded_locations) + setdiff( + forecasttools::us_location_table$code, + forecasttools::us_location_recode( + hubhelpr::default_excluded_locations, + "abbr", + "hub" + ) + ) ) }) } From 5d756be5bd6fd70b4a601c7be084cebb4c078df7 Mon Sep 17 00:00:00 2001 From: O957 <127630341+O957@users.noreply.github.com> Date: Mon, 16 Mar 2026 14:18:56 -0400 Subject: [PATCH 02/10] synchronize reviews across prs --- actions/generate-viz-data/action.yaml | 13 +++++-------- actions/update-target-data/action.yaml | 13 +++++-------- 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/actions/generate-viz-data/action.yaml b/actions/generate-viz-data/action.yaml index fca7133..917350c 100644 --- a/actions/generate-viz-data/action.yaml +++ b/actions/generate-viz-data/action.yaml @@ -28,9 +28,9 @@ inputs: required: false default: "" excluded_locations: - description: "JSON array of US state/territory abbreviations to exclude from output (e.g., '[\"VI\", \"GU\", \"AS\", \"MP\", \"UM\"]'). Defaults to hubhelpr::default_excluded_locations." + description: "JSON array of US state/territory abbreviations to exclude from output (e.g., '[\"VI\", \"GU\", \"AS\", \"MP\", \"UM\"]'). Also accepts a JSON object for target-specific exclusions (e.g., '{\"all\": [\"UM\"], \"wk inc covid hosp\": [\"MA\"]}'). Defaults to hubhelpr::default_excluded_locations." required: false - default: "" + default: "[]" runs: using: "composite" @@ -58,12 +58,9 @@ runs: targets <- jsonlite::fromJSON(input_targets) } - input_excluded <- '${{ inputs.excluded_locations }}' - if (nchar(input_excluded) == 0) { - excluded_locations <- hubhelpr::default_excluded_locations - } else { - excluded_locations <- jsonlite::fromJSON(input_excluded) - } + excluded_locations <- jsonlite::fromJSON( + '${{ inputs.excluded_locations }}' + ) hubhelpr::write_ref_date_summary_ens( reference_date = ref_date, diff --git a/actions/update-target-data/action.yaml b/actions/update-target-data/action.yaml index 8cc189f..407fe8d 100644 --- a/actions/update-target-data/action.yaml +++ b/actions/update-target-data/action.yaml @@ -31,9 +31,9 @@ inputs: required: false default: "false" excluded_locations: - description: "JSON array of US state/territory abbreviations to exclude from output (e.g., '[\"VI\", \"GU\", \"AS\", \"MP\", \"UM\"]'). Defaults to hubhelpr::default_excluded_locations." + description: "JSON array of US state/territory abbreviations to exclude from output (e.g., '[\"VI\", \"GU\", \"AS\", \"MP\", \"UM\"]'). Also accepts a JSON object for target-specific exclusions (e.g., '{\"all\": [\"UM\"], \"wk inc covid hosp\": [\"MA\"]}'). Defaults to hubhelpr::default_excluded_locations." required: false - default: "" + default: "[]" runs: using: "composite" @@ -47,12 +47,9 @@ runs: run: | today <- lubridate::today() - input_excluded <- '${{ inputs.excluded_locations }}' - if (nchar(input_excluded) == 0) { - excluded_locations <- hubhelpr::default_excluded_locations - } else { - excluded_locations <- jsonlite::fromJSON(input_excluded) - } + excluded_locations <- jsonlite::fromJSON( + '${{ inputs.excluded_locations }}' + ) hubhelpr::update_hub_target_data( base_hub_path = "${{ inputs.base_hub_path }}", From c5765149fcd2d1b41dcfe620555102b283261936 Mon Sep 17 00:00:00 2001 From: O957 <127630341+O957@users.noreply.github.com> Date: Mon, 16 Mar 2026 14:49:17 -0400 Subject: [PATCH 03/10] Update R/constants.R Co-authored-by: Dylan H. Morris --- R/constants.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/constants.R b/R/constants.R index af20b92..016bdb3 100644 --- a/R/constants.R +++ b/R/constants.R @@ -1,5 +1,5 @@ -#' Default US state/territory abbreviations excluded -#' from hub data. +#' US state/territory abbreviations excluded +#' by default from hub data. #' #' Excludes Virgin Islands (VI), Guam (GU), #' American Samoa (AS), Northern Mariana Islands (MP), From e53102babf4aca12293ee8db074cb07650fa2de5 Mon Sep 17 00:00:00 2001 From: O957 <127630341+O957@users.noreply.github.com> Date: Wed, 18 Mar 2026 09:22:07 -0400 Subject: [PATCH 04/10] update typos --- _typos.toml | 7 ------- 1 file changed, 7 deletions(-) diff --git a/_typos.toml b/_typos.toml index cbf651c..aa5938f 100644 --- a/_typos.toml +++ b/_typos.toml @@ -1,10 +1,3 @@ -[default] -extend-ignore-identifiers-re = [ - "AttributeID.*Supress.*", -] - -[default.extend-identifiers] -AttributeIDSupressMenu = "AttributeIDSupressMenu" [default.extend-words] # words that should not be corrected From b91733c90b381c7c5fa84e1ca60ea94e8bba0bee Mon Sep 17 00:00:00 2001 From: O957 <127630341+O957@users.noreply.github.com> Date: Wed, 18 Mar 2026 17:00:29 -0400 Subject: [PATCH 05/10] update exclusion procedure --- NAMESPACE | 1 - R/constants.R | 10 +- R/location_exclusions.R | 131 +++---------------- R/summarize_ref_date_forecasts.R | 18 +-- R/update_hub_target_data.R | 24 ++-- R/write_ref_date_summary.R | 36 ++--- R/write_viz_target_data.R | 18 +-- R/write_webtext.R | 29 ++-- actions/generate-viz-data/action.yaml | 2 +- actions/update-target-data/action.yaml | 2 +- man/apply_location_exclusions.Rd | 25 +--- man/check_hospital_reporting_latency.Rd | 5 +- man/default_excluded_locations.Rd | 19 --- man/generate_webtext_block.Rd | 5 +- man/get_hubverse_format_nhsn_data.Rd | 8 +- man/get_hubverse_format_nssp_data.Rd | 8 +- man/summarize_ref_date_forecasts.Rd | 14 +- man/update_hub_target_data.Rd | 8 +- man/write_ref_date_summary.Rd | 12 +- man/write_ref_date_summary_all.Rd | 12 +- man/write_ref_date_summary_ens.Rd | 12 +- man/write_viz_target_data.Rd | 14 +- man/write_webtext.Rd | 5 +- tests/testthat/test_update_hub_target_data.R | 9 +- 24 files changed, 120 insertions(+), 307 deletions(-) delete mode 100644 man/default_excluded_locations.Rd diff --git a/NAMESPACE b/NAMESPACE index df77858..0d305f7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,7 +4,6 @@ export(apply_location_exclusions) export(assert_data_up_to_date) export(check_authorized_users) export(check_changes_for_autoapproval) -export(default_excluded_locations) export(flatten_task) export(flatten_task_list) export(generate_hub_baseline) diff --git a/R/constants.R b/R/constants.R index 016bdb3..fd6c2a3 100644 --- a/R/constants.R +++ b/R/constants.R @@ -1,9 +1 @@ -#' US state/territory abbreviations excluded -#' by default from hub data. -#' -#' Excludes Virgin Islands (VI), Guam (GU), -#' American Samoa (AS), Northern Mariana Islands (MP), -#' and Minor Outlying Islands (UM). -#' -#' @export -default_excluded_locations <- c("VI", "GU", "AS", "MP", "UM") +# constants used across hubhelpr functions. diff --git a/R/location_exclusions.R b/R/location_exclusions.R index d97dd92..7e8ae45 100644 --- a/R/location_exclusions.R +++ b/R/location_exclusions.R @@ -1,129 +1,34 @@ -#' Normalize excluded locations to a named list. -#' -#' Converts a character vector or named list of excluded -#' locations into a consistent named list format. -#' -#' @param excluded_locations NULL, character vector, or -#' named list of character vectors. -#' -#' @return Named list of character vectors. -#' @noRd -normalize_excluded_locations <- function(excluded_locations) { - if (is.null(excluded_locations)) { - return(list()) - } - if (is.character(excluded_locations)) { - return(list("all" = excluded_locations)) - } - if (is.list(excluded_locations)) { - return(excluded_locations) - } - cli::cli_abort( - "{.arg excluded_locations} must be NULL, a character vector, or a named list." - ) -} - - -#' Build a target-location exclusion data frame. -#' -#' Constructs a tibble of target/location pairs to -#' exclude. Entries keyed by "all" are expanded into -#' one row per supported target. Errors if any named -#' targets in the exclusion list are not in -#' `supported_targets`. -#' -#' @param excluded_locations Named list as returned by -#' `normalize_excluded_locations()`. -#' @param supported_targets character vector of targets -#' the hub accepts, as returned by -#' `get_hub_supported_targets()`. -#' -#' @return A tibble with columns "target" and "location". -#' (hub codes). -#' @noRd -build_exclusion_df <- function(excluded_locations, supported_targets) { - named_targets <- setdiff(names(excluded_locations), "all") - invalid_targets <- setdiff(named_targets, supported_targets) - if (length(invalid_targets) > 0) { - cli::cli_abort( - "{.arg excluded_locations} contains unknown target{?s}: {.val {invalid_targets}}." - ) - } - - merged <- purrr::map( - purrr::set_names(supported_targets), - \(tgt) unique(c(excluded_locations[["all"]], excluded_locations[[tgt]])) - ) - - tibble::enframe(merged, name = "target", value = "location") |> - tidyr::unnest(cols = "location") |> - dplyr::mutate( - location = forecasttools::us_location_recode( - .data$location, - "abbr", - "hub" - ) - ) -} - - #' Apply location exclusions to a data frame. #' #' Removes rows from a data frame based on excluded -#' location abbreviations. Target-specific -#' exclusions, when `supported_targets` is provided and -#' the data contains a "target" column, are -#' supported; otherwise, exclusions are applied are -#' across all rows. +#' location abbreviations. Abbreviations are converted +#' to hub codes internally before filtering. #' #' @param data Data frame with a "location" column -#' containing hub-format location codes and, optionally, -#' a "target" column. -#' @param excluded_locations NULL, character vector, or -#' named list of US state/territory abbreviations to -#' exclude. If a character vector, locations are excluded -#' across all rows. If a named list, names should be -#' target names (or "all" for global exclusions) mapping -#' to character vectors of abbreviations. Default: NULL. -#' @param supported_targets Character vector of valid -#' target names. Required for target-specific exclusions -#' when `excluded_locations` is a named list with -#' non-"all" keys. Default: NULL. +#' containing hub-format location codes. +#' @param excluded_locations Character vector of US +#' state/territory abbreviations to exclude, or NULL +#' for no exclusions. #' #' @return Data frame with excluded rows removed. #' @export apply_location_exclusions <- function( data, - excluded_locations, - supported_targets = NULL + excluded_locations ) { - excluded_locations <- normalize_excluded_locations(excluded_locations) - if (length(excluded_locations) == 0) { + if (is.null(excluded_locations) || length(excluded_locations) == 0) { return(data) } + checkmate::assert_character(excluded_locations) - if (!is.null(supported_targets) && "target" %in% names(data)) { - exclusion_df <- build_exclusion_df( - excluded_locations, - supported_targets - ) - data <- dplyr::anti_join( - data, - exclusion_df, - by = c("target", "location") - ) - } else { - all_excluded <- unique(unlist(excluded_locations)) - excluded_codes <- forecasttools::us_location_recode( - all_excluded, - "abbr", - "hub" - ) - data <- dplyr::filter( - data, - !(.data$location %in% excluded_codes) - ) - } + excluded_codes <- forecasttools::us_location_recode( + excluded_locations, + "abbr", + "hub" + ) - return(data) + dplyr::filter( + data, + !(.data$location %in% excluded_codes) + ) } diff --git a/R/summarize_ref_date_forecasts.R b/R/summarize_ref_date_forecasts.R index 440c622..73d42e2 100644 --- a/R/summarize_ref_date_forecasts.R +++ b/R/summarize_ref_date_forecasts.R @@ -15,14 +15,10 @@ #' and "population". Adds population-based calculations. #' @param horizons_to_include integer vector, horizons to #' include in the output. Default: c(0, 1, 2). -#' @param excluded_locations character vector or named list -#' specifying US state abbreviations to exclude. If a -#' character vector, locations are excluded across all -#' targets. If a named list, names should be target names -#' (or "all" for global exclusions) mapping to character -#' vectors of abbreviations. Converted to hub codes -#' internally. Default: -#' [hubhelpr::default_excluded_locations]. +#' @param excluded_locations character vector of US +#' state/territory abbreviations to exclude. Converted +#' to hub codes internally. Default: NULL (no +#' exclusions). #' @param targets character vector, target name(s) to filter #' forecasts. If NULL (default), does not filter by target. #' @param model_ids character vector of model IDs to include. @@ -37,7 +33,7 @@ summarize_ref_date_forecasts <- function( disease, population_data, horizons_to_include = c(0, 1, 2), - excluded_locations = hubhelpr::default_excluded_locations, + excluded_locations = NULL, targets = NULL, model_ids = NULL ) { @@ -61,11 +57,9 @@ summarize_ref_date_forecasts <- function( forecasttools::nullable_comparison(.data$model_id, "%in%", !!model_ids) ) - supported_targets <- get_hub_supported_targets(base_hub_path) current_forecasts <- apply_location_exclusions( current_forecasts, - excluded_locations, - supported_targets + excluded_locations ) if (nrow(current_forecasts) == 0) { diff --git a/R/update_hub_target_data.R b/R/update_hub_target_data.R index 4ae7f8c..709b824 100644 --- a/R/update_hub_target_data.R +++ b/R/update_hub_target_data.R @@ -69,9 +69,9 @@ merge_target_data <- function( #' @param disease Disease name ("covid" or "rsv"). #' @param as_of As-of date of the data pull. Default is #' the system date as determined by [lubridate::today()]. -#' @param excluded_locations Character vector or named list -#' of US state/territory abbreviations to exclude. -#' Default: [hubhelpr::default_excluded_locations]. +#' @param excluded_locations Character vector of US +#' state/territory abbreviations to exclude. +#' Default: NULL (no exclusions). #' @param start_date First week-ending #' date to include for the NHSN dataset. Default value #' is NULL (no filtering). @@ -84,7 +84,7 @@ merge_target_data <- function( get_hubverse_format_nhsn_data <- function( disease, as_of = lubridate::today(), - excluded_locations = hubhelpr::default_excluded_locations, + excluded_locations = NULL, start_date = NULL, end_date = NULL ) { @@ -124,9 +124,9 @@ get_hubverse_format_nhsn_data <- function( #' @param base_hub_path Path to the base hub directory. #' @param as_of As-of date of the data pull. Default is #' the system date as determined by [lubridate::today()]. -#' @param excluded_locations Character vector or named list -#' of US state/territory abbreviations to exclude. -#' Default: [hubhelpr::default_excluded_locations]. +#' @param excluded_locations Character vector of US +#' state/territory abbreviations to exclude. +#' Default: NULL (no exclusions). #' @param nssp_update_local Logical. Whether to update NSSP #' data from local file `auxiliary-data/latest.parquet` #' (default: FALSE). @@ -143,7 +143,7 @@ get_hubverse_format_nssp_data <- function( disease, base_hub_path, as_of = lubridate::today(), - excluded_locations = hubhelpr::default_excluded_locations, + excluded_locations = NULL, nssp_update_local = FALSE, start_date = NULL, end_date = NULL @@ -215,9 +215,9 @@ get_hubverse_format_nssp_data <- function( #' @param start_date First week-ending #' date to include for the NHSN dataset. Default value #' is "2024-11-09". -#' @param excluded_locations Character vector or named list -#' of US state/territory abbreviations to exclude. -#' Default: [hubhelpr::default_excluded_locations]. +#' @param excluded_locations Character vector of US +#' state/territory abbreviations to exclude. +#' Default: NULL (no exclusions). #' @param legacy_file Logical. Whether to write legacy #' CSV output (default: FALSE). #' @param nssp_update_local Logical. Whether to update NSSP @@ -237,7 +237,7 @@ update_hub_target_data <- function( disease, as_of = lubridate::today(), start_date = lubridate::as_date("2024-11-09"), - excluded_locations = hubhelpr::default_excluded_locations, + excluded_locations = NULL, legacy_file = FALSE, nssp_update_local = FALSE, overwrite_existing = FALSE diff --git a/R/write_ref_date_summary.R b/R/write_ref_date_summary.R index fc6526e..1b3f635 100644 --- a/R/write_ref_date_summary.R +++ b/R/write_ref_date_summary.R @@ -15,13 +15,9 @@ #' filename (e.g., "map_data", "forecasts_data"). #' @param horizons_to_include integer vector, horizons to #' include in the output. Default: c(0, 1, 2). -#' @param excluded_locations character vector or named list -#' specifying US state abbreviations to exclude. If a -#' character vector, locations are excluded across all -#' targets. If a named list, names should be target names -#' (or "all" for global exclusions) mapping to character -#' vectors of abbreviations. Default: -#' [hubhelpr::default_excluded_locations]. +#' @param excluded_locations character vector of US +#' state/territory abbreviations to exclude. Default: +#' NULL (no exclusions). #' @param output_format character, output file format. One #' of "csv", "tsv", or "parquet". Default: "csv". #' @param targets character vector, target name(s) to @@ -48,7 +44,7 @@ write_ref_date_summary <- function( disease, file_suffix, horizons_to_include = c(0, 1, 2), - excluded_locations = hubhelpr::default_excluded_locations, + excluded_locations = NULL, output_format = "csv", targets = NULL, model_ids = NULL, @@ -118,13 +114,9 @@ write_ref_date_summary <- function( #' include in the output. Default: c(0, 1, 2). #' @param population_data data frame with columns #' "location" and "population". Default: population_data. -#' @param excluded_locations character vector or named list -#' specifying US state abbreviations to exclude. If a -#' character vector, locations are excluded across all -#' targets. If a named list, names should be target names -#' (or "all" for global exclusions) mapping to character -#' vectors of abbreviations. Default: -#' [hubhelpr::default_excluded_locations]. +#' @param excluded_locations character vector of US +#' state/territory abbreviations to exclude. Default: +#' NULL (no exclusions). #' @param output_format character, output file format. One #' of "csv", "tsv", or "parquet". Default: "csv". #' @param targets character vector, target name(s) to @@ -144,7 +136,7 @@ write_ref_date_summary_ens <- function( disease, horizons_to_include = c(0, 1, 2), population_data = hubhelpr::population_data, - excluded_locations = hubhelpr::default_excluded_locations, + excluded_locations = NULL, output_format = "csv", targets = NULL, overwrite_existing = FALSE @@ -211,13 +203,9 @@ write_ref_date_summary_ens <- function( #' include in the output. Default: c(0, 1, 2). #' @param population_data data frame with columns #' "location" and "population". Default: [population_data]. -#' @param excluded_locations character vector or named list -#' specifying US state abbreviations to exclude. If a -#' character vector, locations are excluded across all -#' targets. If a named list, names should be target names -#' (or "all" for global exclusions) mapping to character -#' vectors of abbreviations. Default: -#' [hubhelpr::default_excluded_locations]. +#' @param excluded_locations character vector of US +#' state/territory abbreviations to exclude. Default: +#' NULL (no exclusions). #' @param output_format character, output file format. One #' of "csv", "tsv", or "parquet". Default: "csv". #' @param targets character vector, target name(s) to @@ -237,7 +225,7 @@ write_ref_date_summary_all <- function( disease, horizons_to_include = c(0, 1, 2), population_data = hubhelpr::population_data, - excluded_locations = hubhelpr::default_excluded_locations, + excluded_locations = NULL, output_format = "csv", targets = NULL, overwrite_existing = FALSE diff --git a/R/write_viz_target_data.R b/R/write_viz_target_data.R index 90fdace..8a54fef 100644 --- a/R/write_viz_target_data.R +++ b/R/write_viz_target_data.R @@ -27,14 +27,10 @@ #' @param end_date Date, latest date to include in data. #' Default: NULL (no filtering). Used only when #' use_hub_data = FALSE. -#' @param excluded_locations Character vector or named list -#' specifying US state abbreviations to exclude. If a -#' character vector, locations are excluded across all -#' targets. If a named list, names should be target names -#' (or "all" for global exclusions) mapping to character -#' vectors of abbreviations. Converted to hub codes -#' internally. Default: -#' [hubhelpr::default_excluded_locations]. +#' @param excluded_locations Character vector of US +#' state/territory abbreviations to exclude. Converted +#' to hub codes internally. Default: NULL (no +#' exclusions). #' @param output_format Character, output file format. One #' of "csv", "tsv", or "parquet". Default: "csv". #' @param overwrite_existing logical. If TRUE, overwrite @@ -53,7 +49,7 @@ write_viz_target_data <- function( as_of = "latest", start_date = NULL, end_date = NULL, - excluded_locations = hubhelpr::default_excluded_locations, + excluded_locations = NULL, output_format = "csv", overwrite_existing = FALSE ) { @@ -85,11 +81,9 @@ write_viz_target_data <- function( target_data <- dplyr::bind_rows(nhsn_data, nssp_data) } - supported_targets <- get_hub_supported_targets(base_hub_path) target_data <- apply_location_exclusions( target_data, - excluded_locations, - supported_targets + excluded_locations ) target_data <- target_data |> diff --git a/R/write_webtext.R b/R/write_webtext.R index 1d86373..cb04a5f 100644 --- a/R/write_webtext.R +++ b/R/write_webtext.R @@ -11,23 +11,26 @@ #' "rsv"). #' @param excluded_locations Character vector of US #' state/territory abbreviations to exclude from expected -#' reporting locations. Default: -#' [hubhelpr::default_excluded_locations]. +#' reporting locations. Default: NULL (no exclusions). #' #' @return Character string describing reporting issues, #' or empty string if no issues. check_hospital_reporting_latency <- function( reference_date, disease, - excluded_locations = hubhelpr::default_excluded_locations + excluded_locations = NULL ) { desired_weekendingdate <- as.Date(reference_date) - lubridate::dweeks(1) - excluded_codes <- forecasttools::us_location_recode( - excluded_locations, - "abbr", - "hub" - ) + if (!is.null(excluded_locations) && length(excluded_locations) > 0) { + excluded_codes <- forecasttools::us_location_recode( + excluded_locations, + "abbr", + "hub" + ) + } else { + excluded_codes <- character(0) + } expected_locations <- setdiff( forecasttools::us_location_table$code, excluded_codes @@ -330,8 +333,7 @@ compute_target_webtext_values <- function( #' generate text for. #' @param excluded_locations Character vector of US #' state/territory abbreviations to exclude from expected -#' reporting locations. Default: -#' [hubhelpr::default_excluded_locations]. +#' reporting locations. Default: NULL (no exclusions). #' @param input_format Character, input file format for #' reading summary data files. One of "csv", "tsv", or #' "parquet". Default: "csv". @@ -345,7 +347,7 @@ generate_webtext_block <- function( base_hub_path, weekly_data_path, targets, - excluded_locations = hubhelpr::default_excluded_locations, + excluded_locations = NULL, input_format = "csv" ) { checkmate::assert_choice(disease, choices = c("covid", "rsv")) @@ -467,8 +469,7 @@ generate_webtext_block <- function( #' from hub time-series data. #' @param excluded_locations Character vector of US #' state/territory abbreviations to exclude from expected -#' reporting locations. Default: -#' [hubhelpr::default_excluded_locations]. +#' reporting locations. Default: NULL (no exclusions). #' @param input_format Character, input file format for #' reading summary data files. One of "csv", "tsv", or #' "parquet". Default: "csv". @@ -482,7 +483,7 @@ write_webtext <- function( base_hub_path, hub_reports_path, targets = NULL, - excluded_locations = hubhelpr::default_excluded_locations, + excluded_locations = NULL, input_format = "csv", overwrite_existing = FALSE ) { diff --git a/actions/generate-viz-data/action.yaml b/actions/generate-viz-data/action.yaml index 6ae81ba..1fe89b2 100644 --- a/actions/generate-viz-data/action.yaml +++ b/actions/generate-viz-data/action.yaml @@ -28,7 +28,7 @@ inputs: required: false default: "" excluded_locations: - description: "JSON array of US state/territory abbreviations to exclude from output (e.g., '[\"VI\", \"GU\", \"AS\", \"MP\", \"UM\"]'). Also accepts a JSON object for target-specific exclusions (e.g., '{\"all\": [\"UM\"], \"wk inc covid hosp\": [\"MA\"]}'). Defaults to no exclusions." + description: "JSON array of US state/territory abbreviations to exclude from output (e.g., '[\"VI\", \"GU\", \"AS\", \"MP\", \"UM\"]'). Defaults to no exclusions." required: false default: "[]" diff --git a/actions/update-target-data/action.yaml b/actions/update-target-data/action.yaml index 407fe8d..a7f0a09 100644 --- a/actions/update-target-data/action.yaml +++ b/actions/update-target-data/action.yaml @@ -31,7 +31,7 @@ inputs: required: false default: "false" excluded_locations: - description: "JSON array of US state/territory abbreviations to exclude from output (e.g., '[\"VI\", \"GU\", \"AS\", \"MP\", \"UM\"]'). Also accepts a JSON object for target-specific exclusions (e.g., '{\"all\": [\"UM\"], \"wk inc covid hosp\": [\"MA\"]}'). Defaults to hubhelpr::default_excluded_locations." + description: "JSON array of US state/territory abbreviations to exclude from output (e.g., '[\"VI\", \"GU\", \"AS\", \"MP\", \"UM\"]'). Defaults to no exclusions." required: false default: "[]" diff --git a/man/apply_location_exclusions.Rd b/man/apply_location_exclusions.Rd index 39c7e8e..f5ca969 100644 --- a/man/apply_location_exclusions.Rd +++ b/man/apply_location_exclusions.Rd @@ -4,32 +4,21 @@ \alias{apply_location_exclusions} \title{Apply location exclusions to a data frame.} \usage{ -apply_location_exclusions(data, excluded_locations, supported_targets = NULL) +apply_location_exclusions(data, excluded_locations) } \arguments{ \item{data}{Data frame with a "location" column -containing hub-format location codes, and optionally -a "target" column.} +containing hub-format location codes.} -\item{excluded_locations}{NULL, character vector, or -named list of US state/territory abbreviations to -exclude. If a character vector, locations are excluded -across all rows. If a named list, names should be -target names (or "all" for global exclusions) mapping -to character vectors of abbreviations. Default: NULL.} - -\item{supported_targets}{Character vector of valid -target names. Required for target-specific exclusions -when \code{excluded_locations} is a named list with -non-"all" keys. Default: NULL.} +\item{excluded_locations}{Character vector of US +state/territory abbreviations to exclude, or NULL +for no exclusions.} } \value{ Data frame with excluded rows removed. } \description{ Removes rows from a data frame based on excluded -location abbreviations. Supports target-specific -exclusions when \code{supported_targets} is provided and -the data contains a "target" column; otherwise -applies blanket exclusion across all rows. +location abbreviations. Abbreviations are converted +to hub codes internally before filtering. } diff --git a/man/check_hospital_reporting_latency.Rd b/man/check_hospital_reporting_latency.Rd index 40da8c8..a16e59f 100644 --- a/man/check_hospital_reporting_latency.Rd +++ b/man/check_hospital_reporting_latency.Rd @@ -7,7 +7,7 @@ check_hospital_reporting_latency( reference_date, disease, - excluded_locations = hubhelpr::default_excluded_locations + excluded_locations = NULL ) } \arguments{ @@ -19,8 +19,7 @@ forecast.} \item{excluded_locations}{Character vector of US state/territory abbreviations to exclude from expected -reporting locations. Default: -\link{default_excluded_locations}.} +reporting locations. Default: NULL (no exclusions).} } \value{ Character string describing reporting issues, diff --git a/man/default_excluded_locations.Rd b/man/default_excluded_locations.Rd deleted file mode 100644 index 12c7ca5..0000000 --- a/man/default_excluded_locations.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/constants.R -\docType{data} -\name{default_excluded_locations} -\alias{default_excluded_locations} -\title{Default US state/territory abbreviations excluded -from hub data.} -\format{ -An object of class \code{character} of length 5. -} -\usage{ -default_excluded_locations -} -\description{ -Excludes Virgin Islands (VI), Guam (GU), -American Samoa (AS), Northern Mariana Islands (MP), -and Minor Outlying Islands (UM). -} -\keyword{datasets} diff --git a/man/generate_webtext_block.Rd b/man/generate_webtext_block.Rd index 28e7a26..c5dfd99 100644 --- a/man/generate_webtext_block.Rd +++ b/man/generate_webtext_block.Rd @@ -10,7 +10,7 @@ generate_webtext_block( base_hub_path, weekly_data_path, targets, - excluded_locations = hubhelpr::default_excluded_locations, + excluded_locations = NULL, input_format = "csv" ) } @@ -32,8 +32,7 @@ generate text for.} \item{excluded_locations}{Character vector of US state/territory abbreviations to exclude from expected -reporting locations. Default: -\link{default_excluded_locations}.} +reporting locations. Default: NULL (no exclusions).} \item{input_format}{Character, input file format for reading summary data files. One of "csv", "tsv", or diff --git a/man/get_hubverse_format_nhsn_data.Rd b/man/get_hubverse_format_nhsn_data.Rd index d6e85e1..b4fe2ce 100644 --- a/man/get_hubverse_format_nhsn_data.Rd +++ b/man/get_hubverse_format_nhsn_data.Rd @@ -7,7 +7,7 @@ get_hubverse_format_nhsn_data( disease, as_of = lubridate::today(), - excluded_locations = hubhelpr::default_excluded_locations, + excluded_locations = NULL, start_date = NULL, end_date = NULL ) @@ -18,9 +18,9 @@ get_hubverse_format_nhsn_data( \item{as_of}{As-of date of the data pull. Default is the system date as determined by \code{\link[lubridate:now]{lubridate::today()}}.} -\item{excluded_locations}{Character vector or named list -of US state/territory abbreviations to exclude. -Default: \link{default_excluded_locations}.} +\item{excluded_locations}{Character vector of US +state/territory abbreviations to exclude. +Default: NULL (no exclusions).} \item{start_date}{First week-ending date to include for the NHSN dataset. Default value diff --git a/man/get_hubverse_format_nssp_data.Rd b/man/get_hubverse_format_nssp_data.Rd index e8ddf23..719f31c 100644 --- a/man/get_hubverse_format_nssp_data.Rd +++ b/man/get_hubverse_format_nssp_data.Rd @@ -8,7 +8,7 @@ get_hubverse_format_nssp_data( disease, base_hub_path, as_of = lubridate::today(), - excluded_locations = hubhelpr::default_excluded_locations, + excluded_locations = NULL, nssp_update_local = FALSE, start_date = NULL, end_date = NULL @@ -22,9 +22,9 @@ get_hubverse_format_nssp_data( \item{as_of}{As-of date of the data pull. Default is the system date as determined by \code{\link[lubridate:now]{lubridate::today()}}.} -\item{excluded_locations}{Character vector or named list -of US state/territory abbreviations to exclude. -Default: \link{default_excluded_locations}.} +\item{excluded_locations}{Character vector of US +state/territory abbreviations to exclude. +Default: NULL (no exclusions).} \item{nssp_update_local}{Logical. Whether to update NSSP data from local file \code{auxiliary-data/latest.parquet} diff --git a/man/summarize_ref_date_forecasts.Rd b/man/summarize_ref_date_forecasts.Rd index 6a369d8..a96b998 100644 --- a/man/summarize_ref_date_forecasts.Rd +++ b/man/summarize_ref_date_forecasts.Rd @@ -10,7 +10,7 @@ summarize_ref_date_forecasts( disease, population_data, horizons_to_include = c(0, 1, 2), - excluded_locations = hubhelpr::default_excluded_locations, + excluded_locations = NULL, targets = NULL, model_ids = NULL ) @@ -31,14 +31,10 @@ and "population". Adds population-based calculations.} \item{horizons_to_include}{integer vector, horizons to include in the output. Default: c(0, 1, 2).} -\item{excluded_locations}{character vector or named list -specifying US state abbreviations to exclude. If a -character vector, locations are excluded across all -targets. If a named list, names should be target names -(or "all" for global exclusions) mapping to character -vectors of abbreviations. Converted to hub codes -internally. Default: -\link{default_excluded_locations}.} +\item{excluded_locations}{character vector of US +state/territory abbreviations to exclude. Converted +to hub codes internally. Default: NULL (no +exclusions).} \item{targets}{character vector, target name(s) to filter forecasts. If NULL (default), does not filter by target.} diff --git a/man/update_hub_target_data.Rd b/man/update_hub_target_data.Rd index 35361e0..bd4c24d 100644 --- a/man/update_hub_target_data.Rd +++ b/man/update_hub_target_data.Rd @@ -9,7 +9,7 @@ update_hub_target_data( disease, as_of = lubridate::today(), start_date = lubridate::as_date("2024-11-09"), - excluded_locations = hubhelpr::default_excluded_locations, + excluded_locations = NULL, legacy_file = FALSE, nssp_update_local = FALSE, overwrite_existing = FALSE @@ -27,9 +27,9 @@ the system date as determined by \code{\link[lubridate:now]{lubridate::today()}} date to include for the NHSN dataset. Default value is "2024-11-09".} -\item{excluded_locations}{Character vector or named list -of US state/territory abbreviations to exclude. -Default: \link{default_excluded_locations}.} +\item{excluded_locations}{Character vector of US +state/territory abbreviations to exclude. +Default: NULL (no exclusions).} \item{legacy_file}{Logical. Whether to write legacy CSV output (default: FALSE).} diff --git a/man/write_ref_date_summary.Rd b/man/write_ref_date_summary.Rd index 0070226..6c78a4b 100644 --- a/man/write_ref_date_summary.Rd +++ b/man/write_ref_date_summary.Rd @@ -11,7 +11,7 @@ write_ref_date_summary( disease, file_suffix, horizons_to_include = c(0, 1, 2), - excluded_locations = hubhelpr::default_excluded_locations, + excluded_locations = NULL, output_format = "csv", targets = NULL, model_ids = NULL, @@ -39,13 +39,9 @@ filename (e.g., "map_data", "forecasts_data").} \item{horizons_to_include}{integer vector, horizons to include in the output. Default: c(0, 1, 2).} -\item{excluded_locations}{character vector or named list -specifying US state abbreviations to exclude. If a -character vector, locations are excluded across all -targets. If a named list, names should be target names -(or "all" for global exclusions) mapping to character -vectors of abbreviations. Default: -\link{default_excluded_locations}.} +\item{excluded_locations}{character vector of US +state/territory abbreviations to exclude. Default: +NULL (no exclusions).} \item{output_format}{character, output file format. One of "csv", "tsv", or "parquet". Default: "csv".} diff --git a/man/write_ref_date_summary_all.Rd b/man/write_ref_date_summary_all.Rd index 9f9dcdd..7e8e6e2 100644 --- a/man/write_ref_date_summary_all.Rd +++ b/man/write_ref_date_summary_all.Rd @@ -11,7 +11,7 @@ write_ref_date_summary_all( disease, horizons_to_include = c(0, 1, 2), population_data = hubhelpr::population_data, - excluded_locations = hubhelpr::default_excluded_locations, + excluded_locations = NULL, output_format = "csv", targets = NULL, overwrite_existing = FALSE @@ -35,13 +35,9 @@ include in the output. Default: c(0, 1, 2).} \item{population_data}{data frame with columns "location" and "population". Default: \link{population_data}.} -\item{excluded_locations}{character vector or named list -specifying US state abbreviations to exclude. If a -character vector, locations are excluded across all -targets. If a named list, names should be target names -(or "all" for global exclusions) mapping to character -vectors of abbreviations. Default: -\link{default_excluded_locations}.} +\item{excluded_locations}{character vector of US +state/territory abbreviations to exclude. Default: +NULL (no exclusions).} \item{output_format}{character, output file format. One of "csv", "tsv", or "parquet". Default: "csv".} diff --git a/man/write_ref_date_summary_ens.Rd b/man/write_ref_date_summary_ens.Rd index 4a8b915..55d623b 100644 --- a/man/write_ref_date_summary_ens.Rd +++ b/man/write_ref_date_summary_ens.Rd @@ -11,7 +11,7 @@ write_ref_date_summary_ens( disease, horizons_to_include = c(0, 1, 2), population_data = hubhelpr::population_data, - excluded_locations = hubhelpr::default_excluded_locations, + excluded_locations = NULL, output_format = "csv", targets = NULL, overwrite_existing = FALSE @@ -35,13 +35,9 @@ include in the output. Default: c(0, 1, 2).} \item{population_data}{data frame with columns "location" and "population". Default: population_data.} -\item{excluded_locations}{character vector or named list -specifying US state abbreviations to exclude. If a -character vector, locations are excluded across all -targets. If a named list, names should be target names -(or "all" for global exclusions) mapping to character -vectors of abbreviations. Default: -\link{default_excluded_locations}.} +\item{excluded_locations}{character vector of US +state/territory abbreviations to exclude. Default: +NULL (no exclusions).} \item{output_format}{character, output file format. One of "csv", "tsv", or "parquet". Default: "csv".} diff --git a/man/write_viz_target_data.Rd b/man/write_viz_target_data.Rd index d7b5b3c..7e3b3c6 100644 --- a/man/write_viz_target_data.Rd +++ b/man/write_viz_target_data.Rd @@ -13,7 +13,7 @@ write_viz_target_data( as_of = "latest", start_date = NULL, end_date = NULL, - excluded_locations = hubhelpr::default_excluded_locations, + excluded_locations = NULL, output_format = "csv", overwrite_existing = FALSE ) @@ -47,14 +47,10 @@ use_hub_data = FALSE.} Default: NULL (no filtering). Used only when use_hub_data = FALSE.} -\item{excluded_locations}{Character vector or named list -specifying US state abbreviations to exclude. If a -character vector, locations are excluded across all -targets. If a named list, names should be target names -(or "all" for global exclusions) mapping to character -vectors of abbreviations. Converted to hub codes -internally. Default: -\link{default_excluded_locations}.} +\item{excluded_locations}{Character vector of US +state/territory abbreviations to exclude. Converted +to hub codes internally. Default: NULL (no +exclusions).} \item{output_format}{Character, output file format. One of "csv", "tsv", or "parquet". Default: "csv".} diff --git a/man/write_webtext.Rd b/man/write_webtext.Rd index 5ff676b..0767e20 100644 --- a/man/write_webtext.Rd +++ b/man/write_webtext.Rd @@ -11,7 +11,7 @@ write_webtext( base_hub_path, hub_reports_path, targets = NULL, - excluded_locations = hubhelpr::default_excluded_locations, + excluded_locations = NULL, input_format = "csv", overwrite_existing = FALSE ) @@ -34,8 +34,7 @@ from hub time-series data.} \item{excluded_locations}{Character vector of US state/territory abbreviations to exclude from expected -reporting locations. Default: -\link{default_excluded_locations}.} +reporting locations. Default: NULL (no exclusions).} \item{input_format}{Character, input file format for reading summary data files. One of "csv", "tsv", or diff --git a/tests/testthat/test_update_hub_target_data.R b/tests/testthat/test_update_hub_target_data.R index 2f5df25..79dd900 100644 --- a/tests/testthat/test_update_hub_target_data.R +++ b/tests/testthat/test_update_hub_target_data.R @@ -40,14 +40,7 @@ purrr::walk(c("covid", "rsv"), function(disease) { ) expect_setequal( unique(target_ts$location), - setdiff( - forecasttools::us_location_table$code, - forecasttools::us_location_recode( - hubhelpr::default_excluded_locations, - "abbr", - "hub" - ) - ) + forecasttools::us_location_table$code ) }) } From fbc11595ff15e38af18e4ca06a0b0bbd63609905 Mon Sep 17 00:00:00 2001 From: O957 <127630341+O957@users.noreply.github.com> Date: Wed, 18 Mar 2026 17:06:15 -0400 Subject: [PATCH 06/10] update tests --- tests/testthat/test_update_hub_target_data.R | 23 ++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test_update_hub_target_data.R b/tests/testthat/test_update_hub_target_data.R index 79dd900..0890e0a 100644 --- a/tests/testthat/test_update_hub_target_data.R +++ b/tests/testthat/test_update_hub_target_data.R @@ -11,6 +11,8 @@ if (fs::dir_exists(mockdir_tests)) { ) } +test_excluded_locations <- c("VI", "GU", "AS", "MP", "UM") + purrr::walk(c("covid", "rsv"), function(disease) { test_that( glue::glue("update_hub_target_data returns expected data for {disease}"), @@ -24,6 +26,7 @@ purrr::walk(c("covid", "rsv"), function(disease) { base_hub_path = base_hub_path, disease = disease, as_of = lubridate::as_date("2025-08-18"), + excluded_locations = test_excluded_locations ) target_ts <- forecasttools::read_tabular_file(output_file) @@ -38,9 +41,17 @@ purrr::walk(c("covid", "rsv"), function(disease) { glue::glue("wk inc {disease} hosp") ) ) + excluded_codes <- forecasttools::us_location_recode( + test_excluded_locations, + "abbr", + "hub" + ) expect_setequal( unique(target_ts$location), - forecasttools::us_location_table$code + setdiff( + forecasttools::us_location_table$code, + excluded_codes + ) ) }) } @@ -80,7 +91,8 @@ purrr::walk(c("covid", "rsv"), function(disease) { hubhelpr::update_hub_target_data( base_hub_path = base_hub_path, disease = disease, - as_of = lubridate::as_date("2025-08-18") + as_of = lubridate::as_date("2025-08-18"), + excluded_locations = test_excluded_locations ) # second run with same data errors by default @@ -88,7 +100,8 @@ purrr::walk(c("covid", "rsv"), function(disease) { hubhelpr::update_hub_target_data( base_hub_path = base_hub_path, disease = disease, - as_of = lubridate::as_date("2025-08-18") + as_of = lubridate::as_date("2025-08-18"), + excluded_locations = test_excluded_locations ), "overwrite" ) @@ -99,6 +112,7 @@ purrr::walk(c("covid", "rsv"), function(disease) { base_hub_path = base_hub_path, disease = disease, as_of = lubridate::as_date("2025-08-18"), + excluded_locations = test_excluded_locations, overwrite_existing = TRUE ) }) @@ -112,7 +126,8 @@ httptest2::with_mock_dir(mockdir_tests, { nhsn_mock <- hubhelpr::get_hubverse_format_nhsn_data( disease = "covid", as_of = lubridate::as_date("2025-08-18"), - start_date = lubridate::as_date("2024-11-09") + start_date = lubridate::as_date("2024-11-09"), + excluded_locations = test_excluded_locations ) }) From 641e497003db7d7d1c9056c5ac7ae6345ae1492e Mon Sep 17 00:00:00 2001 From: O957 <127630341+O957@users.noreply.github.com> Date: Tue, 24 Mar 2026 11:22:55 -0400 Subject: [PATCH 07/10] update location exclusions across codebase --- NAMESPACE | 2 +- R/location_exclusions.R | 148 ++++++++++++++++--- R/summarize_ref_date_forecasts.R | 17 ++- R/update_hub_target_data.R | 33 ++--- R/write_ref_date_summary.R | 30 ++-- R/write_viz_target_data.R | 17 ++- R/write_webtext.R | 32 ++-- actions/generate-viz-data/action.yaml | 2 +- man/apply_location_exclusions.Rd | 24 --- man/apply_target_location_exclusions.Rd | 36 +++++ man/check_hospital_reporting_latency.Rd | 4 +- man/generate_webtext_block.Rd | 10 +- man/get_hubverse_format_nhsn_data.Rd | 5 - man/get_hubverse_format_nssp_data.Rd | 5 - man/summarize_ref_date_forecasts.Rd | 11 +- man/update_hub_target_data.Rd | 10 +- man/write_ref_date_summary.Rd | 10 +- man/write_ref_date_summary_all.Rd | 10 +- man/write_ref_date_summary_ens.Rd | 10 +- man/write_viz_target_data.Rd | 11 +- man/write_webtext.Rd | 10 +- tests/testthat/test_update_hub_target_data.R | 3 +- 22 files changed, 305 insertions(+), 135 deletions(-) delete mode 100644 man/apply_location_exclusions.Rd create mode 100644 man/apply_target_location_exclusions.Rd diff --git a/NAMESPACE b/NAMESPACE index 0d305f7..f08ea1e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,6 @@ # Generated by roxygen2: do not edit by hand -export(apply_location_exclusions) +export(apply_target_location_exclusions) export(assert_data_up_to_date) export(check_authorized_users) export(check_changes_for_autoapproval) diff --git a/R/location_exclusions.R b/R/location_exclusions.R index 7e8ae45..cfcf685 100644 --- a/R/location_exclusions.R +++ b/R/location_exclusions.R @@ -1,34 +1,142 @@ -#' Apply location exclusions to a data frame. +#' Normalize excluded locations to a named list. #' -#' Removes rows from a data frame based on excluded -#' location abbreviations. Abbreviations are converted -#' to hub codes internally before filtering. +#' Converts a character vector or named list of excluded +#' locations into a consistent named list format. #' -#' @param data Data frame with a "location" column -#' containing hub-format location codes. -#' @param excluded_locations Character vector of US -#' state/territory abbreviations to exclude, or NULL -#' for no exclusions. +#' @param excluded_locations NULL, character vector, or +#' named list of character vectors. +#' +#' @return Named list of character vectors, or NULL if +#' input is NULL or zero-length. +#' @noRd +normalize_excluded_locations <- function(excluded_locations) { + if (is.null(excluded_locations) || length(excluded_locations) == 0) { + return(NULL) + } + if (is.character(excluded_locations)) { + return(list("all" = excluded_locations)) + } + if (is.list(excluded_locations)) { + purrr::walk(excluded_locations, function(x) { + checkmate::assert_character( + x, + .var.name = "excluded_locations list values" + ) + }) + return(excluded_locations) + } + cli::cli_abort( + "{.arg excluded_locations} must be NULL, a character vector, or a named list." + ) +} + + +#' Build a target-location exclusion data frame. +#' +#' Constructs a tibble of target/location pairs to +#' exclude. Entries keyed by "all" are expanded into +#' one row per supported target. Errors if any named +#' targets in the exclusion list are not in +#' `supported_targets`. +#' +#' @param excluded_locations Named list as returned by +#' `normalize_excluded_locations()`. +#' @param supported_targets Character vector of targets +#' the hub accepts, as returned by +#' `get_hub_supported_targets()`. +#' +#' @return A tibble with columns "target" and "location" +#' (hub codes). +#' @noRd +build_exclusion_df <- function(excluded_locations, supported_targets) { + named_targets <- setdiff(names(excluded_locations), "all") + invalid_targets <- setdiff(named_targets, supported_targets) + if (length(invalid_targets) > 0) { + cli::cli_abort( + "{.arg excluded_locations} contains unknown target{?s}: {.val {invalid_targets}}." + ) + } + + merged <- purrr::map( + purrr::set_names(supported_targets), + \(tgt) unique(c(excluded_locations[["all"]], excluded_locations[[tgt]])) + ) + + tibble::enframe(merged, name = "target", value = "location") |> + tidyr::unnest(cols = "location") |> + dplyr::mutate( + location = forecasttools::us_location_recode( + .data$location, + "abbr", + "hub" + ) + ) +} + + +#' Flatten excluded locations to a character vector. +#' +#' Extracts all unique location abbreviations from an +#' excluded locations specification; some call +#' sites need a flat character vector of abbreviations +#' (e.g., functions operating on single-target data +#' without a target column). +#' +#' @param excluded_locations NULL, character vector, or +#' named list of character vectors. +#' +#' @return Character vector of unique abbreviations, or +#' NULL if input is NULL or zero-length. +#' @noRd +flatten_excluded_locations <- function(excluded_locations) { + normalized <- normalize_excluded_locations(excluded_locations) + if (is.null(normalized)) { + return(NULL) + } + unique(unlist(normalized, use.names = FALSE)) +} + + +#' Apply target-specific location exclusions to a data +#' frame. +#' +#' Removes rows from data frame based on target-specific +#' excluded location abbreviations. Supports uniform +#' exclusions (character vector applied to all targets) +#' and target-specific exclusions (named list with target +#' names as keys). Filters on the "target" and +#' "location" columns via anti-join. +#' +#' @param data Data frame with "target" and "location" +#' columns. +#' @param excluded_locations NULL, character vector, or +#' named list of US state/territory abbreviations to +#' exclude. If a character vector, locations are +#' excluded across all targets. If a named list, names +#' should be target names (or "all" for global +#' exclusions) mapping to character vectors of +#' abbreviations. Default: NULL (no exclusions). +#' @param supported_targets Character vector of valid +#' target names, as returned by +#' [get_hub_supported_targets()]. #' #' @return Data frame with excluded rows removed. #' @export -apply_location_exclusions <- function( +apply_target_location_exclusions <- function( data, - excluded_locations + excluded_locations, + supported_targets ) { - if (is.null(excluded_locations) || length(excluded_locations) == 0) { + excluded_locations <- normalize_excluded_locations(excluded_locations) + if (is.null(excluded_locations)) { return(data) } - checkmate::assert_character(excluded_locations) - excluded_codes <- forecasttools::us_location_recode( - excluded_locations, - "abbr", - "hub" - ) + exclusion_df <- build_exclusion_df(excluded_locations, supported_targets) - dplyr::filter( + dplyr::anti_join( data, - !(.data$location %in% excluded_codes) + exclusion_df, + by = c("target", "location") ) } diff --git a/R/summarize_ref_date_forecasts.R b/R/summarize_ref_date_forecasts.R index 73d42e2..2e4078a 100644 --- a/R/summarize_ref_date_forecasts.R +++ b/R/summarize_ref_date_forecasts.R @@ -15,10 +15,13 @@ #' and "population". Adds population-based calculations. #' @param horizons_to_include integer vector, horizons to #' include in the output. Default: c(0, 1, 2). -#' @param excluded_locations character vector of US -#' state/territory abbreviations to exclude. Converted -#' to hub codes internally. Default: NULL (no -#' exclusions). +#' @param excluded_locations NULL, character vector, or +#' named list of US state/territory abbreviations to +#' exclude. If a character vector, locations are excluded +#' across all targets. If a named list, names should be +#' target names (or "all" for global exclusions) mapping +#' to character vectors of abbreviations. Converted to +#' hub codes internally. Default: NULL (no exclusions). #' @param targets character vector, target name(s) to filter #' forecasts. If NULL (default), does not filter by target. #' @param model_ids character vector of model IDs to include. @@ -57,9 +60,11 @@ summarize_ref_date_forecasts <- function( forecasttools::nullable_comparison(.data$model_id, "%in%", !!model_ids) ) - current_forecasts <- apply_location_exclusions( + supported_targets <- get_hub_supported_targets(base_hub_path) + current_forecasts <- apply_target_location_exclusions( current_forecasts, - excluded_locations + excluded_locations, + supported_targets ) if (nrow(current_forecasts) == 0) { diff --git a/R/update_hub_target_data.R b/R/update_hub_target_data.R index 709b824..6b30dcc 100644 --- a/R/update_hub_target_data.R +++ b/R/update_hub_target_data.R @@ -69,9 +69,6 @@ merge_target_data <- function( #' @param disease Disease name ("covid" or "rsv"). #' @param as_of As-of date of the data pull. Default is #' the system date as determined by [lubridate::today()]. -#' @param excluded_locations Character vector of US -#' state/territory abbreviations to exclude. -#' Default: NULL (no exclusions). #' @param start_date First week-ending #' date to include for the NHSN dataset. Default value #' is NULL (no filtering). @@ -84,7 +81,6 @@ merge_target_data <- function( get_hubverse_format_nhsn_data <- function( disease, as_of = lubridate::today(), - excluded_locations = NULL, start_date = NULL, end_date = NULL ) { @@ -109,8 +105,7 @@ get_hubverse_format_nhsn_data <- function( as_of = !!as_of, target = glue::glue("wk inc {disease} hosp") ) |> - dplyr::select(tidyselect::all_of(hubverse_ts_req_cols)) |> - apply_location_exclusions(excluded_locations) + dplyr::select(tidyselect::all_of(hubverse_ts_req_cols)) return(hubverse_format_nhsn_data) } @@ -124,9 +119,6 @@ get_hubverse_format_nhsn_data <- function( #' @param base_hub_path Path to the base hub directory. #' @param as_of As-of date of the data pull. Default is #' the system date as determined by [lubridate::today()]. -#' @param excluded_locations Character vector of US -#' state/territory abbreviations to exclude. -#' Default: NULL (no exclusions). #' @param nssp_update_local Logical. Whether to update NSSP #' data from local file `auxiliary-data/latest.parquet` #' (default: FALSE). @@ -143,7 +135,6 @@ get_hubverse_format_nssp_data <- function( disease, base_hub_path, as_of = lubridate::today(), - excluded_locations = NULL, nssp_update_local = FALSE, start_date = NULL, end_date = NULL @@ -195,8 +186,7 @@ get_hubverse_format_nssp_data <- function( target = glue::glue("wk inc {disease} prop ed visits") ) |> dplyr::select(tidyselect::all_of(hubverse_ts_req_cols)) |> - dplyr::arrange(.data$date, .data$location) |> - apply_location_exclusions(excluded_locations) + dplyr::arrange(.data$date, .data$location) return(hubverse_format_nssp_data) } @@ -215,9 +205,13 @@ get_hubverse_format_nssp_data <- function( #' @param start_date First week-ending #' date to include for the NHSN dataset. Default value #' is "2024-11-09". -#' @param excluded_locations Character vector of US -#' state/territory abbreviations to exclude. -#' Default: NULL (no exclusions). +#' @param excluded_locations NULL, character vector, or +#' named list of US state/territory abbreviations to +#' exclude. If a character vector, locations are excluded +#' across all targets. If a named list, names should be +#' target names (or "all" for global exclusions) mapping +#' to character vectors of abbreviations. Default: NULL +#' (no exclusions). #' @param legacy_file Logical. Whether to write legacy #' CSV output (default: FALSE). #' @param nssp_update_local Logical. Whether to update NSSP @@ -247,7 +241,6 @@ update_hub_target_data <- function( nhsn_data <- get_hubverse_format_nhsn_data( disease, as_of = as_of, - excluded_locations = excluded_locations, start_date = start_date ) @@ -263,7 +256,6 @@ update_hub_target_data <- function( disease, base_hub_path, as_of = as_of, - excluded_locations = excluded_locations, nssp_update_local = nssp_update_local ) @@ -304,6 +296,13 @@ update_hub_target_data <- function( new_data <- dplyr::bind_rows(nhsn_data, nssp_data) + supported_targets <- get_hub_supported_targets(base_hub_path) + new_data <- apply_target_location_exclusions( + new_data, + excluded_locations, + supported_targets + ) + if (fs::file_exists(output_file)) { existing_data <- forecasttools::read_tabular(output_file) } else { diff --git a/R/write_ref_date_summary.R b/R/write_ref_date_summary.R index 1b3f635..44c1f5a 100644 --- a/R/write_ref_date_summary.R +++ b/R/write_ref_date_summary.R @@ -15,9 +15,13 @@ #' filename (e.g., "map_data", "forecasts_data"). #' @param horizons_to_include integer vector, horizons to #' include in the output. Default: c(0, 1, 2). -#' @param excluded_locations character vector of US -#' state/territory abbreviations to exclude. Default: -#' NULL (no exclusions). +#' @param excluded_locations NULL, character vector, or +#' named list of US state/territory abbreviations to +#' exclude. If a character vector, locations are excluded +#' across all targets. If a named list, names should be +#' target names (or "all" for global exclusions) mapping +#' to character vectors of abbreviations. Default: NULL +#' (no exclusions). #' @param output_format character, output file format. One #' of "csv", "tsv", or "parquet". Default: "csv". #' @param targets character vector, target name(s) to @@ -114,9 +118,13 @@ write_ref_date_summary <- function( #' include in the output. Default: c(0, 1, 2). #' @param population_data data frame with columns #' "location" and "population". Default: population_data. -#' @param excluded_locations character vector of US -#' state/territory abbreviations to exclude. Default: -#' NULL (no exclusions). +#' @param excluded_locations NULL, character vector, or +#' named list of US state/territory abbreviations to +#' exclude. If a character vector, locations are excluded +#' across all targets. If a named list, names should be +#' target names (or "all" for global exclusions) mapping +#' to character vectors of abbreviations. Default: NULL +#' (no exclusions). #' @param output_format character, output file format. One #' of "csv", "tsv", or "parquet". Default: "csv". #' @param targets character vector, target name(s) to @@ -203,9 +211,13 @@ write_ref_date_summary_ens <- function( #' include in the output. Default: c(0, 1, 2). #' @param population_data data frame with columns #' "location" and "population". Default: [population_data]. -#' @param excluded_locations character vector of US -#' state/territory abbreviations to exclude. Default: -#' NULL (no exclusions). +#' @param excluded_locations NULL, character vector, or +#' named list of US state/territory abbreviations to +#' exclude. If a character vector, locations are excluded +#' across all targets. If a named list, names should be +#' target names (or "all" for global exclusions) mapping +#' to character vectors of abbreviations. Default: NULL +#' (no exclusions). #' @param output_format character, output file format. One #' of "csv", "tsv", or "parquet". Default: "csv". #' @param targets character vector, target name(s) to diff --git a/R/write_viz_target_data.R b/R/write_viz_target_data.R index 8a54fef..080efad 100644 --- a/R/write_viz_target_data.R +++ b/R/write_viz_target_data.R @@ -27,10 +27,13 @@ #' @param end_date Date, latest date to include in data. #' Default: NULL (no filtering). Used only when #' use_hub_data = FALSE. -#' @param excluded_locations Character vector of US -#' state/territory abbreviations to exclude. Converted -#' to hub codes internally. Default: NULL (no -#' exclusions). +#' @param excluded_locations NULL, character vector, or +#' named list of US state/territory abbreviations to +#' exclude. If a character vector, locations are excluded +#' across all targets. If a named list, names should be +#' target names (or "all" for global exclusions) mapping +#' to character vectors of abbreviations. Converted to +#' hub codes internally. Default: NULL (no exclusions). #' @param output_format Character, output file format. One #' of "csv", "tsv", or "parquet". Default: "csv". #' @param overwrite_existing logical. If TRUE, overwrite @@ -81,9 +84,11 @@ write_viz_target_data <- function( target_data <- dplyr::bind_rows(nhsn_data, nssp_data) } - target_data <- apply_location_exclusions( + supported_targets <- get_hub_supported_targets(base_hub_path) + target_data <- apply_target_location_exclusions( target_data, - excluded_locations + excluded_locations, + supported_targets ) target_data <- target_data |> diff --git a/R/write_webtext.R b/R/write_webtext.R index cb04a5f..49a8662 100644 --- a/R/write_webtext.R +++ b/R/write_webtext.R @@ -9,9 +9,9 @@ #' forecast. #' @param disease Character, disease name ("covid" or #' "rsv"). -#' @param excluded_locations Character vector of US +#' @param excluded_locations NULL or character vector of US #' state/territory abbreviations to exclude from expected -#' reporting locations. Default: NULL (no exclusions). +#' reporting locations. #' #' @return Character string describing reporting issues, #' or empty string if no issues. @@ -187,8 +187,10 @@ compute_change_direction <- function( #' @param all_model_metadata Data frame of model metadata. #' @param hub_name Character, hub name. #' @param reference_date Date, the reference date. -#' @param excluded_locations Character vector of US -#' state/territory abbreviations to exclude. +#' @param excluded_locations NULL, character vector, or +#' named list of US state/territory abbreviations to +#' exclude. Flattened to a character vector for hospital +#' reporting latency checks. #' #' @return Named list of template placeholder values with #' keys prefixed by the target data type. @@ -307,7 +309,7 @@ compute_target_webtext_values <- function( values[["hosp_reporting_flag_text"]] <- check_hospital_reporting_latency( reference_date = reference_date, disease = disease, - excluded_locations = excluded_locations + excluded_locations = flatten_excluded_locations(excluded_locations) ) } @@ -331,9 +333,13 @@ compute_target_webtext_values <- function( #' with weekly summary files. #' @param targets Character vector of target names to #' generate text for. -#' @param excluded_locations Character vector of US -#' state/territory abbreviations to exclude from expected -#' reporting locations. Default: NULL (no exclusions). +#' @param excluded_locations NULL, character vector, or +#' named list of US state/territory abbreviations to +#' exclude. If a character vector, locations are excluded +#' across all targets. If a named list, names should be +#' target names (or "all" for global exclusions) mapping +#' to character vectors of abbreviations. Default: NULL +#' (no exclusions). #' @param input_format Character, input file format for #' reading summary data files. One of "csv", "tsv", or #' "parquet". Default: "csv". @@ -467,9 +473,13 @@ generate_webtext_block <- function( #' @param targets Character vector of target names to #' generate text for. Default NULL discovers targets #' from hub time-series data. -#' @param excluded_locations Character vector of US -#' state/territory abbreviations to exclude from expected -#' reporting locations. Default: NULL (no exclusions). +#' @param excluded_locations NULL, character vector, or +#' named list of US state/territory abbreviations to +#' exclude. If a character vector, locations are excluded +#' across all targets. If a named list, names should be +#' target names (or "all" for global exclusions) mapping +#' to character vectors of abbreviations. Default: NULL +#' (no exclusions). #' @param input_format Character, input file format for #' reading summary data files. One of "csv", "tsv", or #' "parquet". Default: "csv". diff --git a/actions/generate-viz-data/action.yaml b/actions/generate-viz-data/action.yaml index 1fe89b2..3e54226 100644 --- a/actions/generate-viz-data/action.yaml +++ b/actions/generate-viz-data/action.yaml @@ -28,7 +28,7 @@ inputs: required: false default: "" excluded_locations: - description: "JSON array of US state/territory abbreviations to exclude from output (e.g., '[\"VI\", \"GU\", \"AS\", \"MP\", \"UM\"]'). Defaults to no exclusions." + description: "JSON array or object of US state/territory abbreviations to exclude. Array (e.g., '[\"VI\", \"GU\"]') excludes from all targets. Object (e.g., '{\"all\": [\"VI\"], \"wk inc covid hosp\": [\"GU\"]}') supports target-specific exclusions. Defaults to no exclusions." required: false default: "[]" diff --git a/man/apply_location_exclusions.Rd b/man/apply_location_exclusions.Rd deleted file mode 100644 index f5ca969..0000000 --- a/man/apply_location_exclusions.Rd +++ /dev/null @@ -1,24 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/location_exclusions.R -\name{apply_location_exclusions} -\alias{apply_location_exclusions} -\title{Apply location exclusions to a data frame.} -\usage{ -apply_location_exclusions(data, excluded_locations) -} -\arguments{ -\item{data}{Data frame with a "location" column -containing hub-format location codes.} - -\item{excluded_locations}{Character vector of US -state/territory abbreviations to exclude, or NULL -for no exclusions.} -} -\value{ -Data frame with excluded rows removed. -} -\description{ -Removes rows from a data frame based on excluded -location abbreviations. Abbreviations are converted -to hub codes internally before filtering. -} diff --git a/man/apply_target_location_exclusions.Rd b/man/apply_target_location_exclusions.Rd new file mode 100644 index 0000000..a852804 --- /dev/null +++ b/man/apply_target_location_exclusions.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/location_exclusions.R +\name{apply_target_location_exclusions} +\alias{apply_target_location_exclusions} +\title{Apply target-specific location exclusions to a data +frame.} +\usage{ +apply_target_location_exclusions(data, excluded_locations, supported_targets) +} +\arguments{ +\item{data}{Data frame with "target" and "location" +columns.} + +\item{excluded_locations}{NULL, character vector, or +named list of US state/territory abbreviations to +exclude. If a character vector, locations are +excluded across all targets. If a named list, names +should be target names (or "all" for global +exclusions) mapping to character vectors of +abbreviations. Default: NULL (no exclusions).} + +\item{supported_targets}{Character vector of valid +target names, as returned by +\code{\link[=get_hub_supported_targets]{get_hub_supported_targets()}}.} +} +\value{ +Data frame with excluded rows removed. +} +\description{ +Removes rows from data frame based on target-specific +excluded location abbreviations. Supports uniform +exclusions (character vector applied to all targets) +and target-specific exclusions (named list with target +names as keys). Filters on the "target" and +"location" columns via anti-join. +} diff --git a/man/check_hospital_reporting_latency.Rd b/man/check_hospital_reporting_latency.Rd index a16e59f..ffb8a5b 100644 --- a/man/check_hospital_reporting_latency.Rd +++ b/man/check_hospital_reporting_latency.Rd @@ -17,9 +17,9 @@ forecast.} \item{disease}{Character, disease name ("covid" or "rsv").} -\item{excluded_locations}{Character vector of US +\item{excluded_locations}{NULL or character vector of US state/territory abbreviations to exclude from expected -reporting locations. Default: NULL (no exclusions).} +reporting locations.} } \value{ Character string describing reporting issues, diff --git a/man/generate_webtext_block.Rd b/man/generate_webtext_block.Rd index c5dfd99..3532c69 100644 --- a/man/generate_webtext_block.Rd +++ b/man/generate_webtext_block.Rd @@ -30,9 +30,13 @@ with weekly summary files.} \item{targets}{Character vector of target names to generate text for.} -\item{excluded_locations}{Character vector of US -state/territory abbreviations to exclude from expected -reporting locations. Default: NULL (no exclusions).} +\item{excluded_locations}{NULL, character vector, or +named list of US state/territory abbreviations to +exclude. If a character vector, locations are excluded +across all targets. If a named list, names should be +target names (or "all" for global exclusions) mapping +to character vectors of abbreviations. Default: NULL +(no exclusions).} \item{input_format}{Character, input file format for reading summary data files. One of "csv", "tsv", or diff --git a/man/get_hubverse_format_nhsn_data.Rd b/man/get_hubverse_format_nhsn_data.Rd index b4fe2ce..e152a75 100644 --- a/man/get_hubverse_format_nhsn_data.Rd +++ b/man/get_hubverse_format_nhsn_data.Rd @@ -7,7 +7,6 @@ get_hubverse_format_nhsn_data( disease, as_of = lubridate::today(), - excluded_locations = NULL, start_date = NULL, end_date = NULL ) @@ -18,10 +17,6 @@ get_hubverse_format_nhsn_data( \item{as_of}{As-of date of the data pull. Default is the system date as determined by \code{\link[lubridate:now]{lubridate::today()}}.} -\item{excluded_locations}{Character vector of US -state/territory abbreviations to exclude. -Default: NULL (no exclusions).} - \item{start_date}{First week-ending date to include for the NHSN dataset. Default value is NULL (no filtering).} diff --git a/man/get_hubverse_format_nssp_data.Rd b/man/get_hubverse_format_nssp_data.Rd index 719f31c..1685985 100644 --- a/man/get_hubverse_format_nssp_data.Rd +++ b/man/get_hubverse_format_nssp_data.Rd @@ -8,7 +8,6 @@ get_hubverse_format_nssp_data( disease, base_hub_path, as_of = lubridate::today(), - excluded_locations = NULL, nssp_update_local = FALSE, start_date = NULL, end_date = NULL @@ -22,10 +21,6 @@ get_hubverse_format_nssp_data( \item{as_of}{As-of date of the data pull. Default is the system date as determined by \code{\link[lubridate:now]{lubridate::today()}}.} -\item{excluded_locations}{Character vector of US -state/territory abbreviations to exclude. -Default: NULL (no exclusions).} - \item{nssp_update_local}{Logical. Whether to update NSSP data from local file \code{auxiliary-data/latest.parquet} (default: FALSE).} diff --git a/man/summarize_ref_date_forecasts.Rd b/man/summarize_ref_date_forecasts.Rd index a96b998..93d3ac3 100644 --- a/man/summarize_ref_date_forecasts.Rd +++ b/man/summarize_ref_date_forecasts.Rd @@ -31,10 +31,13 @@ and "population". Adds population-based calculations.} \item{horizons_to_include}{integer vector, horizons to include in the output. Default: c(0, 1, 2).} -\item{excluded_locations}{character vector of US -state/territory abbreviations to exclude. Converted -to hub codes internally. Default: NULL (no -exclusions).} +\item{excluded_locations}{NULL, character vector, or +named list of US state/territory abbreviations to +exclude. If a character vector, locations are excluded +across all targets. If a named list, names should be +target names (or "all" for global exclusions) mapping +to character vectors of abbreviations. Converted to +hub codes internally. Default: NULL (no exclusions).} \item{targets}{character vector, target name(s) to filter forecasts. If NULL (default), does not filter by target.} diff --git a/man/update_hub_target_data.Rd b/man/update_hub_target_data.Rd index bd4c24d..9d4efde 100644 --- a/man/update_hub_target_data.Rd +++ b/man/update_hub_target_data.Rd @@ -27,9 +27,13 @@ the system date as determined by \code{\link[lubridate:now]{lubridate::today()}} date to include for the NHSN dataset. Default value is "2024-11-09".} -\item{excluded_locations}{Character vector of US -state/territory abbreviations to exclude. -Default: NULL (no exclusions).} +\item{excluded_locations}{NULL, character vector, or +named list of US state/territory abbreviations to +exclude. If a character vector, locations are excluded +across all targets. If a named list, names should be +target names (or "all" for global exclusions) mapping +to character vectors of abbreviations. Default: NULL +(no exclusions).} \item{legacy_file}{Logical. Whether to write legacy CSV output (default: FALSE).} diff --git a/man/write_ref_date_summary.Rd b/man/write_ref_date_summary.Rd index 6c78a4b..0d9eb0b 100644 --- a/man/write_ref_date_summary.Rd +++ b/man/write_ref_date_summary.Rd @@ -39,9 +39,13 @@ filename (e.g., "map_data", "forecasts_data").} \item{horizons_to_include}{integer vector, horizons to include in the output. Default: c(0, 1, 2).} -\item{excluded_locations}{character vector of US -state/territory abbreviations to exclude. Default: -NULL (no exclusions).} +\item{excluded_locations}{NULL, character vector, or +named list of US state/territory abbreviations to +exclude. If a character vector, locations are excluded +across all targets. If a named list, names should be +target names (or "all" for global exclusions) mapping +to character vectors of abbreviations. Default: NULL +(no exclusions).} \item{output_format}{character, output file format. One of "csv", "tsv", or "parquet". Default: "csv".} diff --git a/man/write_ref_date_summary_all.Rd b/man/write_ref_date_summary_all.Rd index 7e8e6e2..3266a9e 100644 --- a/man/write_ref_date_summary_all.Rd +++ b/man/write_ref_date_summary_all.Rd @@ -35,9 +35,13 @@ include in the output. Default: c(0, 1, 2).} \item{population_data}{data frame with columns "location" and "population". Default: \link{population_data}.} -\item{excluded_locations}{character vector of US -state/territory abbreviations to exclude. Default: -NULL (no exclusions).} +\item{excluded_locations}{NULL, character vector, or +named list of US state/territory abbreviations to +exclude. If a character vector, locations are excluded +across all targets. If a named list, names should be +target names (or "all" for global exclusions) mapping +to character vectors of abbreviations. Default: NULL +(no exclusions).} \item{output_format}{character, output file format. One of "csv", "tsv", or "parquet". Default: "csv".} diff --git a/man/write_ref_date_summary_ens.Rd b/man/write_ref_date_summary_ens.Rd index 55d623b..dde0a10 100644 --- a/man/write_ref_date_summary_ens.Rd +++ b/man/write_ref_date_summary_ens.Rd @@ -35,9 +35,13 @@ include in the output. Default: c(0, 1, 2).} \item{population_data}{data frame with columns "location" and "population". Default: population_data.} -\item{excluded_locations}{character vector of US -state/territory abbreviations to exclude. Default: -NULL (no exclusions).} +\item{excluded_locations}{NULL, character vector, or +named list of US state/territory abbreviations to +exclude. If a character vector, locations are excluded +across all targets. If a named list, names should be +target names (or "all" for global exclusions) mapping +to character vectors of abbreviations. Default: NULL +(no exclusions).} \item{output_format}{character, output file format. One of "csv", "tsv", or "parquet". Default: "csv".} diff --git a/man/write_viz_target_data.Rd b/man/write_viz_target_data.Rd index 7e3b3c6..7802647 100644 --- a/man/write_viz_target_data.Rd +++ b/man/write_viz_target_data.Rd @@ -47,10 +47,13 @@ use_hub_data = FALSE.} Default: NULL (no filtering). Used only when use_hub_data = FALSE.} -\item{excluded_locations}{Character vector of US -state/territory abbreviations to exclude. Converted -to hub codes internally. Default: NULL (no -exclusions).} +\item{excluded_locations}{NULL, character vector, or +named list of US state/territory abbreviations to +exclude. If a character vector, locations are excluded +across all targets. If a named list, names should be +target names (or "all" for global exclusions) mapping +to character vectors of abbreviations. Converted to +hub codes internally. Default: NULL (no exclusions).} \item{output_format}{Character, output file format. One of "csv", "tsv", or "parquet". Default: "csv".} diff --git a/man/write_webtext.Rd b/man/write_webtext.Rd index 0767e20..c7d54b4 100644 --- a/man/write_webtext.Rd +++ b/man/write_webtext.Rd @@ -32,9 +32,13 @@ reports directory.} generate text for. Default NULL discovers targets from hub time-series data.} -\item{excluded_locations}{Character vector of US -state/territory abbreviations to exclude from expected -reporting locations. Default: NULL (no exclusions).} +\item{excluded_locations}{NULL, character vector, or +named list of US state/territory abbreviations to +exclude. If a character vector, locations are excluded +across all targets. If a named list, names should be +target names (or "all" for global exclusions) mapping +to character vectors of abbreviations. Default: NULL +(no exclusions).} \item{input_format}{Character, input file format for reading summary data files. One of "csv", "tsv", or diff --git a/tests/testthat/test_update_hub_target_data.R b/tests/testthat/test_update_hub_target_data.R index 0890e0a..18ac7b0 100644 --- a/tests/testthat/test_update_hub_target_data.R +++ b/tests/testthat/test_update_hub_target_data.R @@ -126,8 +126,7 @@ httptest2::with_mock_dir(mockdir_tests, { nhsn_mock <- hubhelpr::get_hubverse_format_nhsn_data( disease = "covid", as_of = lubridate::as_date("2025-08-18"), - start_date = lubridate::as_date("2024-11-09"), - excluded_locations = test_excluded_locations + start_date = lubridate::as_date("2024-11-09") ) }) From 9bd2bb1f54f09079233af6c4bcf8d2212692ed1f Mon Sep 17 00:00:00 2001 From: O957 <127630341+O957@users.noreply.github.com> Date: Tue, 24 Mar 2026 11:56:05 -0400 Subject: [PATCH 08/10] update to action language --- actions/update-target-data/action.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/actions/update-target-data/action.yaml b/actions/update-target-data/action.yaml index a7f0a09..fc3dabf 100644 --- a/actions/update-target-data/action.yaml +++ b/actions/update-target-data/action.yaml @@ -31,7 +31,7 @@ inputs: required: false default: "false" excluded_locations: - description: "JSON array of US state/territory abbreviations to exclude from output (e.g., '[\"VI\", \"GU\", \"AS\", \"MP\", \"UM\"]'). Defaults to no exclusions." + description: "JSON array or object of US state/territory abbreviations to exclude. Array (e.g., '[\"VI\", \"GU\"]') excludes from all targets. Object (e.g., '{\"all\": [\"VI\"], \"wk inc covid hosp\": [\"GU\"]}') supports target-specific exclusions. Defaults to no exclusions." required: false default: "[]" From 0df25740bdf1a64ee581284dbd39582b8fa59055 Mon Sep 17 00:00:00 2001 From: O957 <127630341+O957@users.noreply.github.com> Date: Tue, 24 Mar 2026 12:10:33 -0400 Subject: [PATCH 09/10] try updating tests --- R/update_hub_target_data.R | 14 ++++++++------ tests/testthat/test_update_hub_target_data.R | 16 ++++++++++++++++ 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/R/update_hub_target_data.R b/R/update_hub_target_data.R index 6b30dcc..ef5d4f4 100644 --- a/R/update_hub_target_data.R +++ b/R/update_hub_target_data.R @@ -296,12 +296,14 @@ update_hub_target_data <- function( new_data <- dplyr::bind_rows(nhsn_data, nssp_data) - supported_targets <- get_hub_supported_targets(base_hub_path) - new_data <- apply_target_location_exclusions( - new_data, - excluded_locations, - supported_targets - ) + if (!is.null(excluded_locations) && length(excluded_locations) > 0) { + supported_targets <- get_hub_supported_targets(base_hub_path) + new_data <- apply_target_location_exclusions( + new_data, + excluded_locations, + supported_targets + ) + } if (fs::file_exists(output_file)) { existing_data <- forecasttools::read_tabular(output_file) diff --git a/tests/testthat/test_update_hub_target_data.R b/tests/testthat/test_update_hub_target_data.R index 18ac7b0..fe8a32f 100644 --- a/tests/testthat/test_update_hub_target_data.R +++ b/tests/testthat/test_update_hub_target_data.R @@ -21,6 +21,14 @@ purrr::walk(c("covid", "rsv"), function(disease) { output_file <- fs::path(base_hub_path, "target-data/time-series.parquet") fs::dir_create(fs::path(base_hub_path, "target-data")) + test_targets <- c( + glue::glue("wk inc {disease} hosp"), + glue::glue("wk inc {disease} prop ed visits") + ) + local_mocked_bindings( + get_hub_supported_targets = function(...) test_targets + ) + httptest2::with_mock_dir(mockdir_tests, { hubhelpr::update_hub_target_data( base_hub_path = base_hub_path, @@ -86,6 +94,14 @@ purrr::walk(c("covid", "rsv"), function(disease) { ) fs::dir_create(fs::path(base_hub_path, "target-data")) + test_targets <- c( + glue::glue("wk inc {disease} hosp"), + glue::glue("wk inc {disease} prop ed visits") + ) + local_mocked_bindings( + get_hub_supported_targets = function(...) test_targets + ) + httptest2::with_mock_dir(mockdir_tests, { # first run succeeds hubhelpr::update_hub_target_data( From 5bc406c770d8f1f6a4cca7c6e7d4b9bc966407aa Mon Sep 17 00:00:00 2001 From: O957 <127630341+O957@users.noreply.github.com> Date: Tue, 24 Mar 2026 16:33:20 -0400 Subject: [PATCH 10/10] try new setup for hosp latency and dryifying across certain functions --- R/location_exclusions.R | 162 +++++++++++-------- R/summarize_ref_date_forecasts.R | 4 +- R/update_hub_target_data.R | 12 +- R/write_viz_target_data.R | 17 +- R/write_webtext.R | 17 +- man/apply_target_location_exclusions.Rd | 22 ++- man/check_hospital_reporting_latency.Rd | 7 +- tests/testthat/test_update_hub_target_data.R | 16 -- 8 files changed, 139 insertions(+), 118 deletions(-) diff --git a/R/location_exclusions.R b/R/location_exclusions.R index cfcf685..3c2213d 100644 --- a/R/location_exclusions.R +++ b/R/location_exclusions.R @@ -2,6 +2,8 @@ #' #' Converts a character vector or named list of excluded #' locations into a consistent named list format. +#' Validates that all abbreviations are valid US +#' state/territory abbreviations. #' #' @param excluded_locations NULL, character vector, or #' named list of character vectors. @@ -14,6 +16,7 @@ normalize_excluded_locations <- function(excluded_locations) { return(NULL) } if (is.character(excluded_locations)) { + assert_valid_location_abbrs(excluded_locations) return(list("all" = excluded_locations)) } if (is.list(excluded_locations)) { @@ -22,6 +25,7 @@ normalize_excluded_locations <- function(excluded_locations) { x, .var.name = "excluded_locations list values" ) + assert_valid_location_abbrs(x) }) return(excluded_locations) } @@ -31,81 +35,59 @@ normalize_excluded_locations <- function(excluded_locations) { } -#' Build a target-location exclusion data frame. +#' Assert that location abbreviations are valid. #' -#' Constructs a tibble of target/location pairs to -#' exclude. Entries keyed by "all" are expanded into -#' one row per supported target. Errors if any named -#' targets in the exclusion list are not in -#' `supported_targets`. +#' Checks that all provided abbreviations are present +#' in the US location table (from forecasttools). +#' Errors with a message listing any invalid +#' abbreviations. #' -#' @param excluded_locations Named list as returned by -#' `normalize_excluded_locations()`. -#' @param supported_targets Character vector of targets -#' the hub accepts, as returned by -#' `get_hub_supported_targets()`. +#' @param abbrs Character vector of abbreviations to +#' validate. #' -#' @return A tibble with columns "target" and "location" -#' (hub codes). +#' @return Invisible NULL. Called for side effects. #' @noRd -build_exclusion_df <- function(excluded_locations, supported_targets) { - named_targets <- setdiff(names(excluded_locations), "all") - invalid_targets <- setdiff(named_targets, supported_targets) - if (length(invalid_targets) > 0) { +assert_valid_location_abbrs <- function(abbrs) { + valid_abbrs <- forecasttools::us_location_table$abbr + invalid <- setdiff(abbrs, valid_abbrs) + if (length(invalid) > 0) { cli::cli_abort( - "{.arg excluded_locations} contains unknown target{?s}: {.val {invalid_targets}}." + "{.arg excluded_locations} contains invalid abbreviation{?s}: {.val {invalid}}." ) } - - merged <- purrr::map( - purrr::set_names(supported_targets), - \(tgt) unique(c(excluded_locations[["all"]], excluded_locations[[tgt]])) - ) - - tibble::enframe(merged, name = "target", value = "location") |> - tidyr::unnest(cols = "location") |> - dplyr::mutate( - location = forecasttools::us_location_recode( - .data$location, - "abbr", - "hub" - ) - ) } -#' Flatten excluded locations to a character vector. +#' Get excluded abbreviations for a specific target. #' -#' Extracts all unique location abbreviations from an -#' excluded locations specification; some call -#' sites need a flat character vector of abbreviations -#' (e.g., functions operating on single-target data -#' without a target column). +#' Extracts the abbreviations that should be excluded +#' for a given target from a normalized exclusion list, +#' combining global ("all") exclusions with any +#' target-specific ones. #' -#' @param excluded_locations NULL, character vector, or -#' named list of character vectors. +#' @param normalized Named list as returned by +#' `normalize_excluded_locations()`. +#' @param target Character, the target name. #' -#' @return Character vector of unique abbreviations, or -#' NULL if input is NULL or zero-length. +#' @return Character vector of unique abbreviations to +#' exclude for this target. #' @noRd -flatten_excluded_locations <- function(excluded_locations) { - normalized <- normalize_excluded_locations(excluded_locations) - if (is.null(normalized)) { - return(NULL) - } - unique(unlist(normalized, use.names = FALSE)) +get_target_exclusions <- function(normalized, target) { + unique(c(normalized[["all"]], normalized[[target]])) } #' Apply target-specific location exclusions to a data #' frame. #' -#' Removes rows from data frame based on target-specific -#' excluded location abbreviations. Supports uniform -#' exclusions (character vector applied to all targets) -#' and target-specific exclusions (named list with target -#' names as keys). Filters on the "target" and -#' "location" columns via anti-join. +#' Removes rows from a data frame based on +#' target-specific excluded location abbreviations. +#' Supports uniform exclusions (character vector applied +#' to all targets) and target-specific exclusions (named +#' list with target names as keys). Validates target +#' names against the targets present in the data. +#' Filters on the "target" and "location" columns via +#' anti-join. #' #' @param data Data frame with "target" and "location" #' columns. @@ -115,24 +97,38 @@ flatten_excluded_locations <- function(excluded_locations) { #' excluded across all targets. If a named list, names #' should be target names (or "all" for global #' exclusions) mapping to character vectors of -#' abbreviations. Default: NULL (no exclusions). -#' @param supported_targets Character vector of valid -#' target names, as returned by -#' [get_hub_supported_targets()]. +#' abbreviations. #' #' @return Data frame with excluded rows removed. #' @export apply_target_location_exclusions <- function( data, - excluded_locations, - supported_targets + excluded_locations ) { - excluded_locations <- normalize_excluded_locations(excluded_locations) - if (is.null(excluded_locations)) { + normalized <- normalize_excluded_locations(excluded_locations) + if (is.null(normalized)) { return(data) } - exclusion_df <- build_exclusion_df(excluded_locations, supported_targets) + data_targets <- unique(data$target) + named_targets <- setdiff(names(normalized), "all") + unmatched <- setdiff(named_targets, data_targets) + if (length(unmatched) > 0) { + cli::cli_warn( + "{.arg excluded_locations} contains target{?s} not in data: {.val {unmatched}}." + ) + } + + exclusion_df <- purrr::map_df(data_targets, \(tgt) { + excl_abbrs <- get_target_exclusions(normalized, tgt) + if (length(excl_abbrs) == 0) { + return(tibble::tibble(target = character(), location = character())) + } + tibble::tibble( + target = tgt, + location = forecasttools::us_location_recode(excl_abbrs, "abbr", "hub") + ) + }) dplyr::anti_join( data, @@ -140,3 +136,41 @@ apply_target_location_exclusions <- function( by = c("target", "location") ) } + + +#' Filter data to included locations only. +#' +#' Only keeps rows where location is in the set of +#' valid US locations minus any excluded locations for +#' that target. +#' +#' @param data Data frame with "target" and "location" +#' columns. +#' @param excluded_locations NULL, character vector, or +#' named list of US state/territory abbreviations to +#' exclude. +#' +#' @return Data frame filtered to included locations. +#' @noRd +filter_to_included_locations <- function( + data, + excluded_locations +) { + normalized <- normalize_excluded_locations(excluded_locations) + all_valid_codes <- forecasttools::us_location_table$code + + purrr::map_df(unique(data$target), \(tgt) { + if (!is.null(normalized)) { + excl_abbrs <- get_target_exclusions(normalized, tgt) + excl_codes <- forecasttools::us_location_recode( + excl_abbrs, + "abbr", + "hub" + ) + included_codes <- setdiff(all_valid_codes, excl_codes) + } else { + included_codes <- all_valid_codes + } + dplyr::filter(data, .data$target == tgt, .data$location %in% included_codes) + }) +} diff --git a/R/summarize_ref_date_forecasts.R b/R/summarize_ref_date_forecasts.R index 2e4078a..f8b07ea 100644 --- a/R/summarize_ref_date_forecasts.R +++ b/R/summarize_ref_date_forecasts.R @@ -60,11 +60,9 @@ summarize_ref_date_forecasts <- function( forecasttools::nullable_comparison(.data$model_id, "%in%", !!model_ids) ) - supported_targets <- get_hub_supported_targets(base_hub_path) current_forecasts <- apply_target_location_exclusions( current_forecasts, - excluded_locations, - supported_targets + excluded_locations ) if (nrow(current_forecasts) == 0) { diff --git a/R/update_hub_target_data.R b/R/update_hub_target_data.R index ef5d4f4..c7a6b08 100644 --- a/R/update_hub_target_data.R +++ b/R/update_hub_target_data.R @@ -296,14 +296,10 @@ update_hub_target_data <- function( new_data <- dplyr::bind_rows(nhsn_data, nssp_data) - if (!is.null(excluded_locations) && length(excluded_locations) > 0) { - supported_targets <- get_hub_supported_targets(base_hub_path) - new_data <- apply_target_location_exclusions( - new_data, - excluded_locations, - supported_targets - ) - } + new_data <- filter_to_included_locations( + new_data, + excluded_locations + ) if (fs::file_exists(output_file)) { existing_data <- forecasttools::read_tabular(output_file) diff --git a/R/write_viz_target_data.R b/R/write_viz_target_data.R index 080efad..07afd6c 100644 --- a/R/write_viz_target_data.R +++ b/R/write_viz_target_data.R @@ -84,12 +84,17 @@ write_viz_target_data <- function( target_data <- dplyr::bind_rows(nhsn_data, nssp_data) } - supported_targets <- get_hub_supported_targets(base_hub_path) - target_data <- apply_target_location_exclusions( - target_data, - excluded_locations, - supported_targets - ) + if (use_hub_data) { + target_data <- apply_target_location_exclusions( + target_data, + excluded_locations + ) + } else { + target_data <- filter_to_included_locations( + target_data, + excluded_locations + ) + } target_data <- target_data |> dplyr::mutate( diff --git a/R/write_webtext.R b/R/write_webtext.R index 49a8662..a135cd0 100644 --- a/R/write_webtext.R +++ b/R/write_webtext.R @@ -9,9 +9,10 @@ #' forecast. #' @param disease Character, disease name ("covid" or #' "rsv"). -#' @param excluded_locations NULL or character vector of US -#' state/territory abbreviations to exclude from expected -#' reporting locations. +#' @param excluded_locations Character vector of US +#' state/territory abbreviations to exclude from +#' expected reporting locations. Default: NULL (no +#' exclusions). #' #' @return Character string describing reporting issues, #' or empty string if no issues. @@ -189,8 +190,8 @@ compute_change_direction <- function( #' @param reference_date Date, the reference date. #' @param excluded_locations NULL, character vector, or #' named list of US state/territory abbreviations to -#' exclude. Flattened to a character vector for hospital -#' reporting latency checks. +#' exclude. Target-specific exclusions are resolved +#' before passing to hospital reporting latency checks. #' #' @return Named list of template placeholder values with #' keys prefixed by the target data type. @@ -306,10 +307,14 @@ compute_target_webtext_values <- function( # add hospital reporting flag if applicable if (is_hosp_target(target)) { + normalized <- normalize_excluded_locations(excluded_locations) + hosp_exclusions <- if (!is.null(normalized)) { + get_target_exclusions(normalized, target) + } values[["hosp_reporting_flag_text"]] <- check_hospital_reporting_latency( reference_date = reference_date, disease = disease, - excluded_locations = flatten_excluded_locations(excluded_locations) + excluded_locations = hosp_exclusions ) } diff --git a/man/apply_target_location_exclusions.Rd b/man/apply_target_location_exclusions.Rd index a852804..3531b9b 100644 --- a/man/apply_target_location_exclusions.Rd +++ b/man/apply_target_location_exclusions.Rd @@ -5,7 +5,7 @@ \title{Apply target-specific location exclusions to a data frame.} \usage{ -apply_target_location_exclusions(data, excluded_locations, supported_targets) +apply_target_location_exclusions(data, excluded_locations) } \arguments{ \item{data}{Data frame with "target" and "location" @@ -17,20 +17,18 @@ exclude. If a character vector, locations are excluded across all targets. If a named list, names should be target names (or "all" for global exclusions) mapping to character vectors of -abbreviations. Default: NULL (no exclusions).} - -\item{supported_targets}{Character vector of valid -target names, as returned by -\code{\link[=get_hub_supported_targets]{get_hub_supported_targets()}}.} +abbreviations.} } \value{ Data frame with excluded rows removed. } \description{ -Removes rows from data frame based on target-specific -excluded location abbreviations. Supports uniform -exclusions (character vector applied to all targets) -and target-specific exclusions (named list with target -names as keys). Filters on the "target" and -"location" columns via anti-join. +Removes rows from a data frame based on +target-specific excluded location abbreviations. +Supports uniform exclusions (character vector applied +to all targets) and target-specific exclusions (named +list with target names as keys). Validates target +names against the targets present in the data. +Filters on the "target" and "location" columns via +anti-join. } diff --git a/man/check_hospital_reporting_latency.Rd b/man/check_hospital_reporting_latency.Rd index ffb8a5b..1498584 100644 --- a/man/check_hospital_reporting_latency.Rd +++ b/man/check_hospital_reporting_latency.Rd @@ -17,9 +17,10 @@ forecast.} \item{disease}{Character, disease name ("covid" or "rsv").} -\item{excluded_locations}{NULL or character vector of US -state/territory abbreviations to exclude from expected -reporting locations.} +\item{excluded_locations}{Character vector of US +state/territory abbreviations to exclude from +expected reporting locations. Default: NULL (no +exclusions).} } \value{ Character string describing reporting issues, diff --git a/tests/testthat/test_update_hub_target_data.R b/tests/testthat/test_update_hub_target_data.R index fe8a32f..18ac7b0 100644 --- a/tests/testthat/test_update_hub_target_data.R +++ b/tests/testthat/test_update_hub_target_data.R @@ -21,14 +21,6 @@ purrr::walk(c("covid", "rsv"), function(disease) { output_file <- fs::path(base_hub_path, "target-data/time-series.parquet") fs::dir_create(fs::path(base_hub_path, "target-data")) - test_targets <- c( - glue::glue("wk inc {disease} hosp"), - glue::glue("wk inc {disease} prop ed visits") - ) - local_mocked_bindings( - get_hub_supported_targets = function(...) test_targets - ) - httptest2::with_mock_dir(mockdir_tests, { hubhelpr::update_hub_target_data( base_hub_path = base_hub_path, @@ -94,14 +86,6 @@ purrr::walk(c("covid", "rsv"), function(disease) { ) fs::dir_create(fs::path(base_hub_path, "target-data")) - test_targets <- c( - glue::glue("wk inc {disease} hosp"), - glue::glue("wk inc {disease} prop ed visits") - ) - local_mocked_bindings( - get_hub_supported_targets = function(...) test_targets - ) - httptest2::with_mock_dir(mockdir_tests, { # first run succeeds hubhelpr::update_hub_target_data(