diff --git a/NAMESPACE b/NAMESPACE index b3c28c1..690d3f5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,10 +1,10 @@ # Generated by roxygen2: do not edit by hand +export(apply_target_location_exclusions) export(assert_data_up_to_date) export(check_authorized_users) export(check_changes_for_autoapproval) export(count_designated_models) -export(excluded_locations) export(generate_hub_baseline) export(generate_hub_ensemble) export(generate_oracle_output) @@ -23,7 +23,6 @@ export(get_nssp_col_name) export(get_round_ids_vec) export(get_target_data_type) export(get_target_label) -export(included_locations) export(is_ed_target) export(is_hosp_target) export(summarize_ref_date_forecasts) diff --git a/R/constants.R b/R/constants.R index 60522ae..fd6c2a3 100644 --- a/R/constants.R +++ b/R/constants.R @@ -1,74 +1 @@ -#' Two digits FIPS codes for locations excluded from Hubs' -#' target data. -#' -#' Excludes Virgin Islands (78), Northern Mariana -#' Islands (69), Guam (66), American Samoa (60), and Minor -#' Outlying Islands (74). -#' -#' @export -excluded_locations <- c("78", "74", "69", "66", "60") - -#' Two digits FIPS codes for locations included in Hubs' -#' target data. -#' -#' Includes 50 states, US national, DC, and Puerto Rico -#' (PR). Excludes Virgin Islands (78), Northern Mariana -#' Islands (69), Guam (66), American Samoa (60), and Minor -#' Outlying Islands (74). -#' -#' @export -included_locations <- c( - "01", - "02", - "04", - "05", - "06", - "08", - "09", - "10", - "11", - "12", - "13", - "15", - "16", - "17", - "18", - "19", - "20", - "21", - "22", - "23", - "24", - "25", - "26", - "27", - "28", - "29", - "30", - "31", - "32", - "33", - "34", - "35", - "36", - "37", - "38", - "39", - "40", - "41", - "42", - "44", - "45", - "46", - "47", - "48", - "49", - "50", - "51", - "53", - "54", - "55", - "56", - "72", - "US" -) +# constants used across hubhelpr functions. diff --git a/R/location_exclusions.R b/R/location_exclusions.R new file mode 100644 index 0000000..3c2213d --- /dev/null +++ b/R/location_exclusions.R @@ -0,0 +1,176 @@ +#' Normalize excluded locations to a named list. +#' +#' Converts a character vector or named list of excluded +#' locations into a consistent named list format. +#' Validates that all abbreviations are valid US +#' state/territory abbreviations. +#' +#' @param excluded_locations NULL, character vector, or +#' named list of character vectors. +#' +#' @return Named list of character vectors, or NULL if +#' input is NULL or zero-length. +#' @noRd +normalize_excluded_locations <- function(excluded_locations) { + if (is.null(excluded_locations) || length(excluded_locations) == 0) { + return(NULL) + } + if (is.character(excluded_locations)) { + assert_valid_location_abbrs(excluded_locations) + return(list("all" = excluded_locations)) + } + if (is.list(excluded_locations)) { + purrr::walk(excluded_locations, function(x) { + checkmate::assert_character( + x, + .var.name = "excluded_locations list values" + ) + assert_valid_location_abbrs(x) + }) + return(excluded_locations) + } + cli::cli_abort( + "{.arg excluded_locations} must be NULL, a character vector, or a named list." + ) +} + + +#' Assert that location abbreviations are valid. +#' +#' Checks that all provided abbreviations are present +#' in the US location table (from forecasttools). +#' Errors with a message listing any invalid +#' abbreviations. +#' +#' @param abbrs Character vector of abbreviations to +#' validate. +#' +#' @return Invisible NULL. Called for side effects. +#' @noRd +assert_valid_location_abbrs <- function(abbrs) { + valid_abbrs <- forecasttools::us_location_table$abbr + invalid <- setdiff(abbrs, valid_abbrs) + if (length(invalid) > 0) { + cli::cli_abort( + "{.arg excluded_locations} contains invalid abbreviation{?s}: {.val {invalid}}." + ) + } +} + + +#' Get excluded abbreviations for a specific target. +#' +#' Extracts the abbreviations that should be excluded +#' for a given target from a normalized exclusion list, +#' combining global ("all") exclusions with any +#' target-specific ones. +#' +#' @param normalized Named list as returned by +#' `normalize_excluded_locations()`. +#' @param target Character, the target name. +#' +#' @return Character vector of unique abbreviations to +#' exclude for this target. +#' @noRd +get_target_exclusions <- function(normalized, target) { + unique(c(normalized[["all"]], normalized[[target]])) +} + + +#' Apply target-specific location exclusions to a data +#' frame. +#' +#' Removes rows from a data frame based on +#' target-specific excluded location abbreviations. +#' Supports uniform exclusions (character vector applied +#' to all targets) and target-specific exclusions (named +#' list with target names as keys). Validates target +#' names against the targets present in the data. +#' Filters on the "target" and "location" columns via +#' anti-join. +#' +#' @param data Data frame with "target" and "location" +#' columns. +#' @param excluded_locations NULL, character vector, or +#' named list of US state/territory abbreviations to +#' exclude. If a character vector, locations are +#' excluded across all targets. If a named list, names +#' should be target names (or "all" for global +#' exclusions) mapping to character vectors of +#' abbreviations. +#' +#' @return Data frame with excluded rows removed. +#' @export +apply_target_location_exclusions <- function( + data, + excluded_locations +) { + normalized <- normalize_excluded_locations(excluded_locations) + if (is.null(normalized)) { + return(data) + } + + data_targets <- unique(data$target) + named_targets <- setdiff(names(normalized), "all") + unmatched <- setdiff(named_targets, data_targets) + if (length(unmatched) > 0) { + cli::cli_warn( + "{.arg excluded_locations} contains target{?s} not in data: {.val {unmatched}}." + ) + } + + exclusion_df <- purrr::map_df(data_targets, \(tgt) { + excl_abbrs <- get_target_exclusions(normalized, tgt) + if (length(excl_abbrs) == 0) { + return(tibble::tibble(target = character(), location = character())) + } + tibble::tibble( + target = tgt, + location = forecasttools::us_location_recode(excl_abbrs, "abbr", "hub") + ) + }) + + dplyr::anti_join( + data, + exclusion_df, + by = c("target", "location") + ) +} + + +#' Filter data to included locations only. +#' +#' Only keeps rows where location is in the set of +#' valid US locations minus any excluded locations for +#' that target. +#' +#' @param data Data frame with "target" and "location" +#' columns. +#' @param excluded_locations NULL, character vector, or +#' named list of US state/territory abbreviations to +#' exclude. +#' +#' @return Data frame filtered to included locations. +#' @noRd +filter_to_included_locations <- function( + data, + excluded_locations +) { + normalized <- normalize_excluded_locations(excluded_locations) + all_valid_codes <- forecasttools::us_location_table$code + + purrr::map_df(unique(data$target), \(tgt) { + if (!is.null(normalized)) { + excl_abbrs <- get_target_exclusions(normalized, tgt) + excl_codes <- forecasttools::us_location_recode( + excl_abbrs, + "abbr", + "hub" + ) + included_codes <- setdiff(all_valid_codes, excl_codes) + } else { + included_codes <- all_valid_codes + } + dplyr::filter(data, .data$target == tgt, .data$location %in% included_codes) + }) +} diff --git a/R/summarize_ref_date_forecasts.R b/R/summarize_ref_date_forecasts.R index b6fafb5..f8b07ea 100644 --- a/R/summarize_ref_date_forecasts.R +++ b/R/summarize_ref_date_forecasts.R @@ -1,72 +1,3 @@ -#' Normalize excluded locations to a named list. -#' -#' Converts a character vector or named list of excluded -#' locations into a consistent named list format. -#' -#' @param excluded_locations NULL, character vector, or -#' named list of character vector. -#' -#' @return Named list of character vectors. -#' @noRd -normalize_excluded_locations <- function(excluded_locations) { - if (is.null(excluded_locations)) { - return(list()) - } - if (is.character(excluded_locations)) { - return(list("all" = excluded_locations)) - } - if (is.list(excluded_locations)) { - return(excluded_locations) - } - cli::cli_abort( - "{.arg excluded_locations} must be NULL, a character vector, or a named list." - ) -} - - -#' Build a target-location exclusion data frame. -#' -#' Constructs a tibble of target/location pairs to -#' exclude. Entries keyed by "all" are expanded into -#' one row per supported target. Errors if any named -#' targets in the exclusion list are not in -#' `supported_targets`. -#' -#' @param excluded_locations Named list as returned by -#' `normalize_excluded_locations()`. -#' @param supported_targets character vector of targets -#' the hub accepts, as returned by -#' `get_hub_supported_targets()`. -#' -#' @return A tibble with columns "target" and "location" -#' (hub codes). -#' @noRd -build_exclusion_df <- function(excluded_locations, supported_targets) { - named_targets <- setdiff(names(excluded_locations), "all") - invalid_targets <- setdiff(named_targets, supported_targets) - if (length(invalid_targets) > 0) { - cli::cli_abort( - "{.arg excluded_locations} contains unknown target{?s}: {.val {invalid_targets}}." - ) - } - - merged <- purrr::map( - purrr::set_names(supported_targets), - \(tgt) unique(c(excluded_locations[["all"]], excluded_locations[[tgt]])) - ) - - tibble::enframe(merged, name = "target", value = "location") |> - tidyr::unnest(cols = "location") |> - dplyr::mutate( - location = forecasttools::us_location_recode( - .data$location, - "abbr", - "hub" - ) - ) -} - - #' Summarize forecast hub data for a specific reference date. #' #' This function generates a tibble of forecast data @@ -84,13 +15,13 @@ build_exclusion_df <- function(excluded_locations, supported_targets) { #' and "population". Adds population-based calculations. #' @param horizons_to_include integer vector, horizons to #' include in the output. Default: c(0, 1, 2). -#' @param excluded_locations character vector or named list -#' specifying US state abbreviations to exclude. If a -#' character vector, locations are excluded across all -#' targets. If a named list, names should be target names -#' (or "all" for global exclusions) mapping to character -#' vectors of abbreviations. Converted to hub codes -#' internally. Default: NULL. +#' @param excluded_locations NULL, character vector, or +#' named list of US state/territory abbreviations to +#' exclude. If a character vector, locations are excluded +#' across all targets. If a named list, names should be +#' target names (or "all" for global exclusions) mapping +#' to character vectors of abbreviations. Converted to +#' hub codes internally. Default: NULL (no exclusions). #' @param targets character vector, target name(s) to filter #' forecasts. If NULL (default), does not filter by target. #' @param model_ids character vector of model IDs to include. @@ -110,7 +41,6 @@ summarize_ref_date_forecasts <- function( model_ids = NULL ) { reference_date <- lubridate::as_date(reference_date) - excluded_locations <- normalize_excluded_locations(excluded_locations) model_metadata <- hubData::load_model_metadata( base_hub_path, @@ -130,11 +60,10 @@ summarize_ref_date_forecasts <- function( forecasttools::nullable_comparison(.data$model_id, "%in%", !!model_ids) ) - supported_targets <- get_hub_supported_targets(base_hub_path) - exclusion_df <- build_exclusion_df(excluded_locations, supported_targets) - - current_forecasts <- current_forecasts |> - dplyr::anti_join(exclusion_df, by = c("target", "location")) + current_forecasts <- apply_target_location_exclusions( + current_forecasts, + excluded_locations + ) if (nrow(current_forecasts) == 0) { model_filter_msg <- if (!is.null(model_ids)) { diff --git a/R/update_hub_target_data.R b/R/update_hub_target_data.R index db6f1e6..c7a6b08 100644 --- a/R/update_hub_target_data.R +++ b/R/update_hub_target_data.R @@ -69,9 +69,6 @@ merge_target_data <- function( #' @param disease Disease name ("covid" or "rsv"). #' @param as_of As-of date of the data pull. Default is #' the system date as determined by [lubridate::today()]. -#' @param included_locations Vector of location codes to -#' include in the output. -#' Default value `hubhelpr::included_locations`. #' @param start_date First week-ending #' date to include for the NHSN dataset. Default value #' is NULL (no filtering). @@ -84,7 +81,6 @@ merge_target_data <- function( get_hubverse_format_nhsn_data <- function( disease, as_of = lubridate::today(), - included_locations = hubhelpr::included_locations, start_date = NULL, end_date = NULL ) { @@ -109,7 +105,6 @@ get_hubverse_format_nhsn_data <- function( as_of = !!as_of, target = glue::glue("wk inc {disease} hosp") ) |> - dplyr::filter(.data$location %in% !!included_locations) |> dplyr::select(tidyselect::all_of(hubverse_ts_req_cols)) return(hubverse_format_nhsn_data) @@ -124,9 +119,6 @@ get_hubverse_format_nhsn_data <- function( #' @param base_hub_path Path to the base hub directory. #' @param as_of As-of date of the data pull. Default is #' the system date as determined by [lubridate::today()]. -#' @param included_locations Vector of location codes to -#' include in the output. -#' Default value `hubhelpr::included_locations`. #' @param nssp_update_local Logical. Whether to update NSSP #' data from local file `auxiliary-data/latest.parquet` #' (default: FALSE). @@ -143,7 +135,6 @@ get_hubverse_format_nssp_data <- function( disease, base_hub_path, as_of = lubridate::today(), - included_locations = hubhelpr::included_locations, nssp_update_local = FALSE, start_date = NULL, end_date = NULL @@ -194,7 +185,6 @@ get_hubverse_format_nssp_data <- function( as_of = !!as_of, target = glue::glue("wk inc {disease} prop ed visits") ) |> - dplyr::filter(.data$location %in% !!included_locations) |> dplyr::select(tidyselect::all_of(hubverse_ts_req_cols)) |> dplyr::arrange(.data$date, .data$location) @@ -215,9 +205,13 @@ get_hubverse_format_nssp_data <- function( #' @param start_date First week-ending #' date to include for the NHSN dataset. Default value #' is "2024-11-09". -#' @param included_locations Vector of location codes to -#' include in the output. -#' Default value `hubhelpr::included_locations`. +#' @param excluded_locations NULL, character vector, or +#' named list of US state/territory abbreviations to +#' exclude. If a character vector, locations are excluded +#' across all targets. If a named list, names should be +#' target names (or "all" for global exclusions) mapping +#' to character vectors of abbreviations. Default: NULL +#' (no exclusions). #' @param legacy_file Logical. Whether to write legacy #' CSV output (default: FALSE). #' @param nssp_update_local Logical. Whether to update NSSP @@ -237,7 +231,7 @@ update_hub_target_data <- function( disease, as_of = lubridate::today(), start_date = lubridate::as_date("2024-11-09"), - included_locations = hubhelpr::included_locations, + excluded_locations = NULL, legacy_file = FALSE, nssp_update_local = FALSE, overwrite_existing = FALSE @@ -247,7 +241,6 @@ update_hub_target_data <- function( nhsn_data <- get_hubverse_format_nhsn_data( disease, as_of = as_of, - included_locations = included_locations, start_date = start_date ) @@ -263,7 +256,6 @@ update_hub_target_data <- function( disease, base_hub_path, as_of = as_of, - included_locations = included_locations, nssp_update_local = nssp_update_local ) @@ -304,6 +296,11 @@ update_hub_target_data <- function( new_data <- dplyr::bind_rows(nhsn_data, nssp_data) + new_data <- filter_to_included_locations( + new_data, + excluded_locations + ) + if (fs::file_exists(output_file)) { existing_data <- forecasttools::read_tabular(output_file) } else { diff --git a/R/write_ref_date_summary.R b/R/write_ref_date_summary.R index dcdc53b..94e5793 100644 --- a/R/write_ref_date_summary.R +++ b/R/write_ref_date_summary.R @@ -81,12 +81,13 @@ write_ref_date_summary <- function( #' include in the output. Default: c(0, 1, 2). #' @param population_data data frame with columns #' "location" and "population". Default: population_data. -#' @param excluded_locations character vector or named list -#' specifying US state abbreviations to exclude. If a -#' character vector, locations are excluded across all -#' targets. If a named list, names should be target names -#' (or "all" for global exclusions) mapping to character -#' vectors of abbreviations. Default: NULL. +#' @param excluded_locations NULL, character vector, or +#' named list of US state/territory abbreviations to +#' exclude. If a character vector, locations are excluded +#' across all targets. If a named list, names should be +#' target names (or "all" for global exclusions) mapping +#' to character vectors of abbreviations. Default: NULL +#' (no exclusions). #' @param output_format character, output file format. One #' of "csv", "tsv", or "parquet". Default: "csv". #' @param targets character vector, target name(s) to @@ -196,12 +197,13 @@ write_ref_date_summary_ens <- function( #' include in the output. Default: c(0, 1, 2). #' @param population_data data frame with columns #' "location" and "population". Default: [population_data]. -#' @param excluded_locations character vector or named list -#' specifying US state abbreviations to exclude. If a -#' character vector, locations are excluded across all -#' targets. If a named list, names should be target names -#' (or "all" for global exclusions) mapping to character -#' vectors of abbreviations. Default: NULL. +#' @param excluded_locations NULL, character vector, or +#' named list of US state/territory abbreviations to +#' exclude. If a character vector, locations are excluded +#' across all targets. If a named list, names should be +#' target names (or "all" for global exclusions) mapping +#' to character vectors of abbreviations. Default: NULL +#' (no exclusions). #' @param output_format character, output file format. One #' of "csv", "tsv", or "parquet". Default: "csv". #' @param targets character vector, target name(s) to diff --git a/R/write_viz_target_data.R b/R/write_viz_target_data.R index b7bc06e..07afd6c 100644 --- a/R/write_viz_target_data.R +++ b/R/write_viz_target_data.R @@ -27,16 +27,13 @@ #' @param end_date Date, latest date to include in data. #' Default: NULL (no filtering). Used only when #' use_hub_data = FALSE. -#' @param included_locations Character vector of location -#' codes to include in the output. Default -#' hubhelpr::included_locations. -#' @param excluded_locations Character vector or named list -#' specifying US state abbreviations to exclude. If a -#' character vector, locations are excluded across all -#' targets. If a named list, names should be target names -#' (or "all" for global exclusions) mapping to character -#' vectors of abbreviations. Converted to hub codes -#' internally. Default: NULL. +#' @param excluded_locations NULL, character vector, or +#' named list of US state/territory abbreviations to +#' exclude. If a character vector, locations are excluded +#' across all targets. If a named list, names should be +#' target names (or "all" for global exclusions) mapping +#' to character vectors of abbreviations. Converted to +#' hub codes internally. Default: NULL (no exclusions). #' @param output_format Character, output file format. One #' of "csv", "tsv", or "parquet". Default: "csv". #' @param overwrite_existing logical. If TRUE, overwrite @@ -55,7 +52,6 @@ write_viz_target_data <- function( as_of = "latest", start_date = NULL, end_date = NULL, - included_locations = hubhelpr::included_locations, excluded_locations = NULL, output_format = "csv", overwrite_existing = FALSE @@ -63,7 +59,6 @@ write_viz_target_data <- function( if (use_hub_data) { target_data <- hubData::connect_target_timeseries(base_hub_path) |> forecasttools::hub_target_data_as_of(as_of = as_of) |> - dplyr::filter(.data$location %in% !!included_locations) |> dplyr::collect() } else { nhsn_data <- get_hubverse_format_nhsn_data( @@ -89,12 +84,17 @@ write_viz_target_data <- function( target_data <- dplyr::bind_rows(nhsn_data, nssp_data) } - excluded_locations <- normalize_excluded_locations(excluded_locations) - supported_targets <- get_hub_supported_targets(base_hub_path) - exclusion_df <- build_exclusion_df(excluded_locations, supported_targets) - - target_data <- target_data |> - dplyr::anti_join(exclusion_df, by = c("target", "location")) + if (use_hub_data) { + target_data <- apply_target_location_exclusions( + target_data, + excluded_locations + ) + } else { + target_data <- filter_to_included_locations( + target_data, + excluded_locations + ) + } target_data <- target_data |> dplyr::mutate( diff --git a/R/write_webtext.R b/R/write_webtext.R index 75c0846..8c5b4b3 100644 --- a/R/write_webtext.R +++ b/R/write_webtext.R @@ -9,19 +9,34 @@ #' forecast. #' @param disease Character, disease name ("covid" or #' "rsv"). -#' @param included_locations Character vector of location -#' codes that are expected to report. Default -#' hubhelpr::included_locations. +#' @param excluded_locations Character vector of US +#' state/territory abbreviations to exclude from +#' expected reporting locations. Default: NULL (no +#' exclusions). #' #' @return Character string describing reporting issues, #' or empty string if no issues. check_hospital_reporting_rate <- function( reference_date, disease, - included_locations = hubhelpr::included_locations + excluded_locations = NULL ) { desired_weekendingdate <- as.Date(reference_date) - lubridate::dweeks(1) + if (!is.null(excluded_locations) && length(excluded_locations) > 0) { + excluded_codes <- forecasttools::us_location_recode( + excluded_locations, + "abbr", + "hub" + ) + } else { + excluded_codes <- character(0) + } + expected_locations <- setdiff( + forecasttools::us_location_table$code, + excluded_codes + ) + disease_abbr <- dplyr::case_match( disease, "covid" ~ "c19", @@ -33,7 +48,7 @@ check_hospital_reporting_rate <- function( ) included_jurisdictions <- forecasttools::us_location_recode( - included_locations, + expected_locations, "code", "hrd" ) @@ -63,7 +78,7 @@ check_hospital_reporting_rate <- function( ) locations_in_data <- unique(percent_hosp_reporting_below80$location) - missing_locations <- setdiff(included_locations, locations_in_data) + missing_locations <- setdiff(expected_locations, locations_in_data) if (length(missing_locations) > 0) { missing_location_names <- forecasttools::us_location_recode( @@ -173,8 +188,10 @@ compute_change_direction <- function( #' @param all_model_metadata Data frame of model metadata. #' @param hub_name Character, hub name. #' @param reference_date Date, the reference date. -#' @param included_locations Character vector of location -#' codes. +#' @param excluded_locations NULL, character vector, or +#' named list of US state/territory abbreviations to +#' exclude. Target-specific exclusions are resolved +#' before passing to hospital reporting latency checks. #' #' @return Named list of template placeholder values with #' keys prefixed by the target data type. @@ -188,7 +205,7 @@ compute_target_webtext_values <- function( all_model_metadata, hub_name, reference_date, - included_locations + excluded_locations ) { target_type <- get_target_data_type(target) @@ -290,10 +307,14 @@ compute_target_webtext_values <- function( # add hospital reporting flag if applicable if (is_hosp_target(target)) { + normalized <- normalize_excluded_locations(excluded_locations) + hosp_exclusions <- if (!is.null(normalized)) { + get_target_exclusions(normalized, target) + } values[["hosp_reporting_flag_text"]] <- check_hospital_reporting_rate( reference_date = reference_date, disease = disease, - included_locations = included_locations + excluded_locations = hosp_exclusions ) } @@ -317,9 +338,13 @@ compute_target_webtext_values <- function( #' with weekly summary files. #' @param targets Character vector of target names to #' generate text for. -#' @param included_locations Character vector of location -#' codes that are expected to report. Default -#' hubhelpr::included_locations. +#' @param excluded_locations NULL, character vector, or +#' named list of US state/territory abbreviations to +#' exclude. If a character vector, locations are excluded +#' across all targets. If a named list, names should be +#' target names (or "all" for global exclusions) mapping +#' to character vectors of abbreviations. Default: NULL +#' (no exclusions). #' @param input_format Character, input file format for #' reading summary data files. One of "csv", "tsv", or #' "parquet". Default: "csv". @@ -333,7 +358,7 @@ generate_webtext_block <- function( base_hub_path, weekly_data_path, targets, - included_locations = hubhelpr::included_locations, + excluded_locations = NULL, input_format = "csv" ) { checkmate::assert_choice(disease, choices = c("covid", "rsv")) @@ -395,7 +420,7 @@ generate_webtext_block <- function( all_model_metadata = all_model_metadata, hub_name = hub_name, reference_date = reference_date, - included_locations = included_locations + excluded_locations = excluded_locations ) |> purrr::list_flatten() @@ -453,9 +478,13 @@ generate_webtext_block <- function( #' @param targets Character vector of target names to #' generate text for. Default NULL discovers targets #' from hub time-series data. -#' @param included_locations Character vector of location -#' codes that are expected to report. Default -#' hubhelpr::included_locations. +#' @param excluded_locations NULL, character vector, or +#' named list of US state/territory abbreviations to +#' exclude. If a character vector, locations are excluded +#' across all targets. If a named list, names should be +#' target names (or "all" for global exclusions) mapping +#' to character vectors of abbreviations. Default: NULL +#' (no exclusions). #' @param input_format Character, input file format for #' reading summary data files. One of "csv", "tsv", or #' "parquet". Default: "csv". @@ -469,7 +498,7 @@ write_webtext <- function( base_hub_path, hub_reports_path, targets = NULL, - included_locations = hubhelpr::included_locations, + excluded_locations = NULL, input_format = "csv", overwrite_existing = FALSE ) { @@ -492,7 +521,7 @@ write_webtext <- function( base_hub_path = base_hub_path, weekly_data_path = weekly_data_path, targets = targets, - included_locations = included_locations, + excluded_locations = excluded_locations, input_format = input_format ) diff --git a/_typos.toml b/_typos.toml index cbf651c..aa5938f 100644 --- a/_typos.toml +++ b/_typos.toml @@ -1,10 +1,3 @@ -[default] -extend-ignore-identifiers-re = [ - "AttributeID.*Supress.*", -] - -[default.extend-identifiers] -AttributeIDSupressMenu = "AttributeIDSupressMenu" [default.extend-words] # words that should not be corrected diff --git a/actions/generate-viz-data/action.yaml b/actions/generate-viz-data/action.yaml index 707dcce..3e54226 100644 --- a/actions/generate-viz-data/action.yaml +++ b/actions/generate-viz-data/action.yaml @@ -28,7 +28,7 @@ inputs: required: false default: "" excluded_locations: - description: "JSON array of US state/territory abbreviations to exclude from output (e.g., '[\"VI\", \"GU\", \"AS\", \"MP\", \"UM\"]'). Also accepts a JSON object for target-specific exclusions (e.g., '{\"all\": [\"UM\"], \"wk inc covid hosp\": [\"MA\"]}'). Defaults to no exclusions." + description: "JSON array or object of US state/territory abbreviations to exclude. Array (e.g., '[\"VI\", \"GU\"]') excludes from all targets. Object (e.g., '{\"all\": [\"VI\"], \"wk inc covid hosp\": [\"GU\"]}') supports target-specific exclusions. Defaults to no exclusions." required: false default: "[]" @@ -91,7 +91,8 @@ runs: disease = disease, base_hub_path = base_hub_path, hub_reports_path = hub_reports_path, - targets = targets + targets = targets, + excluded_locations = excluded_locations ) writeLines( diff --git a/actions/update-target-data/action.yaml b/actions/update-target-data/action.yaml index 8a4bac8..fc3dabf 100644 --- a/actions/update-target-data/action.yaml +++ b/actions/update-target-data/action.yaml @@ -30,6 +30,10 @@ inputs: description: "Whether to overwrite existing target data files ('true' or 'false')." required: false default: "false" + excluded_locations: + description: "JSON array or object of US state/territory abbreviations to exclude. Array (e.g., '[\"VI\", \"GU\"]') excludes from all targets. Object (e.g., '{\"all\": [\"VI\"], \"wk inc covid hosp\": [\"GU\"]}') supports target-specific exclusions. Defaults to no exclusions." + required: false + default: "[]" runs: using: "composite" @@ -42,10 +46,16 @@ runs: DATA_CDC_GOV_API_KEY_SECRET: ${{ inputs.api_key_secret }} run: | today <- lubridate::today() + + excluded_locations <- jsonlite::fromJSON( + '${{ inputs.excluded_locations }}' + ) + hubhelpr::update_hub_target_data( base_hub_path = "${{ inputs.base_hub_path }}", disease = "${{ inputs.disease }}", as_of = today, + excluded_locations = excluded_locations, legacy_file = as.logical("${{ inputs.legacy_file }}"), nssp_update_local = as.logical("${{ inputs.nssp_update_local }}"), overwrite_existing = as.logical("${{ inputs.overwrite_existing }}") diff --git a/man/apply_target_location_exclusions.Rd b/man/apply_target_location_exclusions.Rd new file mode 100644 index 0000000..3531b9b --- /dev/null +++ b/man/apply_target_location_exclusions.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/location_exclusions.R +\name{apply_target_location_exclusions} +\alias{apply_target_location_exclusions} +\title{Apply target-specific location exclusions to a data +frame.} +\usage{ +apply_target_location_exclusions(data, excluded_locations) +} +\arguments{ +\item{data}{Data frame with "target" and "location" +columns.} + +\item{excluded_locations}{NULL, character vector, or +named list of US state/territory abbreviations to +exclude. If a character vector, locations are +excluded across all targets. If a named list, names +should be target names (or "all" for global +exclusions) mapping to character vectors of +abbreviations.} +} +\value{ +Data frame with excluded rows removed. +} +\description{ +Removes rows from a data frame based on +target-specific excluded location abbreviations. +Supports uniform exclusions (character vector applied +to all targets) and target-specific exclusions (named +list with target names as keys). Validates target +names against the targets present in the data. +Filters on the "target" and "location" columns via +anti-join. +} diff --git a/man/check_hospital_reporting_rate.Rd b/man/check_hospital_reporting_rate.Rd index 491db40..03835bb 100644 --- a/man/check_hospital_reporting_rate.Rd +++ b/man/check_hospital_reporting_rate.Rd @@ -7,7 +7,7 @@ check_hospital_reporting_rate( reference_date, disease, - included_locations = hubhelpr::included_locations + excluded_locations = NULL ) } \arguments{ @@ -17,9 +17,10 @@ forecast.} \item{disease}{Character, disease name ("covid" or "rsv").} -\item{included_locations}{Character vector of location -codes that are expected to report. Default -hubhelpr::included_locations.} +\item{excluded_locations}{Character vector of US +state/territory abbreviations to exclude from +expected reporting locations. Default: NULL (no +exclusions).} } \value{ Character string describing reporting issues, diff --git a/man/excluded_locations.Rd b/man/excluded_locations.Rd deleted file mode 100644 index 8686e50..0000000 --- a/man/excluded_locations.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/constants.R -\docType{data} -\name{excluded_locations} -\alias{excluded_locations} -\title{Two digits FIPS codes for locations excluded from Hubs' -target data.} -\format{ -An object of class \code{character} of length 5. -} -\usage{ -excluded_locations -} -\description{ -Excludes Virgin Islands (78), Northern Mariana -Islands (69), Guam (66), American Samoa (60), and Minor -Outlying Islands (74). -} -\keyword{datasets} diff --git a/man/generate_webtext_block.Rd b/man/generate_webtext_block.Rd index 4be93b1..3532c69 100644 --- a/man/generate_webtext_block.Rd +++ b/man/generate_webtext_block.Rd @@ -10,7 +10,7 @@ generate_webtext_block( base_hub_path, weekly_data_path, targets, - included_locations = hubhelpr::included_locations, + excluded_locations = NULL, input_format = "csv" ) } @@ -30,9 +30,13 @@ with weekly summary files.} \item{targets}{Character vector of target names to generate text for.} -\item{included_locations}{Character vector of location -codes that are expected to report. Default -hubhelpr::included_locations.} +\item{excluded_locations}{NULL, character vector, or +named list of US state/territory abbreviations to +exclude. If a character vector, locations are excluded +across all targets. If a named list, names should be +target names (or "all" for global exclusions) mapping +to character vectors of abbreviations. Default: NULL +(no exclusions).} \item{input_format}{Character, input file format for reading summary data files. One of "csv", "tsv", or diff --git a/man/get_hubverse_format_nhsn_data.Rd b/man/get_hubverse_format_nhsn_data.Rd index 405d684..e152a75 100644 --- a/man/get_hubverse_format_nhsn_data.Rd +++ b/man/get_hubverse_format_nhsn_data.Rd @@ -7,7 +7,6 @@ get_hubverse_format_nhsn_data( disease, as_of = lubridate::today(), - included_locations = hubhelpr::included_locations, start_date = NULL, end_date = NULL ) @@ -18,10 +17,6 @@ get_hubverse_format_nhsn_data( \item{as_of}{As-of date of the data pull. Default is the system date as determined by \code{\link[lubridate:now]{lubridate::today()}}.} -\item{included_locations}{Vector of location codes to -include in the output. -Default value \code{hubhelpr::included_locations}.} - \item{start_date}{First week-ending date to include for the NHSN dataset. Default value is NULL (no filtering).} diff --git a/man/get_hubverse_format_nssp_data.Rd b/man/get_hubverse_format_nssp_data.Rd index 099dc81..1685985 100644 --- a/man/get_hubverse_format_nssp_data.Rd +++ b/man/get_hubverse_format_nssp_data.Rd @@ -8,7 +8,6 @@ get_hubverse_format_nssp_data( disease, base_hub_path, as_of = lubridate::today(), - included_locations = hubhelpr::included_locations, nssp_update_local = FALSE, start_date = NULL, end_date = NULL @@ -22,10 +21,6 @@ get_hubverse_format_nssp_data( \item{as_of}{As-of date of the data pull. Default is the system date as determined by \code{\link[lubridate:now]{lubridate::today()}}.} -\item{included_locations}{Vector of location codes to -include in the output. -Default value \code{hubhelpr::included_locations}.} - \item{nssp_update_local}{Logical. Whether to update NSSP data from local file \code{auxiliary-data/latest.parquet} (default: FALSE).} diff --git a/man/included_locations.Rd b/man/included_locations.Rd deleted file mode 100644 index 593e3e8..0000000 --- a/man/included_locations.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/constants.R -\docType{data} -\name{included_locations} -\alias{included_locations} -\title{Two digits FIPS codes for locations included in Hubs' -target data.} -\format{ -An object of class \code{character} of length 53. -} -\usage{ -included_locations -} -\description{ -Includes 50 states, US national, DC, and Puerto Rico -(PR). Excludes Virgin Islands (78), Northern Mariana -Islands (69), Guam (66), American Samoa (60), and Minor -Outlying Islands (74). -} -\keyword{datasets} diff --git a/man/summarize_ref_date_forecasts.Rd b/man/summarize_ref_date_forecasts.Rd index 07659b3..93d3ac3 100644 --- a/man/summarize_ref_date_forecasts.Rd +++ b/man/summarize_ref_date_forecasts.Rd @@ -31,13 +31,13 @@ and "population". Adds population-based calculations.} \item{horizons_to_include}{integer vector, horizons to include in the output. Default: c(0, 1, 2).} -\item{excluded_locations}{character vector or named list -specifying US state abbreviations to exclude. If a -character vector, locations are excluded across all -targets. If a named list, names should be target names -(or "all" for global exclusions) mapping to character -vectors of abbreviations. Converted to hub codes -internally. Default: NULL.} +\item{excluded_locations}{NULL, character vector, or +named list of US state/territory abbreviations to +exclude. If a character vector, locations are excluded +across all targets. If a named list, names should be +target names (or "all" for global exclusions) mapping +to character vectors of abbreviations. Converted to +hub codes internally. Default: NULL (no exclusions).} \item{targets}{character vector, target name(s) to filter forecasts. If NULL (default), does not filter by target.} diff --git a/man/update_hub_target_data.Rd b/man/update_hub_target_data.Rd index 394abc3..9d4efde 100644 --- a/man/update_hub_target_data.Rd +++ b/man/update_hub_target_data.Rd @@ -9,7 +9,7 @@ update_hub_target_data( disease, as_of = lubridate::today(), start_date = lubridate::as_date("2024-11-09"), - included_locations = hubhelpr::included_locations, + excluded_locations = NULL, legacy_file = FALSE, nssp_update_local = FALSE, overwrite_existing = FALSE @@ -27,9 +27,13 @@ the system date as determined by \code{\link[lubridate:now]{lubridate::today()}} date to include for the NHSN dataset. Default value is "2024-11-09".} -\item{included_locations}{Vector of location codes to -include in the output. -Default value \code{hubhelpr::included_locations}.} +\item{excluded_locations}{NULL, character vector, or +named list of US state/territory abbreviations to +exclude. If a character vector, locations are excluded +across all targets. If a named list, names should be +target names (or "all" for global exclusions) mapping +to character vectors of abbreviations. Default: NULL +(no exclusions).} \item{legacy_file}{Logical. Whether to write legacy CSV output (default: FALSE).} diff --git a/man/write_ref_date_summary_all.Rd b/man/write_ref_date_summary_all.Rd index b63d78a..3266a9e 100644 --- a/man/write_ref_date_summary_all.Rd +++ b/man/write_ref_date_summary_all.Rd @@ -35,12 +35,13 @@ include in the output. Default: c(0, 1, 2).} \item{population_data}{data frame with columns "location" and "population". Default: \link{population_data}.} -\item{excluded_locations}{character vector or named list -specifying US state abbreviations to exclude. If a -character vector, locations are excluded across all -targets. If a named list, names should be target names -(or "all" for global exclusions) mapping to character -vectors of abbreviations. Default: NULL.} +\item{excluded_locations}{NULL, character vector, or +named list of US state/territory abbreviations to +exclude. If a character vector, locations are excluded +across all targets. If a named list, names should be +target names (or "all" for global exclusions) mapping +to character vectors of abbreviations. Default: NULL +(no exclusions).} \item{output_format}{character, output file format. One of "csv", "tsv", or "parquet". Default: "csv".} diff --git a/man/write_ref_date_summary_ens.Rd b/man/write_ref_date_summary_ens.Rd index fabd11d..0e95a4f 100644 --- a/man/write_ref_date_summary_ens.Rd +++ b/man/write_ref_date_summary_ens.Rd @@ -36,12 +36,13 @@ include in the output. Default: c(0, 1, 2).} \item{population_data}{data frame with columns "location" and "population". Default: population_data.} -\item{excluded_locations}{character vector or named list -specifying US state abbreviations to exclude. If a -character vector, locations are excluded across all -targets. If a named list, names should be target names -(or "all" for global exclusions) mapping to character -vectors of abbreviations. Default: NULL.} +\item{excluded_locations}{NULL, character vector, or +named list of US state/territory abbreviations to +exclude. If a character vector, locations are excluded +across all targets. If a named list, names should be +target names (or "all" for global exclusions) mapping +to character vectors of abbreviations. Default: NULL +(no exclusions).} \item{output_format}{character, output file format. One of "csv", "tsv", or "parquet". Default: "csv".} diff --git a/man/write_viz_target_data.Rd b/man/write_viz_target_data.Rd index 97f6213..7802647 100644 --- a/man/write_viz_target_data.Rd +++ b/man/write_viz_target_data.Rd @@ -13,7 +13,6 @@ write_viz_target_data( as_of = "latest", start_date = NULL, end_date = NULL, - included_locations = hubhelpr::included_locations, excluded_locations = NULL, output_format = "csv", overwrite_existing = FALSE @@ -48,17 +47,13 @@ use_hub_data = FALSE.} Default: NULL (no filtering). Used only when use_hub_data = FALSE.} -\item{included_locations}{Character vector of location -codes to include in the output. Default -hubhelpr::included_locations.} - -\item{excluded_locations}{Character vector or named list -specifying US state abbreviations to exclude. If a -character vector, locations are excluded across all -targets. If a named list, names should be target names -(or "all" for global exclusions) mapping to character -vectors of abbreviations. Converted to hub codes -internally. Default: NULL.} +\item{excluded_locations}{NULL, character vector, or +named list of US state/territory abbreviations to +exclude. If a character vector, locations are excluded +across all targets. If a named list, names should be +target names (or "all" for global exclusions) mapping +to character vectors of abbreviations. Converted to +hub codes internally. Default: NULL (no exclusions).} \item{output_format}{Character, output file format. One of "csv", "tsv", or "parquet". Default: "csv".} diff --git a/man/write_webtext.Rd b/man/write_webtext.Rd index ec29a61..c7d54b4 100644 --- a/man/write_webtext.Rd +++ b/man/write_webtext.Rd @@ -11,7 +11,7 @@ write_webtext( base_hub_path, hub_reports_path, targets = NULL, - included_locations = hubhelpr::included_locations, + excluded_locations = NULL, input_format = "csv", overwrite_existing = FALSE ) @@ -32,9 +32,13 @@ reports directory.} generate text for. Default NULL discovers targets from hub time-series data.} -\item{included_locations}{Character vector of location -codes that are expected to report. Default -hubhelpr::included_locations.} +\item{excluded_locations}{NULL, character vector, or +named list of US state/territory abbreviations to +exclude. If a character vector, locations are excluded +across all targets. If a named list, names should be +target names (or "all" for global exclusions) mapping +to character vectors of abbreviations. Default: NULL +(no exclusions).} \item{input_format}{Character, input file format for reading summary data files. One of "csv", "tsv", or diff --git a/tests/testthat/test_update_hub_target_data.R b/tests/testthat/test_update_hub_target_data.R index 8271635..18ac7b0 100644 --- a/tests/testthat/test_update_hub_target_data.R +++ b/tests/testthat/test_update_hub_target_data.R @@ -11,6 +11,8 @@ if (fs::dir_exists(mockdir_tests)) { ) } +test_excluded_locations <- c("VI", "GU", "AS", "MP", "UM") + purrr::walk(c("covid", "rsv"), function(disease) { test_that( glue::glue("update_hub_target_data returns expected data for {disease}"), @@ -24,6 +26,7 @@ purrr::walk(c("covid", "rsv"), function(disease) { base_hub_path = base_hub_path, disease = disease, as_of = lubridate::as_date("2025-08-18"), + excluded_locations = test_excluded_locations ) target_ts <- forecasttools::read_tabular_file(output_file) @@ -38,9 +41,17 @@ purrr::walk(c("covid", "rsv"), function(disease) { glue::glue("wk inc {disease} hosp") ) ) + excluded_codes <- forecasttools::us_location_recode( + test_excluded_locations, + "abbr", + "hub" + ) expect_setequal( unique(target_ts$location), - setdiff(forecasttools::us_location_table$code, excluded_locations) + setdiff( + forecasttools::us_location_table$code, + excluded_codes + ) ) }) } @@ -80,7 +91,8 @@ purrr::walk(c("covid", "rsv"), function(disease) { hubhelpr::update_hub_target_data( base_hub_path = base_hub_path, disease = disease, - as_of = lubridate::as_date("2025-08-18") + as_of = lubridate::as_date("2025-08-18"), + excluded_locations = test_excluded_locations ) # second run with same data errors by default @@ -88,7 +100,8 @@ purrr::walk(c("covid", "rsv"), function(disease) { hubhelpr::update_hub_target_data( base_hub_path = base_hub_path, disease = disease, - as_of = lubridate::as_date("2025-08-18") + as_of = lubridate::as_date("2025-08-18"), + excluded_locations = test_excluded_locations ), "overwrite" ) @@ -99,6 +112,7 @@ purrr::walk(c("covid", "rsv"), function(disease) { base_hub_path = base_hub_path, disease = disease, as_of = lubridate::as_date("2025-08-18"), + excluded_locations = test_excluded_locations, overwrite_existing = TRUE ) })