From f4e9deb4277fc6726de1582e5bc260f9fb7b5368 Mon Sep 17 00:00:00 2001 From: hsonne Date: Sat, 11 Jun 2022 03:29:10 +0200 Subject: [PATCH 1/2] Clean baseFillUp.R - replace ## with # - use kwb.utils::selectColumns() - use df[[j]][i] instead of df[i, j]. It should be much faster - use kwb.utils::inRange() - return early - use lapply() and do.call(rbind, ...) - simplify names - numberOfNoStepMultiple -> n - colNumbers -> j - use drop = FALSE --- NAMESPACE | 1 + R/baseFillUp.R | 196 +++++++++++++++++++++++++------------------------ 2 files changed, 100 insertions(+), 97 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 14eb612..9ef55bd 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -66,6 +66,7 @@ importFrom(kwb.utils,isNullOrEmpty) importFrom(kwb.utils,posixColumnAtPosition) importFrom(kwb.utils,preparePdfIf) importFrom(kwb.utils,printIf) +importFrom(kwb.utils,selectColumns) importFrom(kwb.utils,warningDeprecated) importFrom(stats,aggregate) importFrom(stats,approx) diff --git a/R/baseFillUp.R b/R/baseFillUp.R index 67a887d..b3a59ab 100644 --- a/R/baseFillUp.R +++ b/R/baseFillUp.R @@ -25,7 +25,7 @@ #' @param dbg If TRUE, debug messages are shown #' @export #' @return Returns a data.frame -#' @importFrom kwb.utils catIf posixColumnAtPosition +#' @importFrom kwb.utils catIf posixColumnAtPosition selectColumns hsFillUp <- function( tseries, tsField = names(tseries)[kwb.utils::posixColumnAtPosition(tseries)[1]], step_s = 60, forceStep = TRUE, limits = NULL, interpol = TRUE, @@ -35,76 +35,73 @@ hsFillUp <- function( # Stop if there are unexpected argument values .stopOnBadArguments(tseries, step_s, limits) - ## Check if there are timestamps that are not at exact minutes (seconds = 0) - .warnOnNoStepMultiples(timestamps = tseries[[tsField]], step_s = step_s) + # Select the vector of times + timestamps <- kwb.utils::selectColumns(tseries, tsField) + + # Check if there are timestamps that are not at exact minutes (seconds = 0) + .warnOnNoStepMultiples(timestamps = timestamps, step_s = step_s) # If no limits are given use the full time range of the timestamps in the # given data frame as "artificial" limits if (is.null(limits)) { - timeRange <- range(tseries[[tsField]]) - limits <- data.frame(from = timeRange[1], to = timeRange[2]) + timeRange <- range(timestamps) + limits <- data.frame(from = timeRange[1L], to = timeRange[2L]) } - ## Initialise result (will become a data.frame) + # Initialise result (will become a data.frame) result <- NULL - ## Call fillup for each data block defined by the intervals given in limits + # Call fillup for each data block defined by the intervals given in limits # (may be only one interval) - for (i in seq_len(nrow(limits))) { + blocks <- lapply(seq_len(nrow(limits)), function(i) { - tbeg <- limits[i, 1] - tend <- limits[i, 2] + tbeg <- limits[[1L]][i] + tend <- limits[[2L]][i] kwb.utils::catIf(dbg, sprintf("tbeg: %s, tend: %s\n", tbeg, tend)) - ## Cut block of rows representing timestamps between tbeg and tend from - ## tseries - selected <- tseries[[tsField]] >= tbeg & tseries[[tsField]] <= tend + # Cut block of rows representing timestamps between tbeg and tend + inRange <- kwb.utils::inRange(timestamps, tbeg, tend) # Skip empty areas - if (any(selected)) { - - ## Call fillup for the data block, not giving limits - blockResult <- fillup( - tseries = tseries[selected, ], - tsField = tsField, - step_s = step_s, - forceStep = forceStep, - interpol = interpol, - includeOrig = includeOrig, - default = default, - dbg = dbg - ) - - ## add filled-up data block to result data.frame - result <- rbind(result, blockResult) - } - else { + if (!any(inRange)) { warning("No data available between the given limits: ", tbeg, " and ", tend, "!") + return() } - } + + # Call fillup for the whole data block, not giving limits + fillup( + tseries = tseries[inRange, , drop = FALSE], + tsField = tsField, + step_s = step_s, + forceStep = forceStep, + interpol = interpol, + includeOrig = includeOrig, + default = default, + dbg = dbg + ) + }) - result + do.call(rbind, blocks) } # .stopOnBadArguments ---------------------------------------------------------- .stopOnBadArguments <- function(tseries, step_s, limits) { - ## The timeseries must be given as a data.frame - if (class(tseries)[1] != "data.frame") { - stop(paste("In tseries, a data.frame must be given, containing timestamps", - "in the first column.\n")) + # The timeseries must be given as a data.frame + if (!inherits(tseries, "data.frame")) { + stop("In tseries, a data.frame must be given, containing timestamps", + "in the first column.") } - ## The time-step must be given as a number - if (! is.numeric(step_s)) { + # The time-step must be given as a number + if (!is.numeric(step_s)) { stop(sprintf("step_s must be numeric (is %s).", class(step_s))) } - ## If limits are given they must be of type matrix or data.frame - if (! is.null(limits) && class(limits) != "data.frame" - && class(limits) != "matrix") { + # If limits are given they must be of type matrix or data.frame + if (!is.null(limits) && !(class(limits) %in% c("data.frame", "matrix"))) { stop(sprintf("limits must be data.frame or matrix (is %s).", class(limits))) } } @@ -113,11 +110,10 @@ hsFillUp <- function( .warnOnNoStepMultiples <- function(timestamps, step_s) { isNoStepMultiple <- as.integer(timestamps) %% step_s != 0 - numberOfNoStepMultiples <- sum(isNoStepMultiple) - if (numberOfNoStepMultiples > 0) { + if (n <- sum(isNoStepMultiple)) { - cat("There are", numberOfNoStepMultiples, "timestamps", + cat("There are", n, "timestamps", "(out of a total of", length(timestamps), ") that are not multiples", "of the timestep (", step_s, "seconds ):\n") @@ -152,57 +148,57 @@ hsFillUp <- function( #' @return Returns a data.frame #' @export #' @importFrom kwb.datetime roundTime +#' @importFrom kwb.utils selectColumns fillup <- function( tseries, tsField, step_s, forceStep, interpol, includeOrig, default = NA, dbg = FALSE ) { - ## Initialise result (will become a data.frame) + # Initialise result (will become a data.frame) result <- NULL - ## Fill-up between the first and the last timestamp of the given time series - timeRange <- range(tseries[[tsField]]) + # Fill-up between the first and the last timestamp of the given time series + timeRange <- range(kwb.utils::selectColumns(tseries, tsField)) - ## If needed, get lower minimum or greater maximum representing multiples - ## of the time step. - tbeg <- kwb.datetime::roundTime(timeRange[1], step_s, 1) - tend <- kwb.datetime::roundTime(timeRange[2], step_s, 0) + # If needed, get lower minimum or greater maximum representing multiples + # of the time step. + tbeg <- kwb.datetime::roundTime(timeRange[1L], step_s, direction = 1L) + tend <- kwb.datetime::roundTime(timeRange[2L], step_s, direction = 0L) - ## Generate the complete series of "regular" timestamps (multiples of - ## time-step) between tbeg and tend + # Generate the complete series of "regular" timestamps (multiples of + # time-step) between tbeg and tend - ## Handle the special case of only one value - if (tbeg == tend) { - timestamps <- tbeg + # Handle the special case of only one value + timestamps <- if (tbeg == tend) { + tbeg } else { - timestamps <- seq(tbeg, tend, by = step_s) + seq(tbeg, tend, by = step_s) } tsBlock <- tseries if (dbg) { - .showStartAndEndOfBlock(tsBlock, tbeg, tend, timestamps) } - ## Merge all timestamps with time series block by joining the regular - ## timestamps (multiples of time-step) with the timestamps of the given - ## timeseries block: - ## - If forcStep is TRUE we do a "left join", where the result only contains - ## the "regular" timestamps (multiples of time-step). - ## - If forcStep is FALSE we do a "left or right join", where the result - ## contains both all "regular" timestamps (multiples of time-step) and all - ## timestamps contained in the original timeseries. + # Merge all timestamps with time series block by joining the regular + # timestamps (multiples of time-step) with the timestamps of the given + # timeseries block: + # - If forceStep is TRUE we do a "left join", where the result only contains + # the "regular" timestamps (multiples of time-step). + # - If forceStep is FALSE we do a "left or right join", where the result + # contains both all "regular" timestamps (multiples of time-step) and all + # timestamps contained in the original timeseries. tsBlock <- merge( x = data.frame(timestamps = timestamps), y = tsBlock, by.x = "timestamps", by.y = tsField, all.x = TRUE, - all.y = ! forceStep + all.y = !forceStep ) - ## Interpolate values, if desired + # Interpolate values, if desired if (interpol) { tsBlock <- .interpolateAllColumns( tsBlock = tsBlock, @@ -213,13 +209,13 @@ fillup <- function( ) } - ## Set result data frame + # Set result data frame result <- tsBlock - ## Get column names without timestamp column + # Get column names without timestamp column dataColumnNames <- setdiff(colnames(tseries), tsField) - ## Set column names in result data.frame + # Set column names in result data.frame names(result) <- .newColumnNames( columnNames = dataColumnNames, tsField = tsField, @@ -234,7 +230,7 @@ fillup <- function( includeOrig = includeOrig ) - result[, columns] + result[, columns, drop = FALSE] } # .showStartAndEndOfBlock ------------------------------------------------------ @@ -253,6 +249,7 @@ fillup <- function( } # .interpolateAllColumns ------------------------------------------------------- +#' @importFrom kwb.utils catIf selectColumns .interpolateAllColumns <- function( tsBlock, tseries, tsField, default = NA, dbg = FALSE ) @@ -260,20 +257,20 @@ fillup <- function( # Skip the timestamp field itself fields <- setdiff(colnames(tseries), tsField) - ## For each value field + # For each value field for (field in fields) { kwb.utils::catIf(dbg, sprintf("Interpolating field: %s\n", field)) interpolated <- .getInterpolatedValues( - timestamps = tseries[[tsField]], - values = tseries[[field]], + timestamps = kwb.utils::selectColumns(tseries, tsField), + values = kwb.utils::selectColumns(tseries, field), requiredTimestamps = tsBlock$timestamps, default = default, dbg = dbg ) - ## Add column with interpolated values to the result data.frame + # Add column with interpolated values to the result data.frame tsBlock <- cbind(tsBlock, interpolated) } @@ -285,47 +282,50 @@ fillup <- function( #' @noRd #' @noMd #' @importFrom stats approx -.getInterpolatedValues <- function -( +#' @importFrom kwb.utils catIf +.getInterpolatedValues <- function( timestamps, values, requiredTimestamps, default = NA, dbg = FALSE ) { - ## We need at least two non-NA values to interpolate - if (sum(!is.na(values)) > 1 ) { + # We need at least two non-NA values to interpolate + if (sum(!is.na(values)) > 1L) { # Calculate interpolation for this field. approx returns a list with # components x and y of which we use the y component. interpolated <- stats::approx( timestamps, values, xout = requiredTimestamps )$y - } - else { + + } else { # Determine a subsitute value. Either the value itself if there is only # one value or the given default value! substituteValue <- ifelse(length(values) == 1, values, default) - kwb.utils::catIf(dbg, "Not at least two non-NA values available for interpolation!\n", - "Using one value for all timestamps:", substituteValue, "\n") + kwb.utils::catIf( + dbg, "Not at least two non-NA values available for interpolation!\n", + "Using one value for all timestamps:", substituteValue, "\n" + ) interpolated <- rep(substituteValue, length(requiredTimestamps)) } interpolated - ### list with components \emph{x} (requiredTimestamps) and \emph{y} - ### (interpolated values) } # .newColumnNames -------------------------------------------------------------- +#' @importFrom kwb.utils catIf .newColumnNames <- function(columnNames, tsField, interpol, dbg = FALSE) { if (interpol) { - columnNames <- c(paste(columnNames, "orig", sep = "_"), columnNames) + columnNames <- c(paste0(columnNames, "_orig"), columnNames) } columnNames <- c(tsField, columnNames) - kwb.utils::catIf(dbg, sprintf("Column names: %s\n", paste(columnNames, collapse = ", "))) + kwb.utils::catIf( + dbg, sprintf("Column names: %s\n", paste(columnNames, collapse = ", ")) + ) columnNames } @@ -335,19 +335,19 @@ fillup <- function( numberOfColumns, numberOfDataColumns, interpol, includeOrig ) { - ## Reorder columns - columnNumbers <- 1 + # Start vector of column indices, starting with 1 + j <- 1L if (interpol) { - columnNumbers <- c(columnNumbers, (numberOfDataColumns + 2):numberOfColumns) + j <- c(j, (numberOfDataColumns + 2L):numberOfColumns) } - ## If desired, include original columns + # If desired, include original columns if (!interpol || includeOrig) { - columnNumbers <- c(columnNumbers, 2:(numberOfDataColumns + 1)) + j <- c(j, 2:(numberOfDataColumns + 1L)) } - columnNumbers + j } # demo_hsFillUp ---------------------------------------------------------------- @@ -357,7 +357,8 @@ fillup <- function( #' @return demo plot hsFillUp #' @export #' @importFrom stats rnorm -#' @importFrom graphics lines plot +#' @importFrom graphics lines plot +#' @importFrom kwb.datetime hsToPosix demo_hsFillUp <- function() { message( @@ -373,7 +374,8 @@ demo_hsFillUp <- function() ) limits <- data.frame( - from = kwb.datetime::hsToPosix("2010-03-28"), to = kwb.datetime::hsToPosix("2010-03-29") + from = kwb.datetime::hsToPosix("2010-03-28"), + to = kwb.datetime::hsToPosix("2010-03-29") ) df_2 <- hsFillUp(df_1, includeOrig = FALSE, limits = limits) From 4d111086c4c6be83fc572b5cc712019cd8da94c7 Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 29 Jul 2022 14:36:30 +0200 Subject: [PATCH 2/2] Simplify dataFrameToXts() --- R/hsLibDateTime.R | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/R/hsLibDateTime.R b/R/hsLibDateTime.R index e3cc0b0..dcdc8b7 100644 --- a/R/hsLibDateTime.R +++ b/R/hsLibDateTime.R @@ -37,14 +37,12 @@ dataFrameToXts <- function( dataFrame, timeColumn = names(dataFrame)[kwb.utils::posixColumnAtPosition(dataFrame)[1]] ) { - numericColumns <- sapply( - names(dataFrame), FUN = function(x) is.numeric(dataFrame[[x]]) - ) + order.by <- dataFrame[[timeColumn]] xts::xts( - x = dataFrame[, numericColumns, drop = FALSE], - order.by = dataFrame[[timeColumn]], - tzone = attr(dataFrame[[timeColumn]], which = "tzone") + x = dataFrame[, sapply(dataFrame, is.numeric), drop = FALSE], + order.by = order.by, + tzone = attr(order.by, "tzone") ) }