From 7745fd3abcfa1c504f51296d8804587f34964e6c Mon Sep 17 00:00:00 2001 From: Patrik Schilter Date: Tue, 12 Aug 2025 01:13:40 +0200 Subject: [PATCH 01/26] clv.time: Accept observation.end when setting periods incl fix tests --- R/class_clv_time.R | 29 +++++--- .../helper_testthat_correctness_clvtime.R | 72 +++++++++++++++---- 2 files changed, 78 insertions(+), 23 deletions(-) diff --git a/R/class_clv_time.R b/R/class_clv_time.R index 6a32a3a5..328a26d5 100644 --- a/R/class_clv_time.R +++ b/R/class_clv_time.R @@ -75,10 +75,24 @@ clv.time.has.holdout <- function(clv.time){ # set.sample.periods ------------------------------------------------------------------------ #' @importFrom lubridate period -clv.time.set.sample.periods <- function(clv.time, tp.first.transaction, tp.last.transaction, user.estimation.end){ +clv.time.set.sample.periods <- function(clv.time, tp.first.transaction, tp.last.transaction, user.estimation.end, user.observation.end){ tp.estimation.start <- tp.first.transaction + if(is.null(user.observation.end)){ + tp.observation.end <- tp.last.transaction + }else{ + tp.observation.end <- clv.time.convert.user.input.to.timepoint( + clv.time=clv.time, + user.timepoint=user.observation.end) + + # Observation end may not be before last transaction + if(tp.observation.end < tp.last.transaction){ + stop("The given observation.end may not be before the last recorded transaction!") + } + } + + if(!is.null(user.estimation.end)){ # specific end @@ -100,22 +114,21 @@ clv.time.set.sample.periods <- function(clv.time, tp.first.transaction, tp.last. # Need to be 2 periods because otherwise for days, holdout can be not on estimation.end but still be of length zero # ie 2 periods to still have 1 as holdout - if(tp.estimation.end > tp.last.transaction-clv.time.number.timeunits.to.timeperiod(clv.time, 2L)) - stop("Parameter estimation.split needs to indicate a point at least 2 periods before the last transaction!", call. = FALSE) + if(tp.estimation.end > tp.observation.end - clv.time.number.timeunits.to.timeperiod(clv.time, 2L)) + stop("Parameter estimation.split needs to indicate a point in time such that it yields a holdout period of at least 2 time.units!", call. = FALSE) # + 1 day is the same for all because most fine-grained change that Date can do tp.holdout.start <- tp.estimation.end + clv.time.epsilon(clv.time=clv.time) - tp.holdout.end <- tp.last.transaction + tp.holdout.end <- tp.observation.end holdout.period.in.tu <- clv.time.interval.in.number.tu(clv.time, interv=interval(start = tp.holdout.start, end = tp.holdout.end)) }else{ - # NULL: no specific end - until end of data (last transaction) - # **TODO: last transaction or full period where last transaction is in? + # NULL: no specific end - until observation end - # tp.holdout.start and .end HAVE to be end of estimation period as this is used elsewhere! + # tp.holdout.start/.end and HAVE to be end of estimation period as this is used elsewhere! # ie to ensure prediction.end (with clv.time.get.prediction.table) finds correct end if user gives NULL - tp.estimation.end <- tp.last.transaction + tp.estimation.end <- tp.observation.end tp.holdout.start <- tp.estimation.end tp.holdout.end <- tp.estimation.end holdout.period.in.tu <- 0 diff --git a/tests/testthat/helper_testthat_correctness_clvtime.R b/tests/testthat/helper_testthat_correctness_clvtime.R index b0d0f1c5..835e02d6 100644 --- a/tests/testthat/helper_testthat_correctness_clvtime.R +++ b/tests/testthat/helper_testthat_correctness_clvtime.R @@ -27,23 +27,35 @@ fct.helper.clv.time.create.test.objects <- function(with.holdout){ pred.tp.last <- as.Date("2008-09-27") if(with.holdout){ expect_silent(clv.t.hours <-clv.time.set.sample.periods(clv.t.hours, user.estimation.end = 37, + user.observation.end = NULL, tp.first.transaction = lubridate::ymd("2005-01-20", tz="UTC"), tp.last.transaction = lubridate::ymd("2008-09-27", tz="UTC"))) expect_silent(clv.t.days <-clv.time.set.sample.periods(clv.t.days, user.estimation.end = 37, + user.observation.end = NULL, tp.first.transaction = pred.tp.first, tp.last.transaction = pred.tp.last)) expect_silent(clv.t.weeks <-clv.time.set.sample.periods(clv.t.weeks, user.estimation.end = 37, + user.observation.end = NULL, tp.first.transaction = pred.tp.first, tp.last.transaction = pred.tp.last)) expect_silent(clv.t.years <-clv.time.set.sample.periods(clv.t.years, user.estimation.end = 1, + user.observation.end = NULL, tp.first.transaction = pred.tp.first, tp.last.transaction = pred.tp.last)) }else{ - expect_silent(clv.t.hours <-clv.time.set.sample.periods(clv.t.hours, user.estimation.end = NULL, + expect_silent(clv.t.hours <-clv.time.set.sample.periods(clv.t.hours, + user.estimation.end = NULL, + user.observation.end = NULL, tp.first.transaction = lubridate::ymd("2005-01-20", tz="UTC"), tp.last.transaction = lubridate::ymd("2008-09-27", tz="UTC"))) - expect_silent(clv.t.days <-clv.time.set.sample.periods(clv.t.days, user.estimation.end = NULL, + expect_silent(clv.t.days <-clv.time.set.sample.periods(clv.t.days, + user.estimation.end = NULL, + user.observation.end = NULL, tp.first.transaction = pred.tp.first, tp.last.transaction = pred.tp.last)) - expect_silent(clv.t.weeks <-clv.time.set.sample.periods(clv.t.weeks, user.estimation.end = NULL, + expect_silent(clv.t.weeks <-clv.time.set.sample.periods(clv.t.weeks, + user.estimation.end = NULL, + user.observation.end = NULL, tp.first.transaction = pred.tp.first, tp.last.transaction = pred.tp.last)) - expect_silent(clv.t.years <-clv.time.set.sample.periods(clv.t.years, user.estimation.end = NULL, + expect_silent(clv.t.years <-clv.time.set.sample.periods(clv.t.years, + user.estimation.end = NULL, + user.observation.end = NULL, tp.first.transaction = pred.tp.first, tp.last.transaction = pred.tp.last)) } @@ -96,7 +108,9 @@ fct.testthat.correctness.clvtime.set.sample.periods.estimation.start <- function tp.first <- fct.helper.clv.time.correct.datetype("2018-01-01", clv.t) tp.last <- fct.helper.clv.time.correct.datetype("2019-06-15", clv.t) - expect_silent(clv.t <- clv.time.set.sample.periods(clv.t, user.estimation.end = NULL, + expect_silent(clv.t <- clv.time.set.sample.periods(clv.t, + user.estimation.end = NULL, + user.observation.end = NULL, tp.first.transaction = tp.first, tp.last.transaction = tp.last)) expect_equal(clv.t@timepoint.estimation.start, tp.first) @@ -112,7 +126,10 @@ fct.testthat.correctness.clvtime.set.sample.periods.no.estimation.end <- functio tp.last <- fct.helper.clv.time.correct.datetype("2019-06-15", clv.t) # Dates - expect_silent(clv.t <- clv.time.set.sample.periods(clv.t, user.estimation.end = NULL, tp.first.transaction = tp.first, + expect_silent(clv.t <- clv.time.set.sample.periods(clv.t, + user.estimation.end = NULL, + user.observation.end = NULL, + tp.first.transaction = tp.first, tp.last.transaction = tp.last)) expect_equal(clv.t@timepoint.estimation.end, tp.last) expect_equal(clv.t@timepoint.holdout.start, tp.last) @@ -135,7 +152,9 @@ fct.testthat.correctness.clvtime.set.sample.periods.numeric.estimation.end <- fu NULL) stopifnot(!is.null(splitting.end)) - expect_silent(clv.t <- clv.time.set.sample.periods(clv.t, user.estimation.end = splitting.end, + expect_silent(clv.t <- clv.time.set.sample.periods(clv.t, + user.estimation.end = splitting.end, + user.observation.end = NULL, tp.first.transaction =tp.first, tp.last.transaction = tp.last)) expect_equal(clv.t@timepoint.estimation.end, tp.first+lubridate::period(splitting.end, period.type)) @@ -158,7 +177,9 @@ fct.testthat.correctness.clvtime.set.sample.periods.warn.partial.period <- funct NULL) stopifnot(!is.null(splitting.end)) - expect_warning(clv.t <- clv.time.set.sample.periods(clv.t, user.estimation.end = splitting.end, + expect_warning(clv.t <- clv.time.set.sample.periods(clv.t, + user.estimation.end = splitting.end, + user.observation.end = NULL, tp.first.transaction =tp.first, tp.last.transaction = tp.last), regexp = "partial periods") @@ -176,22 +197,26 @@ fct.testthat.correctness.clvtime.set.sample.periods.stop.estimation.period.less. # Numeric expect_error(clv.time.set.sample.periods(clv.t, user.estimation.end = 0, + user.observation.end = NULL, tp.first.transaction = tp.first, tp.last.transaction = tp.last), regexp = "1 time.unit after") expect_error(clv.time.set.sample.periods(clv.t, user.estimation.end = -3, + user.observation.end = NULL, tp.first.transaction = tp.first, tp.last.transaction = tp.last), regexp = "1 time.unit after") # Date expect_error(clv.time.set.sample.periods(clv.t, user.estimation.end = tp.first, + user.observation.end = NULL, tp.first.transaction = tp.first, tp.last.transaction = tp.last), regexp = "1 time.unit after") expect_error(clv.time.set.sample.periods(clv.t, user.estimation.end = tp.first-lubridate::days(1), + user.observation.end = NULL, tp.first.transaction = tp.first, tp.last.transaction = tp.last), regexp = "1 time.unit after") @@ -199,21 +224,25 @@ fct.testthat.correctness.clvtime.set.sample.periods.stop.estimation.period.less. expect_error(clv.time.set.sample.periods(clv.time = clv.t.hours, user.estimation.end = "2018-01-01 00:35:49", + user.observation.end = NULL, tp.first.transaction = fct.helper.clv.time.correct.datetype("2018-01-01", clv.t.hours), tp.last.transaction = fct.helper.clv.time.correct.datetype("2025-06-15", clv.t.hours)), regexp = "1 time.unit after") expect_error(clv.time.set.sample.periods(clv.time = clv.t.days, user.estimation.end = "2018-01-01", + user.observation.end = NULL, tp.first.transaction = fct.helper.clv.time.correct.datetype("2018-01-01", clv.t.days), tp.last.transaction = fct.helper.clv.time.correct.datetype("2025-06-15", clv.t.days)), regexp = "1 time.unit after") expect_error(clv.time.set.sample.periods(clv.time = clv.t.weeks, user.estimation.end = "2018-01-03", # Wed + user.observation.end = NULL, tp.first.transaction = fct.helper.clv.time.correct.datetype("2018-01-01", clv.t.weeks), # Mon tp.last.transaction = fct.helper.clv.time.correct.datetype("2025-06-15", clv.t.weeks)), regexp = "1 time.unit after") expect_error(clv.time.set.sample.periods(clv.time = clv.t.years, user.estimation.end = "2018-12-31", + user.observation.end = NULL, tp.first.transaction = fct.helper.clv.time.correct.datetype("2018-01-01", clv.t.years), tp.last.transaction = fct.helper.clv.time.correct.datetype("2025-06-15", clv.t.years)), regexp = "1 time.unit after") @@ -226,32 +255,37 @@ fct.testthat.correctness.clvtime.set.sample.periods.stop.holdout.length.less.2.p tp.first <- fct.helper.clv.time.correct.datetype("2018-01-01", clv.t) tp.last <- fct.helper.clv.time.correct.datetype("2025-06-15", clv.t) expect_error(clv.time.set.sample.periods(clv.t, + user.observation.end = NULL, user.estimation.end = tp.last-lubridate::hours(1), tp.first.transaction = tp.first, tp.last.transaction = tp.last), - regexp = "2 periods before") + regexp = "at least 2 time.units") } expect_error(clv.time.set.sample.periods(clv.time = clv.t.hours, user.estimation.end = "2025-06-14 22:40:11", + user.observation.end = NULL, tp.first.transaction = fct.helper.clv.time.correct.datetype("2018-01-01", clv.t.hours), tp.last.transaction = fct.helper.clv.time.correct.datetype("2025-06-15", clv.t.hours)), - regexp = "2 periods before") + regexp = "at least 2 time.units") expect_error(clv.time.set.sample.periods(clv.time = clv.t.days, user.estimation.end = "2025-06-14", + user.observation.end = NULL, tp.first.transaction = fct.helper.clv.time.correct.datetype("2018-01-01", clv.t.days), tp.last.transaction = fct.helper.clv.time.correct.datetype("2025-06-15", clv.t.days)), - regexp = "2 periods before") + regexp = "at least 2 time.units") expect_error(clv.time.set.sample.periods(clv.time = clv.t.weeks, user.estimation.end = "2025-06-11", # Wed + user.observation.end = NULL, tp.first.transaction = fct.helper.clv.time.correct.datetype("2018-01-01", clv.t.weeks), tp.last.transaction = fct.helper.clv.time.correct.datetype("2025-06-13", clv.t.weeks)),# Fr - regexp = "2 periods before") + regexp = "at least 2 time.units") expect_error(clv.time.set.sample.periods(clv.time = clv.t.years, user.estimation.end = "2025-01-01", + user.observation.end = NULL, tp.first.transaction = fct.helper.clv.time.correct.datetype("2018-01-01", clv.t.years), tp.last.transaction = fct.helper.clv.time.correct.datetype("2025-06-15", clv.t.years)), - regexp = "2 periods before") + regexp = "at least 2 time.units") } @@ -265,13 +299,17 @@ fct.testthat.correctness.clvtime.set.sample.periods.date.estimation.end <- funct if(is(clv.t, "clv.time.datetime")){ # POSIX dates in transactions - but split with Date (ymd by user) - expect_silent(clv.t <- clv.time.set.sample.periods(clv.t, user.estimation.end = tp.split, + expect_silent(clv.t <- clv.time.set.sample.periods(clv.t, + user.estimation.end = tp.split, + user.observation.end = NULL, tp.first.transaction = tp.first, tp.last.transaction = tp.last)) # Split same Date but as posix expect_equal(clv.t@timepoint.estimation.end, as.POSIXct.POSIXlt(as.POSIXlt.Date(tp.split), tz = "UTC")) }else{ - expect_silent(clv.t <- clv.time.set.sample.periods(clv.t, user.estimation.end = tp.split, + expect_silent(clv.t <- clv.time.set.sample.periods(clv.t, + user.estimation.end = tp.split, + user.observation.end = NULL, tp.first.transaction = tp.first, tp.last.transaction = tp.last)) expect_equal(clv.t@timepoint.estimation.end, tp.split) @@ -297,6 +335,7 @@ fct.testthat.correctness.clvtime.set.sample.periods.posixct.estimation.end <- fu if(is(clv.t, "clv.time.datetime")){ expect_silent(clv.t <- clv.time.set.sample.periods(clv.t, user.estimation.end = tp.split, + user.observation.end = NULL, tp.first.transaction = tp.first, tp.last.transaction = tp.last)) @@ -306,6 +345,7 @@ fct.testthat.correctness.clvtime.set.sample.periods.posixct.estimation.end <- fu # Date transactions - but split with POSIXct (given by user) expect_message(clv.t <- clv.time.set.sample.periods(clv.t, user.estimation.end = tp.split, + user.observation.end = NULL, tp.first.transaction = tp.first, tp.last.transaction = tp.last), regexp = "is ignored") @@ -331,6 +371,7 @@ fct.testthat.correctness.clvtime.set.sample.periods.char.estimation.end <- funct tp.split <- "2019-07-19 15:36:19" expect_silent(clv.t <- clv.time.set.sample.periods(clv.t, user.estimation.end = tp.split, + user.observation.end = NULL, tp.first.transaction = tp.first, tp.last.transaction = tp.last)) @@ -341,6 +382,7 @@ fct.testthat.correctness.clvtime.set.sample.periods.char.estimation.end <- funct tp.split <- "2019-07-19" expect_silent(clv.t <- clv.time.set.sample.periods(clv.t, user.estimation.end = tp.split, + user.observation.end = NULL, tp.first.transaction = tp.first, tp.last.transaction = tp.last)) # Split same but as Date From 765d8e5e304fb503ff1af79cc48dacb01e2bf4a8 Mon Sep 17 00:00:00 2001 From: Patrik Schilter Date: Tue, 12 Aug 2025 01:19:39 +0200 Subject: [PATCH 02/26] clv.time: Tests for set.sample.periods(observation.end) --- .../helper_testthat_correctness_clvtime.R | 145 ++++++++++++++++++ tests/testthat/test_correctness_clvtime.R | 7 + 2 files changed, 152 insertions(+) diff --git a/tests/testthat/helper_testthat_correctness_clvtime.R b/tests/testthat/helper_testthat_correctness_clvtime.R index 835e02d6..17b7173c 100644 --- a/tests/testthat/helper_testthat_correctness_clvtime.R +++ b/tests/testthat/helper_testthat_correctness_clvtime.R @@ -397,6 +397,151 @@ fct.testthat.correctness.clvtime.set.sample.periods.char.estimation.end <- funct } +fct.testthat.correctness.clvtime.set.sample.periods.observation.end <- function(){ + + # fct.helper.clv.time.correct.datetype + + test_that("Fail if observation.end is before last transaction",{ + expect_error( + clv.time.set.sample.periods( + clv.time.weeks("ymd"), + tp.first.transaction = ymd("2000-01-01"), + tp.last.transaction = ymd("2000-12-31"), + user.estimation.end = NULL, + user.observation.end = "2000-12-30"), + regexp = "may not be before the last recorded transaction" + ) + + }) + + test_that("Fail if observation.end leads to holdout period < 2 periods", { + expect_error( + clv.time.set.sample.periods( + clv.time.weeks("ymd"), + tp.first.transaction = ymd("2000-01-01"), + tp.last.transaction = ymd("2000-12-31"), + user.estimation.end = "2001-01-20", + user.observation.end = "2001-01-31"), + regexp = "holdout period of at least 2 time.units" + ) + + }) + + test_that("Fail if observation.end leads to estimation period < 1 period", { + expect_error( + clv.time.set.sample.periods( + clv.time.weeks("ymd"), + tp.first.transaction = ymd("2000-01-01"), + tp.last.transaction = ymd("2000-01-02"), + user.estimation.end = "2000-01-05", + user.observation.end = "2000-01-31"), + regexp = "least 1 time.unit" + ) + }) + + test_that("Fail if observation.end is before estimation.split", { + expect_error( + clv.time.set.sample.periods( + clv.time.weeks("ymd"), + tp.first.transaction = ymd("2000-01-01"), + tp.last.transaction = ymd("2000-12-31"), + user.estimation.end = "2001-02-15", + user.observation.end = "2001-01-31"), + regexp = "holdout period of at least 2 time.units" + ) + }) + + test_that("Same object when no observation.end as when observation.end=tp.last.transaction", { + l.args <- list( + clv.time = clv.time.weeks("ymd"), + tp.first.transaction = ymd("2000-01-01"), + tp.last.transaction = ymd("2000-12-31") + ) + + # No holdout + l.args["user.estimation.end"] <- list(NULL) + expect_equal( + do.call(clv.time.set.sample.periods, c(l.args, list(user.observation.end = NULL))), + do.call(clv.time.set.sample.periods, c(l.args, list(user.observation.end = "2000-12-31"))) + ) + + # With holdout + l.args$user.estimation.end <- "2000-06-15" + expect_equal( + do.call(clv.time.set.sample.periods, c(l.args, list(user.observation.end = NULL))), + do.call(clv.time.set.sample.periods, c(l.args, list(user.observation.end = "2000-12-31"))) + ) + }) + + test_that("estimation.split can be after last transaction if observation.end is given", { + expect_silent( + clv.time.set.sample.periods( + clv.time.weeks("ymd"), + tp.first.transaction = ymd("2000-01-01"), + tp.last.transaction = ymd("2000-12-31"), + user.estimation.end = "2001-01-10", + user.observation.end = "2001-01-31") + ) + }) + + test_that("observation.end only moves holdout.end (if estimation.split is before)",{ + clv.t.no.obsend <- clv.time.set.sample.periods( + clv.time = clv.time.weeks("ymd"), + tp.first.transaction = ymd("2000-01-01"), + tp.last.transaction = ymd("2000-12-31"), + user.estimation.end = "2000-06-15", + user.observation.end = NULL) + + clv.t.with.obsend <- clv.time.set.sample.periods( + clv.time = clv.time.weeks("ymd"), + tp.first.transaction = ymd("2000-01-01"), + tp.last.transaction = ymd("2000-12-31"), + user.estimation.end = "2000-06-15", + user.observation.end = "2001-02-28") + + expect_true(all( + clv.t.with.obsend@timepoint.estimation.start == clv.t.no.obsend@timepoint.estimation.start, + clv.t.with.obsend@timepoint.estimation.end == clv.t.no.obsend@timepoint.estimation.end, + clv.t.with.obsend@timepoint.holdout.start == clv.t.no.obsend@timepoint.holdout.start, + clv.t.with.obsend@estimation.period.in.tu == clv.t.no.obsend@estimation.period.in.tu + )) + + expect_true(clv.t.with.obsend@timepoint.holdout.end > clv.t.no.obsend@timepoint.holdout.end) + expect_true(clv.t.with.obsend@holdout.period.in.tu > clv.t.no.obsend@holdout.period.in.tu) + }) + + + test_that("Manually check if yields correct timepoints", { + + # With holdout + split after last transaction + clv.holdout <- clv.time.set.sample.periods( + clv.time = clv.time.weeks("ymd"), + tp.first.transaction = ymd("2000-01-01"), + tp.last.transaction = ymd("2000-05-28"), + user.estimation.end = "2000-06-15", + user.observation.end = "2000-12-31") + + expect_true(clv.holdout@timepoint.estimation.start == "2000-01-01") + expect_true(clv.holdout@timepoint.estimation.end == "2000-06-15") + expect_true(clv.holdout@timepoint.holdout.start == "2000-06-16") + expect_true(clv.holdout@timepoint.holdout.end == "2000-12-31") + + + clv.no.holdout <- clv.time.set.sample.periods( + clv.time = clv.time.weeks("ymd"), + tp.first.transaction = ymd("2000-01-01"), + tp.last.transaction = ymd("2000-12-28"), + user.estimation.end = NULL, + user.observation.end = "2000-12-31") + + expect_true(clv.no.holdout@timepoint.estimation.start == "2000-01-01") + expect_true(clv.no.holdout@timepoint.estimation.end == "2000-12-31") + expect_true(clv.no.holdout@timepoint.holdout.start == "2000-12-31") + expect_true(clv.no.holdout@timepoint.holdout.end == "2000-12-31") + }) +} + + # convert.user.input.to.timepoint ------------------------------------------------------------------------------------- fct.testthat.correctness.clvtime.convert.user.input.chars.to.posixct <- function(clv.t.datetime){ stopifnot(is(clv.t.datetime, "clv.time.datetime")) diff --git a/tests/testthat/test_correctness_clvtime.R b/tests/testthat/test_correctness_clvtime.R index 44d16760..22ff4e75 100644 --- a/tests/testthat/test_correctness_clvtime.R +++ b/tests/testthat/test_correctness_clvtime.R @@ -11,6 +11,7 @@ for(clv.t in c(fct.helper.clv.time.create.test.objects(with.holdout = FALSE), # set.sample.periods -------------------------------------------------------------------------------- +# . no observation.end ----------------------------------------------------------------------------- for(clv.t in list(clv.time.hours(time.format="ymd HMS"), clv.time.days( time.format="ymd"), clv.time.weeks(time.format="ymd"), @@ -33,6 +34,12 @@ fct.testthat.correctness.clvtime.set.sample.periods.stop.holdout.length.less.2.p clv.t.weeks = clv.time.weeks(time.format="ymd"), clv.t.years = clv.time.years(time.format="ymd")) +# . with observation.end -------------------------------------------------------------------------- +fct.testthat.correctness.clvtime.set.sample.periods.observation.end() + + + + # convert.user.input.to.timepoint -------------------------------------------------------------------------------- for(clv.t in c(fct.helper.clv.time.create.test.objects(with.holdout = FALSE), fct.helper.clv.time.create.test.objects(with.holdout = TRUE))){ From 3f9967555e9edbc561aed66247aad737af4f98fb Mon Sep 17 00:00:00 2001 From: Patrik Schilter Date: Wed, 13 Aug 2025 21:36:32 +0200 Subject: [PATCH 03/26] observation.end: Docu --- R/all_generics.R | 4 +++- R/f_interface_clvdata.R | 13 +++++++++++++ man-roxygen/template_params_clvdata.R | 1 + man/as.clv.data.Rd | 3 +++ man/clvdata.Rd | 16 ++++++++++++++++ 5 files changed, 36 insertions(+), 1 deletion(-) diff --git a/R/all_generics.R b/R/all_generics.R index 51eb2e86..ef5ccf3f 100644 --- a/R/all_generics.R +++ b/R/all_generics.R @@ -245,7 +245,9 @@ setGeneric("clv.data.create.bootstrapping.data", def = function(clv.data, ids){ #' #' @export as.clv.data <- function(x, - date.format="ymd", time.unit="weeks", + date.format="ymd", + time.unit="weeks", + observation.end = NULL, estimation.split = NULL, name.id="Id", name.date="Date", name.price="Price", ...){ diff --git a/R/f_interface_clvdata.R b/R/f_interface_clvdata.R index 08d7c6b1..f7f10c44 100644 --- a/R/f_interface_clvdata.R +++ b/R/f_interface_clvdata.R @@ -32,6 +32,12 @@ #' (i.e., "2010-06-17") is indicated with \code{"ymd"}. Other combinations such as \code{"dmy"}, \code{"dym"}, #' \code{"ymd HMS"}, or \code{"HMS dmy"} are possible as well. #' +#' \code{observation.end} The point in time at which the observation period ends. +#' The observation period is the total time frame in which customers were observed and is the combined estimation and holdout periods. +#' Useful when the last recorded transaction does not constitute the end of the observation period. +#' For example, when the last transaction was on "2000-12-29" but customers where actually observed until "2000-12-31". +#' Required to be after the last recorded transaction. +#' #' \code{estimation.split} May be specified as either the number of periods since the first transaction or the timepoint #' (either as character, Date, or POSIXct) at which the estimation period ends. The indicated timepoint itself will be part of the estimation sample. #' If no value is provided or set to \code{NULL}, the whole dataset will used for fitting the model (no holdout sample). @@ -84,6 +90,13 @@ #' time.unit = "w", #' estimation.split = "1997-10-15") #' +#' # Extend observation period until 31th Dec 1998 +#' clv.data.cdnow <- clvdata(data.transactions = cdnow, +#' date.format="ymd", +#' time.unit = "w", +#' observation.end = "1998-12-31", +#' estimation.split = "1997-10-15") +#' #' # summary of the transaction data #' summary(clv.data.cdnow) #' diff --git a/man-roxygen/template_params_clvdata.R b/man-roxygen/template_params_clvdata.R index e3cf6454..f91cb976 100644 --- a/man-roxygen/template_params_clvdata.R +++ b/man-roxygen/template_params_clvdata.R @@ -1,5 +1,6 @@ #' @param date.format Character string that indicates the format of the date variable in the data used. See details. #' @param time.unit What time unit defines a period. May be abbreviated, capitalization is ignored. See details. +#' @param observation.split The end of the observation period, beyond the last recorded transaction in \code{<%=name_param_trans%>}. See details. #' @param estimation.split Indicates the length of the estimation period. See details. #' @param name.id Column name of the customer id in \code{<%=name_param_trans%>}. #' @param name.date Column name of the transaction date in \code{<%=name_param_trans%>}. diff --git a/man/as.clv.data.Rd b/man/as.clv.data.Rd index 297a620f..e46b9f42 100644 --- a/man/as.clv.data.Rd +++ b/man/as.clv.data.Rd @@ -10,6 +10,7 @@ as.clv.data( x, date.format = "ymd", time.unit = "weeks", + observation.end = NULL, estimation.split = NULL, name.id = "Id", name.date = "Date", @@ -55,6 +56,8 @@ as.clv.data( \item{name.price}{Column name of price in \code{x}. NULL if no spending data is present.} \item{...}{Ignored} + +\item{observation.split}{The end of the observation period, beyond the last recorded transaction in \code{x}. See details.} } \description{ Functions to coerce transaction data to a \code{clv.data} object. diff --git a/man/clvdata.Rd b/man/clvdata.Rd index 6987312e..08149f14 100644 --- a/man/clvdata.Rd +++ b/man/clvdata.Rd @@ -9,6 +9,7 @@ clvdata( date.format, time.unit, estimation.split = NULL, + observation.end = NULL, name.id = "Id", name.date = "Date", name.price = "Price" @@ -28,6 +29,8 @@ clvdata( \item{name.date}{Column name of the transaction date in \code{data.transactions}.} \item{name.price}{Column name of price in \code{data.transactions}. NULL if no spending data is present.} + +\item{observation.split}{The end of the observation period, beyond the last recorded transaction in \code{data.transactions}. See details.} } \value{ An object of class \code{clv.data}. @@ -62,6 +65,12 @@ and hence all formats it accepts in argument \code{orders} can be used. For exam (i.e., "2010-06-17") is indicated with \code{"ymd"}. Other combinations such as \code{"dmy"}, \code{"dym"}, \code{"ymd HMS"}, or \code{"HMS dmy"} are possible as well. +\code{observation.end} The point in time at which the observation period ends. +The observation period is the total time frame in which customers were observed and is the combined estimation and holdout periods. +Useful when the last recorded transaction does not constitute the end of the observation period. +For example, when the last transaction was on "2000-12-29" but customers where actually observed until "2000-12-31". +Required to be after the last recorded transaction. + \code{estimation.split} May be specified as either the number of periods since the first transaction or the timepoint (either as character, Date, or POSIXct) at which the estimation period ends. The indicated timepoint itself will be part of the estimation sample. If no value is provided or set to \code{NULL}, the whole dataset will used for fitting the model (no holdout sample). @@ -101,6 +110,13 @@ clv.data.cdnow <- clvdata(data.transactions = cdnow, time.unit = "w", estimation.split = "1997-10-15") +# Extend observation period until 31th Dec 1998 +clv.data.cdnow <- clvdata(data.transactions = cdnow, + date.format="ymd", + time.unit = "w", + observation.end = "1998-12-31", + estimation.split = "1997-10-15") + # summary of the transaction data summary(clv.data.cdnow) From 3bb12e7bb347b5fb3cec1375f32e1a9cc5c5026c Mon Sep 17 00:00:00 2001 From: Patrik Schilter Date: Wed, 13 Aug 2025 22:03:27 +0200 Subject: [PATCH 04/26] summary.clv.data: Print start and end of periods As the end now can be different from the actual transactions --- R/class_clv_data.R | 14 ++++++++++++++ man-roxygen/template_summary_data.R | 2 ++ man/summary.clv.data.Rd | 2 ++ 3 files changed, 18 insertions(+) diff --git a/R/class_clv_data.R b/R/class_clv_data.R index d130740a..79519299 100644 --- a/R/class_clv_data.R +++ b/R/class_clv_data.R @@ -253,7 +253,21 @@ clv.data.make.descriptives <- function(clv.data, ids){ dt.interp <- clv.data.mean.interpurchase.times(clv.data=clv.data, dt.transactions = dt.data) dt.num.trans.by.cust <- dt.data[, .N, by="Id"] + tp.period.start <- switch( + sample.name, + Estimation=clv.time@timepoint.estimation.start, + Holdout=clv.time@timepoint.holdout.start, + Total=clv.time@timepoint.estimation.start) + + tp.period.end <- switch( + sample.name, + Estimation=clv.time@timepoint.estimation.end, + Holdout=clv.time@timepoint.holdout.end, + Total=clv.time@timepoint.holdout.end) + l.desc <- list( + "Period Start" = clv.time.format.timepoint(clv.time=clv.time, timepoint=tp.period.start), + "Period End" = clv.time.format.timepoint(clv.time=clv.time, timepoint=tp.period.end), "Number of customers" = if(sample.name=="Total"){nrow(dt.num.trans.by.cust)}else{"-"}, "First Transaction in period" = clv.time.format.timepoint(clv.time=clv.time, timepoint=dt.data[, min(Date)]), "Last Transaction in period" = clv.time.format.timepoint(clv.time=clv.time, timepoint=dt.data[, max(Date)]), diff --git a/man-roxygen/template_summary_data.R b/man-roxygen/template_summary_data.R index bca429a2..7f5ed15e 100644 --- a/man-roxygen/template_summary_data.R +++ b/man-roxygen/template_summary_data.R @@ -16,6 +16,8 @@ #' be limited to a subset of customers. #' \describe{ #' \item{\code{Number of customers}}{Count of individual customers.} +#' \item{\code{Period Start}}{Start of the indicated period.} +#' \item{\code{Period End}}{End of indicated period.} #' \item{\code{First Transaction in period}}{Time point of the first transaction occurring in the indicated period.} #' \item{\code{Last Transaction in period}}{Time point of the last transaction occurring in the indicated period.} #' \item{\code{Total # Transactions}}{Count of transactions occurring in the indicated period.} diff --git a/man/summary.clv.data.Rd b/man/summary.clv.data.Rd index b5999cfd..e7cb5aa5 100644 --- a/man/summary.clv.data.Rd +++ b/man/summary.clv.data.Rd @@ -60,6 +60,8 @@ for the overall time period (estimation + holdout). By using the \code{ids} argu be limited to a subset of customers. \describe{ \item{\code{Number of customers}}{Count of individual customers.} +\item{\code{Period Start}}{Start of the indicated period.} +\item{\code{Period End}}{End of indicated period.} \item{\code{First Transaction in period}}{Time point of the first transaction occurring in the indicated period.} \item{\code{Last Transaction in period}}{Time point of the last transaction occurring in the indicated period.} \item{\code{Total # Transactions}}{Count of transactions occurring in the indicated period.} From 6caf82582efd4db3004d397dfe95edca0f909839 Mon Sep 17 00:00:00 2001 From: Patrik Schilter Date: Wed, 13 Aug 2025 22:50:46 +0200 Subject: [PATCH 05/26] clv.data: Implement observation.end, incl input checks --- R/f_clvdata_inputchecks.R | 25 +++++++++++++++++++++++++ R/f_interface_clvdata.R | 4 +++- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/R/f_clvdata_inputchecks.R b/R/f_clvdata_inputchecks.R index eee0d4f4..623a9c3b 100644 --- a/R/f_clvdata_inputchecks.R +++ b/R/f_clvdata_inputchecks.R @@ -210,6 +210,31 @@ check_userinput_datanocov_estimationsplit <- function(estimation.split, date.for return(c()) } +#' @importFrom lubridate is.POSIXt is.Date parse_date_time +check_userinput_datanocov_observationend <- function(observation.end, date.format){ + + # May be NULL + if(is.null(observation.end)) + return(c()) + + if(length(observation.end) != 1) + return("observation.end must contain exactly one single element!") + + if(anyNA(observation.end)) + return("observation.end may not contain any NAs!") + + if(!is.character(observation.end) + & !is.Date(observation.end) + & !is.POSIXt(observation.end)) + return("observation.end needs to either of type character or date-like (Date or POSIXt)") + + if(is.character(observation.end)) + if(anyNA(parse_date_time(x=observation.end, quiet=TRUE, orders=date.format))) + return("Please provide a valid observation.end to that can be converted with the given date.format!") + + return(c()) +} + #' @importFrom lubridate is.POSIXct check_userinput_datanocov_datatransactions <- function(data.transactions.dt, has.spending){ diff --git a/R/f_interface_clvdata.R b/R/f_interface_clvdata.R index f7f10c44..a6995533 100644 --- a/R/f_interface_clvdata.R +++ b/R/f_interface_clvdata.R @@ -125,7 +125,7 @@ #' #' #' @export -clvdata <- function(data.transactions, date.format, time.unit, estimation.split=NULL, name.id="Id", name.date="Date", name.price="Price"){ +clvdata <- function(data.transactions, date.format, time.unit, estimation.split=NULL, observation.end=NULL, name.id="Id", name.date="Date", name.price="Price"){ # silence CRAN notes Date <- Price <- Id <- x <- previous <- date.first.actual.trans <- NULL @@ -149,6 +149,7 @@ clvdata <- function(data.transactions, date.format, time.unit, estimation.split= err.msg <- c(err.msg, .check_userinput_charactervec(char=date.format, var.name = "date.format", n=1)) err.msg <- c(err.msg, check_userinput_datanocov_estimationsplit(estimation.split=estimation.split, date.format=date.format)) + err.msg <- c(err.msg, check_userinput_datanocov_observationend(observation.end=observation.end, date.format=date.format)) check_err_msg(err.msg) @@ -221,6 +222,7 @@ clvdata <- function(data.transactions, date.format, time.unit, estimation.split= clv.t <- clv.time.set.sample.periods(clv.time = clv.t, tp.first.transaction = tp.first.transaction, tp.last.transaction = tp.last.transaction, + user.observation.end = observation.end, user.estimation.end = estimation.split) if(clv.t@timepoint.estimation.end > dt.trans[, max(Date)]) From 375b30111f23b826bddfb719221dc56ac7d566c4 Mon Sep 17 00:00:00 2001 From: Patrik Schilter Date: Wed, 13 Aug 2025 22:57:25 +0200 Subject: [PATCH 06/26] Fix tests: clv.time produces new error message --- tests/testthat/test_inputchecks_clvdata_clvdata.R | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/testthat/test_inputchecks_clvdata_clvdata.R b/tests/testthat/test_inputchecks_clvdata_clvdata.R index d25ed279..0a618ecd 100644 --- a/tests/testthat/test_inputchecks_clvdata_clvdata.R +++ b/tests/testthat/test_inputchecks_clvdata_clvdata.R @@ -194,22 +194,22 @@ test_that("Fails with character in wrong dateformat",{ test_that("Fails with split after last transaction",{ expect_error(clvdata(estimation.split = "2010-01-01",time.unit = "w", data.transactions = cdnow, - date.format="ymd"), regexp = "before the last transaction") + date.format="ymd"), regexp = "a holdout period of at least 2 time.units") expect_error(clvdata(estimation.split = 200,time.unit = "w", data.transactions = cdnow, - date.format="ymd"), regexp = "before the last transaction") + date.format="ymd"), regexp = "a holdout period of at least 2 time.units") expect_error(clvdata(estimation.split = 4,time.unit = "y", data.transactions = cdnow, - date.format="ymd"), regexp = "before the last transaction") + date.format="ymd"), regexp = "a holdout period of at least 2 time.units") }) test_that("Fails with split in 2 periods before last transaction (ie in last period)",{ expect_error(clvdata(estimation.split = "1998-06-30",time.unit = "d", data.transactions = cdnow, - date.format="ymd"), regexp = "before the last transaction") + date.format="ymd"), regexp = "a holdout period of at least 2 time.units") expect_error(clvdata(estimation.split = "1998-06-29",time.unit = "d", data.transactions = cdnow, - date.format="ymd"), regexp = "before the last transaction") + date.format="ymd"), regexp = "a holdout period of at least 2 time.units") expect_error(clvdata(estimation.split = "1998-06-30",time.unit = "w", data.transactions = cdnow, - date.format="ymd"), regexp = "before the last transaction") + date.format="ymd"), regexp = "a holdout period of at least 2 time.units") expect_error(clvdata(estimation.split = "1998-06-21",time.unit = "w", data.transactions = cdnow, - date.format="ymd"), regexp = "before the last transaction") + date.format="ymd"), regexp = "a holdout period of at least 2 time.units") }) test_that("Fails if before all first transactions by customer", { From 4d4e847fde640771df77aa2fbe6cc3358e60b9a3 Mon Sep 17 00:00:00 2001 From: Patrik Schilter Date: Wed, 13 Aug 2025 23:00:57 +0200 Subject: [PATCH 07/26] clvdata(): Remove redundant check Already checked in clv.time.set.sample.periods --- R/f_interface_clvdata.R | 3 --- 1 file changed, 3 deletions(-) diff --git a/R/f_interface_clvdata.R b/R/f_interface_clvdata.R index a6995533..b46273d6 100644 --- a/R/f_interface_clvdata.R +++ b/R/f_interface_clvdata.R @@ -228,9 +228,6 @@ clvdata <- function(data.transactions, date.format, time.unit, estimation.split= if(clv.t@timepoint.estimation.end > dt.trans[, max(Date)]) stop("Parameter estimation.split needs to indicate a point in the data!", call. = FALSE) - if(clv.t@estimation.period.in.tu < 1) - stop("Parameter estimation.split needs to be at least 1 time.unit after the start!", call. = FALSE) - # Check if the estimation.split is valid ---------------------------------------- # - estimation period long enough From a195f7e9ba8331a0aa48b517d97b874f4702a4b5 Mon Sep 17 00:00:00 2001 From: Patrik Schilter Date: Wed, 13 Aug 2025 23:52:09 +0200 Subject: [PATCH 08/26] Move check to clv.time --- R/class_clv_time.R | 6 ++++++ R/f_interface_clvdata.R | 3 --- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/R/class_clv_time.R b/R/class_clv_time.R index 328a26d5..af1ba919 100644 --- a/R/class_clv_time.R +++ b/R/class_clv_time.R @@ -112,6 +112,12 @@ clv.time.set.sample.periods <- function(clv.time, tp.first.transaction, tp.last. user.timepoint=user.estimation.end) } + + # Before the last transaction to ensure there is at least 1 transaction in the holdout period. + # Needed additionally to holdout >=2 periods + if(tp.estimation.end >= tp.last.transaction) + stop("Parameter estimation.split needs to indicate a point before the last transaction!", call. = FALSE) + # Need to be 2 periods because otherwise for days, holdout can be not on estimation.end but still be of length zero # ie 2 periods to still have 1 as holdout if(tp.estimation.end > tp.observation.end - clv.time.number.timeunits.to.timeperiod(clv.time, 2L)) diff --git a/R/f_interface_clvdata.R b/R/f_interface_clvdata.R index b46273d6..cd07bdb0 100644 --- a/R/f_interface_clvdata.R +++ b/R/f_interface_clvdata.R @@ -225,9 +225,6 @@ clvdata <- function(data.transactions, date.format, time.unit, estimation.split= user.observation.end = observation.end, user.estimation.end = estimation.split) - if(clv.t@timepoint.estimation.end > dt.trans[, max(Date)]) - stop("Parameter estimation.split needs to indicate a point in the data!", call. = FALSE) - # Check if the estimation.split is valid ---------------------------------------- # - estimation period long enough From 0da6c7623c99014efa45c187b66c9f1751191fa9 Mon Sep 17 00:00:00 2001 From: Patrik Schilter Date: Wed, 13 Aug 2025 23:58:16 +0200 Subject: [PATCH 09/26] clv.time: Adapt tests --- tests/testthat/test_inputchecks_clvdata_clvdata.R | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/tests/testthat/test_inputchecks_clvdata_clvdata.R b/tests/testthat/test_inputchecks_clvdata_clvdata.R index 0a618ecd..918719f7 100644 --- a/tests/testthat/test_inputchecks_clvdata_clvdata.R +++ b/tests/testthat/test_inputchecks_clvdata_clvdata.R @@ -194,20 +194,16 @@ test_that("Fails with character in wrong dateformat",{ test_that("Fails with split after last transaction",{ expect_error(clvdata(estimation.split = "2010-01-01",time.unit = "w", data.transactions = cdnow, - date.format="ymd"), regexp = "a holdout period of at least 2 time.units") + date.format="ymd"), regexp = "before the last transaction") expect_error(clvdata(estimation.split = 200,time.unit = "w", data.transactions = cdnow, - date.format="ymd"), regexp = "a holdout period of at least 2 time.units") + date.format="ymd"), regexp = "before the last transaction") expect_error(clvdata(estimation.split = 4,time.unit = "y", data.transactions = cdnow, - date.format="ymd"), regexp = "a holdout period of at least 2 time.units") + date.format="ymd"), regexp = "before the last transaction") }) test_that("Fails with split in 2 periods before last transaction (ie in last period)",{ - expect_error(clvdata(estimation.split = "1998-06-30",time.unit = "d", data.transactions = cdnow, - date.format="ymd"), regexp = "a holdout period of at least 2 time.units") expect_error(clvdata(estimation.split = "1998-06-29",time.unit = "d", data.transactions = cdnow, date.format="ymd"), regexp = "a holdout period of at least 2 time.units") - expect_error(clvdata(estimation.split = "1998-06-30",time.unit = "w", data.transactions = cdnow, - date.format="ymd"), regexp = "a holdout period of at least 2 time.units") expect_error(clvdata(estimation.split = "1998-06-21",time.unit = "w", data.transactions = cdnow, date.format="ymd"), regexp = "a holdout period of at least 2 time.units") }) From d687fe0ee487a2a450712880e45b7b6e1d0262f7 Mon Sep 17 00:00:00 2001 From: Patrik Schilter Date: Thu, 14 Aug 2025 00:02:42 +0200 Subject: [PATCH 10/26] clvdata: Update error message --- R/f_interface_clvdata.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/f_interface_clvdata.R b/R/f_interface_clvdata.R index cd07bdb0..ae01c4d7 100644 --- a/R/f_interface_clvdata.R +++ b/R/f_interface_clvdata.R @@ -238,7 +238,7 @@ clvdata <- function(data.transactions, date.format, time.unit, estimation.split= everyones.first.trans <- dt.trans[, list(date.first.actual.trans = min(Date)), by="Id"] date.last.first.trans <- everyones.first.trans[, max(date.first.actual.trans)] if(clv.t@timepoint.estimation.end < date.last.first.trans) - stop("The estimation split is too short! Not all customers of this cohort had their first actual transaction until the specified estimation.split!", call. = F) + stop("The estimation period is too short! Not all customers had their first transaction until the end of the estimation period!", call. = FALSE) From 9ffcd3a47e40e688ca15620f9f530297c9bcce01 Mon Sep 17 00:00:00 2001 From: Patrik Schilter Date: Thu, 14 Aug 2025 00:22:31 +0200 Subject: [PATCH 11/26] Fix docu: parameter name --- man-roxygen/template_params_clvdata.R | 2 +- man/as.clv.data.Rd | 4 ++-- man/clvdata.Rd | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/man-roxygen/template_params_clvdata.R b/man-roxygen/template_params_clvdata.R index f91cb976..5e5d21cc 100644 --- a/man-roxygen/template_params_clvdata.R +++ b/man-roxygen/template_params_clvdata.R @@ -1,6 +1,6 @@ #' @param date.format Character string that indicates the format of the date variable in the data used. See details. #' @param time.unit What time unit defines a period. May be abbreviated, capitalization is ignored. See details. -#' @param observation.split The end of the observation period, beyond the last recorded transaction in \code{<%=name_param_trans%>}. See details. +#' @param observation.end The end of the observation period, beyond the last recorded transaction in \code{<%=name_param_trans%>}. See details. #' @param estimation.split Indicates the length of the estimation period. See details. #' @param name.id Column name of the customer id in \code{<%=name_param_trans%>}. #' @param name.date Column name of the transaction date in \code{<%=name_param_trans%>}. diff --git a/man/as.clv.data.Rd b/man/as.clv.data.Rd index e46b9f42..61d6075d 100644 --- a/man/as.clv.data.Rd +++ b/man/as.clv.data.Rd @@ -47,6 +47,8 @@ as.clv.data( \item{time.unit}{What time unit defines a period. May be abbreviated, capitalization is ignored. See details.} +\item{observation.end}{The end of the observation period, beyond the last recorded transaction in \code{x}. See details.} + \item{estimation.split}{Indicates the length of the estimation period. See details.} \item{name.id}{Column name of the customer id in \code{x}.} @@ -56,8 +58,6 @@ as.clv.data( \item{name.price}{Column name of price in \code{x}. NULL if no spending data is present.} \item{...}{Ignored} - -\item{observation.split}{The end of the observation period, beyond the last recorded transaction in \code{x}. See details.} } \description{ Functions to coerce transaction data to a \code{clv.data} object. diff --git a/man/clvdata.Rd b/man/clvdata.Rd index 08149f14..d1626acf 100644 --- a/man/clvdata.Rd +++ b/man/clvdata.Rd @@ -24,13 +24,13 @@ clvdata( \item{estimation.split}{Indicates the length of the estimation period. See details.} +\item{observation.end}{The end of the observation period, beyond the last recorded transaction in \code{data.transactions}. See details.} + \item{name.id}{Column name of the customer id in \code{data.transactions}.} \item{name.date}{Column name of the transaction date in \code{data.transactions}.} \item{name.price}{Column name of price in \code{data.transactions}. NULL if no spending data is present.} - -\item{observation.split}{The end of the observation period, beyond the last recorded transaction in \code{data.transactions}. See details.} } \value{ An object of class \code{clv.data}. From ad4248fbd720fec7712cc679f127f24105121c12 Mon Sep 17 00:00:00 2001 From: Patrik Schilter Date: Thu, 14 Aug 2025 10:58:30 +0200 Subject: [PATCH 12/26] clvdata s3: Add observation.end Consistent order --- R/all_generics.R | 2 +- R/f_s3generics_clvdata.R | 8 ++++++-- man/as.clv.data.Rd | 8 +++++--- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/R/all_generics.R b/R/all_generics.R index ef5ccf3f..a939cebd 100644 --- a/R/all_generics.R +++ b/R/all_generics.R @@ -247,8 +247,8 @@ setGeneric("clv.data.create.bootstrapping.data", def = function(clv.data, ids){ as.clv.data <- function(x, date.format="ymd", time.unit="weeks", - observation.end = NULL, estimation.split = NULL, + observation.end = NULL, name.id="Id", name.date="Date", name.price="Price", ...){ UseMethod("as.clv.data", x) diff --git a/R/f_s3generics_clvdata.R b/R/f_s3generics_clvdata.R index 198ccdfd..91b05041 100644 --- a/R/f_s3generics_clvdata.R +++ b/R/f_s3generics_clvdata.R @@ -291,8 +291,10 @@ subset.clv.data <- function(x, #' @rdname as.clv.data #' @export as.clv.data.data.frame <- function(x, - date.format="ymd", time.unit="weeks", + date.format="ymd", + time.unit="weeks", estimation.split = NULL, + observation.end = NULL, name.id="Id", name.date="Date", name.price="Price", ...){ return(clvdata(data.transactions = x, @@ -304,8 +306,10 @@ as.clv.data.data.frame <- function(x, #' @rdname as.clv.data #' @export as.clv.data.data.table <- function(x, - date.format="ymd", time.unit="weeks", + date.format="ymd", + time.unit="weeks", estimation.split = NULL, + observation.end = NULL, name.id="Id", name.date="Date", name.price="Price", ...){ return(clvdata(data.transactions = x, diff --git a/man/as.clv.data.Rd b/man/as.clv.data.Rd index 61d6075d..a573feb4 100644 --- a/man/as.clv.data.Rd +++ b/man/as.clv.data.Rd @@ -10,8 +10,8 @@ as.clv.data( x, date.format = "ymd", time.unit = "weeks", - observation.end = NULL, estimation.split = NULL, + observation.end = NULL, name.id = "Id", name.date = "Date", name.price = "Price", @@ -23,6 +23,7 @@ as.clv.data( date.format = "ymd", time.unit = "weeks", estimation.split = NULL, + observation.end = NULL, name.id = "Id", name.date = "Date", name.price = "Price", @@ -34,6 +35,7 @@ as.clv.data( date.format = "ymd", time.unit = "weeks", estimation.split = NULL, + observation.end = NULL, name.id = "Id", name.date = "Date", name.price = "Price", @@ -47,10 +49,10 @@ as.clv.data( \item{time.unit}{What time unit defines a period. May be abbreviated, capitalization is ignored. See details.} -\item{observation.end}{The end of the observation period, beyond the last recorded transaction in \code{x}. See details.} - \item{estimation.split}{Indicates the length of the estimation period. See details.} +\item{observation.end}{The end of the observation period, beyond the last recorded transaction in \code{x}. See details.} + \item{name.id}{Column name of the customer id in \code{x}.} \item{name.date}{Column name of the transaction date in \code{x}.} From 2b953a240a09dbd1b2825f5408da93dfeb6a1de0 Mon Sep 17 00:00:00 2001 From: Patrik Schilter Date: Thu, 14 Aug 2025 11:02:24 +0200 Subject: [PATCH 13/26] Improve clvdata docu --- R/f_interface_clvdata.R | 6 ++++-- man/clvdata.Rd | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/R/f_interface_clvdata.R b/R/f_interface_clvdata.R index ae01c4d7..88f57d7c 100644 --- a/R/f_interface_clvdata.R +++ b/R/f_interface_clvdata.R @@ -34,12 +34,14 @@ #' #' \code{observation.end} The point in time at which the observation period ends. #' The observation period is the total time frame in which customers were observed and is the combined estimation and holdout periods. -#' Useful when the last recorded transaction does not constitute the end of the observation period. +#' Useful when the last transaction does not constitute the end of the observation period. #' For example, when the last transaction was on "2000-12-29" but customers where actually observed until "2000-12-31". #' Required to be after the last recorded transaction. #' #' \code{estimation.split} May be specified as either the number of periods since the first transaction or the timepoint -#' (either as character, Date, or POSIXct) at which the estimation period ends. The indicated timepoint itself will be part of the estimation sample. +#' (either as character, Date, or POSIXct) at which the estimation period ends. +#' Required to be before the last transaction. +#' The indicated timepoint itself will be part of the estimation sample. #' If no value is provided or set to \code{NULL}, the whole dataset will used for fitting the model (no holdout sample). #' #' @details ## Aggregation of Transactions diff --git a/man/clvdata.Rd b/man/clvdata.Rd index d1626acf..95b8ce87 100644 --- a/man/clvdata.Rd +++ b/man/clvdata.Rd @@ -67,12 +67,14 @@ and hence all formats it accepts in argument \code{orders} can be used. For exam \code{observation.end} The point in time at which the observation period ends. The observation period is the total time frame in which customers were observed and is the combined estimation and holdout periods. -Useful when the last recorded transaction does not constitute the end of the observation period. +Useful when the last transaction does not constitute the end of the observation period. For example, when the last transaction was on "2000-12-29" but customers where actually observed until "2000-12-31". Required to be after the last recorded transaction. \code{estimation.split} May be specified as either the number of periods since the first transaction or the timepoint -(either as character, Date, or POSIXct) at which the estimation period ends. The indicated timepoint itself will be part of the estimation sample. +(either as character, Date, or POSIXct) at which the estimation period ends. +Required to be before the last transaction. +The indicated timepoint itself will be part of the estimation sample. If no value is provided or set to \code{NULL}, the whole dataset will used for fitting the model (no holdout sample). \subsection{Aggregation of Transactions}{ From c71e79520698089bdf7946b535866262ad749ed4 Mon Sep 17 00:00:00 2001 From: Patrik Schilter Date: Thu, 14 Aug 2025 11:24:34 +0200 Subject: [PATCH 14/26] Fix tests * ymd() * estimation.split can no longer be > last transaction --- .../helper_testthat_correctness_clvtime.R | 141 ++++++++++-------- 1 file changed, 77 insertions(+), 64 deletions(-) diff --git a/tests/testthat/helper_testthat_correctness_clvtime.R b/tests/testthat/helper_testthat_correctness_clvtime.R index 17b7173c..9708d3d6 100644 --- a/tests/testthat/helper_testthat_correctness_clvtime.R +++ b/tests/testthat/helper_testthat_correctness_clvtime.R @@ -399,14 +399,15 @@ fct.testthat.correctness.clvtime.set.sample.periods.char.estimation.end <- funct fct.testthat.correctness.clvtime.set.sample.periods.observation.end <- function(){ - # fct.helper.clv.time.correct.datetype + # + clv.t <- clv.time.weeks("ymd") test_that("Fail if observation.end is before last transaction",{ expect_error( clv.time.set.sample.periods( - clv.time.weeks("ymd"), - tp.first.transaction = ymd("2000-01-01"), - tp.last.transaction = ymd("2000-12-31"), + clv.t, + tp.first.transaction = fct.helper.clv.time.correct.datetype("2000-01-01", clv.t = clv.t), + tp.last.transaction = fct.helper.clv.time.correct.datetype("2000-12-31", clv.t = clv.t), user.estimation.end = NULL, user.observation.end = "2000-12-30"), regexp = "may not be before the last recorded transaction" @@ -417,45 +418,50 @@ fct.testthat.correctness.clvtime.set.sample.periods.observation.end <- function( test_that("Fail if observation.end leads to holdout period < 2 periods", { expect_error( clv.time.set.sample.periods( - clv.time.weeks("ymd"), - tp.first.transaction = ymd("2000-01-01"), - tp.last.transaction = ymd("2000-12-31"), - user.estimation.end = "2001-01-20", - user.observation.end = "2001-01-31"), + clv.t, + tp.first.transaction = fct.helper.clv.time.correct.datetype("2000-01-01", clv.t = clv.t), + tp.last.transaction = fct.helper.clv.time.correct.datetype("2000-12-31", clv.t = clv.t), + user.estimation.end = "2000-12-28", + user.observation.end = "2001-01-03"), regexp = "holdout period of at least 2 time.units" ) }) - test_that("Fail if observation.end leads to estimation period < 1 period", { - expect_error( - clv.time.set.sample.periods( - clv.time.weeks("ymd"), - tp.first.transaction = ymd("2000-01-01"), - tp.last.transaction = ymd("2000-01-02"), - user.estimation.end = "2000-01-05", - user.observation.end = "2000-01-31"), - regexp = "least 1 time.unit" - ) - }) - test_that("Fail if observation.end is before estimation.split", { - expect_error( - clv.time.set.sample.periods( - clv.time.weeks("ymd"), - tp.first.transaction = ymd("2000-01-01"), - tp.last.transaction = ymd("2000-12-31"), - user.estimation.end = "2001-02-15", - user.observation.end = "2001-01-31"), - regexp = "holdout period of at least 2 time.units" - ) - }) + # Not possible anymore since requiring estimation.split to be before last + # transaction and observation.end after last transaction + # test_that("Fail if observation.end leads to estimation period < 1 period", { + # expect_error( + # clv.time.set.sample.periods( + # clv.t, + # tp.first.transaction = fct.helper.clv.time.correct.datetype("2000-01-01", clv.t = clv.t), + # tp.last.transaction = fct.helper.clv.time.correct.datetype("2000-01-10", clv.t = clv.t), + # user.estimation.end = "2000-01-05", + # user.observation.end = "2000-01-31"), + # regexp = "least 1 time.unit" + # ) + # }) + + # Not possible anymore since requiring estimation.split to be before last + # transaction and observation.end after last transaction + # test_that("Fail if observation.end is before estimation.split", { + # expect_error( + # clv.time.set.sample.periods( + # clv.t, + # tp.first.transaction = fct.helper.clv.time.correct.datetype("2000-01-01", clv.t = clv.t), + # tp.last.transaction = fct.helper.clv.time.correct.datetype("2000-12-31", clv.t = clv.t), + # user.estimation.end = "2001-02-15", + # user.observation.end = "2001-01-31"), + # regexp = "holdout period of at least 2 time.units" + # ) + # }) test_that("Same object when no observation.end as when observation.end=tp.last.transaction", { l.args <- list( - clv.time = clv.time.weeks("ymd"), - tp.first.transaction = ymd("2000-01-01"), - tp.last.transaction = ymd("2000-12-31") + clv.time = clv.t, + tp.first.transaction = fct.helper.clv.time.correct.datetype("2000-01-01", clv.t = clv.t), + tp.last.transaction = fct.helper.clv.time.correct.datetype("2000-12-31", clv.t = clv.t) ) # No holdout @@ -473,29 +479,32 @@ fct.testthat.correctness.clvtime.set.sample.periods.observation.end <- function( ) }) - test_that("estimation.split can be after last transaction if observation.end is given", { - expect_silent( - clv.time.set.sample.periods( - clv.time.weeks("ymd"), - tp.first.transaction = ymd("2000-01-01"), - tp.last.transaction = ymd("2000-12-31"), - user.estimation.end = "2001-01-10", - user.observation.end = "2001-01-31") - ) - }) + + # Not possible anymore since requiring estimation.split to be before last + # transaction and observation.end after last transaction + # test_that("estimation.split can be after last transaction if observation.end is given", { + # expect_silent( + # clv.time.set.sample.periods( + # clv.t, + # tp.first.transaction = fct.helper.clv.time.correct.datetype("2000-01-01", clv.t=clv.t), + # tp.last.transaction = fct.helper.clv.time.correct.datetype("2000-12-31", clv.t = clv.t), + # user.estimation.end = "2001-01-10", + # user.observation.end = "2001-01-31") + # ) + # }) test_that("observation.end only moves holdout.end (if estimation.split is before)",{ clv.t.no.obsend <- clv.time.set.sample.periods( - clv.time = clv.time.weeks("ymd"), - tp.first.transaction = ymd("2000-01-01"), - tp.last.transaction = ymd("2000-12-31"), + clv.time = clv.t, + tp.first.transaction = fct.helper.clv.time.correct.datetype("2000-01-01", clv.t = clv.t), + tp.last.transaction = fct.helper.clv.time.correct.datetype("2000-12-31", clv.t = clv.t), user.estimation.end = "2000-06-15", user.observation.end = NULL) clv.t.with.obsend <- clv.time.set.sample.periods( - clv.time = clv.time.weeks("ymd"), - tp.first.transaction = ymd("2000-01-01"), - tp.last.transaction = ymd("2000-12-31"), + clv.time = clv.t, + tp.first.transaction = fct.helper.clv.time.correct.datetype("2000-01-01", clv.t=clv.t), + tp.last.transaction = fct.helper.clv.time.correct.datetype("2000-12-31", clv.t=clv.t), user.estimation.end = "2000-06-15", user.observation.end = "2001-02-28") @@ -511,26 +520,30 @@ fct.testthat.correctness.clvtime.set.sample.periods.observation.end <- function( }) + test_that("Manually check if yields correct timepoints", { - # With holdout + split after last transaction - clv.holdout <- clv.time.set.sample.periods( - clv.time = clv.time.weeks("ymd"), - tp.first.transaction = ymd("2000-01-01"), - tp.last.transaction = ymd("2000-05-28"), - user.estimation.end = "2000-06-15", - user.observation.end = "2000-12-31") - expect_true(clv.holdout@timepoint.estimation.start == "2000-01-01") - expect_true(clv.holdout@timepoint.estimation.end == "2000-06-15") - expect_true(clv.holdout@timepoint.holdout.start == "2000-06-16") - expect_true(clv.holdout@timepoint.holdout.end == "2000-12-31") + # Not possible anymore since requiring estimation.split to be before last + # transaction and observation.end after last transaction + # # With holdout + split after last transaction + # clv.holdout <- clv.time.set.sample.periods( + # clv.time = clv.t, + # tp.first.transaction = fct.helper.clv.time.correct.datetype("2000-01-01", clv.t = clv.t), + # tp.last.transaction = fct.helper.clv.time.correct.datetype("2000-05-28", clv.t = clv.t), + # user.estimation.end = "2000-06-15", + # user.observation.end = "2000-12-31") + # + # expect_true(clv.holdout@timepoint.estimation.start == "2000-01-01") + # expect_true(clv.holdout@timepoint.estimation.end == "2000-06-15") + # expect_true(clv.holdout@timepoint.holdout.start == "2000-06-16") + # expect_true(clv.holdout@timepoint.holdout.end == "2000-12-31") clv.no.holdout <- clv.time.set.sample.periods( - clv.time = clv.time.weeks("ymd"), - tp.first.transaction = ymd("2000-01-01"), - tp.last.transaction = ymd("2000-12-28"), + clv.time = clv.t, + tp.first.transaction = fct.helper.clv.time.correct.datetype("2000-01-01", clv.t = clv.t), + tp.last.transaction = fct.helper.clv.time.correct.datetype("2000-12-28", clv.t=clv.t), user.estimation.end = NULL, user.observation.end = "2000-12-31") From 5e41a7892ef951a125466f0146b0d345d31ae79c Mon Sep 17 00:00:00 2001 From: Patrik Schilter Date: Fri, 15 Aug 2025 00:05:29 +0200 Subject: [PATCH 15/26] clv.time: Add back test --- .../helper_testthat_correctness_clvtime.R | 24 +++++++++---------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/tests/testthat/helper_testthat_correctness_clvtime.R b/tests/testthat/helper_testthat_correctness_clvtime.R index 9708d3d6..7b536471 100644 --- a/tests/testthat/helper_testthat_correctness_clvtime.R +++ b/tests/testthat/helper_testthat_correctness_clvtime.R @@ -524,20 +524,18 @@ fct.testthat.correctness.clvtime.set.sample.periods.observation.end <- function( test_that("Manually check if yields correct timepoints", { - # Not possible anymore since requiring estimation.split to be before last - # transaction and observation.end after last transaction # # With holdout + split after last transaction - # clv.holdout <- clv.time.set.sample.periods( - # clv.time = clv.t, - # tp.first.transaction = fct.helper.clv.time.correct.datetype("2000-01-01", clv.t = clv.t), - # tp.last.transaction = fct.helper.clv.time.correct.datetype("2000-05-28", clv.t = clv.t), - # user.estimation.end = "2000-06-15", - # user.observation.end = "2000-12-31") - # - # expect_true(clv.holdout@timepoint.estimation.start == "2000-01-01") - # expect_true(clv.holdout@timepoint.estimation.end == "2000-06-15") - # expect_true(clv.holdout@timepoint.holdout.start == "2000-06-16") - # expect_true(clv.holdout@timepoint.holdout.end == "2000-12-31") + clv.holdout <- clv.time.set.sample.periods( + clv.time = clv.t, + tp.first.transaction = fct.helper.clv.time.correct.datetype("2000-01-01", clv.t = clv.t), + tp.last.transaction = fct.helper.clv.time.correct.datetype("2000-06-15", clv.t = clv.t), + user.estimation.end = "2000-04-04", + user.observation.end = "2000-12-31") + + expect_true(clv.holdout@timepoint.estimation.start == "2000-01-01") + expect_true(clv.holdout@timepoint.estimation.end == "2000-04-04") + expect_true(clv.holdout@timepoint.holdout.start == "2000-04-05") + expect_true(clv.holdout@timepoint.holdout.end == "2000-12-31") clv.no.holdout <- clv.time.set.sample.periods( From 8ad3befd74606d2b539135b806cd128093e3b52f Mon Sep 17 00:00:00 2001 From: Patrik Schilter Date: Fri, 15 Aug 2025 00:05:58 +0200 Subject: [PATCH 16/26] Tests: clvdata(), clv.data S3 --- tests/testthat/helper_arrange.R | 10 +++- .../test_runability_clvdata_clvdata.R | 37 ++++++++++++++ tests/testthat/test_runability_clvdata_s3.R | 49 ++++++++++--------- 3 files changed, 72 insertions(+), 24 deletions(-) diff --git a/tests/testthat/helper_arrange.R b/tests/testthat/helper_arrange.R index 23c2b2a7..7fb7e4c3 100644 --- a/tests/testthat/helper_arrange.R +++ b/tests/testthat/helper_arrange.R @@ -35,6 +35,7 @@ fct.helper.create.clvdata.cdnow <- function(data.cdnow = NULL, estimation.split fct.helper.create.clvdata.apparel.nocov <- function( data.apparelTrans = NULL, + observation.end = NULL, estimation.split = 104) { if (is.null(data.apparelTrans)) { @@ -45,6 +46,7 @@ fct.helper.create.clvdata.apparel.nocov <- function( data.transactions = data.apparelTrans, date.format = "ymd", time.unit = "W", + observation.end = observation.end, estimation.split = estimation.split )) } @@ -52,6 +54,7 @@ fct.helper.create.clvdata.apparel.nocov <- function( fct.helper.create.clvdata.apparel.staticcov <- function( data.apparelTrans = NULL, data.apparelStaticCov = NULL, + observation.end = NULL, estimation.split = 104, names.cov.life = c("Gender", "Channel"), names.cov.trans = c("Gender", "Channel")) { @@ -64,7 +67,10 @@ fct.helper.create.clvdata.apparel.staticcov <- function( return(SetStaticCovariates( clvdata( - data.transactions = data.apparelTrans, date.format = "ymd", time.unit = "W", + data.transactions = data.apparelTrans, + date.format = "ymd", + time.unit = "W", + observation.end = observation.end, estimation.split = estimation.split ), data.cov.life = data.apparelStaticCov, @@ -77,6 +83,7 @@ fct.helper.create.clvdata.apparel.staticcov <- function( fct.helper.create.clvdata.apparel.dyncov <- function( data.apparelTrans = NULL, data.apparelDynCov = NULL, + observation.end = NULL, estimation.split = 104, names.cov.life = c("High.Season", "Gender", "Channel"), names.cov.trans = c("High.Season", "Gender", "Channel")) { @@ -92,6 +99,7 @@ fct.helper.create.clvdata.apparel.dyncov <- function( data = data.apparelTrans, date.format = "ymd", time.unit = "w", + observation.end = observation.end, estimation.split = estimation.split )) diff --git a/tests/testthat/test_runability_clvdata_clvdata.R b/tests/testthat/test_runability_clvdata_clvdata.R index 2750b080..0fd8a0a4 100644 --- a/tests/testthat/test_runability_clvdata_clvdata.R +++ b/tests/testthat/test_runability_clvdata_clvdata.R @@ -249,3 +249,40 @@ test_that("Works when called from as.clv.data()", { expect_silent(as.clv.data(as.data.frame(cdnow))) expect_silent(as.clv.data(as.data.table(cdnow))) }) + +# observation.end ---------------------------------------------------------------- + +test_that("Works with observation.end with and without holdout",{ + skip_on_cran() + l.args <- list( + observation.end = "2000-01-01", + data.transactions = cdnow, + time.unit = "w", + date.format = "ymd" + ) + + l.args["estimation.split"] <- list(NULL) + expect_silent(do.call(clvdata, l.args)) + + l.args$estimation.split <- 37 + expect_silent(do.call(clvdata, l.args)) +}) + +test_that("Works with observation.end and time.units hours, days, years", { + l.args <- list( + data.transactions = cdnow, + observation.end = "2000-01-01", + estimation.split = NULL, + date.format = "ymd" + ) + + l.args$time.unit <- "days" + expect_silent(do.call(clvdata, l.args)) + + l.args$time.unit <- "year" + expect_silent(do.call(clvdata, l.args)) + + l.args$time.unit <- "hours" + expect_silent(do.call(clvdata, l.args)) +}) + diff --git a/tests/testthat/test_runability_clvdata_s3.R b/tests/testthat/test_runability_clvdata_s3.R index a4259260..2f9bb12d 100644 --- a/tests/testthat/test_runability_clvdata_s3.R +++ b/tests/testthat/test_runability_clvdata_s3.R @@ -25,9 +25,11 @@ fct.helper.test.runability.clv.data.summary <- function(clv.data){ # warning if inexistent ids expect_warning(summary(clv.data, ids=c(ids, "abczxy")), regexp = "Not all given ids were found") - # id with trans in holdout - expect_silent(id.with.holdout <- clv.data@data.transactions[Date>=clv.data@clv.time@timepoint.holdout.start, head(Id,n=1)]) - expect_silent(summary(clv.data, ids=id.with.holdout)) + if(clv.data.has.holdout(clv.data)){ + # id with trans in holdout + expect_silent(id.with.holdout <- clv.data@data.transactions[Date>=clv.data@clv.time@timepoint.holdout.start, head(Id,n=1)]) + expect_silent(summary(clv.data, ids=id.with.holdout)) + } # id without trans in holdout # any zero-repeater @@ -270,20 +272,6 @@ fct.helper.test.runability.clv.data.others3 <- function(clv.data){ } - - -# Create all combos: {w/, w/o} holdout, {w/, w/o} {static, dynamic} covs - -apparel.holdout <- fct.helper.create.clvdata.apparel.nocov() -apparel.no.holdout <- fct.helper.create.clvdata.apparel.nocov(estimation.split = NULL) - -apparel.holdout.static.cov <- fct.helper.create.clvdata.apparel.staticcov() -apparel.no.holdout.static.cov <- fct.helper.create.clvdata.apparel.staticcov(estimation.split = NULL) - -apparel.holdout.dyn.cov <- fct.helper.create.clvdata.apparel.dyncov() -apparel.no.holdout.dyn.cov <- fct.helper.create.clvdata.apparel.dyncov(estimation.split = NULL) - - fct.helper.test.runability.clv.data.runall <- function(clv.data){ fct.helper.test.runability.clv.data.trackingplot(clv.data) fct.helper.test.runability.clv.data.plotfrequency(clv.data) @@ -295,10 +283,25 @@ fct.helper.test.runability.clv.data.runall <- function(clv.data){ } -fct.helper.test.runability.clv.data.runall(apparel.holdout) -fct.helper.test.runability.clv.data.runall(apparel.no.holdout) -fct.helper.test.runability.clv.data.runall(apparel.holdout.static.cov) -fct.helper.test.runability.clv.data.runall(apparel.no.holdout.static.cov) -fct.helper.test.runability.clv.data.runall(apparel.holdout.dyn.cov) -fct.helper.test.runability.clv.data.runall(apparel.no.holdout.dyn.cov) + +for(fn in list( + fct.helper.create.clvdata.apparel.nocov, + fct.helper.create.clvdata.apparel.staticcov +)){ + # With holdout + fct.helper.test.runability.clv.data.runall(fn()) + # . with observation.end + fct.helper.test.runability.clv.data.runall(fn(observation.end="2011-01-31")) + + # Without holdout + fct.helper.test.runability.clv.data.runall(fn(estimation.split=NULL)) + # . with observation.end + fct.helper.test.runability.clv.data.runall(fn(estimation.split=NULL, observation.end="2011-01-31")) + +} + +# observation.end would require to prepare transaction data (cut before cov end) +# But dyncovs are not used for any of the s3 methods, therefore skip testing with observation.end for dyncov +fct.helper.test.runability.clv.data.runall(fct.helper.create.clvdata.apparel.dyncov()) +fct.helper.test.runability.clv.data.runall(fct.helper.create.clvdata.apparel.dyncov(estimation.split = NULL)) From 39d7f7c95778835a64f24bce41126d63b409800e Mon Sep 17 00:00:00 2001 From: Patrik Schilter Date: Fri, 15 Aug 2025 00:33:48 +0200 Subject: [PATCH 17/26] Tests: Fix setdyncov tests --- .../test_inputchecks_clvdata_setdynamiccov.R | 26 +++++++++++++------ 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/tests/testthat/test_inputchecks_clvdata_setdynamiccov.R b/tests/testthat/test_inputchecks_clvdata_setdynamiccov.R index 7736d58b..008ebbd2 100644 --- a/tests/testthat/test_inputchecks_clvdata_setdynamiccov.R +++ b/tests/testthat/test_inputchecks_clvdata_setdynamiccov.R @@ -13,22 +13,32 @@ fct.expect.error.setdyncov <- function( names.cov.trans=c("High.Season", "Gender", "Channel"), name.id = "Id", name.date = "Cov.Date", + make.missing = NULL, regexp=NULL){ + if(!is.null(make.missing)){ + e <- environment() + # e[[make.missing]] <- NULL + rm(list=make.missing, envir = e) + } + expect_error(SetDynamicCovariates( clv.data = clv.data, data.cov.life = data.cov.life, names.cov.life = names.cov.life, data.cov.trans = data.cov.trans, - names.cov.trans = names.cov.trans - )) + names.cov.trans = names.cov.trans, + name.id = name.id, + name.date = name.date + ), + regexp = regexp) } # Parameter clv.data --------------------------------------------------------------------------------------- test_that("Fails if not clv.data input", { # missing/NA/NULL - fct.expect.error.setdyncov(clv.data = ) + fct.expect.error.setdyncov(make.missing = "clv.data") fct.expect.error.setdyncov(clv.data = NULL) fct.expect.error.setdyncov(clv.data = NA_real_) @@ -52,14 +62,14 @@ test_that("Fails if already has covariates", { test_that("Fails if is wrong type ", { # data.cov.life - fct.expect.error.setdyncov(data.cov.life = , regexp = "missing") + fct.expect.error.setdyncov(make.missing="data.cov.life", regexp = "not found") fct.expect.error.setdyncov(data.cov.life = NULL, regexp = "type data.frame or data.table") fct.expect.error.setdyncov(data.cov.life = NA, regexp = "type data.frame or data.table") fct.expect.error.setdyncov(data.cov.life = as.list(apparelDynCov), regexp = "type data.frame or data.table") # data.cov.trans - fct.expect.error.setdyncov(data.cov.trans = , regexp = "missing") + fct.expect.error.setdyncov(make.missing = "data.cov.trans", regexp = "not found") fct.expect.error.setdyncov(data.cov.trans = NULL, regexp = "type data.frame or data.table") fct.expect.error.setdyncov(data.cov.trans = NA, regexp = "type data.frame or data.table") fct.expect.error.setdyncov(data.cov.trans = as.list(apparelDynCov), regexp = "type data.frame or data.table") @@ -214,13 +224,13 @@ test_that("Fails for variable with single category", { test_that("Fails if missing/NULL/NA/empty",{ # names.cov.life - fct.expect.error.setdyncov(names.cov.life = , regexp = "missing") + fct.expect.error.setdyncov(make.missing = "names.cov.life", regexp = "not found") fct.expect.error.setdyncov(names.cov.life = NULL, regexp = "may not be NULL") fct.expect.error.setdyncov(names.cov.life = NA_character_, regexp = "any NA") fct.expect.error.setdyncov(names.cov.life = "", regexp = "could not be found") # names.cov.trans - fct.expect.error.setdyncov(names.cov.trans = , regexp = "missing") + fct.expect.error.setdyncov(make.missing = "names.cov.trans", regexp = "not found") fct.expect.error.setdyncov(names.cov.trans = NULL, regexp = "may not be NULL") fct.expect.error.setdyncov(names.cov.trans = NA_character_, regexp = "any NA") fct.expect.error.setdyncov(names.cov.trans = "", regexp = "could not be found") @@ -313,7 +323,7 @@ test_that("Has default argument Id",{ # Parameter name.date --------------------------------------------------------------------------------------- test_that("Fails if NA/NULL", { - fct.expect.error.setdyncov(name.date = "id", regexp = "NULL") + fct.expect.error.setdyncov(name.date = "id", regexp = "could not be found") fct.expect.error.setdyncov(name.date = NA_character_, regexp = "any NA") fct.expect.error.setdyncov(name.date = character(0), regexp = "exactly 1 element") }) From d946c9dc37c21514108a7e5acac2711850c059b0 Mon Sep 17 00:00:00 2001 From: Patrik Schilter Date: Fri, 15 Aug 2025 00:34:08 +0200 Subject: [PATCH 18/26] Tests: SetDynCov with observation.end --- .../test_inputchecks_clvdata_setdynamiccov.R | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/testthat/test_inputchecks_clvdata_setdynamiccov.R b/tests/testthat/test_inputchecks_clvdata_setdynamiccov.R index 008ebbd2..cb0e17eb 100644 --- a/tests/testthat/test_inputchecks_clvdata_setdynamiccov.R +++ b/tests/testthat/test_inputchecks_clvdata_setdynamiccov.R @@ -100,6 +100,18 @@ test_that("Fails if covariate data is to short for all customers",{ fct.expect.error.setdyncov(data.cov.trans = apparelDynCov.tooshort, regexp = "covariate data exactly from") }) +test_that("Fails if covariate data ends before observation.end", { + + clv.data.apparel.obsE <- fct.helper.create.clvdata.apparel.nocov( + estimation.split = NULL, + observation.end = "2012-12-31") + + fct.expect.error.setdyncov( + clv.data = clv.data.apparel.obsE, + regexp = "There need to be weekly covariate data exactly") + +}) + test_that("Fails if there are Ids in the covariates that are not in the transaction data", { dt.cov.1additional <- data.table::copy(apparelDynCov[Id == "1"]) dt.cov.1additional[, Id := "ABC"] From 9b6b62c45218f38bee5a78df3d7d6c405dff8272 Mon Sep 17 00:00:00 2001 From: Patrik Schilter Date: Sat, 16 Aug 2025 23:05:28 +0200 Subject: [PATCH 19/26] Tests: Dyncov runability with observation.end --- tests/testthat/helper_arrange.R | 2 + .../test_runability_pnbd_dynamiccov.R | 62 +++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/tests/testthat/helper_arrange.R b/tests/testthat/helper_arrange.R index 7fb7e4c3..190562a3 100644 --- a/tests/testthat/helper_arrange.R +++ b/tests/testthat/helper_arrange.R @@ -203,6 +203,7 @@ fit.apparel.static <- function( fit.apparel.dyncov <- function( data.apparelTrans = NULL, data.apparelDynCov = NULL, + observation.end = NULL, estimation.split = 104, names.cov.life = c("High.Season", "Gender", "Channel"), names.cov.trans = c("High.Season", "Gender", "Channel"), @@ -214,6 +215,7 @@ fit.apparel.dyncov <- function( data.apparelTrans = data.apparelTrans, data.apparelDynCov = data.apparelDynCov, estimation.split = estimation.split, + observation.end = observation.end, names.cov.life = names.cov.life, names.cov.trans = names.cov.trans ) diff --git a/tests/testthat/test_runability_pnbd_dynamiccov.R b/tests/testthat/test_runability_pnbd_dynamiccov.R index d5277fc7..0dee6761 100644 --- a/tests/testthat/test_runability_pnbd_dynamiccov.R +++ b/tests/testthat/test_runability_pnbd_dynamiccov.R @@ -93,3 +93,65 @@ test_that("Dyncov works with additional model specifications", { names.cov.constr = "Gender", reg.lambda = c(trans=10, life=10)) }) + + +# With observation.end --------------------------------------------------------- + +test_that("Fit, plot, predict work with partially empty estimation/holdout period", { + + covs.life <- c("High.Season", "Gender") + covs.trans <- c("High.Season", "Gender", "Channel") + + apparelTrans.cut.obsE <- apparelTrans[Date < "2010-12-01"] + date.original.trans.max <- apparelTrans[, max(Date)] + + + # fct.helper.runability.dyncov.all.downstream requires holdout + + names.params <- c( + 'r', 'alpha', 's', 'beta', + paste0('life.', covs.life), + paste0('trans.', covs.trans)) + + + # Create object with no transactions in December 2010 but with the observation + # period as originally (2010-12-20) + + # No holdout + expect_silent(fitted.dyncov.noholdout.obsE <- fit.apparel.dyncov( + data.apparelTrans = apparelTrans.cut.obsE, + estimation.split = NULL, + observation.end = date.original.trans.max, + names.cov.life = covs.life, + names.cov.trans = covs.trans, + optimx.args=fct.helper.dyncov.get.optimxargs.quickfit(hessian=TRUE) + )) + + .fct.helper.clvfitted.all.s3.except.plot.and.predict( + clv.fitted=fitted.dyncov.noholdout.obsE, + full.names=names.params + ) + + expect_silent(predict(fitted.dyncov.noholdout.obsE, prediction.end=1, verbose=FALSE)) + expect_silent(plot(fitted.dyncov.noholdout.obsE, prediction.end = NULL, verbose=FALSE)) + + + # With holdout + expect_silent(fitted.dyncov.holdout.obsE <- fit.apparel.dyncov( + data.apparelTrans = apparelTrans.cut.obsE, + estimation.split = 104, + observation.end = date.original.trans.max, + names.cov.life = covs.life, + names.cov.trans = covs.trans, + optimx.args=fct.helper.dyncov.get.optimxargs.quickfit(hessian=TRUE) + )) + + .fct.helper.clvfitted.all.s3.except.plot.and.predict( + clv.fitted=fitted.dyncov.holdout.obsE, + full.names=names.params + ) + + expect_silent(predict(fitted.dyncov.holdout.obsE, prediction.end=10, verbose=FALSE)) + expect_silent(plot(fitted.dyncov.holdout.obsE, prediction.end = 10, verbose=FALSE)) + +}) From c4df0adf09cb13d88c56a991d9e1a53373ff9fed Mon Sep 17 00:00:00 2001 From: Patrik Schilter Date: Wed, 27 Aug 2025 23:41:44 +0200 Subject: [PATCH 20/26] Tests: observation.end moves prediction period * add observation.end to missing arrange methods --- tests/testthat/helper_arrange.R | 14 ++++++++++- ...helper_testthat_correctness_transactions.R | 24 +++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/tests/testthat/helper_arrange.R b/tests/testthat/helper_arrange.R index 190562a3..dbfbe91d 100644 --- a/tests/testthat/helper_arrange.R +++ b/tests/testthat/helper_arrange.R @@ -19,7 +19,12 @@ fct.helper.load.apparelDynCov <- function(){.load.data.locally("apparelDynCov")} -fct.helper.create.clvdata.cdnow <- function(data.cdnow = NULL, estimation.split = 37, name.price = "Price") { +fct.helper.create.clvdata.cdnow <- function( + data.cdnow = NULL, + observation.end = NULL, + estimation.split = 37, + name.price = "Price") +{ if (is.null(data.cdnow)) { data.cdnow <- fct.helper.load.cdnow() } @@ -27,6 +32,7 @@ fct.helper.create.clvdata.cdnow <- function(data.cdnow = NULL, estimation.split data.transactions = data.cdnow, date.format = "ymd", time.unit = "w", + observation.end = observation.end, estimation.split = estimation.split, name.price = name.price ) @@ -118,6 +124,7 @@ fct.helper.create.clvdata.apparel.dyncov <- function( fit.cdnow <- function( data.cdnow = NULL, + observation.end = NULL, estimation.split = 37, name.price = 'Price', model = pnbd, @@ -127,6 +134,7 @@ fit.cdnow <- function( clv.cdnow <- fct.helper.create.clvdata.cdnow( data.cdnow = data.cdnow, + observation.end = observation.end, estimation.split = estimation.split, name.price=name.price ) @@ -145,6 +153,7 @@ fit.cdnow <- function( fit.apparel.nocov <- function( data.apparelTrans = NULL, + observation.end = NULL, estimation.split = 104, model = pnbd, verbose=FALSE, @@ -155,6 +164,7 @@ fit.apparel.nocov <- function( clv.data.apparel <- fct.helper.create.clvdata.apparel.nocov( data.apparelTrans = data.apparelTrans, + observation.end = observation.end, estimation.split = estimation.split ) @@ -171,6 +181,7 @@ fit.apparel.nocov <- function( fit.apparel.static <- function( data.apparelTrans = NULL, data.apparelStaticCov = NULL, + observation.end = NULL, estimation.split = 104, names.cov.life = c("Gender", "Channel"), names.cov.trans = c("Gender", "Channel"), @@ -184,6 +195,7 @@ fit.apparel.static <- function( clv.data.apparel.cov <- fct.helper.create.clvdata.apparel.staticcov( data.apparelTrans = data.apparelTrans, data.apparelStaticCov = data.apparelStaticCov, + observation.end = observation.end, estimation.split = estimation.split, names.cov.life = names.cov.life, names.cov.trans = names.cov.trans diff --git a/tests/testthat/helper_testthat_correctness_transactions.R b/tests/testthat/helper_testthat_correctness_transactions.R index 0d0a32f1..b98613e8 100644 --- a/tests/testthat/helper_testthat_correctness_transactions.R +++ b/tests/testthat/helper_testthat_correctness_transactions.R @@ -296,6 +296,28 @@ fct.testthat.correctness.clvfittedtransactions.staticcov.predict.newcustomer.0.f }) } +fct.testthat.correctness.clvfittedtransactions.observation.end.moves.prediction.period <- function(method){ + + test_that("Using observation.end moves prediction period",{ + # Only valid for no holdout data + skip_on_cran() + + fitted <- fit.cdnow( + model = method, + estimation.split = NULL, + observation.end = "1998-07-15" + ) + + dt.pred <- predict(fitted, prediction.end = "1998-07-30") + + # Prediction period starts first `eps` after observation.end + expect_true(dt.pred[1, "period.first"] == "1998-07-16") + # Nothing else changed + expect_true(dt.pred[1, "period.last"] == "1998-07-30") + }) + +} + fct.testthat.correctness.clvfittedtransactions <- function(name.model, method, correct.start.params.model, correct.params.nocov.coef, correct.LL.nocov, kkt2.true){ @@ -326,6 +348,8 @@ fct.testthat.correctness.clvfittedtransactions <- function(name.model, method, # predict(newdata=newcustomer): no cov fct.testthat.correctness.clvfittedtransactions.nocov.predict.newcustomer.0.for.num.periods.eq.0(obj.fitted) + # observation.end moves start of prediction period + fct.testthat.correctness.clvfittedtransactions.observation.end.moves.prediction.period(method = method) # Static cov data -------------------------------------------------------------------------------------------- # why 100 and not 104??????? From f39295f693bc7d690c455ad0006f3964c45f751e Mon Sep 17 00:00:00 2001 From: Patrik Schilter Date: Thu, 28 Aug 2025 14:48:50 +0200 Subject: [PATCH 21/26] Fix test --- tests/testthat/test_runability_pnbd_dynamiccov.R | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test_runability_pnbd_dynamiccov.R b/tests/testthat/test_runability_pnbd_dynamiccov.R index 0dee6761..fa9e8675 100644 --- a/tests/testthat/test_runability_pnbd_dynamiccov.R +++ b/tests/testthat/test_runability_pnbd_dynamiccov.R @@ -151,7 +151,10 @@ test_that("Fit, plot, predict work with partially empty estimation/holdout perio full.names=names.params ) - expect_silent(predict(fitted.dyncov.holdout.obsE, prediction.end=10, verbose=FALSE)) - expect_silent(plot(fitted.dyncov.holdout.obsE, prediction.end = 10, verbose=FALSE)) + expect_silent(predict(fitted.dyncov.holdout.obsE, prediction.end=5, verbose=FALSE)) + expect_warning( + plot(fitted.dyncov.holdout.obsE, prediction.end = 5, verbose=FALSE), + regexp = "Not plotting full holdout period" + ) }) From ab7c7cc8db10d56664076862cd1819ef276ca769 Mon Sep 17 00:00:00 2001 From: Patrik Schilter Date: Thu, 28 Aug 2025 15:12:11 +0200 Subject: [PATCH 22/26] Fix docu --- R/f_interface_clvdata.R | 2 +- man/clvdata.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/f_interface_clvdata.R b/R/f_interface_clvdata.R index 88f57d7c..5d71c4c1 100644 --- a/R/f_interface_clvdata.R +++ b/R/f_interface_clvdata.R @@ -35,7 +35,7 @@ #' \code{observation.end} The point in time at which the observation period ends. #' The observation period is the total time frame in which customers were observed and is the combined estimation and holdout periods. #' Useful when the last transaction does not constitute the end of the observation period. -#' For example, when the last transaction was on "2000-12-29" but customers where actually observed until "2000-12-31". +#' For example, when the last transaction was on "2000-12-29" but customers were actually observed until "2000-12-31". #' Required to be after the last recorded transaction. #' #' \code{estimation.split} May be specified as either the number of periods since the first transaction or the timepoint diff --git a/man/clvdata.Rd b/man/clvdata.Rd index 95b8ce87..c027f57d 100644 --- a/man/clvdata.Rd +++ b/man/clvdata.Rd @@ -68,7 +68,7 @@ and hence all formats it accepts in argument \code{orders} can be used. For exam \code{observation.end} The point in time at which the observation period ends. The observation period is the total time frame in which customers were observed and is the combined estimation and holdout periods. Useful when the last transaction does not constitute the end of the observation period. -For example, when the last transaction was on "2000-12-29" but customers where actually observed until "2000-12-31". +For example, when the last transaction was on "2000-12-29" but customers were actually observed until "2000-12-31". Required to be after the last recorded transaction. \code{estimation.split} May be specified as either the number of periods since the first transaction or the timepoint From 0bcb260a39081596e63aefa06bd59be5d0ce7d2d Mon Sep 17 00:00:00 2001 From: Patrik Schilter Date: Thu, 28 Aug 2025 15:42:19 +0200 Subject: [PATCH 23/26] Rename `observation.end` -> `data.end` --- R/all_generics.R | 2 +- R/class_clv_time.R | 24 ++-- R/f_clvdata_inputchecks.R | 26 ++--- R/f_interface_clvdata.R | 17 +-- R/f_s3generics_clvdata.R | 4 +- man-roxygen/template_params_clvdata.R | 2 +- man/as.clv.data.Rd | 8 +- man/clvdata.Rd | 15 +-- tests/testthat/helper_arrange.R | 32 ++--- .../helper_testthat_correctness_clvtime.R | 110 +++++++++--------- ...helper_testthat_correctness_transactions.R | 12 +- tests/testthat/test_correctness_clvtime.R | 6 +- .../test_inputchecks_clvdata_setdynamiccov.R | 4 +- .../test_runability_clvdata_clvdata.R | 10 +- tests/testthat/test_runability_clvdata_s3.R | 12 +- .../test_runability_pnbd_dynamiccov.R | 6 +- 16 files changed, 146 insertions(+), 144 deletions(-) diff --git a/R/all_generics.R b/R/all_generics.R index a939cebd..ed8c56c9 100644 --- a/R/all_generics.R +++ b/R/all_generics.R @@ -248,7 +248,7 @@ as.clv.data <- function(x, date.format="ymd", time.unit="weeks", estimation.split = NULL, - observation.end = NULL, + data.end = NULL, name.id="Id", name.date="Date", name.price="Price", ...){ UseMethod("as.clv.data", x) diff --git a/R/class_clv_time.R b/R/class_clv_time.R index af1ba919..204be747 100644 --- a/R/class_clv_time.R +++ b/R/class_clv_time.R @@ -75,20 +75,20 @@ clv.time.has.holdout <- function(clv.time){ # set.sample.periods ------------------------------------------------------------------------ #' @importFrom lubridate period -clv.time.set.sample.periods <- function(clv.time, tp.first.transaction, tp.last.transaction, user.estimation.end, user.observation.end){ +clv.time.set.sample.periods <- function(clv.time, tp.first.transaction, tp.last.transaction, user.estimation.end, user.data.end){ tp.estimation.start <- tp.first.transaction - if(is.null(user.observation.end)){ - tp.observation.end <- tp.last.transaction + if(is.null(user.data.end)){ + tp.data.end <- tp.last.transaction }else{ - tp.observation.end <- clv.time.convert.user.input.to.timepoint( + tp.data.end <- clv.time.convert.user.input.to.timepoint( clv.time=clv.time, - user.timepoint=user.observation.end) + user.timepoint=user.data.end) - # Observation end may not be before last transaction - if(tp.observation.end < tp.last.transaction){ - stop("The given observation.end may not be before the last recorded transaction!") + # Data end may not be before last transaction + if(tp.data.end < tp.last.transaction){ + stop("The given data.end may not be before the last recorded transaction!") } } @@ -120,21 +120,21 @@ clv.time.set.sample.periods <- function(clv.time, tp.first.transaction, tp.last. # Need to be 2 periods because otherwise for days, holdout can be not on estimation.end but still be of length zero # ie 2 periods to still have 1 as holdout - if(tp.estimation.end > tp.observation.end - clv.time.number.timeunits.to.timeperiod(clv.time, 2L)) + if(tp.estimation.end > tp.data.end - clv.time.number.timeunits.to.timeperiod(clv.time, 2L)) stop("Parameter estimation.split needs to indicate a point in time such that it yields a holdout period of at least 2 time.units!", call. = FALSE) # + 1 day is the same for all because most fine-grained change that Date can do tp.holdout.start <- tp.estimation.end + clv.time.epsilon(clv.time=clv.time) - tp.holdout.end <- tp.observation.end + tp.holdout.end <- tp.data.end holdout.period.in.tu <- clv.time.interval.in.number.tu(clv.time, interv=interval(start = tp.holdout.start, end = tp.holdout.end)) }else{ - # NULL: no specific end - until observation end + # NULL: no specific end - until data end # tp.holdout.start/.end and HAVE to be end of estimation period as this is used elsewhere! # ie to ensure prediction.end (with clv.time.get.prediction.table) finds correct end if user gives NULL - tp.estimation.end <- tp.observation.end + tp.estimation.end <- tp.data.end tp.holdout.start <- tp.estimation.end tp.holdout.end <- tp.estimation.end holdout.period.in.tu <- 0 diff --git a/R/f_clvdata_inputchecks.R b/R/f_clvdata_inputchecks.R index 623a9c3b..42be999f 100644 --- a/R/f_clvdata_inputchecks.R +++ b/R/f_clvdata_inputchecks.R @@ -211,26 +211,26 @@ check_userinput_datanocov_estimationsplit <- function(estimation.split, date.for } #' @importFrom lubridate is.POSIXt is.Date parse_date_time -check_userinput_datanocov_observationend <- function(observation.end, date.format){ +check_userinput_datanocov_dataend <- function(data.end, date.format){ # May be NULL - if(is.null(observation.end)) + if(is.null(data.end)) return(c()) - if(length(observation.end) != 1) - return("observation.end must contain exactly one single element!") + if(length(data.end) != 1) + return("data.end must contain exactly one single element!") - if(anyNA(observation.end)) - return("observation.end may not contain any NAs!") + if(anyNA(data.end)) + return("data.end may not contain any NAs!") - if(!is.character(observation.end) - & !is.Date(observation.end) - & !is.POSIXt(observation.end)) - return("observation.end needs to either of type character or date-like (Date or POSIXt)") + if(!is.character(data.end) + & !is.Date(data.end) + & !is.POSIXt(data.end)) + return("data.end needs to either of type character or date-like (Date or POSIXt)") - if(is.character(observation.end)) - if(anyNA(parse_date_time(x=observation.end, quiet=TRUE, orders=date.format))) - return("Please provide a valid observation.end to that can be converted with the given date.format!") + if(is.character(data.end)) + if(anyNA(parse_date_time(x=data.end, quiet=TRUE, orders=date.format))) + return("Please provide a valid data.end to that can be converted with the given date.format!") return(c()) } diff --git a/R/f_interface_clvdata.R b/R/f_interface_clvdata.R index 5d71c4c1..78807aa5 100644 --- a/R/f_interface_clvdata.R +++ b/R/f_interface_clvdata.R @@ -32,10 +32,11 @@ #' (i.e., "2010-06-17") is indicated with \code{"ymd"}. Other combinations such as \code{"dmy"}, \code{"dym"}, #' \code{"ymd HMS"}, or \code{"HMS dmy"} are possible as well. #' -#' \code{observation.end} The point in time at which the observation period ends. -#' The observation period is the total time frame in which customers were observed and is the combined estimation and holdout periods. -#' Useful when the last transaction does not constitute the end of the observation period. -#' For example, when the last transaction was on "2000-12-29" but customers were actually observed until "2000-12-31". +#' \code{data.end} A point in time beyond the last purchase at which the data should fictionally end. +#' It defines the total time frame in which customers could be observed: The combined estimation and holdout periods. +#' For example, when the last recorded transaction was on "2000-12-29" but customers were actually observed until "2000-12-31". +#' Using \code{data.end="2000-12-31"} without holdout period, +#' the estimation period will be until "2000-12-31" and the prediction period will start on "2001-01-01". #' Required to be after the last recorded transaction. #' #' \code{estimation.split} May be specified as either the number of periods since the first transaction or the timepoint @@ -96,7 +97,7 @@ #' clv.data.cdnow <- clvdata(data.transactions = cdnow, #' date.format="ymd", #' time.unit = "w", -#' observation.end = "1998-12-31", +#' data.end = "1998-12-31", #' estimation.split = "1997-10-15") #' #' # summary of the transaction data @@ -127,7 +128,7 @@ #' #' #' @export -clvdata <- function(data.transactions, date.format, time.unit, estimation.split=NULL, observation.end=NULL, name.id="Id", name.date="Date", name.price="Price"){ +clvdata <- function(data.transactions, date.format, time.unit, estimation.split=NULL, data.end=NULL, name.id="Id", name.date="Date", name.price="Price"){ # silence CRAN notes Date <- Price <- Id <- x <- previous <- date.first.actual.trans <- NULL @@ -151,7 +152,7 @@ clvdata <- function(data.transactions, date.format, time.unit, estimation.split= err.msg <- c(err.msg, .check_userinput_charactervec(char=date.format, var.name = "date.format", n=1)) err.msg <- c(err.msg, check_userinput_datanocov_estimationsplit(estimation.split=estimation.split, date.format=date.format)) - err.msg <- c(err.msg, check_userinput_datanocov_observationend(observation.end=observation.end, date.format=date.format)) + err.msg <- c(err.msg, check_userinput_datanocov_dataend(data.end=data.end, date.format=date.format)) check_err_msg(err.msg) @@ -224,7 +225,7 @@ clvdata <- function(data.transactions, date.format, time.unit, estimation.split= clv.t <- clv.time.set.sample.periods(clv.time = clv.t, tp.first.transaction = tp.first.transaction, tp.last.transaction = tp.last.transaction, - user.observation.end = observation.end, + user.data.end = data.end, user.estimation.end = estimation.split) diff --git a/R/f_s3generics_clvdata.R b/R/f_s3generics_clvdata.R index 91b05041..e12f6402 100644 --- a/R/f_s3generics_clvdata.R +++ b/R/f_s3generics_clvdata.R @@ -294,7 +294,7 @@ as.clv.data.data.frame <- function(x, date.format="ymd", time.unit="weeks", estimation.split = NULL, - observation.end = NULL, + data.end = NULL, name.id="Id", name.date="Date", name.price="Price", ...){ return(clvdata(data.transactions = x, @@ -309,7 +309,7 @@ as.clv.data.data.table <- function(x, date.format="ymd", time.unit="weeks", estimation.split = NULL, - observation.end = NULL, + data.end = NULL, name.id="Id", name.date="Date", name.price="Price", ...){ return(clvdata(data.transactions = x, diff --git a/man-roxygen/template_params_clvdata.R b/man-roxygen/template_params_clvdata.R index 5e5d21cc..7e184d4b 100644 --- a/man-roxygen/template_params_clvdata.R +++ b/man-roxygen/template_params_clvdata.R @@ -1,6 +1,6 @@ #' @param date.format Character string that indicates the format of the date variable in the data used. See details. #' @param time.unit What time unit defines a period. May be abbreviated, capitalization is ignored. See details. -#' @param observation.end The end of the observation period, beyond the last recorded transaction in \code{<%=name_param_trans%>}. See details. +#' @param data.end The fictional end of the data, after the last recorded transaction in \code{<%=name_param_trans%>}. See details. #' @param estimation.split Indicates the length of the estimation period. See details. #' @param name.id Column name of the customer id in \code{<%=name_param_trans%>}. #' @param name.date Column name of the transaction date in \code{<%=name_param_trans%>}. diff --git a/man/as.clv.data.Rd b/man/as.clv.data.Rd index a573feb4..652068f4 100644 --- a/man/as.clv.data.Rd +++ b/man/as.clv.data.Rd @@ -11,7 +11,7 @@ as.clv.data( date.format = "ymd", time.unit = "weeks", estimation.split = NULL, - observation.end = NULL, + data.end = NULL, name.id = "Id", name.date = "Date", name.price = "Price", @@ -23,7 +23,7 @@ as.clv.data( date.format = "ymd", time.unit = "weeks", estimation.split = NULL, - observation.end = NULL, + data.end = NULL, name.id = "Id", name.date = "Date", name.price = "Price", @@ -35,7 +35,7 @@ as.clv.data( date.format = "ymd", time.unit = "weeks", estimation.split = NULL, - observation.end = NULL, + data.end = NULL, name.id = "Id", name.date = "Date", name.price = "Price", @@ -51,7 +51,7 @@ as.clv.data( \item{estimation.split}{Indicates the length of the estimation period. See details.} -\item{observation.end}{The end of the observation period, beyond the last recorded transaction in \code{x}. See details.} +\item{data.end}{The fictional end of the data, after the last recorded transaction in \code{x}. See details.} \item{name.id}{Column name of the customer id in \code{x}.} diff --git a/man/clvdata.Rd b/man/clvdata.Rd index c027f57d..a0694ee1 100644 --- a/man/clvdata.Rd +++ b/man/clvdata.Rd @@ -9,7 +9,7 @@ clvdata( date.format, time.unit, estimation.split = NULL, - observation.end = NULL, + data.end = NULL, name.id = "Id", name.date = "Date", name.price = "Price" @@ -24,7 +24,7 @@ clvdata( \item{estimation.split}{Indicates the length of the estimation period. See details.} -\item{observation.end}{The end of the observation period, beyond the last recorded transaction in \code{data.transactions}. See details.} +\item{data.end}{The fictional end of the data, after the last recorded transaction in \code{data.transactions}. See details.} \item{name.id}{Column name of the customer id in \code{data.transactions}.} @@ -65,10 +65,11 @@ and hence all formats it accepts in argument \code{orders} can be used. For exam (i.e., "2010-06-17") is indicated with \code{"ymd"}. Other combinations such as \code{"dmy"}, \code{"dym"}, \code{"ymd HMS"}, or \code{"HMS dmy"} are possible as well. -\code{observation.end} The point in time at which the observation period ends. -The observation period is the total time frame in which customers were observed and is the combined estimation and holdout periods. -Useful when the last transaction does not constitute the end of the observation period. -For example, when the last transaction was on "2000-12-29" but customers were actually observed until "2000-12-31". +\code{data.end} A point in time beyond the last purchase at which the data should fictionally end. +It defines the total time frame in which customers could be observed: The combined estimation and holdout periods. +For example, when the last recorded transaction was on "2000-12-29" but customers were actually observed until "2000-12-31". +Using \code{data.end="2000-12-31"} without holdout period, +the estimation period will be until "2000-12-31" and the prediction period will start on "2001-01-01". Required to be after the last recorded transaction. \code{estimation.split} May be specified as either the number of periods since the first transaction or the timepoint @@ -116,7 +117,7 @@ clv.data.cdnow <- clvdata(data.transactions = cdnow, clv.data.cdnow <- clvdata(data.transactions = cdnow, date.format="ymd", time.unit = "w", - observation.end = "1998-12-31", + data.end = "1998-12-31", estimation.split = "1997-10-15") # summary of the transaction data diff --git a/tests/testthat/helper_arrange.R b/tests/testthat/helper_arrange.R index dbfbe91d..3ea9a1a5 100644 --- a/tests/testthat/helper_arrange.R +++ b/tests/testthat/helper_arrange.R @@ -21,7 +21,7 @@ fct.helper.load.apparelDynCov <- function(){.load.data.locally("apparelDynCov")} fct.helper.create.clvdata.cdnow <- function( data.cdnow = NULL, - observation.end = NULL, + data.end = NULL, estimation.split = 37, name.price = "Price") { @@ -32,7 +32,7 @@ fct.helper.create.clvdata.cdnow <- function( data.transactions = data.cdnow, date.format = "ymd", time.unit = "w", - observation.end = observation.end, + data.end = data.end, estimation.split = estimation.split, name.price = name.price ) @@ -41,7 +41,7 @@ fct.helper.create.clvdata.cdnow <- function( fct.helper.create.clvdata.apparel.nocov <- function( data.apparelTrans = NULL, - observation.end = NULL, + data.end = NULL, estimation.split = 104) { if (is.null(data.apparelTrans)) { @@ -52,7 +52,7 @@ fct.helper.create.clvdata.apparel.nocov <- function( data.transactions = data.apparelTrans, date.format = "ymd", time.unit = "W", - observation.end = observation.end, + data.end = data.end, estimation.split = estimation.split )) } @@ -60,7 +60,7 @@ fct.helper.create.clvdata.apparel.nocov <- function( fct.helper.create.clvdata.apparel.staticcov <- function( data.apparelTrans = NULL, data.apparelStaticCov = NULL, - observation.end = NULL, + data.end = NULL, estimation.split = 104, names.cov.life = c("Gender", "Channel"), names.cov.trans = c("Gender", "Channel")) { @@ -76,7 +76,7 @@ fct.helper.create.clvdata.apparel.staticcov <- function( data.transactions = data.apparelTrans, date.format = "ymd", time.unit = "W", - observation.end = observation.end, + data.end = data.end, estimation.split = estimation.split ), data.cov.life = data.apparelStaticCov, @@ -89,7 +89,7 @@ fct.helper.create.clvdata.apparel.staticcov <- function( fct.helper.create.clvdata.apparel.dyncov <- function( data.apparelTrans = NULL, data.apparelDynCov = NULL, - observation.end = NULL, + data.end = NULL, estimation.split = 104, names.cov.life = c("High.Season", "Gender", "Channel"), names.cov.trans = c("High.Season", "Gender", "Channel")) { @@ -105,7 +105,7 @@ fct.helper.create.clvdata.apparel.dyncov <- function( data = data.apparelTrans, date.format = "ymd", time.unit = "w", - observation.end = observation.end, + data.end = data.end, estimation.split = estimation.split )) @@ -124,7 +124,7 @@ fct.helper.create.clvdata.apparel.dyncov <- function( fit.cdnow <- function( data.cdnow = NULL, - observation.end = NULL, + data.end = NULL, estimation.split = 37, name.price = 'Price', model = pnbd, @@ -134,7 +134,7 @@ fit.cdnow <- function( clv.cdnow <- fct.helper.create.clvdata.cdnow( data.cdnow = data.cdnow, - observation.end = observation.end, + data.end = data.end, estimation.split = estimation.split, name.price=name.price ) @@ -153,7 +153,7 @@ fit.cdnow <- function( fit.apparel.nocov <- function( data.apparelTrans = NULL, - observation.end = NULL, + data.end = NULL, estimation.split = 104, model = pnbd, verbose=FALSE, @@ -164,7 +164,7 @@ fit.apparel.nocov <- function( clv.data.apparel <- fct.helper.create.clvdata.apparel.nocov( data.apparelTrans = data.apparelTrans, - observation.end = observation.end, + data.end = data.end, estimation.split = estimation.split ) @@ -181,7 +181,7 @@ fit.apparel.nocov <- function( fit.apparel.static <- function( data.apparelTrans = NULL, data.apparelStaticCov = NULL, - observation.end = NULL, + data.end = NULL, estimation.split = 104, names.cov.life = c("Gender", "Channel"), names.cov.trans = c("Gender", "Channel"), @@ -195,7 +195,7 @@ fit.apparel.static <- function( clv.data.apparel.cov <- fct.helper.create.clvdata.apparel.staticcov( data.apparelTrans = data.apparelTrans, data.apparelStaticCov = data.apparelStaticCov, - observation.end = observation.end, + data.end = data.end, estimation.split = estimation.split, names.cov.life = names.cov.life, names.cov.trans = names.cov.trans @@ -215,7 +215,7 @@ fit.apparel.static <- function( fit.apparel.dyncov <- function( data.apparelTrans = NULL, data.apparelDynCov = NULL, - observation.end = NULL, + data.end = NULL, estimation.split = 104, names.cov.life = c("High.Season", "Gender", "Channel"), names.cov.trans = c("High.Season", "Gender", "Channel"), @@ -227,7 +227,7 @@ fit.apparel.dyncov <- function( data.apparelTrans = data.apparelTrans, data.apparelDynCov = data.apparelDynCov, estimation.split = estimation.split, - observation.end = observation.end, + data.end = data.end, names.cov.life = names.cov.life, names.cov.trans = names.cov.trans ) diff --git a/tests/testthat/helper_testthat_correctness_clvtime.R b/tests/testthat/helper_testthat_correctness_clvtime.R index 7b536471..ee848177 100644 --- a/tests/testthat/helper_testthat_correctness_clvtime.R +++ b/tests/testthat/helper_testthat_correctness_clvtime.R @@ -27,35 +27,35 @@ fct.helper.clv.time.create.test.objects <- function(with.holdout){ pred.tp.last <- as.Date("2008-09-27") if(with.holdout){ expect_silent(clv.t.hours <-clv.time.set.sample.periods(clv.t.hours, user.estimation.end = 37, - user.observation.end = NULL, + user.data.end = NULL, tp.first.transaction = lubridate::ymd("2005-01-20", tz="UTC"), tp.last.transaction = lubridate::ymd("2008-09-27", tz="UTC"))) expect_silent(clv.t.days <-clv.time.set.sample.periods(clv.t.days, user.estimation.end = 37, - user.observation.end = NULL, + user.data.end = NULL, tp.first.transaction = pred.tp.first, tp.last.transaction = pred.tp.last)) expect_silent(clv.t.weeks <-clv.time.set.sample.periods(clv.t.weeks, user.estimation.end = 37, - user.observation.end = NULL, + user.data.end = NULL, tp.first.transaction = pred.tp.first, tp.last.transaction = pred.tp.last)) expect_silent(clv.t.years <-clv.time.set.sample.periods(clv.t.years, user.estimation.end = 1, - user.observation.end = NULL, + user.data.end = NULL, tp.first.transaction = pred.tp.first, tp.last.transaction = pred.tp.last)) }else{ expect_silent(clv.t.hours <-clv.time.set.sample.periods(clv.t.hours, user.estimation.end = NULL, - user.observation.end = NULL, + user.data.end = NULL, tp.first.transaction = lubridate::ymd("2005-01-20", tz="UTC"), tp.last.transaction = lubridate::ymd("2008-09-27", tz="UTC"))) expect_silent(clv.t.days <-clv.time.set.sample.periods(clv.t.days, user.estimation.end = NULL, - user.observation.end = NULL, + user.data.end = NULL, tp.first.transaction = pred.tp.first, tp.last.transaction = pred.tp.last)) expect_silent(clv.t.weeks <-clv.time.set.sample.periods(clv.t.weeks, user.estimation.end = NULL, - user.observation.end = NULL, + user.data.end = NULL, tp.first.transaction = pred.tp.first, tp.last.transaction = pred.tp.last)) expect_silent(clv.t.years <-clv.time.set.sample.periods(clv.t.years, user.estimation.end = NULL, - user.observation.end = NULL, + user.data.end = NULL, tp.first.transaction = pred.tp.first, tp.last.transaction = pred.tp.last)) } @@ -110,7 +110,7 @@ fct.testthat.correctness.clvtime.set.sample.periods.estimation.start <- function expect_silent(clv.t <- clv.time.set.sample.periods(clv.t, user.estimation.end = NULL, - user.observation.end = NULL, + user.data.end = NULL, tp.first.transaction = tp.first, tp.last.transaction = tp.last)) expect_equal(clv.t@timepoint.estimation.start, tp.first) @@ -128,7 +128,7 @@ fct.testthat.correctness.clvtime.set.sample.periods.no.estimation.end <- functio # Dates expect_silent(clv.t <- clv.time.set.sample.periods(clv.t, user.estimation.end = NULL, - user.observation.end = NULL, + user.data.end = NULL, tp.first.transaction = tp.first, tp.last.transaction = tp.last)) expect_equal(clv.t@timepoint.estimation.end, tp.last) @@ -154,7 +154,7 @@ fct.testthat.correctness.clvtime.set.sample.periods.numeric.estimation.end <- fu expect_silent(clv.t <- clv.time.set.sample.periods(clv.t, user.estimation.end = splitting.end, - user.observation.end = NULL, + user.data.end = NULL, tp.first.transaction =tp.first, tp.last.transaction = tp.last)) expect_equal(clv.t@timepoint.estimation.end, tp.first+lubridate::period(splitting.end, period.type)) @@ -179,7 +179,7 @@ fct.testthat.correctness.clvtime.set.sample.periods.warn.partial.period <- funct expect_warning(clv.t <- clv.time.set.sample.periods(clv.t, user.estimation.end = splitting.end, - user.observation.end = NULL, + user.data.end = NULL, tp.first.transaction =tp.first, tp.last.transaction = tp.last), regexp = "partial periods") @@ -197,26 +197,26 @@ fct.testthat.correctness.clvtime.set.sample.periods.stop.estimation.period.less. # Numeric expect_error(clv.time.set.sample.periods(clv.t, user.estimation.end = 0, - user.observation.end = NULL, + user.data.end = NULL, tp.first.transaction = tp.first, tp.last.transaction = tp.last), regexp = "1 time.unit after") expect_error(clv.time.set.sample.periods(clv.t, user.estimation.end = -3, - user.observation.end = NULL, + user.data.end = NULL, tp.first.transaction = tp.first, tp.last.transaction = tp.last), regexp = "1 time.unit after") # Date expect_error(clv.time.set.sample.periods(clv.t, user.estimation.end = tp.first, - user.observation.end = NULL, + user.data.end = NULL, tp.first.transaction = tp.first, tp.last.transaction = tp.last), regexp = "1 time.unit after") expect_error(clv.time.set.sample.periods(clv.t, user.estimation.end = tp.first-lubridate::days(1), - user.observation.end = NULL, + user.data.end = NULL, tp.first.transaction = tp.first, tp.last.transaction = tp.last), regexp = "1 time.unit after") @@ -224,25 +224,25 @@ fct.testthat.correctness.clvtime.set.sample.periods.stop.estimation.period.less. expect_error(clv.time.set.sample.periods(clv.time = clv.t.hours, user.estimation.end = "2018-01-01 00:35:49", - user.observation.end = NULL, + user.data.end = NULL, tp.first.transaction = fct.helper.clv.time.correct.datetype("2018-01-01", clv.t.hours), tp.last.transaction = fct.helper.clv.time.correct.datetype("2025-06-15", clv.t.hours)), regexp = "1 time.unit after") expect_error(clv.time.set.sample.periods(clv.time = clv.t.days, user.estimation.end = "2018-01-01", - user.observation.end = NULL, + user.data.end = NULL, tp.first.transaction = fct.helper.clv.time.correct.datetype("2018-01-01", clv.t.days), tp.last.transaction = fct.helper.clv.time.correct.datetype("2025-06-15", clv.t.days)), regexp = "1 time.unit after") expect_error(clv.time.set.sample.periods(clv.time = clv.t.weeks, user.estimation.end = "2018-01-03", # Wed - user.observation.end = NULL, + user.data.end = NULL, tp.first.transaction = fct.helper.clv.time.correct.datetype("2018-01-01", clv.t.weeks), # Mon tp.last.transaction = fct.helper.clv.time.correct.datetype("2025-06-15", clv.t.weeks)), regexp = "1 time.unit after") expect_error(clv.time.set.sample.periods(clv.time = clv.t.years, user.estimation.end = "2018-12-31", - user.observation.end = NULL, + user.data.end = NULL, tp.first.transaction = fct.helper.clv.time.correct.datetype("2018-01-01", clv.t.years), tp.last.transaction = fct.helper.clv.time.correct.datetype("2025-06-15", clv.t.years)), regexp = "1 time.unit after") @@ -255,7 +255,7 @@ fct.testthat.correctness.clvtime.set.sample.periods.stop.holdout.length.less.2.p tp.first <- fct.helper.clv.time.correct.datetype("2018-01-01", clv.t) tp.last <- fct.helper.clv.time.correct.datetype("2025-06-15", clv.t) expect_error(clv.time.set.sample.periods(clv.t, - user.observation.end = NULL, + user.data.end = NULL, user.estimation.end = tp.last-lubridate::hours(1), tp.first.transaction = tp.first, tp.last.transaction = tp.last), @@ -264,25 +264,25 @@ fct.testthat.correctness.clvtime.set.sample.periods.stop.holdout.length.less.2.p expect_error(clv.time.set.sample.periods(clv.time = clv.t.hours, user.estimation.end = "2025-06-14 22:40:11", - user.observation.end = NULL, + user.data.end = NULL, tp.first.transaction = fct.helper.clv.time.correct.datetype("2018-01-01", clv.t.hours), tp.last.transaction = fct.helper.clv.time.correct.datetype("2025-06-15", clv.t.hours)), regexp = "at least 2 time.units") expect_error(clv.time.set.sample.periods(clv.time = clv.t.days, user.estimation.end = "2025-06-14", - user.observation.end = NULL, + user.data.end = NULL, tp.first.transaction = fct.helper.clv.time.correct.datetype("2018-01-01", clv.t.days), tp.last.transaction = fct.helper.clv.time.correct.datetype("2025-06-15", clv.t.days)), regexp = "at least 2 time.units") expect_error(clv.time.set.sample.periods(clv.time = clv.t.weeks, user.estimation.end = "2025-06-11", # Wed - user.observation.end = NULL, + user.data.end = NULL, tp.first.transaction = fct.helper.clv.time.correct.datetype("2018-01-01", clv.t.weeks), tp.last.transaction = fct.helper.clv.time.correct.datetype("2025-06-13", clv.t.weeks)),# Fr regexp = "at least 2 time.units") expect_error(clv.time.set.sample.periods(clv.time = clv.t.years, user.estimation.end = "2025-01-01", - user.observation.end = NULL, + user.data.end = NULL, tp.first.transaction = fct.helper.clv.time.correct.datetype("2018-01-01", clv.t.years), tp.last.transaction = fct.helper.clv.time.correct.datetype("2025-06-15", clv.t.years)), regexp = "at least 2 time.units") @@ -301,7 +301,7 @@ fct.testthat.correctness.clvtime.set.sample.periods.date.estimation.end <- funct # POSIX dates in transactions - but split with Date (ymd by user) expect_silent(clv.t <- clv.time.set.sample.periods(clv.t, user.estimation.end = tp.split, - user.observation.end = NULL, + user.data.end = NULL, tp.first.transaction = tp.first, tp.last.transaction = tp.last)) # Split same Date but as posix @@ -309,7 +309,7 @@ fct.testthat.correctness.clvtime.set.sample.periods.date.estimation.end <- funct }else{ expect_silent(clv.t <- clv.time.set.sample.periods(clv.t, user.estimation.end = tp.split, - user.observation.end = NULL, + user.data.end = NULL, tp.first.transaction = tp.first, tp.last.transaction = tp.last)) expect_equal(clv.t@timepoint.estimation.end, tp.split) @@ -335,7 +335,7 @@ fct.testthat.correctness.clvtime.set.sample.periods.posixct.estimation.end <- fu if(is(clv.t, "clv.time.datetime")){ expect_silent(clv.t <- clv.time.set.sample.periods(clv.t, user.estimation.end = tp.split, - user.observation.end = NULL, + user.data.end = NULL, tp.first.transaction = tp.first, tp.last.transaction = tp.last)) @@ -345,7 +345,7 @@ fct.testthat.correctness.clvtime.set.sample.periods.posixct.estimation.end <- fu # Date transactions - but split with POSIXct (given by user) expect_message(clv.t <- clv.time.set.sample.periods(clv.t, user.estimation.end = tp.split, - user.observation.end = NULL, + user.data.end = NULL, tp.first.transaction = tp.first, tp.last.transaction = tp.last), regexp = "is ignored") @@ -371,7 +371,7 @@ fct.testthat.correctness.clvtime.set.sample.periods.char.estimation.end <- funct tp.split <- "2019-07-19 15:36:19" expect_silent(clv.t <- clv.time.set.sample.periods(clv.t, user.estimation.end = tp.split, - user.observation.end = NULL, + user.data.end = NULL, tp.first.transaction = tp.first, tp.last.transaction = tp.last)) @@ -382,7 +382,7 @@ fct.testthat.correctness.clvtime.set.sample.periods.char.estimation.end <- funct tp.split <- "2019-07-19" expect_silent(clv.t <- clv.time.set.sample.periods(clv.t, user.estimation.end = tp.split, - user.observation.end = NULL, + user.data.end = NULL, tp.first.transaction = tp.first, tp.last.transaction = tp.last)) # Split same but as Date @@ -397,32 +397,32 @@ fct.testthat.correctness.clvtime.set.sample.periods.char.estimation.end <- funct } -fct.testthat.correctness.clvtime.set.sample.periods.observation.end <- function(){ +fct.testthat.correctness.clvtime.set.sample.periods.data.end <- function(){ # clv.t <- clv.time.weeks("ymd") - test_that("Fail if observation.end is before last transaction",{ + test_that("Fail if data.end is before last transaction",{ expect_error( clv.time.set.sample.periods( clv.t, tp.first.transaction = fct.helper.clv.time.correct.datetype("2000-01-01", clv.t = clv.t), tp.last.transaction = fct.helper.clv.time.correct.datetype("2000-12-31", clv.t = clv.t), user.estimation.end = NULL, - user.observation.end = "2000-12-30"), + user.data.end = "2000-12-30"), regexp = "may not be before the last recorded transaction" ) }) - test_that("Fail if observation.end leads to holdout period < 2 periods", { + test_that("Fail if data.end leads to holdout period < 2 periods", { expect_error( clv.time.set.sample.periods( clv.t, tp.first.transaction = fct.helper.clv.time.correct.datetype("2000-01-01", clv.t = clv.t), tp.last.transaction = fct.helper.clv.time.correct.datetype("2000-12-31", clv.t = clv.t), user.estimation.end = "2000-12-28", - user.observation.end = "2001-01-03"), + user.data.end = "2001-01-03"), regexp = "holdout period of at least 2 time.units" ) @@ -430,34 +430,34 @@ fct.testthat.correctness.clvtime.set.sample.periods.observation.end <- function( # Not possible anymore since requiring estimation.split to be before last - # transaction and observation.end after last transaction - # test_that("Fail if observation.end leads to estimation period < 1 period", { + # transaction and data.end after last transaction + # test_that("Fail if data.end leads to estimation period < 1 period", { # expect_error( # clv.time.set.sample.periods( # clv.t, # tp.first.transaction = fct.helper.clv.time.correct.datetype("2000-01-01", clv.t = clv.t), # tp.last.transaction = fct.helper.clv.time.correct.datetype("2000-01-10", clv.t = clv.t), # user.estimation.end = "2000-01-05", - # user.observation.end = "2000-01-31"), + # user.data.end = "2000-01-31"), # regexp = "least 1 time.unit" # ) # }) # Not possible anymore since requiring estimation.split to be before last - # transaction and observation.end after last transaction - # test_that("Fail if observation.end is before estimation.split", { + # transaction and data.end after last transaction + # test_that("Fail if data.end is before estimation.split", { # expect_error( # clv.time.set.sample.periods( # clv.t, # tp.first.transaction = fct.helper.clv.time.correct.datetype("2000-01-01", clv.t = clv.t), # tp.last.transaction = fct.helper.clv.time.correct.datetype("2000-12-31", clv.t = clv.t), # user.estimation.end = "2001-02-15", - # user.observation.end = "2001-01-31"), + # user.data.end = "2001-01-31"), # regexp = "holdout period of at least 2 time.units" # ) # }) - test_that("Same object when no observation.end as when observation.end=tp.last.transaction", { + test_that("Same object when no data.end as when data.end=tp.last.transaction", { l.args <- list( clv.time = clv.t, tp.first.transaction = fct.helper.clv.time.correct.datetype("2000-01-01", clv.t = clv.t), @@ -467,46 +467,46 @@ fct.testthat.correctness.clvtime.set.sample.periods.observation.end <- function( # No holdout l.args["user.estimation.end"] <- list(NULL) expect_equal( - do.call(clv.time.set.sample.periods, c(l.args, list(user.observation.end = NULL))), - do.call(clv.time.set.sample.periods, c(l.args, list(user.observation.end = "2000-12-31"))) + do.call(clv.time.set.sample.periods, c(l.args, list(user.data.end = NULL))), + do.call(clv.time.set.sample.periods, c(l.args, list(user.data.end = "2000-12-31"))) ) # With holdout l.args$user.estimation.end <- "2000-06-15" expect_equal( - do.call(clv.time.set.sample.periods, c(l.args, list(user.observation.end = NULL))), - do.call(clv.time.set.sample.periods, c(l.args, list(user.observation.end = "2000-12-31"))) + do.call(clv.time.set.sample.periods, c(l.args, list(user.data.end = NULL))), + do.call(clv.time.set.sample.periods, c(l.args, list(user.data.end = "2000-12-31"))) ) }) # Not possible anymore since requiring estimation.split to be before last - # transaction and observation.end after last transaction - # test_that("estimation.split can be after last transaction if observation.end is given", { + # transaction and data.end after last transaction + # test_that("estimation.split can be after last transaction if data.end is given", { # expect_silent( # clv.time.set.sample.periods( # clv.t, # tp.first.transaction = fct.helper.clv.time.correct.datetype("2000-01-01", clv.t=clv.t), # tp.last.transaction = fct.helper.clv.time.correct.datetype("2000-12-31", clv.t = clv.t), # user.estimation.end = "2001-01-10", - # user.observation.end = "2001-01-31") + # user.data.end = "2001-01-31") # ) # }) - test_that("observation.end only moves holdout.end (if estimation.split is before)",{ + test_that("data.end only moves holdout.end (if estimation.split is before)",{ clv.t.no.obsend <- clv.time.set.sample.periods( clv.time = clv.t, tp.first.transaction = fct.helper.clv.time.correct.datetype("2000-01-01", clv.t = clv.t), tp.last.transaction = fct.helper.clv.time.correct.datetype("2000-12-31", clv.t = clv.t), user.estimation.end = "2000-06-15", - user.observation.end = NULL) + user.data.end = NULL) clv.t.with.obsend <- clv.time.set.sample.periods( clv.time = clv.t, tp.first.transaction = fct.helper.clv.time.correct.datetype("2000-01-01", clv.t=clv.t), tp.last.transaction = fct.helper.clv.time.correct.datetype("2000-12-31", clv.t=clv.t), user.estimation.end = "2000-06-15", - user.observation.end = "2001-02-28") + user.data.end = "2001-02-28") expect_true(all( clv.t.with.obsend@timepoint.estimation.start == clv.t.no.obsend@timepoint.estimation.start, @@ -530,7 +530,7 @@ fct.testthat.correctness.clvtime.set.sample.periods.observation.end <- function( tp.first.transaction = fct.helper.clv.time.correct.datetype("2000-01-01", clv.t = clv.t), tp.last.transaction = fct.helper.clv.time.correct.datetype("2000-06-15", clv.t = clv.t), user.estimation.end = "2000-04-04", - user.observation.end = "2000-12-31") + user.data.end = "2000-12-31") expect_true(clv.holdout@timepoint.estimation.start == "2000-01-01") expect_true(clv.holdout@timepoint.estimation.end == "2000-04-04") @@ -543,7 +543,7 @@ fct.testthat.correctness.clvtime.set.sample.periods.observation.end <- function( tp.first.transaction = fct.helper.clv.time.correct.datetype("2000-01-01", clv.t = clv.t), tp.last.transaction = fct.helper.clv.time.correct.datetype("2000-12-28", clv.t=clv.t), user.estimation.end = NULL, - user.observation.end = "2000-12-31") + user.data.end = "2000-12-31") expect_true(clv.no.holdout@timepoint.estimation.start == "2000-01-01") expect_true(clv.no.holdout@timepoint.estimation.end == "2000-12-31") diff --git a/tests/testthat/helper_testthat_correctness_transactions.R b/tests/testthat/helper_testthat_correctness_transactions.R index b98613e8..ea667b7d 100644 --- a/tests/testthat/helper_testthat_correctness_transactions.R +++ b/tests/testthat/helper_testthat_correctness_transactions.R @@ -296,21 +296,21 @@ fct.testthat.correctness.clvfittedtransactions.staticcov.predict.newcustomer.0.f }) } -fct.testthat.correctness.clvfittedtransactions.observation.end.moves.prediction.period <- function(method){ +fct.testthat.correctness.clvfittedtransactions.data.end.moves.prediction.period <- function(method){ - test_that("Using observation.end moves prediction period",{ + test_that("Using data.end moves prediction period",{ # Only valid for no holdout data skip_on_cran() fitted <- fit.cdnow( model = method, estimation.split = NULL, - observation.end = "1998-07-15" + data.end = "1998-07-15" ) dt.pred <- predict(fitted, prediction.end = "1998-07-30") - # Prediction period starts first `eps` after observation.end + # Prediction period starts first `eps` after data.end expect_true(dt.pred[1, "period.first"] == "1998-07-16") # Nothing else changed expect_true(dt.pred[1, "period.last"] == "1998-07-30") @@ -348,8 +348,8 @@ fct.testthat.correctness.clvfittedtransactions <- function(name.model, method, # predict(newdata=newcustomer): no cov fct.testthat.correctness.clvfittedtransactions.nocov.predict.newcustomer.0.for.num.periods.eq.0(obj.fitted) - # observation.end moves start of prediction period - fct.testthat.correctness.clvfittedtransactions.observation.end.moves.prediction.period(method = method) + # data.end moves start of prediction period + fct.testthat.correctness.clvfittedtransactions.data.end.moves.prediction.period(method = method) # Static cov data -------------------------------------------------------------------------------------------- # why 100 and not 104??????? diff --git a/tests/testthat/test_correctness_clvtime.R b/tests/testthat/test_correctness_clvtime.R index 22ff4e75..ccfdf7c3 100644 --- a/tests/testthat/test_correctness_clvtime.R +++ b/tests/testthat/test_correctness_clvtime.R @@ -11,7 +11,7 @@ for(clv.t in c(fct.helper.clv.time.create.test.objects(with.holdout = FALSE), # set.sample.periods -------------------------------------------------------------------------------- -# . no observation.end ----------------------------------------------------------------------------- +# . no data.end ----------------------------------------------------------------------------- for(clv.t in list(clv.time.hours(time.format="ymd HMS"), clv.time.days( time.format="ymd"), clv.time.weeks(time.format="ymd"), @@ -34,8 +34,8 @@ fct.testthat.correctness.clvtime.set.sample.periods.stop.holdout.length.less.2.p clv.t.weeks = clv.time.weeks(time.format="ymd"), clv.t.years = clv.time.years(time.format="ymd")) -# . with observation.end -------------------------------------------------------------------------- -fct.testthat.correctness.clvtime.set.sample.periods.observation.end() +# . with data.end -------------------------------------------------------------------------- +fct.testthat.correctness.clvtime.set.sample.periods.data.end() diff --git a/tests/testthat/test_inputchecks_clvdata_setdynamiccov.R b/tests/testthat/test_inputchecks_clvdata_setdynamiccov.R index cb0e17eb..17fdf0e4 100644 --- a/tests/testthat/test_inputchecks_clvdata_setdynamiccov.R +++ b/tests/testthat/test_inputchecks_clvdata_setdynamiccov.R @@ -100,11 +100,11 @@ test_that("Fails if covariate data is to short for all customers",{ fct.expect.error.setdyncov(data.cov.trans = apparelDynCov.tooshort, regexp = "covariate data exactly from") }) -test_that("Fails if covariate data ends before observation.end", { +test_that("Fails if covariate data ends before data.end", { clv.data.apparel.obsE <- fct.helper.create.clvdata.apparel.nocov( estimation.split = NULL, - observation.end = "2012-12-31") + data.end = "2012-12-31") fct.expect.error.setdyncov( clv.data = clv.data.apparel.obsE, diff --git a/tests/testthat/test_runability_clvdata_clvdata.R b/tests/testthat/test_runability_clvdata_clvdata.R index 0fd8a0a4..887a5d88 100644 --- a/tests/testthat/test_runability_clvdata_clvdata.R +++ b/tests/testthat/test_runability_clvdata_clvdata.R @@ -250,12 +250,12 @@ test_that("Works when called from as.clv.data()", { expect_silent(as.clv.data(as.data.table(cdnow))) }) -# observation.end ---------------------------------------------------------------- +# data.end ---------------------------------------------------------------- -test_that("Works with observation.end with and without holdout",{ +test_that("Works with data.end with and without holdout",{ skip_on_cran() l.args <- list( - observation.end = "2000-01-01", + data.end = "2000-01-01", data.transactions = cdnow, time.unit = "w", date.format = "ymd" @@ -268,10 +268,10 @@ test_that("Works with observation.end with and without holdout",{ expect_silent(do.call(clvdata, l.args)) }) -test_that("Works with observation.end and time.units hours, days, years", { +test_that("Works with data.end and time.units hours, days, years", { l.args <- list( data.transactions = cdnow, - observation.end = "2000-01-01", + data.end = "2000-01-01", estimation.split = NULL, date.format = "ymd" ) diff --git a/tests/testthat/test_runability_clvdata_s3.R b/tests/testthat/test_runability_clvdata_s3.R index 2f9bb12d..0422d72e 100644 --- a/tests/testthat/test_runability_clvdata_s3.R +++ b/tests/testthat/test_runability_clvdata_s3.R @@ -290,18 +290,18 @@ for(fn in list( )){ # With holdout fct.helper.test.runability.clv.data.runall(fn()) - # . with observation.end - fct.helper.test.runability.clv.data.runall(fn(observation.end="2011-01-31")) + # . with data.end + fct.helper.test.runability.clv.data.runall(fn(data.end="2011-01-31")) # Without holdout fct.helper.test.runability.clv.data.runall(fn(estimation.split=NULL)) - # . with observation.end - fct.helper.test.runability.clv.data.runall(fn(estimation.split=NULL, observation.end="2011-01-31")) + # . with data.end + fct.helper.test.runability.clv.data.runall(fn(estimation.split=NULL, data.end="2011-01-31")) } -# observation.end would require to prepare transaction data (cut before cov end) -# But dyncovs are not used for any of the s3 methods, therefore skip testing with observation.end for dyncov +# data.end would require to prepare transaction data (cut before cov end) +# But dyncovs are not used for any of the s3 methods, therefore skip testing with data.end for dyncov fct.helper.test.runability.clv.data.runall(fct.helper.create.clvdata.apparel.dyncov()) fct.helper.test.runability.clv.data.runall(fct.helper.create.clvdata.apparel.dyncov(estimation.split = NULL)) diff --git a/tests/testthat/test_runability_pnbd_dynamiccov.R b/tests/testthat/test_runability_pnbd_dynamiccov.R index fa9e8675..acc99163 100644 --- a/tests/testthat/test_runability_pnbd_dynamiccov.R +++ b/tests/testthat/test_runability_pnbd_dynamiccov.R @@ -95,7 +95,7 @@ test_that("Dyncov works with additional model specifications", { }) -# With observation.end --------------------------------------------------------- +# With data.end --------------------------------------------------------- test_that("Fit, plot, predict work with partially empty estimation/holdout period", { @@ -121,7 +121,7 @@ test_that("Fit, plot, predict work with partially empty estimation/holdout perio expect_silent(fitted.dyncov.noholdout.obsE <- fit.apparel.dyncov( data.apparelTrans = apparelTrans.cut.obsE, estimation.split = NULL, - observation.end = date.original.trans.max, + data.end = date.original.trans.max, names.cov.life = covs.life, names.cov.trans = covs.trans, optimx.args=fct.helper.dyncov.get.optimxargs.quickfit(hessian=TRUE) @@ -140,7 +140,7 @@ test_that("Fit, plot, predict work with partially empty estimation/holdout perio expect_silent(fitted.dyncov.holdout.obsE <- fit.apparel.dyncov( data.apparelTrans = apparelTrans.cut.obsE, estimation.split = 104, - observation.end = date.original.trans.max, + data.end = date.original.trans.max, names.cov.life = covs.life, names.cov.trans = covs.trans, optimx.args=fct.helper.dyncov.get.optimxargs.quickfit(hessian=TRUE) From 9e7c5a6f207a0949f7ee3698607ac6bd82d8b218 Mon Sep 17 00:00:00 2001 From: Patrik Schilter Date: Thu, 28 Aug 2025 15:45:31 +0200 Subject: [PATCH 24/26] Improve docu --- R/f_interface_clvdata.R | 4 +++- man/clvdata.Rd | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/R/f_interface_clvdata.R b/R/f_interface_clvdata.R index 78807aa5..cb5e2446 100644 --- a/R/f_interface_clvdata.R +++ b/R/f_interface_clvdata.R @@ -93,7 +93,9 @@ #' time.unit = "w", #' estimation.split = "1997-10-15") #' -#' # Extend observation period until 31th Dec 1998 +#' # Extend data fictionally until 31th Dec 1998 +#' # In this case, this only moves the holdout period and has no effect on the +#' # estimation. #' clv.data.cdnow <- clvdata(data.transactions = cdnow, #' date.format="ymd", #' time.unit = "w", diff --git a/man/clvdata.Rd b/man/clvdata.Rd index a0694ee1..1eeae7d3 100644 --- a/man/clvdata.Rd +++ b/man/clvdata.Rd @@ -113,7 +113,9 @@ clv.data.cdnow <- clvdata(data.transactions = cdnow, time.unit = "w", estimation.split = "1997-10-15") -# Extend observation period until 31th Dec 1998 +# Extend data fictionally until 31th Dec 1998 +# In this case, this only moves the holdout period and has no effect on the +# estimation. clv.data.cdnow <- clvdata(data.transactions = cdnow, date.format="ymd", time.unit = "w", From cfd3a228198611962c11dd5ec0e614991f91757a Mon Sep 17 00:00:00 2001 From: Patrik Schilter Date: Sat, 30 Aug 2025 23:35:34 +0200 Subject: [PATCH 25/26] tracking plot: Plot empty dates * Leave NAs in the data to know how far to plot (also return with NA) * Dont drop NAs anymore but omit during plotting * Tests: Correctness clvdata + plot clv.fitted.transactions * Docu: Returned plot data may now contain NAs --- R/f_s3generics_clvdata_plot.R | 26 ++++++++--- R/f_s3generics_clvfittedtransactions_plot.R | 12 ++++-- man/plot.clv.data.Rd | 2 +- man/plot.clv.fitted.transactions.Rd | 2 +- .../helper_testthat_correctness_clvfitted.R | 3 +- ...helper_testthat_correctness_transactions.R | 43 +++++++++++++++++++ tests/testthat/test_correctness_clvdata_s3.R | 30 +++++++++++++ 7 files changed, 107 insertions(+), 11 deletions(-) diff --git a/R/f_s3generics_clvdata_plot.R b/R/f_s3generics_clvdata_plot.R index 054ecdf7..c6acac54 100644 --- a/R/f_s3generics_clvdata_plot.R +++ b/R/f_s3generics_clvdata_plot.R @@ -95,7 +95,7 @@ #' \item{variable}{"tracking": The number of actual repeat transactions in the period that ends at \code{period.until}.\cr #' "timings": Coordinate (x or y) for which to use the value in this row for.} #' \item{value}{"timings": Date or numeric (stored as string) \cr -#' "tracking": numeric} +#' "tracking": numeric, may be \code{NA} if no repeat-transactions were recorded in this period} #' #' #' @examples @@ -310,10 +310,26 @@ clv.data.plot.tracking <- function(x, prediction.end, cumulative, plot, verbose, dt.dates.expectation[dt.repeat.trans, (label.transactions) := get(label.transactions), on="period.until"] dt.plot <- melt(dt.dates.expectation, id.vars="period.until") - # last period often has NA as it marks the full span of the period - dt.plot <- dt.plot[!is.na(value)] - - # data.table does not print when returned because it is returned directly after last [:=] + # The last period usually is set to NA because the data does not reach to the end of it. + # The last period has to be a full period because of the expectation plot. + # At the same time, the transaction data often ends before the last period (is only a partial period). + # This leads to a much lower number of transactions recorded in the last period + # and a noticeable, hard-to-explain drop at the end. + # Periods for which there are no transactions contain 0 not NA. Only the last + # period may contain NA. + # We remove it to not have it in the data and not raise a warning when plotting. + # + # Since introducing `data.end`, we no loner remove NAs as now there can be many + # periods without transactions and these should be shown (plotted) and known (returned data). + # Instead `geom_line(na.rm=T)` is used to remove them during plotting. + # Returning them helps users who want to create their own plots to plot the + # correct range (total time span of data). + # Alternative: Drop NA but set x-axis scale until holdout.end using + # `+ xlim(c(x@clv.time@timepoint.estimation.start, x@clv.time@timepoint.holdout.end))` + # + # dt.plot <- dt.plot[!is.na(value)] + + # # data.table does not print when returned because it is returned directly after last [:=] # " if a := is used inside a function with no DT[] before the end of the function, then the next # time DT or print(DT) is typed at the prompt, nothing will be printed. A repeated DT or print(DT) # will print. To avoid this: include a DT[] after the last := in your function." diff --git a/R/f_s3generics_clvfittedtransactions_plot.R b/R/f_s3generics_clvfittedtransactions_plot.R index 7217629e..d2b1f454 100644 --- a/R/f_s3generics_clvfittedtransactions_plot.R +++ b/R/f_s3generics_clvfittedtransactions_plot.R @@ -78,7 +78,7 @@ #' \item{period.until}{The timepoint that marks the end (up until and including) of the period to which the data in this row refers.} #' \item{variable}{Type of variable that 'value' refers to. Either "model name" or "Actual" (if \code{transactions=TRUE}).} #' \item{value}{Depending on variable either (Actual) the actual number of repeat transactions in the period that ends at \code{period.until}, -#' or the unconditional expectation for the period that ends on \code{period.until} ("model name").} +#' or the unconditional expectation for the period that ends on \code{period.until} ("model name"). Actuals may be \code{NA} if no transaction was recorded.} #' #' For the PMF plot: #' \item{num.transactions}{The number of repeat transactions in the estimation period (as ordered factor).} @@ -241,7 +241,7 @@ clv.controlflow.plot.tracking.base <- function(dt.plot, clv.data, color.mapping, # Plotting order dt.plot[, variable := factor(variable, levels=names(color.mapping), ordered = TRUE)] - p <- ggplot(data = dt.plot, aes(x=period.until, y=value, colour=variable)) + geom_line() + p <- ggplot(data = dt.plot, aes(x=period.until, y=value, colour=variable)) + geom_line(na.rm = TRUE) # Add holdout line if there is a holdout period if(clv.data.has.holdout(clv.data)){ @@ -382,7 +382,13 @@ clv.fitted.transactions.plot.tracking.get.data <- function(x, prediction.end, cu dt.plot <- melt(dt.dates.expectation, id.vars='period.until') # last period often has NA as it marks the full span of the period - dt.plot <- dt.plot[!is.na(value)] + # The last period usually was NA because of explanations it was a partial + # period. See explanations in `clv.data.plot.tracking`. + # dt.plot <- dt.plot[!is.na(value)] + # Since introducing `data.end`, many periods can be NA. The NAs are now removed + # during plotting (`geom_line(na.rm=T)`). For consistency with plot(clvdata), + # the returned data also keeps the NA. + return(dt.plot) } diff --git a/man/plot.clv.data.Rd b/man/plot.clv.data.Rd index bac706d1..b871fedd 100644 --- a/man/plot.clv.data.Rd +++ b/man/plot.clv.data.Rd @@ -81,7 +81,7 @@ excluding customers with no repeat-transactions.} \item{variable}{"tracking": The number of actual repeat transactions in the period that ends at \code{period.until}.\cr "timings": Coordinate (x or y) for which to use the value in this row for.} \item{value}{"timings": Date or numeric (stored as string) \cr - "tracking": numeric} + "tracking": numeric, may be \code{NA} if no repeat-transactions were recorded in this period} } \description{ Depending on the value of parameter \code{which}, one of the following plots will be produced. diff --git a/man/plot.clv.fitted.transactions.Rd b/man/plot.clv.fitted.transactions.Rd index 93ac58ad..236374e6 100644 --- a/man/plot.clv.fitted.transactions.Rd +++ b/man/plot.clv.fitted.transactions.Rd @@ -82,7 +82,7 @@ For the Tracking plot: \item{period.until}{The timepoint that marks the end (up until and including) of the period to which the data in this row refers.} \item{variable}{Type of variable that 'value' refers to. Either "model name" or "Actual" (if \code{transactions=TRUE}).} \item{value}{Depending on variable either (Actual) the actual number of repeat transactions in the period that ends at \code{period.until}, -or the unconditional expectation for the period that ends on \code{period.until} ("model name").} +or the unconditional expectation for the period that ends on \code{period.until} ("model name"). Actuals may be \code{NA} if no transaction was recorded.} For the PMF plot: \item{num.transactions}{The number of repeat transactions in the estimation period (as ordered factor).} diff --git a/tests/testthat/helper_testthat_correctness_clvfitted.R b/tests/testthat/helper_testthat_correctness_clvfitted.R index 33db3404..86cefbcc 100644 --- a/tests/testthat/helper_testthat_correctness_clvfitted.R +++ b/tests/testthat/helper_testthat_correctness_clvfitted.R @@ -14,7 +14,8 @@ fct.testthat.correctness.clvfitted.flawless.results.out.of.the.box <- function(m expect_false(fct.DT.any.non.finite(predict(fitted, verbose = FALSE))) if(is(fitted, "clv.fitted.transactions")){ - expect_false(fct.DT.any.non.finite(plot(fitted, plot = FALSE, verbose = FALSE))) + # No NAs, except last period may have (partial period that is set to NA on purpose) + expect_false(fct.DT.any.non.finite(plot(fitted, plot = FALSE, verbose = FALSE)[period.until != max(period.until)])) } # KKTs both true expect_true(res.sum$kkt1) diff --git a/tests/testthat/helper_testthat_correctness_transactions.R b/tests/testthat/helper_testthat_correctness_transactions.R index ea667b7d..5138b5d6 100644 --- a/tests/testthat/helper_testthat_correctness_transactions.R +++ b/tests/testthat/helper_testthat_correctness_transactions.R @@ -318,6 +318,47 @@ fct.testthat.correctness.clvfittedtransactions.data.end.moves.prediction.period } +fct.testthat.correctness.clvfittedtransactions.nocov.plot.until.data.end <- function(method){ + test_that("Plotting until data.end",{ + + expect_warning(fitted <- fit.cdnow( + model = method, + estimation.split = NULL, + data.end = "1998-12-31", + # PNBD requires NM + optimx.args = list(method="Nelder-Mead", hessian=FALSE, control=list(kkt=FALSE))), + regexp = "Hessian could not be derived") + clv.time <- fitted@clv.data@clv.time + + # Data + expect_silent(dt.plot <- plot(fitted, verbose=FALSE, plot=FALSE)) + dt.after <- dt.plot[period.until > "1998-06-30"] + + # Actuals and expectation are for the same dates (none lost for either (mostly Actual)) + expect_true(all( + dt.plot[variable == "Actual", "period.until"] == dt.plot[variable != "Actual", "period.until"])) + + # Actuals are NA after last transaction but model not + expect_true(dt.after[variable == "Actual", all(is.na(value))]) + expect_false(dt.after[variable != "Actual", any(is.na(value))]) + + # Actuals and expectation are until data.end + expect_true(dt.after[, max(period.until)] >= clv.time@timepoint.holdout.end) + + + # Plotting + # Plots without warnings + expect_silent(p <- plot(fitted, verbose = FALSE, plot = TRUE)) + + # Plot has x-axis limits until data.end + # $x.range: What is really rendered + # $limits: The user set limits + p.xlim <- ggplot2::ggplot_build(p)$layout$panel_params[[1]]$x$limits + expect_true(min(p.xlim) <= clv.time@timepoint.estimation.start) + expect_true(max(p.xlim) >= clv.time@timepoint.holdout.end) + }) +} + fct.testthat.correctness.clvfittedtransactions <- function(name.model, method, correct.start.params.model, correct.params.nocov.coef, correct.LL.nocov, kkt2.true){ @@ -339,6 +380,8 @@ fct.testthat.correctness.clvfittedtransactions <- function(name.model, method, fct.testthat.correctness.clvfittedtransactions.nocov.newdata.fitting.sample.predicting.full.data.equal(method = method, clv.cdnow = clv.cdnow) fct.testhat.correctness.clvfittedtransactions.same.spending.as.independent.spending.model(method = method, clv.data = clv.cdnow) + fct.testthat.correctness.clvfittedtransactions.nocov.plot.until.data.end(method=method) + if(fct.helper.has.pmf(obj.fitted)){ fct.testthat.correctness.clvfittedtransactions.pmf.more.x.more.p(clv.fitted = obj.fitted) fct.testthat.correctness.clvfittedtransactions.pmf.valid.values(clv.fitted = obj.fitted) diff --git a/tests/testthat/test_correctness_clvdata_s3.R b/tests/testthat/test_correctness_clvdata_s3.R index cdd7262b..3e09d1c8 100644 --- a/tests/testthat/test_correctness_clvdata_s3.R +++ b/tests/testthat/test_correctness_clvdata_s3.R @@ -268,6 +268,36 @@ test_that("Always returns a copy of the data", { # plot --------------------------------------------------------------------- +# . tracking --------------------------------------------------------------- +test_that("tracking plot - without data.end: Last period is NA and plots without warnings", { + skip_on_cran() + clv.cdnow <- fct.helper.create.clvdata.cdnow() + + # Last period is NA in cdnow (on purpose because its a partial period and no longer dropped) + dt.plot <- plot(clv.cdnow, which = "tracking", verbose = FALSE, plot=FALSE) + expect_true(dt.plot[period.until==max(period.until), is.na(value)]) + + # Plots without warnings although data contains NA + expect_silent(plot(clv.cdnow, which="tracking", verbose=FALSE, plot=TRUE)) +}) + +test_that("tracking plot - with data.end: NA after last transaction until data.end and plots w/o warning", { + skip_on_cran() + clv.cdnow <- fct.helper.create.clvdata.cdnow(data.end="1998-12-31") + + # All periods after last transaction are NA + dt.plot <- plot(clv.cdnow, which = "tracking", verbose = FALSE, plot=FALSE) + dt.plot.empty <- dt.plot[period.until > "1998-06-30"] + # Data until at least data.end + expect_true(dt.plot.empty[, max(period.until)] >= "1998-12-31") + # They are all NA + expect_true(dt.plot.empty[, all(is.na(value))]) + + # Plots without warnings although data contains NA + expect_silent(plot(clv.cdnow, which="tracking", verbose=FALSE, plot=TRUE)) +}) + + # . frequency --------------------------------------------------------------- test_that("frequency plot - actual trans has no 0", { From e260abb76b51ee031493c4c6a06edbf2678bba74 Mon Sep 17 00:00:00 2001 From: Patrik Schilter Date: Sun, 31 Aug 2025 00:20:17 +0200 Subject: [PATCH 26/26] Fix tests: Plot data can be NA now --- tests/testthat/helper_s3_fitted_plot.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/testthat/helper_s3_fitted_plot.R b/tests/testthat/helper_s3_fitted_plot.R index b9f4005c..936a004b 100644 --- a/tests/testthat/helper_s3_fitted_plot.R +++ b/tests/testthat/helper_s3_fitted_plot.R @@ -120,10 +120,12 @@ fct.testthat.runability.clvfittedtransactions.plot.tracking <- function(clv.fitt test_that("Works with newdata", { skip_on_cran() expect_silent(dt.plot <- plot(clv.fitted, newdata = clv.newdata.nohold, prediction.end=3, plot=FALSE, verbose=FALSE)) - expect_false(anyNA(dt.plot)) + # Since introducing data.end: Actuals after last transaction are kept and contain NA. Only check model + expect_false(anyNA(dt.plot[variable != "Actual"])) expect_warning(dt.plot <- plot(clv.fitted, newdata = clv.newdata.withhold, prediction.end=3, plot=FALSE, verbose=FALSE), regexp = "full holdout") - expect_false(anyNA(dt.plot)) + # Since introducing data.end: Actuals after last transaction are kept and contain NA. Only check model + expect_false(anyNA(dt.plot[variable != "Actual"])) }) test_that("Works for prediction.end in different formats, after holdout", {