From acf98f6c59280a10cd00fa96361fe087b957266f Mon Sep 17 00:00:00 2001 From: Chelsea Troy Date: Thu, 21 Sep 2023 11:31:45 -0500 Subject: [PATCH 1/2] Hello from files --- .../search_term_data_validation_v2/data_validation.py | 2 ++ .../search_term_data_validation_v2/main.py | 1 + 2 files changed, 3 insertions(+) diff --git a/jobs/search-term-data-validation-v2/search_term_data_validation_v2/data_validation.py b/jobs/search-term-data-validation-v2/search_term_data_validation_v2/data_validation.py index e94fda26..1674c470 100644 --- a/jobs/search-term-data-validation-v2/search_term_data_validation_v2/data_validation.py +++ b/jobs/search-term-data-validation-v2/search_term_data_validation_v2/data_validation.py @@ -1,3 +1,5 @@ +print("Hello from search_term_data_validation_v2/data_validation.py!") + from google.cloud import bigquery from datetime import date, timedelta, datetime from collections import namedtuple diff --git a/jobs/search-term-data-validation-v2/search_term_data_validation_v2/main.py b/jobs/search-term-data-validation-v2/search_term_data_validation_v2/main.py index 533800b1..fb86aed1 100644 --- a/jobs/search-term-data-validation-v2/search_term_data_validation_v2/main.py +++ b/jobs/search-term-data-validation-v2/search_term_data_validation_v2/main.py @@ -1,3 +1,4 @@ +print("Hello from search_term_data_validation_v2/main.py!") print("Look at me! I'm running a Python command! WOW!") import argparse From 91b1f4e74dff0abf5a7e9736c76aa6193c56c5fd Mon Sep 17 00:00:00 2001 From: Chelsea Troy Date: Thu, 21 Sep 2023 11:37:49 -0500 Subject: [PATCH 2/2] Add a LOT of printing to the range check --- .../search_term_data_validation_v2/data_validation.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/jobs/search-term-data-validation-v2/search_term_data_validation_v2/data_validation.py b/jobs/search-term-data-validation-v2/search_term_data_validation_v2/data_validation.py index 1674c470..4fef072d 100644 --- a/jobs/search-term-data-validation-v2/search_term_data_validation_v2/data_validation.py +++ b/jobs/search-term-data-validation-v2/search_term_data_validation_v2/data_validation.py @@ -199,27 +199,31 @@ def range_check( if metric not in validation_data.columns.values: raise Exception(f'dataframe does not include target metric "{metric}"') + print("Calculating dates for running range check...") today = date.today() latest_finished_at = max(validation_data["finished_at"]) - test_earliest_date = today - timedelta(days=test_window) - comparison_earliest_date = test_earliest_date - timedelta(days=full_lookback_window) + print("Calculating comparison values...") comparison_values = validation_data["finished_at"].apply( lambda m: comparison_earliest_date < m.date() <= test_earliest_date ) + print("Calculating test values...") test_values = validation_data["finished_at"].apply( lambda m: test_earliest_date < m.date() <= today ) + print("Calculating comparison range and test range...") comparison_range = validation_data.loc[comparison_values] test_range = validation_data.loc[test_values] + print("Calculating upper and lower range limits...") range_lower, range_upper = comparison_range[metric].quantile( q=[range_lower_bound, range_upper_bound] ) + print("Recording whether the range check fails for the metric...") should_trigger = len(test_range[metric]) != 0 and ( all(test_range[metric] > range_upper) or all(test_range[metric] < range_lower) )