From f722c3904a8300aad2bf5642ea744b5cb9348b4c Mon Sep 17 00:00:00 2001 From: howsoRes <144272317+howsoRes@users.noreply.github.com> Date: Mon, 9 Jun 2025 15:59:23 -0400 Subject: [PATCH 1/4] 23864: Changes robust accuracy contributions to be computed via weighted robust residuals --- howso/react_aggregate.amlg | 6 +-- howso/residuals.amlg | 94 ++++++++++++++++++++++++++------------ 2 files changed, 67 insertions(+), 33 deletions(-) diff --git a/howso/react_aggregate.amlg b/howso/react_aggregate.amlg index ce480ce94..3e9cbbb3e 100644 --- a/howso/react_aggregate.amlg +++ b/howso/react_aggregate.amlg @@ -97,7 +97,7 @@ num_robust_prediction_contributions_samples_per_case (null) ;{type "number" min 0} ;Total sample size of model to use (using sampling with replacement) for feature_robust_accuracy_contributions. - ; Defaults to the smaller of 10000 or (num cases * 2^number of features) + ; Defaults to the smaller of 2000 or (num cases * 2^number of features) num_robust_accuracy_contributions_samples (null) ;{type "number" min 0} ;Total sample size of model to use (using sampling with replacement) for feature_robust_accuracy_contributions_permutation. @@ -557,8 +557,8 @@ (if num_robust_accuracy_contributions_samples num_robust_accuracy_contributions_samples - ;if the model is small, use the smaller of 10k or (num_cases * 2^f) because that's the amount of all possible combinations - (min 10000 (* num_training_cases (pow 2 (size context_features))) ) + ;if the model is small, use the smaller of 2k or (num_cases * 2^f) because that's the amount of all possible combinations + (min 2000 (* num_training_cases (pow 2 (size context_features))) ) ) case_weight_feature (if valid_weight_feature weight_feature) query_conditions action_condition_filter_query diff --git a/howso/residuals.amlg b/howso/residuals.amlg index 3f123d6e6..53ffc028b 100644 --- a/howso/residuals.amlg +++ b/howso/residuals.amlg @@ -747,37 +747,41 @@ (declare (assoc local_cases_map - ;if empty context set, use global expected values for all features, set local_cases_map to null - (if (= 0 (size react_context_features)) + (if output_raw_mda (null) - ;else compute the local model around the case using the robust set of react_context_features - (compute_on_contained_entities (append - (if focal_case - (query_not_in_entity_list (list (replace case_id) (replace focal_case))) - (query_not_in_entity_list (list (replace case_id))) - ) - (if (size context_condition_filter_query) - context_condition_filter_query - (list) - ) - time_series_filter_query - (query_nearest_generalized_distance - (replace k_parameter) - (replace react_context_features) - (replace (unzip case_values_map react_context_features)) - (replace feature_weights) - (replace !queryDistanceTypeMap) - (replace query_feature_attributes_map) - (replace feature_deviations) - (replace p_parameter) - (replace dt_parameter) - (if valid_weight_feature (replace weight_feature) (null)) - (replace tie_break_random_seed) - (null) ;radius - (replace !numericalPrecision) - ) - )) + ;if empty context set, use global expected values for all features, set local_cases_map to null + (if (= 0 (size react_context_features)) + (null) + + ;else compute the local model around the case using the robust set of react_context_features + (compute_on_contained_entities (append + (if focal_case + (query_not_in_entity_list (list (replace case_id) (replace focal_case))) + (query_not_in_entity_list (list (replace case_id))) + ) + (if (size context_condition_filter_query) + context_condition_filter_query + (list) + ) + time_series_filter_query + (query_nearest_generalized_distance + (replace k_parameter) + (replace react_context_features) + (replace (unzip case_values_map react_context_features)) + (replace feature_weights) + (replace !queryDistanceTypeMap) + (replace query_feature_attributes_map) + (replace feature_deviations) + (replace p_parameter) + (replace dt_parameter) + (if valid_weight_feature (replace weight_feature) (null)) + (replace tie_break_random_seed) + (null) ;radius + (replace !numericalPrecision) + ) + )) + ) ) )) @@ -836,10 +840,40 @@ (list (indices local_cases_map) (values local_cases_map) - (map (lambda (retrieve_from_entity (current_value) feature)) (indices local_cases_map)) ) ) + + ;else if we're using weighted residuals for mda, use the feature-specific weights here + output_raw_mda + (compute_on_contained_entities (append + (if focal_case + (query_not_in_entity_list (list (replace case_id) (replace focal_case))) + (query_not_in_entity_list (list (replace case_id))) + ) + (if (size context_condition_filter_query) + context_condition_filter_query + (list) + ) + time_series_filter_query + (query_nearest_generalized_distance + (replace k_parameter) + (replace react_context_features) + (replace (unzip case_values_map react_context_features)) + (get hyperparam_map ["featureMdaMap" feature]) + (replace !queryDistanceTypeMap) + (replace query_feature_attributes_map) + (replace feature_deviations) + (replace p_parameter) + (replace dt_parameter) + (if valid_weight_feature (replace weight_feature) (null)) + (replace tie_break_random_seed) + (null) ;radius + (replace !numericalPrecision) + feature + ) + )) + ) )) From ed8c33371dd598881b75f1f5bf6cdc6c50f2a2e6 Mon Sep 17 00:00:00 2001 From: howsoRes <144272317+howsoRes@users.noreply.github.com> Date: Tue, 10 Jun 2025 14:24:45 -0400 Subject: [PATCH 2/4] added to analyze --- howso/analysis_weights.amlg | 7 ++-- howso/feature_residuals.amlg | 2 + howso/mda_weight.amlg | 80 +++++++++++++++++++++++++++++++++++- howso/residuals.amlg | 4 +- 4 files changed, 86 insertions(+), 7 deletions(-) diff --git a/howso/analysis_weights.amlg b/howso/analysis_weights.amlg index 5b7527de2..9f20bf8f4 100644 --- a/howso/analysis_weights.amlg +++ b/howso/analysis_weights.amlg @@ -420,11 +420,11 @@ ;Second pass uses these decent residuals to finds even better hyperparmeters and improved residuals. ;Last pass finds usable hyperparams using the improved residuals and then calculates usable residuals for weights. ;At this point the hyperparameters and residuals and weights are stable enough for use. - (while (< iteration num_iterations) + (while (<= iteration num_iterations) ;compute mda on last iteration (assign (assoc - computing_mda (= iteration (- num_iterations 1)) + computing_mda (> iteration (- num_iterations 2)) )) (assign (assoc @@ -449,12 +449,13 @@ robust_residuals (if computing_mda "robust_mda" "deviations") num_samples (if computing_mda - num_samples_mda + (if (= iteration 4) 2000 num_samples_mda) ;the deviations query uses all features "superfull", there's no need to sample more ;than a default amount of 1000 residual_num_samples ) + weighted_predictions (= iteration 4) custom_hyperparam_map baseline_hyperparameter_map ;must compute confusion matrix to use sparse deviation matrix, but not when computing mda compute_all_statistics (and use_sdm (not computing_mda)) diff --git a/howso/feature_residuals.amlg b/howso/feature_residuals.amlg index be4d6abfa..88fa8d58f 100644 --- a/howso/feature_residuals.amlg +++ b/howso/feature_residuals.amlg @@ -313,6 +313,7 @@ robust_residuals (call !RunRobustResiduals (assoc leave_nulls_in_results (= "robust_mda" robust_residuals) + weighted_predictions (or output_raw_mda weighted_predictions) )) ;else @@ -339,6 +340,7 @@ ) output_raw_mda output_raw_mda mda_action_feature mda_action_feature + weighted_predictions weighted_predictions )) ) ) diff --git a/howso/mda_weight.amlg b/howso/mda_weight.amlg index 2d355e873..45e025bff 100644 --- a/howso/mda_weight.amlg +++ b/howso/mda_weight.amlg @@ -520,15 +520,86 @@ ) ) + ; (declare (assoc + ; mda_matrix_map + ; (zip + ; features + ; ;transpose the matrix, so every feature has its own (column) MDA values + ; (map + ; (lambda (let + ; (assoc feature_index (current_index 1)) + ; (zip + ; features + ; (map + ; (lambda (get (current_value) feature_index)) + ; mda_matrix + ; ) + ; ) + ; )) + ; mda_matrix + ; ) + ; ) + ; )) + + ;normalize the mda matrix as feature probabilities + ; (conclude + ; (map + ; (lambda (let + ; (assoc + ; total_weight (apply "+" (values (current_value 1))) + ; feature (current_index 1) + ; fixed_prob (/ 1 (- (size features) 1)) + ; ) + ; (if (= 0 total_weight) + ; (map + ; (lambda + ; (if (= feature (current_index)) 0 fixed_prob) + ; ) + ; (current_value) + ; ) + + ; (map + ; (lambda (/ (current_value) total_weight) ) + ; (current_value) + ; ) + ; ) + ; )) + ; mda_matrix_map + ; ) + ; ) + + ;set smallest possible probability assuming the Jeffreys prior approach (declare (assoc smallest_probability (/ 1 (+ 0.5 num_training_cases)) ;baseline isn't specified, declare it on the stack here baseline_hyperparameter_map hyperparam_map + + ; robust_residuals_map + ; (if weighted_predictions + ; (zip + ; features + ; (map + ; (lambda (if + ; (size (current_value)) + ; ;compute average by dividing by the number of values + ; (/ (apply "+" (filter (current_value))) (size (filter (current_value))) ) + + ; ;else if there were no values for this feature, return global feature residual + ; ;or simply null for inactive features + ; (if (contains_index !inactiveFeaturesMap (get features (current_index))) + ; (null) + ; (get hyperparam_map (list "featureResiduals" (get features (current_index 1)))) + ; ) + ; )) + ; feature_residuals_lists + ; ) + ; ) + ; ) )) - ;overwrite featureResiduals if custom ones are provided + ;overwrite featureResiduals if custom ones are providedic (if custom_residuals_map (set baseline_hyperparameter_map "featureResiduals" custom_residuals_map) ) @@ -541,7 +612,12 @@ (assoc target_feature (current_value 1) feature_index (current_index 1) - target_feature_residual (get baseline_hyperparameter_map ["featureResiduals" (current_value 2)]) + target_feature_residual + ;(if weighted_predictions + ; (get robust_residuals_map (current_value 1)) + + (get baseline_hyperparameter_map ["featureResiduals" (current_value 2)]) + ;) ) (declare (assoc diff --git a/howso/residuals.amlg b/howso/residuals.amlg index 53ffc028b..7010ebda0 100644 --- a/howso/residuals.amlg +++ b/howso/residuals.amlg @@ -747,7 +747,7 @@ (declare (assoc local_cases_map - (if output_raw_mda + (if weighted_predictions (null) ;if empty context set, use global expected values for all features, set local_cases_map to null @@ -845,7 +845,7 @@ ) ;else if we're using weighted residuals for mda, use the feature-specific weights here - output_raw_mda + weighted_predictions (compute_on_contained_entities (append (if focal_case (query_not_in_entity_list (list (replace case_id) (replace focal_case))) From 6d30d9de4291641965a5b2e0766c38e729380396 Mon Sep 17 00:00:00 2001 From: howsoRes <144272317+howsoRes@users.noreply.github.com> Date: Tue, 10 Jun 2025 15:42:18 -0400 Subject: [PATCH 3/4] undo --- howso/analysis_weights.amlg | 7 ++-- howso/feature_residuals.amlg | 2 - howso/mda_weight.amlg | 78 +----------------------------------- howso/residuals.amlg | 4 +- 4 files changed, 6 insertions(+), 85 deletions(-) diff --git a/howso/analysis_weights.amlg b/howso/analysis_weights.amlg index 9f20bf8f4..5b7527de2 100644 --- a/howso/analysis_weights.amlg +++ b/howso/analysis_weights.amlg @@ -420,11 +420,11 @@ ;Second pass uses these decent residuals to finds even better hyperparmeters and improved residuals. ;Last pass finds usable hyperparams using the improved residuals and then calculates usable residuals for weights. ;At this point the hyperparameters and residuals and weights are stable enough for use. - (while (<= iteration num_iterations) + (while (< iteration num_iterations) ;compute mda on last iteration (assign (assoc - computing_mda (> iteration (- num_iterations 2)) + computing_mda (= iteration (- num_iterations 1)) )) (assign (assoc @@ -449,13 +449,12 @@ robust_residuals (if computing_mda "robust_mda" "deviations") num_samples (if computing_mda - (if (= iteration 4) 2000 num_samples_mda) + num_samples_mda ;the deviations query uses all features "superfull", there's no need to sample more ;than a default amount of 1000 residual_num_samples ) - weighted_predictions (= iteration 4) custom_hyperparam_map baseline_hyperparameter_map ;must compute confusion matrix to use sparse deviation matrix, but not when computing mda compute_all_statistics (and use_sdm (not computing_mda)) diff --git a/howso/feature_residuals.amlg b/howso/feature_residuals.amlg index 88fa8d58f..be4d6abfa 100644 --- a/howso/feature_residuals.amlg +++ b/howso/feature_residuals.amlg @@ -313,7 +313,6 @@ robust_residuals (call !RunRobustResiduals (assoc leave_nulls_in_results (= "robust_mda" robust_residuals) - weighted_predictions (or output_raw_mda weighted_predictions) )) ;else @@ -340,7 +339,6 @@ ) output_raw_mda output_raw_mda mda_action_feature mda_action_feature - weighted_predictions weighted_predictions )) ) ) diff --git a/howso/mda_weight.amlg b/howso/mda_weight.amlg index 45e025bff..25239d0af 100644 --- a/howso/mda_weight.amlg +++ b/howso/mda_weight.amlg @@ -520,83 +520,12 @@ ) ) - ; (declare (assoc - ; mda_matrix_map - ; (zip - ; features - ; ;transpose the matrix, so every feature has its own (column) MDA values - ; (map - ; (lambda (let - ; (assoc feature_index (current_index 1)) - ; (zip - ; features - ; (map - ; (lambda (get (current_value) feature_index)) - ; mda_matrix - ; ) - ; ) - ; )) - ; mda_matrix - ; ) - ; ) - ; )) - - ;normalize the mda matrix as feature probabilities - ; (conclude - ; (map - ; (lambda (let - ; (assoc - ; total_weight (apply "+" (values (current_value 1))) - ; feature (current_index 1) - ; fixed_prob (/ 1 (- (size features) 1)) - ; ) - ; (if (= 0 total_weight) - ; (map - ; (lambda - ; (if (= feature (current_index)) 0 fixed_prob) - ; ) - ; (current_value) - ; ) - - ; (map - ; (lambda (/ (current_value) total_weight) ) - ; (current_value) - ; ) - ; ) - ; )) - ; mda_matrix_map - ; ) - ; ) - - ;set smallest possible probability assuming the Jeffreys prior approach (declare (assoc smallest_probability (/ 1 (+ 0.5 num_training_cases)) ;baseline isn't specified, declare it on the stack here baseline_hyperparameter_map hyperparam_map - - ; robust_residuals_map - ; (if weighted_predictions - ; (zip - ; features - ; (map - ; (lambda (if - ; (size (current_value)) - ; ;compute average by dividing by the number of values - ; (/ (apply "+" (filter (current_value))) (size (filter (current_value))) ) - - ; ;else if there were no values for this feature, return global feature residual - ; ;or simply null for inactive features - ; (if (contains_index !inactiveFeaturesMap (get features (current_index))) - ; (null) - ; (get hyperparam_map (list "featureResiduals" (get features (current_index 1)))) - ; ) - ; )) - ; feature_residuals_lists - ; ) - ; ) - ; ) )) ;overwrite featureResiduals if custom ones are providedic @@ -612,12 +541,7 @@ (assoc target_feature (current_value 1) feature_index (current_index 1) - target_feature_residual - ;(if weighted_predictions - ; (get robust_residuals_map (current_value 1)) - - (get baseline_hyperparameter_map ["featureResiduals" (current_value 2)]) - ;) + target_feature_residual (get baseline_hyperparameter_map ["featureResiduals" (current_value 2)]) ) (declare (assoc diff --git a/howso/residuals.amlg b/howso/residuals.amlg index 7010ebda0..53ffc028b 100644 --- a/howso/residuals.amlg +++ b/howso/residuals.amlg @@ -747,7 +747,7 @@ (declare (assoc local_cases_map - (if weighted_predictions + (if output_raw_mda (null) ;if empty context set, use global expected values for all features, set local_cases_map to null @@ -845,7 +845,7 @@ ) ;else if we're using weighted residuals for mda, use the feature-specific weights here - weighted_predictions + output_raw_mda (compute_on_contained_entities (append (if focal_case (query_not_in_entity_list (list (replace case_id) (replace focal_case))) From 1bb11daa939df97c9fd2149ed5b37655a4a3d451 Mon Sep 17 00:00:00 2001 From: howsoRes <144272317+howsoRes@users.noreply.github.com> Date: Tue, 10 Jun 2025 15:43:23 -0400 Subject: [PATCH 4/4] cleanup --- howso/mda_weight.amlg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/howso/mda_weight.amlg b/howso/mda_weight.amlg index 25239d0af..2d355e873 100644 --- a/howso/mda_weight.amlg +++ b/howso/mda_weight.amlg @@ -528,7 +528,7 @@ baseline_hyperparameter_map hyperparam_map )) - ;overwrite featureResiduals if custom ones are providedic + ;overwrite featureResiduals if custom ones are provided (if custom_residuals_map (set baseline_hyperparameter_map "featureResiduals" custom_residuals_map) )