From e0417be81062c63d4d337e438521f9c0799d782c Mon Sep 17 00:00:00 2001 From: Gaurav Date: Sun, 15 Mar 2026 12:09:40 +0530 Subject: [PATCH] DataProfiler - data auto-discovery and summarization layer - Column-type detection (continuous, categorical, datetime) - Normality: Shapiro-Wilk (n<=5000) or D'Agostino-Pearson - Variance homogeneity: Levene's test (optional, via group_col) - IQR-based outlier detection per numeric column - Structured agent_hints JSON: parametric/nonparametric/welch routing - Ground-truth simulated dataset generator for eval harness - 8-test pytest suite covering shape, normality, outliers, hints, variance Foundation for Phase 2: LangGraph agent test-selection harness. --- .gitignore | 35 ++++ astats/__init__.py | 0 astats/profiler/__init__.py | 0 astats/profiler/data_profiler.py | 183 +++++++++++++++++++ examples/data/generate_sample.py | 34 ++++ examples/data/sample_dataset.csv | 301 +++++++++++++++++++++++++++++++ requirements.txt | Bin 0 -> 474 bytes tests/test_profiler.py | 66 +++++++ 8 files changed, 619 insertions(+) create mode 100644 .gitignore create mode 100644 astats/__init__.py create mode 100644 astats/profiler/__init__.py create mode 100644 astats/profiler/data_profiler.py create mode 100644 examples/data/generate_sample.py create mode 100644 examples/data/sample_dataset.csv create mode 100644 requirements.txt create mode 100644 tests/test_profiler.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ef98b2f --- /dev/null +++ b/.gitignore @@ -0,0 +1,35 @@ +# Virtual environment — never commit this +venv/ +env/ +.env/ +.venv/ + +# Python cache +__pycache__/ +*.py[cod] +*.pyo +*.pyd +.Python + +# Pytest cache +.pytest_cache/ +.cache/ + +# Distribution / packaging +dist/ +build/ +*.egg-info/ + +# Jupyter notebooks checkpoints +.ipynb_checkpoints/ + +# OS files +.DS_Store +Thumbs.db + +# VSCode settings (optional — remove if you want to share these) +.vscode/ + +# Environment variable files +.env +*.env \ No newline at end of file diff --git a/astats/__init__.py b/astats/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/astats/profiler/__init__.py b/astats/profiler/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/astats/profiler/data_profiler.py b/astats/profiler/data_profiler.py new file mode 100644 index 0000000..3daa1a8 --- /dev/null +++ b/astats/profiler/data_profiler.py @@ -0,0 +1,183 @@ +""" +AStats DataProfiler- +Auto-discovers dataset structure and statistical properties. +Outputs a structured JSON profile with agent_hints for the AStats agent. +""" +# imports +import json +import sys +from typing import Any + +import numpy as np +import pandas as pd +from scipy import stats + +# functions +def _normality_test(series: pd.Series) -> dict: + """Shapiro-Wilk for n<=5000, D'Agostino-Pearson for larger samples.""" + n = len(series) + if n < 3: + return {"test": "none", "reason": "insufficient_data"} + if n <= 5000: + stat, p = stats.shapiro(series) + test_name = "shapiro-wilk" + else: + stat, p = stats.normaltest(series) + test_name = "dagostino-pearson" + return { + "test": test_name, + "statistic": round(float(stat), 4), + "p_value": round(float(p), 4), + "is_normal": bool(p > 0.05), + "note": "p>0.05 suggests normality (fail to reject H0)", + } + +def _variance_homogeneity(df: pd.DataFrame, numeric_col: str, group_col: str) -> dict: + """Levene's test for equal variances across groups.""" + groups = [ + grp[numeric_col].dropna().values + for _, grp in df.groupby(group_col) + if grp[numeric_col].dropna().shape[0] >= 2 + ] + if len(groups) < 2: + return {"test": "none", "reason": "need_at_least_2_groups"} + stat, p = stats.levene(*groups) + return { + "test": "levene", + "statistic": round(float(stat), 4), + "p_value": round(float(p), 4), + "equal_variance": bool(p > 0.05), + "note": "p>0.05 suggests equal variances", + } + +def _outlier_summary(series: pd.Series) -> dict: + """IQR-based outlier detection.""" + q1, q3 = series.quantile(0.25), series.quantile(0.75) + iqr = q3 - q1 + lower, upper = q1 - 1.5 * iqr, q3 + 1.5 * iqr + outliers = series[(series < lower) | (series > upper)] + return { + "method": "IQR", + "lower_fence": round(float(lower), 4), + "upper_fence": round(float(upper), 4), + "outlier_count": int(len(outliers)), + "outlier_pct": round(float(len(outliers) / len(series) * 100), 2), + } + +def profile_dataset(df: pd.DataFrame, group_col: str | None = None) -> dict[str, Any]: + """ + Full statistical profile of a DataFrame. + + Parameters + -- + df : pd.DataFrame + Input dataset. + group_col : str, optional + If provided, variance homogeneity is tested between groups + for all numeric columns. + + Returns + -- + dict + Structured JSON-serialisable profile with agent_hints. + """ + profile: dict[str, Any] = { + "shape": {"rows": int(df.shape[0]), "columns": int(df.shape[1])}, + "columns": {}, + } + + for col in df.columns: + series = df[col].dropna() + col_info: dict[str, Any] = { + "dtype": str(df[col].dtype), + "missing": int(df[col].isna().sum()), + "missing_pct": round(float(df[col].isna().mean()) * 100, 2), + "unique": int(df[col].nunique()), + } + + if pd.api.types.is_numeric_dtype(df[col]): + col_info["role"] = "continuous" + col_info["descriptive"] = { + "mean": round(float(series.mean()), 4), + "median": round(float(series.median()), 4), + "std": round(float(series.std()), 4), + "min": round(float(series.min()), 4), + "max": round(float(series.max()), 4), + "skewness": round(float(stats.skew(series)), 4), + "kurtosis": round(float(stats.kurtosis(series)), 4), + } + col_info["normality"] = _normality_test(series) + col_info["outliers"] = _outlier_summary(series) + + if group_col and group_col in df.columns: + col_info["variance_homogeneity"] = _variance_homogeneity( + df, col, group_col + ) + + elif isinstance(df[col].dtype, pd.CategoricalDtype) or df[col].dtype == object or pd.api.types.is_string_dtype(df[col]): + col_info["role"] = "categorical" + col_info["top_values"] = df[col].value_counts().head(5).to_dict() + + elif pd.api.types.is_datetime64_any_dtype(df[col]): + col_info["role"] = "datetime" + col_info["range"] = {"min": str(series.min()), "max": str(series.max())} + + else: + col_info["role"] = "unknown" + + profile["columns"][col] = col_info + + # agent hints- + normal_cols = [ + c for c, v in profile["columns"].items() + if v.get("normality", {}).get("is_normal", False) + ] + non_normal_cols = [ + c for c, v in profile["columns"].items() + if "normality" in v and not v["normality"]["is_normal"] + ] + high_outlier_cols = [ + c for c, v in profile["columns"].items() + if v.get("outliers", {}).get("outlier_pct", 0) > 5 + ] + equal_variance_cols = [ + c for c, v in profile["columns"].items() + if v.get("variance_homogeneity", {}).get("equal_variance", None) is True + ] + unequal_variance_cols = [ + c for c, v in profile["columns"].items() + if v.get("variance_homogeneity", {}).get("equal_variance", None) is False + ] + + profile["agent_hints"] = { + "normal_columns": normal_cols, + "non_normal_columns": non_normal_cols, + "high_outlier_columns": high_outlier_cols, + "equal_variance_columns": equal_variance_cols, + "unequal_variance_columns": unequal_variance_cols, + "test_routing": { + "parametric_candidates": normal_cols, + "nonparametric_candidates": non_normal_cols, + "welch_candidates": unequal_variance_cols, + }, + "summary": ( + f"{len(normal_cols)} normal column(s), " + f"{len(non_normal_cols)} non-normal column(s), " + f"{len(high_outlier_cols)} column(s) with >5% outliers." + ), + } + + return profile + +def profile_csv(filepath: str, group_col: str | None = None) -> dict[str, Any]: + """Convenience wrapper: load a CSV and profile it.""" + df = pd.read_csv(filepath) + return profile_dataset(df, group_col=group_col) + +if __name__ == "__main__": + if len(sys.argv) < 2: + print("Usage: python data_profiler.py [group_col]") + sys.exit(1) + group = sys.argv[2] if len(sys.argv) > 2 else None + result = profile_csv(sys.argv[1], group_col=group) + print(json.dumps(result, indent=2)) \ No newline at end of file diff --git a/examples/data/generate_sample.py b/examples/data/generate_sample.py new file mode 100644 index 0000000..b8f6ce9 --- /dev/null +++ b/examples/data/generate_sample.py @@ -0,0 +1,34 @@ +""" +Generates simulated datasets with KNOWN statistical properties. +Used as ground-truth inputs for evaluating the AStats agent. +""" +import numpy as np +import pandas as pd + +np.random.seed(42) +N = 300 + +df = pd.DataFrame({ + # Normal: should pass normality test → parametric tests valid + "score_normal": np.random.normal(loc=50, scale=10, size=N), + # Skewed: should fail normality → non-parametric tests needed + "score_skewed": np.random.exponential(scale=5, size=N), + # Group column for between-group comparisons + "group": np.random.choice(["control", "treatment"], size=N), + # Categorical variable + "category": np.random.choice(["A", "B", "C"], size=N), + # Age with some missing values + "age": np.random.randint(18, 70, size=N).astype(float), +}) + +# Inject missing values +df.loc[np.random.choice(df.index, 15), "age"] = np.nan + +df.to_csv("sample_dataset.csv", index=False) + +print("sample_dataset.csv created.") +print("\nKnown ground-truth properties:") +print(" score_normal → NORMAL → use t-test / ANOVA") +print(" score_skewed → NON-NORMAL → use Mann-Whitney / Kruskal-Wallis") +print(" group → categorical (2 levels) → suitable grouping variable") +print(" age → continuous with 15 missing values") \ No newline at end of file diff --git a/examples/data/sample_dataset.csv b/examples/data/sample_dataset.csv new file mode 100644 index 0000000..c3b12da --- /dev/null +++ b/examples/data/sample_dataset.csv @@ -0,0 +1,301 @@ +score_normal,score_skewed,group,category,age +54.96714153011233,1.6566693115336928,treatment,A,52.0 +48.61735698828815,0.9766664802022349,treatment,A,35.0 +56.47688538100692,6.943781980027124,control,A,59.0 +65.23029856408026,8.221045916217097,control,C,56.0 +47.658466252766644,23.28502445070115,treatment,C,34.0 +47.6586304305082,2.660396770763025,control,B,31.0 +65.79212815507391,2.3262195598086635,treatment,B,48.0 +57.67434729152909,7.489772516717935,treatment,B,41.0 +45.30525614065048,2.0836683546537658,treatment,A,52.0 +55.42560043585965,13.350689621996633,treatment,A,61.0 +45.365823071875376,9.774195785831246,control,C,62.0 +45.34270246429743,2.8017780472107985,treatment,A,51.0 +52.41962271566034,6.948923582477305,control,A,20.0 +30.86719755342202,7.023164934275085,treatment,A,54.0 +32.75082167486967,0.5441875940335901,control,B,60.0 +44.37712470759027,11.642228404338159,treatment,B,57.0 +39.87168879665576,3.5185374487837717,control,C,43.0 +53.142473325952736,8.75666278761665,control,A,40.0 +40.91975924478789,1.9286771308225414,treatment,A,61.0 +35.87696298664709,11.29395257272963,treatment,C,56.0 +64.65648768921554,2.464942270300856,treatment,A, +47.74223699513465,0.05448403307587739,treatment,A, +50.67528204687924,11.789536484812269,control,B,21.0 +35.75251813786544,0.4786280527423763,treatment,C,46.0 +44.556172754748175,1.9232681669251943,control,C,39.0 +51.10922589709866,14.984861915941428,control,C,42.0 +38.490064225776976,15.039747700519948,treatment,C, +53.75698018345672,4.259986455276319,control,B,35.0 +43.99361310081195,4.996150401952623,control,B,50.0 +47.083062502067236,2.975073318114288,control,A,33.0 +43.98293387770603,1.7351138937309851,control,A,62.0 +68.52278184508938,1.9924316727519487,treatment,A,59.0 +49.86502775262066,5.581617899642532,treatment,A,61.0 +39.422890710440996,6.979189368315386,treatment,B,19.0 +58.225449121031886,7.840977081105743,treatment,C,52.0 +37.791563500289776,7.794155159243709,control,B,59.0 +52.08863595004755,0.4781847327015972,control,A,51.0 +30.403298761202244,3.41024798286678,treatment,B,47.0 +36.71813951101569,0.29640853224368896,treatment,C,30.0 +51.96861235869123,3.9873065784769643,treatment,C,30.0 +57.38466579995411,2.912776374936418,control,B,35.0 +51.7136828118997,10.933093321104007,treatment,B,49.0 +48.843517176117594,2.1609580973394253,treatment,B,49.0 +46.98896304410711,0.6225298877622544,treatment,A,52.0 +35.214780096325725,0.7715382727105968,control,B,56.0 +42.801557916052914,7.167152735395469,control,B,63.0 +45.393612290402125,4.814528398740175,control,B,69.0 +60.57122226218915,0.5330435612269272,control,C,46.0 +53.43618289568462,0.4392776083430254,control,C,66.0 +32.36959844637266,6.036042358265499,control,A,46.0 +53.24083969394795,0.37773044762898567,control,A,61.0 +46.149177195836835,8.625929269042906,control,C,33.0 +43.23077999694041,6.124998765650025,treatment,C,61.0 +56.11676288840868,0.4242437529015603,treatment,C,64.0 +60.30999522495951,0.44326933888903347,control,B,27.0 +59.31280119116199,21.57729281029046,control,B,47.0 +41.607824767773614,2.344187912761448,treatment,C,42.0 +46.907876241487855,2.3152763023765623,control,B,56.0 +53.31263431403564,8.377877016413386,control,B,37.0 +59.75545127122359,14.710822689816219,control,A,22.0 +45.2082576215471,21.34386969800992,treatment,C,18.0 +48.14341023336183,6.999496150581709,treatment,C,47.0 +38.93665025993972,2.360104998095628,treatment,B,66.0 +38.037933759193294,0.4359699684897863,treatment,C,20.0 +58.125258223941984,7.506212702273128,treatment,A,62.0 +63.56240028570823,4.086802037807465,control,A,31.0 +49.27989878419666,2.7601656265248686,treatment,A,47.0 +60.03532897892024,11.841188377390452,control,C,67.0 +53.61636025047634,0.5894010398594012,control,C,21.0 +43.54880245394876,3.3925255468647713,treatment,C,35.0 +53.61395605508414,0.057092947162708345,treatment,C,54.0 +65.38036566465969,3.1617718479146313,control,C,42.0 +49.64173960890049,0.2897521553296289,control,A,65.0 +65.64643655814007,0.6324549796558561,control,B,18.0 +23.802548959102555,0.6251311600372813,treatment,A, +58.21902504375224,5.237841932034858,treatment,C,32.0 +50.870470682381715,6.852988587982364,treatment,A,26.0 +47.009926495341325,4.377768886014342,treatment,C,32.0 +50.91760776535502,16.373601045511887,treatment,C,66.0 +30.124310853991073,2.348982889601336,control,A,41.0 +47.80328112162488,1.6823457871027936,treatment,C,21.0 +53.57112571511746,10.147512690851148,control,B,29.0 +64.77894044741517,1.265410338659267,control,C,54.0 +44.81729781726352,16.514350531259552,control,B, +41.91506397106812,0.061144721794846535,treatment,B,60.0 +44.982429564154636,17.51263461439741,treatment,C,23.0 +59.15402117702074,0.22059499309250946,treatment,A,56.0 +53.287511096596845,11.088606145800044,control,B,63.0 +44.70239796232961,3.750716251746952,control,A,41.0 +55.13267433113356,24.784143039630166,control,B,28.0 +50.9707754934804,0.3833068797722747,control,C,30.0 +59.68644990532889,4.035548318666744,control,C, +42.979469061226474,17.417876106280026,control,B,42.0 +46.723378534022316,3.70221966590522,treatment,C,24.0 +46.078918468678424,4.963141451905937,treatment,B,69.0 +35.364850518678814,5.949506190583871,treatment,C,21.0 +52.96120277064576,3.0306387783847546,treatment,A,25.0 +52.610552721798896,4.938370865040111,control,C,37.0 +50.05113456642461,4.389129309028778,control,A,46.0 +47.65412866624853,11.571163847413237,treatment,C,68.0 +35.84629257949586,0.23255730871740488,control,A,62.0 +45.79354677234641,1.6492136294937922,treatment,C,63.0 +46.572854834732304,15.019980029304655,treatment,A,27.0 +41.97722730778381,11.04837914226743,treatment,B,60.0 +48.387142883339905,3.0408763097981,control,B,51.0 +54.040508568145384,4.839665143694886,treatment,A,43.0 +68.8618590121053,1.6243670994756507,treatment,C,58.0 +51.74577812831839,1.0420208071803225,treatment,A,66.0 +52.57550390722764,3.1152929940204936,control,C,20.0 +49.25554084233833,2.179767673083667,control,B,23.0 +30.812287847009586,4.381218530033215,treatment,C,22.0 +49.73486124550783,0.40461142216592394,control,B,22.0 +50.60230209941027,18.324800612612837,control,C,64.0 +74.63242112485287,21.419327877762694,control,C,40.0 +48.076390352188774,5.989319409563947,control,C,66.0 +53.015473423336125,3.8403921720604495,control,A,26.0 +49.65288230294757,1.8518965077718,control,A,52.0 +38.31321962380468,8.404535837439983,control,B,37.0 +61.42822814515021,5.771647918244078,control,B,66.0 +57.519330326867745,0.8873682705024419,control,A,59.0 +57.910319470430466,12.091505459995638,control,B,52.0 +40.906125452052606,8.644972556836358,treatment,A,67.0 +64.02794310936099,14.958692625167181,treatment,C,35.0 +35.98148937207719,6.4680200209806715,control,B,34.0 +55.868570938002705,4.752020097956864,treatment,B,41.0 +71.90455625809979,2.7085125329655666,treatment,A,20.0 +40.09463674869312,13.49509180773559,control,B,49.0 +44.33702270397228,10.0519619002346,control,B,52.0 +50.99651365087641,0.23136469326561393,control,C,55.0 +44.96524345883801,0.13360408433255683,treatment,B, +34.493365689338674,2.361738807916314,treatment,C,64.0 +50.68562974806027,8.31823861498043,control,A,42.0 +39.37696286273895,21.82137734110084,control,B,26.0 +54.73592430635182,0.8150475496723985,control,C,21.0 +40.80575765766197,4.508620651078679,treatment,A,31.0 +65.4993440501754,2.3973684987904806,treatment,B,54.0 +42.16746707663763,17.51854277187981,treatment,A,51.0 +46.77938483794324,9.229566037168173,treatment,B,28.0 +58.135172173696695,9.110950232349285,control,A,58.0 +37.69135683566045,3.162077855698868,control,A,20.0 +52.27459934604129,2.679174681636745,control,C,45.0 +63.07142754282428,1.596944460750306,control,A,46.0 +33.92516765438772,0.29013481922418105,treatment,B,26.0 +51.84633858532304,10.00213070001655,treatment,B,20.0 +52.598827942484235,8.380587196249614,control,C,42.0 +57.8182287177731,40.862227989090414,control,B,51.0 +37.63049289121918,28.474367003721667,treatment,B,29.0 +36.795433869157236,4.05325796320697,control,B,59.0 +55.21941565616898,7.326415451031304,treatment,A,26.0 +52.96984673233186,14.480858414372179,control,A,32.0 +52.504928503458764,9.473860073546229,control,B,57.0 +53.46448209496975,1.4207622229057326,control,C,42.0 +43.19975278421509,2.9941341369993224,control,A,22.0 +52.322536971610035,0.6914817215654433,control,C,28.0 +52.93072473298681,15.401118932208544,control,B,51.0 +42.85648581973632,4.659238512943091,control,B,41.0 +68.65774511144757,1.2980186270924186,treatment,A, +54.73832920911788,5.569147695866251,treatment,B,55.0 +38.08696502797351,4.813352173715359,treatment,A,25.0 +56.5655360863383,2.2171023114855304,treatment,B,22.0 +40.253183297726785,0.6026956069923464,treatment,B,65.0 +57.87084603742452,5.567206417595511,control,A,43.0 +61.58595579007404,3.6730521209301887,control,A,35.0 +41.7931768164829,7.399035404802575,treatment,A,23.0 +59.63376129244322,3.6715493021175756,control,B,54.0 +54.12780926936498,9.55885055654242,treatment,B,65.0 +58.2206015999449,4.013770595107438,treatment,C,30.0 +68.96792982653947,4.115572904900736,control,A,45.0 +47.54611883997129,10.463793216148858,control,B,22.0 +42.4626383564251,2.5832365699405853,control,B,30.0 +41.10485570374477,0.7194397769514947,treatment,A,34.0 +41.841897150345616,0.14602510716047343,control,C,67.0 +49.228982905858956,7.035287263312928,treatment,A,57.0 +53.41151974816644,4.841994835303398,treatment,C,62.0 +52.766907993300194,6.088326738989382,treatment,B, +58.27183249036024,1.1974074674672173,control,C,46.0 +50.13001891877907,0.7330627561413933,control,B,68.0 +64.53534077157317,0.07325738132834392,treatment,A,20.0 +47.35343166762044,2.158436307575346,control,A,57.0 +77.20169166589619,4.456986878324723,control,A,51.0 +56.25667347765006,2.4899093378057535,control,C,57.0 +41.42842443583717,2.876597814142156,treatment,B,44.0 +39.29107501938888,11.725307614907209,treatment,A,30.0 +54.824724152431855,2.1405130721255587,control,A,62.0 +47.76537214674149,3.6076251403616055,treatment,B,50.0 +57.14000494092092,7.654358688598508,control,A,23.0 +54.73237624573545,2.525400656495899,control,B,41.0 +49.271710873431275,4.865452376765427,control,B,36.0 +41.532062819315954,9.915703218421534,control,B,33.0 +34.851527753141355,14.93095207687334,treatment,B,57.0 +45.53485047932979,0.7954093965741799,treatment,B,45.0 +58.56398794323472,13.058313815210393,treatment,C,50.0 +52.14093744130204,3.387513904977653,control,A,65.0 +37.542612212880115,1.4936772717569788,control,C,58.0 +51.73180925851182,3.0729348368534875,control,A, +53.85317379728837,19.56826548790296,treatment,C,60.0 +41.16142563798867,3.392456463336799,control,B,62.0 +51.53725105945528,1.9930801605024988,treatment,A,42.0 +50.58208718446,5.017431369788298,treatment,A,59.0 +38.57029702169377,1.3731423385780148,treatment,C,53.0 +53.577873603482836,0.3944765248811752,treatment,C,44.0 +55.60784526368234,0.6898760985620793,treatment,A,55.0 +60.830512431752766,0.6850921203350356,treatment,C,24.0 +60.53802052034903,0.8237995077833649,control,C,22.0 +36.22330632042909,0.7473003303375827,control,C,60.0 +40.62174960084877,5.120420255379033,treatment,B,62.0 +55.1503526720866,1.0037317851724081,treatment,B,68.0 +55.13785950912209,2.120696578974026,control,C,48.0 +55.150476863060476,11.354870625641789,treatment,C,40.0 +88.52731490654722,3.211905708232968,treatment,B,59.0 +55.70890510693167,5.506445418261903,control,B,27.0 +61.35565640180599,0.9456425855660824,treatment,A,49.0 +59.54001763493203,1.0677549049754753,treatment,A,60.0 +56.51391251305798,0.20863606358782652,treatment,B,47.0 +46.84730755359654,0.9252367203279044,control,C,56.0 +57.589692204932675,1.6327405931557277,control,C,48.0 +42.27174785462428,0.9740590874155397,control,C,35.0 +47.63181393259991,0.46442953930183783,treatment,C,53.0 +45.14636452170897,0.6427810674667627,treatment,A,19.0 +50.81874139386323,3.0881467168268957,control,B,31.0 +73.14658566673509,1.155461031725483,treatment,C,30.0 +31.32734807408252,2.2649055775545928,treatment,A,46.0 +56.86260190374514,3.500025923571652,treatment,C,29.0 +33.87284128810349,5.862287170062711,treatment,B,56.0 +45.280681342105666,0.20052865050007712,treatment,A,19.0 +60.889505969673664,8.032471219130597,control,B,49.0 +50.64280019095463,4.942968451836482,treatment,B,30.0 +39.22255222070694,0.4264771518816713,control,A,63.0 +42.84696290740032,10.340673492440454,treatment,A,44.0 +56.79597748934676,12.68346773189464,control,A,66.0 +42.69633368282864,0.31511413780081704,treatment,A,58.0 +52.16458589581975,1.6208842156432541,control,A,34.0 +50.45571839903814,8.204675916133212,treatment,A,57.0 +43.483996523941826,6.896786201071542,control,A,48.0 +71.43944089325325,1.0198981603090025,treatment,C,51.0 +56.33919022318011,1.1744951559076662,control,B, +29.74857413342393,2.3139255501218488,treatment,A,62.0 +51.86454314769428,3.3133128233430718,control,C,52.0 +43.38213535231612,4.81500917029276,control,C,65.0 +58.52433334796224,2.301562815001756,treatment,C,23.0 +42.07479261567299,3.104455546104588,treatment,B,36.0 +48.85263558533101,6.8811447019492125,control,A,47.0 +55.049872789804574,0.18686476160739232,treatment,C,24.0 +58.657551941701215,1.454683109411986,control,B,27.0 +37.997035929442234,6.247459374780129,control,B,58.0 +46.654987641590516,11.278833771653067,control,B,49.0 +45.25054688839044,3.583895561042004,treatment,C,51.0 +43.46670767426288,3.7976475117653448,control,A,45.0 +67.65454240281096,0.5668066927033169,control,C,32.0 +54.049817109609556,2.9657162304177542,treatment,C,47.0 +37.391160456649544,3.80303399527645,treatment,A,54.0 +59.17861947054776,1.3884640163032667,treatment,A,46.0 +71.22156197012633,1.5683730593927192,treatment,A,39.0 +60.32465260551147,2.36832492401236,control,B,21.0 +34.80630034045987,0.10137680375157257,control,A,37.0 +45.15765927133749,1.9436238062806788,control,C,49.0 +62.669111491866225,1.1877846781181494,control,A,53.0 +42.92330534381219,1.9837461511599133,treatment,C,67.0 +54.43819428146229,0.6378155164239063,control,B,21.0 +57.74634053429337,11.060399499715123,control,C,44.0 +40.730695284219166,4.501994069397799,treatment,C,34.0 +49.404746439382,5.683164789465158,control,A,29.0 +17.58732659930927,7.783545161888956,control,A,39.0 +39.756123586657104,3.450182109244426,control,C,34.0 +47.4743184860684,0.45466047258727926,control,A,27.0 +37.52216818035151,3.851291816474113,control,B,18.0 +66.32411303931636,4.419615289356868,control,B,39.0 +35.69858622039367,6.841083244042046,treatment,A,43.0 +45.599555133030165,2.8251732469149102,treatment,B,35.0 +51.30740577286091,0.6824233336465612,control,A,67.0 +64.41273289066116,1.6688109013085195,control,C, +35.6413784882056,2.255574127373117,treatment,C,64.0 +61.63163752154959,5.191123058109309,control,B,46.0 +50.10233061019587,4.2289086060070975,control,A,29.0 +40.18491348952049,2.2010337982590307,control,A,53.0 +54.62103474263271,21.53097886084906,treatment,B,52.0 +51.9905969557347,4.6541650423360466,control,C,69.0 +43.99783122841205,1.3539726437839996,treatment,B,29.0 +50.69802084990019,0.5367150231244343,control,C,24.0 +46.1468640313824,0.8294414629491293,treatment,A,46.0 +51.13517345251248,1.4115342469757048,treatment,C,28.0 +56.62130674521047,0.8758243747714701,control,B,50.0 +65.86016816145352,1.032458727272902,treatment,A,23.0 +37.62184501173151,1.6780292410772326,control,B,64.0 +71.33033374656267,0.952012168314435,treatment,B, +30.479122004774982,11.353757246779576,control,C,49.0 +48.482149049644164,0.4181785629600008,control,B,68.0 +55.883172064845766,3.7170617505635133,treatment,C, +52.809918677350325,2.6415277824465115,control,A,40.0 +43.77300480179406,20.1932108300796,control,C,43.0 +47.918777496427246,0.5941367284870245,treatment,A,51.0 +45.06999065341167,2.5362899687121523,control,C,63.0 +44.106352430557884,17.445298320633576,control,B,51.0 +58.496020970210246,10.031220309840773,treatment,C,46.0 +53.57015485965047,8.493315170180898,treatment,C,21.0 +43.070904047393455,1.4913754176675231,control,A,34.0 +58.995998754332504,0.9369976639324378,control,B,52.0 +53.072995208766095,5.5227979934089175,treatment,C,30.0 +58.1286211883896,13.251925474799776,treatment,B,58.0 +56.29628841923612,4.068252109523711,control,C,56.0 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..5a070cd2f5937de8f5fa47dc8f6a165aab404a6b GIT binary patch literal 474 zcmYjOTMmLS5S(ulk0PN(F@AUluV4U?2uP$P;^EcVg%;B6rs+KP(cd@26E)+7D;ma` zCq{%lBgGaQRA?E(YjNYPRE{N{Fk8U;cLmh3c8Df%qD#soS41CNUR|06c${&zYYCz)~k8<|6*1@