From 18002c57f2c5f0a35e7bd800a131645056b15f5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Baccar?= Date: Mon, 14 Jan 2019 09:41:29 +0100 Subject: [PATCH 1/3] class ts_extractio --- transplant/features/dynamic_features.py | 55 +++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 transplant/features/dynamic_features.py diff --git a/transplant/features/dynamic_features.py b/transplant/features/dynamic_features.py new file mode 100644 index 0000000..7588823 --- /dev/null +++ b/transplant/features/dynamic_features.py @@ -0,0 +1,55 @@ +import pandas as pd +import json +import os +import pandas as pd +import seaborn as sns +import numpy as np +import matplotlib.pyplot as plt + + +class ts_extraction: + """ + Compute features on TS for the dynamic dataset + + Input : + - df, dynamic Dataset + Output : + - df, with new features (prefix _tresh) + + """ + def __init__(self, df): + self.df = df + + def f_treshold(self, treshold_json): + + for i in treshold_json.keys(): + + tjson = treshold_json[i] + name_new_feature = i + serie = self.df[i] + cond1 = (serie >= tjson["etendue"][0] ) & (serie < tjson["normalite"][0]) + cond2 = (serie <= tjson["etendue"][1] ) & (serie > tjson["normalite"][1]) + + self.df[i + "_abnormal_tresh"] = np.where(cond1 | cond2, 1, 0) + self.df[i + "_clean_tresh"] = np.where((serie >= tjson["etendue"][0]) & (serie <= tjson["etendue"][1] ), 1, 0) + self.df[i + "_normal_tresh"] = np.where((serie >= tjson["normalite"][0]) & (serie <= tjson["normalite"][1] ), 1, 0) + self.df[i + "_dirty_tresh"] = np.where((serie <= tjson["etendue"][0]) & ( serie >= tjson["etendue"][1] ), 1, 0) + + def plot_tresh(self, treshold_json ,id_patient = 100, var = "Pmean", title = '') : + + + serie = self.df[self.df.id_patient == id_patient ][var] + + y = serie.values + x = serie.reset_index().index + + fig, ax, = plt.subplots(1, 1, sharex=True) + ax.plot(x, y , color='black') + + cond1 = (y <= treshold_json[var]["normalite"][0]) + cond2 = (y >= treshold_json[var]["normalite"][1]) + + ax.fill_between(x, y, treshold_json[var]["normalite"][0], where = cond1, facecolor='red', interpolate=True) + ax.fill_between(x, y, treshold_json[var]["normalite"][1], where = cond2, facecolor='red', interpolate=True) + ax.set_title(title) + plt.show() From 4717eeabd248ddb2209fb345906f94cbb22eb195 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Baccar?= Date: Sat, 19 Jan 2019 12:06:23 +0100 Subject: [PATCH 2/3] nouvelle version de dynamic feature treshold extraction, TRESHOLD_JSON est dans config.py --- transplant/config.py | 20 ++++ transplant/features/dynamic_features.py | 59 ++++------ transplant/visualization/__init__.py | 1 + transplant/visualization/graph.py | 141 +++++++++++++++--------- 4 files changed, 133 insertions(+), 88 deletions(-) create mode 100644 transplant/visualization/__init__.py diff --git a/transplant/config.py b/transplant/config.py index 184f8b3..6dc156d 100644 --- a/transplant/config.py +++ b/transplant/config.py @@ -164,3 +164,23 @@ 'Tabagisme_donor', 'Aspirations_donor', 'RX_donor', 'PF_donor', 'oto_score'] } + + +TRESHOLD_JSON_TS = { + "B.I.S": { + "etendue": [0, 100], + "normalite": [40, 60] + }, + "PAPsys": { + "etendue": [0, 4], + "normalite": [0, 1] + }, + "DC": { + "etendue": [0, 100], + "normalite": [0, 100] + }, + "Pmean": { + "etendue": [0, 100], + "normalite": [5, 10] + } +} diff --git a/transplant/features/dynamic_features.py b/transplant/features/dynamic_features.py index 7588823..a555be8 100644 --- a/transplant/features/dynamic_features.py +++ b/transplant/features/dynamic_features.py @@ -5,51 +5,34 @@ import seaborn as sns import numpy as np import matplotlib.pyplot as plt +from transplant.config import (TRESHOLD_JSON_TS) -class ts_extraction: - """ - Compute features on TS for the dynamic dataset +class time_serie_fX: + """ + Compute features on TS for the dynamic dataset + Input : + - df [pandasdataframe]: dynamic Dataset + Output : + - df [pandas dataframe], with new features (with prefix) - Input : - - df, dynamic Dataset - Output : - - df, with new features (prefix _tresh) + """ - """ - def __init__(self, df): - self.df = df + def __init__(self, df): + self.df = df - def f_treshold(self, treshold_json): + def treshold_fX(self, treshold_json=TRESHOLD_JSON_TS): - for i in treshold_json.keys(): + for i in treshold_json.keys(): - tjson = treshold_json[i] - name_new_feature = i - serie = self.df[i] - cond1 = (serie >= tjson["etendue"][0] ) & (serie < tjson["normalite"][0]) - cond2 = (serie <= tjson["etendue"][1] ) & (serie > tjson["normalite"][1]) + tjson = treshold_json[i] + name_new_feature = i + serie = self.df[i] + cond1 = ((serie >= tjson["etendue"][0]) & (serie < tjson["normalite"][0])) - self.df[i + "_abnormal_tresh"] = np.where(cond1 | cond2, 1, 0) - self.df[i + "_clean_tresh"] = np.where((serie >= tjson["etendue"][0]) & (serie <= tjson["etendue"][1] ), 1, 0) - self.df[i + "_normal_tresh"] = np.where((serie >= tjson["normalite"][0]) & (serie <= tjson["normalite"][1] ), 1, 0) - self.df[i + "_dirty_tresh"] = np.where((serie <= tjson["etendue"][0]) & ( serie >= tjson["etendue"][1] ), 1, 0) + cond2 = (serie <= tjson["etendue"][1]) & (serie > tjson["normalite"][1]) - def plot_tresh(self, treshold_json ,id_patient = 100, var = "Pmean", title = '') : + self.df[i + "_abnormal_treshFx"] = np.where(cond1 | cond2, 1, 0) - - serie = self.df[self.df.id_patient == id_patient ][var] - - y = serie.values - x = serie.reset_index().index - - fig, ax, = plt.subplots(1, 1, sharex=True) - ax.plot(x, y , color='black') - - cond1 = (y <= treshold_json[var]["normalite"][0]) - cond2 = (y >= treshold_json[var]["normalite"][1]) - - ax.fill_between(x, y, treshold_json[var]["normalite"][0], where = cond1, facecolor='red', interpolate=True) - ax.fill_between(x, y, treshold_json[var]["normalite"][1], where = cond2, facecolor='red', interpolate=True) - ax.set_title(title) - plt.show() + self.df[i + "_clean_treshFx"] = np.where((serie >= tjson["etendue"][0]) + & (serie <= tjson["etendue"][1]), 1, 0) diff --git a/transplant/visualization/__init__.py b/transplant/visualization/__init__.py new file mode 100644 index 0000000..d087122 --- /dev/null +++ b/transplant/visualization/__init__.py @@ -0,0 +1 @@ +from .graph import plot_dynamic_features, plot_compare_patient, plot_tresh diff --git a/transplant/visualization/graph.py b/transplant/visualization/graph.py index 6ec936a..4ca0b6b 100644 --- a/transplant/visualization/graph.py +++ b/transplant/visualization/graph.py @@ -4,7 +4,7 @@ import plotly.graph_objs as go from plotly.offline import init_notebook_mode, iplot - +import matplotlib.pyplot as plt def plot_dynamic_features(df, id_patient, features_list): @@ -12,65 +12,64 @@ def plot_dynamic_features(df, id_patient, features_list): Plot a dynamic graph of patient from medical measuring instrument. Work only in notebook. Display only numerical features - - Input : + + Input : - df [DataFrame] : Muse have features [['id_patient', 'time']] - id_patient [int] : Id patient - features_list [list] : list of features you want to analyse - - Ouput : + + Ouput : - Display a plotly graph """ - + init_notebook_mode(connected=True) - + # Check params - + if not isinstance(features_list, list): - raise Exception("""'features_list' must be a list. \n Example features_list=['FC', 'FIN2O'] + raise Exception("""'features_list' must be a list. \n Example features_list=['FC', 'FIN2O'] ou \n features_list=DYNAMIC_CATEGORIES['neurology'])""") - + if not isinstance(id_patient, int): raise Exception("""'id_patient' must be a int. \n Example id_patient=314""") - + data = df[df['id_patient'] == id_patient] - - if len(data) ==0: + + if len(data) == 0: raise Exception("""No data for this id_patient""") - - + try: time = data['time'] except: raise Exception("""DataFrame need to have time feature ('time')""") - + if len(features_list) != 0: - + # Init list of trace data_graph = [] - + # Remove nonusefull features features_list = [feature for feature in features_list if feature not in(['time', 'id_patient'])] - + # Take only numerical features features_list_clean = [] for feature in features_list: if is_numeric_dtype(df[feature]): features_list_clean.append(feature) - + if len(features_list_clean) == 0: raise Exception("""No numeric data is this DataFrame""") - + # Create trace for data_graph for feature in features_list_clean: trace = go.Scatter(x=time, y=data[feature].values, name = feature, yaxis='y1') - data_graph.append(trace) + data_graph.append(trace) else: raise Exception("""No data for this id_patient""") - + # Design graph layout = dict( title='Analyse du patient ' + str(id_patient), @@ -88,51 +87,52 @@ def plot_dynamic_features(df, id_patient, features_list): fig = dict(data=data_graph, layout=layout) iplot(fig) + def plot_compare_patient(df, feature_to_analyse, patient_list): """ Plot a dynamic graph to compare patient on One feature Work only in notebook. Display only numerical features - - Input : + + Input : - df [DataFrame] : Muse have features [['id_patient']] - feature_to_analyse [string] : Name of numerical feature - patient_list [list] : list of patient you want to compare - - Ouput : + + Ouput : - Display a plotly graph """ - + init_notebook_mode(connected=True) - + # Check params - + if not isinstance(feature_to_analyse, str): raise Exception("""feature_to_analyse muse be a string - Example 'ETCO2' """) - + if feature_to_analyse not in df.columns: raise Exception("""feature_to_analyse is not in the DataFrame""") - + if not is_numeric_dtype(df[feature_to_analyse]): raise Exception("""feature_to_analyse must be numeric""") - + if not isinstance(patient_list, list): raise Exception("""'patient_list' must be a list - Example [304, 305, 405]""") - - + + data = df[df['id_patient'].isin(patient_list)] - + if len(data) ==0: raise Exception("""No data for these patients""") - + # Init list of trace data_graph = [] - + for patient in patient_list: data_temp = data[data['id_patient'] == patient].copy() if len(data_temp) == 0: pass - + trace = go.Scatter(x=data_temp.reset_index(drop=True).index, y=data_temp[feature_to_analyse].values, name = str(patient), @@ -154,26 +154,27 @@ def plot_compare_patient(df, feature_to_analyse, patient_list): fig = dict(data=data_graph, layout=layout) iplot(fig) + def plot_analyse_factory(df, pca, hue=False, kmean=None): """ Plotting result from tools.analyse_factory.analyse_factory() - Input : + Input : - df [DataFrame] : Muse have features [['id_patient']] - pca [sklearn.decomposition.PCA] : PCA already fit - hue [Bool] : Using cluster to plot differents colors - kmean [sklearn.kmean] : Kmean already fit. hue must be True - - Ouput : + + Ouput : - Display a plotly graph """ - + init_notebook_mode(connected=True) color_list = ['#6ac1a5', '#fa8d67', '#8ea1c9'] - + pca_expl = round(pca.explained_variance_ratio_[0:2].sum(), 2) - + if hue == False: # Create a trace trace = go.Scatter( @@ -195,7 +196,7 @@ def plot_analyse_factory(df, pca, hue=False, kmean=None): name = 'cluster_'+str(cluster), text=df_temp["id_patient"].tolist() ) - data.append(trace) + data.append(trace) if kmean: @@ -203,7 +204,7 @@ def plot_analyse_factory(df, pca, hue=False, kmean=None): center = go.Scatter(x=centroids[:, 0], y=centroids[:, 1], showlegend=False, - mode='markers', + mode='markers', text='centroid', marker=dict( size=10, @@ -211,7 +212,7 @@ def plot_analyse_factory(df, pca, hue=False, kmean=None): symbol=17, color='black')) data.append(center) - + # Design graph layout = dict( title='Analyse de feature PCA explain (first 2 components) : ' + str(pca_expl), @@ -224,6 +225,46 @@ def plot_analyse_factory(df, pca, hue=False, kmean=None): title='pca_2' ) ) - + fig = dict(data=data, layout=layout) - iplot(fig) \ No newline at end of file + iplot(fig) + + +def plot_tresh(df, treshold_json, id_patient=100, var="Pmean", title=''): + + """ + Plotting time serie with treshold + + Input : + - df [DataFrame] : Muse have features [['id_patient']] + - treshold_json [Json]: : must have the following structure + {[colum_name]: + {"etendue": [value_max, value_min], + "normalite" : [val] + } + } + - id_patient [int,str] : patient time serie to plot + - var [str] : serie to plot + - title [str]: title for the chart + + Ouput : + - Display a matplotlib graph + """ + + serie = df[df.id_patient == id_patient][var] + + y = serie.values + x = serie.reset_index().index + + fig, ax, = plt.subplots(1, 1, sharex=True) + ax.plot(x, y, color='black') + + cond1 = (y <= treshold_json[var]["normalite"][0]) + cond2 = (y >= treshold_json[var]["normalite"][1]) + + ax.fill_between(x, y, treshold_json[var]["normalite"][0], where=cond1, + facecolor='red', interpolate=True) + ax.fill_between(x, y, treshold_json[var]["normalite"][1], where=cond2, + facecolor='red', interpolate=True) + ax.set_title(title) + plt.show() From d727f80e6bdb0eaae052efbb26447ecd54602031 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Baccar?= Date: Sat, 19 Jan 2019 15:27:59 +0100 Subject: [PATCH 3/3] update des seuils dans le fichier de config, config.py --- transplant/config.py | 42 ++++++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/transplant/config.py b/transplant/config.py index 6dc156d..82dc311 100644 --- a/transplant/config.py +++ b/transplant/config.py @@ -166,21 +166,27 @@ } -TRESHOLD_JSON_TS = { - "B.I.S": { - "etendue": [0, 100], - "normalite": [40, 60] - }, - "PAPsys": { - "etendue": [0, 4], - "normalite": [0, 1] - }, - "DC": { - "etendue": [0, 100], - "normalite": [0, 100] - }, - "Pmean": { - "etendue": [0, 100], - "normalite": [5, 10] - } -} +TRESHOLD_JSON_TS = {'Temp': {'etendue': [0, 50], 'normalite': [0, 50]}, + 'B.I.S': {'etendue': [0, 100], 'normalite': [0, 100]}, + 'BIS SR': {'etendue': [0, 100], 'normalite': [0, 100]}, + 'ETCO2': {'etendue': [0, 120], 'normalite': [0, 120]}, + 'FiO2': {'etendue': [21, 100], 'normalite': [21, 100]}, + 'FR': {'etendue': [0, 60], 'normalite': [0, 60]}, + 'PEEPtotal': {'etendue': [0, 20], 'normalite': [0, 20]}, + 'Pmax': {'etendue': [0, 65], 'normalite': [0, 65]}, + 'Pmean': {'etendue': [0, 40], 'normalite': [0, 40]}, + 'SpO2': {'etendue': [0, 100], 'normalite': [0, 100]}, + 'SvO2 (m)': {'etendue': [0, 100], 'normalite': [0, 100]}, + 'VT': {'etendue': [0, 900], 'normalite': [0, 900]}, + 'DC': {'etendue': [0, 5, 10], 'normalite': [0, 5, 10]}, + 'FC': {'etendue': [0, 220], 'normalite': [0, 220]}, + 'PAPdia': {'etendue': [0, 65], 'normalite': [0, 65]}, + 'PAPmoy': {'etendue': [0, 65], 'normalite': [0, 65]}, + 'PAPsys': {'etendue': [0, 120], 'normalite': [0, 120]}, + 'PASd': {'etendue': [0, 80], 'normalite': [0, 80]}, + 'PASm': {'etendue': [0, 150], 'normalite': [0, 150]}, + 'PASs': {'etendue': [0, 320], 'normalite': [0, 320]}, + 'PNId': {'etendue': [0, 80], 'normalite': [0, 80]}, + 'PNIm': {'etendue': [0, 150], 'normalite': [0, 150]}, + 'PNIs': {'etendue': [0, 320], 'normalite': [0, 320]} + }