diff --git a/tableone/formatting.py b/tableone/formatting.py index 548f048..4ca9c49 100644 --- a/tableone/formatting.py +++ b/tableone/formatting.py @@ -35,32 +35,39 @@ def set_display_options(max_rows=None, warnings.warn(msg) -def format_pvalues(table, pval, pval_adjust, pval_threshold): +def format_pvalues(table, pval, pval_adjust, pval_threshold, pval_digits): """ - Formats the p value columns, applying rounding rules and adding - significance markers based on defined thresholds. + Formats p-values to a fixed number of decimal places and optionally adds + significance markers based on a threshold. """ - # round pval column and convert to string - if pval and pval_adjust: - table['P-Value (adjusted)'] = table['P-Value (adjusted)'].apply('{:.3f}'.format).astype(str) - table.loc[table['P-Value (adjusted)'] == '0.000', - 'P-Value (adjusted)'] = '<0.001' - + def _format(p): + if pd.isnull(p): + return "" + try: + fval = float(p) + except Exception: + return str(p) + if fval < 10**(-pval_digits): + return f"<{10**(-pval_digits):.{pval_digits}f}" + return f"{fval:.{pval_digits}f}" + + if pval_adjust: + col = 'P-Value (adjusted)' if pval_threshold: - asterisk_mask = table['P-Value (adjusted)'] < pval_threshold - table.loc[asterisk_mask, 'P-Value (adjusted)'] = ( - table['P-Value (adjusted)'][asterisk_mask].astype(str)+"*" # type: ignore - ) + asterisk_mask = table[col] < pval_threshold + table[col] = table[col].apply(_format).astype(str) + table.loc[asterisk_mask, col] += "*" + else: + table[col] = table[col].apply(_format).astype(str) elif pval: - table['P-Value'] = table['P-Value'].apply('{:.3f}'.format).astype(str) - table.loc[table['P-Value'] == '0.000', 'P-Value'] = '<0.001' - + col = 'P-Value' if pval_threshold: - asterisk_mask = table['P-Value'] < pval_threshold - table.loc[asterisk_mask, 'P-Value'] = ( - table['P-Value'][asterisk_mask].astype(str)+"*" # type: ignore - ) + asterisk_mask = table[col] < pval_threshold + table[col] = table[col].apply(_format).astype(str) + table.loc[asterisk_mask, col] += "*" + else: + table[col] = table[col].apply(_format).astype(str) return table diff --git a/tableone/tableone.py b/tableone/tableone.py index c00e723..d29923a 100644 --- a/tableone/tableone.py +++ b/tableone/tableone.py @@ -103,7 +103,12 @@ class TableOne: `holm-sidak` : step down method using Sidak adjustments `simes-hochberg` : step-up method (independent) `hommel` : closed method based on Simes tests (non-negative) - + pval_digits : int, default=3 + Number of decimal places to display for p-values. + pval_threshold : float, optional + Threshold below which p-values are marked with an asterisk (*). + For example, if set to 0.05, all p-values less than 0.05 will be + displayed with a trailing asterisk (e.g., '0.012*'). htest_name : bool, optional Display a column with the names of hypothesis tests (default: False). htest : dict, optional @@ -219,7 +224,8 @@ def __init__(self, data: pd.DataFrame, dip_test: bool = False, normal_test: bool = False, tukey_test: bool = False, pval_threshold: Optional[float] = None, - include_null: Optional[bool] = True) -> None: + include_null: Optional[bool] = True, + pval_digits: int = 3) -> None: # Warn about deprecated parameters handle_deprecated_parameters(labels, isnull, pval_test_name, remarks) @@ -234,7 +240,7 @@ def __init__(self, data: pd.DataFrame, htest, missing, ddof, rename, sort, limit, order, label_suffix, decimals, smd, overall, row_percent, dip_test, normal_test, tukey_test, pval_threshold, - include_null) + include_null, pval_digits) # Initialize intermediate tables self.initialize_intermediate_tables() @@ -276,7 +282,7 @@ def initialize_core_attributes(self, data, columns, categorical, continuous, gro htest, missing, ddof, rename, sort, limit, order, label_suffix, decimals, smd, overall, row_percent, dip_test, normal_test, tukey_test, pval_threshold, - include_null): + include_null, pval_digits): """ Initialize attributes. """ @@ -305,6 +311,7 @@ def initialize_core_attributes(self, data, columns, categorical, continuous, gro self._pval_adjust = pval_adjust self._pval_test_name = htest_name self._pval_threshold = pval_threshold + self._pval_digits = pval_digits self._reserved_columns = ['Missing', 'P-Value', 'Test', 'P-Value (adjusted)', 'SMD', 'Overall'] self._row_percent = row_percent self._smd = smd @@ -654,7 +661,7 @@ def _create_tableone(self, data): table.columns = table.columns.values.astype(str) table = sort_and_reindex(table, self._smd, self.smd_table, self._sort, self._columns) - table = format_pvalues(table, self._pval, self._pval_adjust, self._pval_threshold) + table = format_pvalues(table, self._pval, self._pval_adjust, self._pval_threshold, self._pval_digits) table = format_smd_columns(table, self._smd, self.smd_table) table = apply_order(table, self._order, self._groupby) table = apply_limits(table, data, self._limit, self._categorical, self._order) diff --git a/tests/unit/test_tableone.py b/tests/unit/test_tableone.py index 24d2010..c9bc28a 100644 --- a/tests/unit/test_tableone.py +++ b/tests/unit/test_tableone.py @@ -1393,3 +1393,26 @@ def test_handle_categorical_nulls_does_not_affect_continuous(): result = handle_categorical_nulls(df, categorical=['cat']) assert result['cont'].dtype == float assert result['cat'].iloc[2] == 'None' + + +def test_pval_digits_custom_formatting(): + df = pd.DataFrame({ + 'group': ['A', 'A', 'B', 'B', 'B'], + 'x': [1, 5, 1, 2, 2], + 'y': [1, 2, 5, 5, 6] + }) + + t1 = TableOne(df, columns=['x'], categorical=['x'], groupby='group', pval=True, pval_digits=5) + pval = t1.tableone['Grouped by group']['P-Value'].iloc[1] + assert pval == '0.23262' + + t2 = TableOne(df, columns=['x'], categorical=['x'], groupby='group', pval=True, pval_digits=3, + pval_threshold=0.3) + pval = t2.tableone['Grouped by group']['P-Value'].iloc[1] + assert pval == '0.233*' + + + t3 = TableOne(df, columns=['y'], continuous=['y'], groupby='group', pval=True, pval_digits=1, + pval_threshold=0.3) + pval = t3.tableone['Grouped by group']['P-Value'].iloc[1] + assert pval == '<0.1*'