Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 27 additions & 20 deletions tableone/formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,32 +35,39 @@ def set_display_options(max_rows=None,
warnings.warn(msg)


def format_pvalues(table, pval, pval_adjust, pval_threshold):
def format_pvalues(table, pval, pval_adjust, pval_threshold, pval_digits):
"""
Formats the p value columns, applying rounding rules and adding
significance markers based on defined thresholds.
Formats p-values to a fixed number of decimal places and optionally adds
significance markers based on a threshold.
"""
# round pval column and convert to string
if pval and pval_adjust:
table['P-Value (adjusted)'] = table['P-Value (adjusted)'].apply('{:.3f}'.format).astype(str)
table.loc[table['P-Value (adjusted)'] == '0.000',
'P-Value (adjusted)'] = '<0.001'

def _format(p):
if pd.isnull(p):
return ""
try:
fval = float(p)
except Exception:
return str(p)
if fval < 10**(-pval_digits):
return f"<{10**(-pval_digits):.{pval_digits}f}"
return f"{fval:.{pval_digits}f}"

if pval_adjust:
col = 'P-Value (adjusted)'
if pval_threshold:
asterisk_mask = table['P-Value (adjusted)'] < pval_threshold
table.loc[asterisk_mask, 'P-Value (adjusted)'] = (
table['P-Value (adjusted)'][asterisk_mask].astype(str)+"*" # type: ignore
)
asterisk_mask = table[col] < pval_threshold
table[col] = table[col].apply(_format).astype(str)
table.loc[asterisk_mask, col] += "*"
else:
table[col] = table[col].apply(_format).astype(str)

elif pval:
table['P-Value'] = table['P-Value'].apply('{:.3f}'.format).astype(str)
table.loc[table['P-Value'] == '0.000', 'P-Value'] = '<0.001'

col = 'P-Value'
if pval_threshold:
asterisk_mask = table['P-Value'] < pval_threshold
table.loc[asterisk_mask, 'P-Value'] = (
table['P-Value'][asterisk_mask].astype(str)+"*" # type: ignore
)
asterisk_mask = table[col] < pval_threshold
table[col] = table[col].apply(_format).astype(str)
table.loc[asterisk_mask, col] += "*"
else:
table[col] = table[col].apply(_format).astype(str)

return table

Expand Down
17 changes: 12 additions & 5 deletions tableone/tableone.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,12 @@ class TableOne:
`holm-sidak` : step down method using Sidak adjustments
`simes-hochberg` : step-up method (independent)
`hommel` : closed method based on Simes tests (non-negative)

pval_digits : int, default=3
Number of decimal places to display for p-values.
pval_threshold : float, optional
Threshold below which p-values are marked with an asterisk (*).
For example, if set to 0.05, all p-values less than 0.05 will be
displayed with a trailing asterisk (e.g., '0.012*').
htest_name : bool, optional
Display a column with the names of hypothesis tests (default: False).
htest : dict, optional
Expand Down Expand Up @@ -219,7 +224,8 @@ def __init__(self, data: pd.DataFrame,
dip_test: bool = False, normal_test: bool = False,
tukey_test: bool = False,
pval_threshold: Optional[float] = None,
include_null: Optional[bool] = True) -> None:
include_null: Optional[bool] = True,
pval_digits: int = 3) -> None:

# Warn about deprecated parameters
handle_deprecated_parameters(labels, isnull, pval_test_name, remarks)
Expand All @@ -234,7 +240,7 @@ def __init__(self, data: pd.DataFrame,
htest, missing, ddof, rename, sort, limit, order,
label_suffix, decimals, smd, overall, row_percent,
dip_test, normal_test, tukey_test, pval_threshold,
include_null)
include_null, pval_digits)

# Initialize intermediate tables
self.initialize_intermediate_tables()
Expand Down Expand Up @@ -276,7 +282,7 @@ def initialize_core_attributes(self, data, columns, categorical, continuous, gro
htest, missing, ddof, rename, sort, limit, order,
label_suffix, decimals, smd, overall, row_percent,
dip_test, normal_test, tukey_test, pval_threshold,
include_null):
include_null, pval_digits):
"""
Initialize attributes.
"""
Expand Down Expand Up @@ -305,6 +311,7 @@ def initialize_core_attributes(self, data, columns, categorical, continuous, gro
self._pval_adjust = pval_adjust
self._pval_test_name = htest_name
self._pval_threshold = pval_threshold
self._pval_digits = pval_digits
self._reserved_columns = ['Missing', 'P-Value', 'Test', 'P-Value (adjusted)', 'SMD', 'Overall']
self._row_percent = row_percent
self._smd = smd
Expand Down Expand Up @@ -654,7 +661,7 @@ def _create_tableone(self, data):
table.columns = table.columns.values.astype(str)

table = sort_and_reindex(table, self._smd, self.smd_table, self._sort, self._columns)
table = format_pvalues(table, self._pval, self._pval_adjust, self._pval_threshold)
table = format_pvalues(table, self._pval, self._pval_adjust, self._pval_threshold, self._pval_digits)
table = format_smd_columns(table, self._smd, self.smd_table)
table = apply_order(table, self._order, self._groupby)
table = apply_limits(table, data, self._limit, self._categorical, self._order)
Expand Down
23 changes: 23 additions & 0 deletions tests/unit/test_tableone.py
Original file line number Diff line number Diff line change
Expand Up @@ -1393,3 +1393,26 @@ def test_handle_categorical_nulls_does_not_affect_continuous():
result = handle_categorical_nulls(df, categorical=['cat'])
assert result['cont'].dtype == float
assert result['cat'].iloc[2] == 'None'


def test_pval_digits_custom_formatting():
df = pd.DataFrame({
'group': ['A', 'A', 'B', 'B', 'B'],
'x': [1, 5, 1, 2, 2],
'y': [1, 2, 5, 5, 6]
})

t1 = TableOne(df, columns=['x'], categorical=['x'], groupby='group', pval=True, pval_digits=5)
pval = t1.tableone['Grouped by group']['P-Value'].iloc[1]
assert pval == '0.23262'

t2 = TableOne(df, columns=['x'], categorical=['x'], groupby='group', pval=True, pval_digits=3,
pval_threshold=0.3)
pval = t2.tableone['Grouped by group']['P-Value'].iloc[1]
assert pval == '0.233*'


t3 = TableOne(df, columns=['y'], continuous=['y'], groupby='group', pval=True, pval_digits=1,
pval_threshold=0.3)
pval = t3.tableone['Grouped by group']['P-Value'].iloc[1]
assert pval == '<0.1*'