Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/build-docker-images.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@ jobs:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
- name: Build the full Docker image
run: docker build . --file Dockerfile --tag streamlitapp:latest
- uses: actions/checkout@v3
- name: Build the full Docker image
run: docker build --file Dockerfile --tag streamlitapp:latest --output type=tar,dest=/dev/null .
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ SHELL ["mamba", "run", "-n", "streamlit-env", "/bin/bash", "-c"]

# Install up-to-date cmake via mamba and packages for pyOpenMS build.
RUN mamba install cmake
RUN pip install --upgrade pip && python -m pip install -U setuptools nose Cython autowrap pandas 'numpy==1.26.4' pytest
RUN pip install --upgrade pip && python -m pip install -U setuptools nose 'Cython<3.1' autowrap pandas 'numpy==1.26.4' pytest

# Clone OpenMS branch and the associcated contrib+thirdparties+pyOpenMS-doc submodules.
RUN git clone --recursive --depth=1 -b ${OPENMS_BRANCH} --single-branch ${OPENMS_REPO} && cd /OpenMS
Expand Down
8 changes: 7 additions & 1 deletion content/FLASHTnT/FLASHTnTLayoutManager.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,20 @@
'Internal fragment map (Protein table needed)',
'Tag table (Protein table needed)',
'Sequence tag view (Tag table needed)',
'Score Distribution Plot',
'MS1 raw heatmap',
'MS1 deconvolved heatmap'
]

COMPONENT_NAMES=[
'protein_table',
'sequence_view',
'internal_fragment_map',
'tag_table',
'combined_spectrum'
'combined_spectrum',
'id_fdr_plot',
Comment on lines +25 to +26
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💡 Verification agent

🧩 Analysis chain

Verify component mapping between options and names.

The component name 'combined_spectrum' at line 25 appears to map to 'Sequence tag view (Tag table needed)' in the options list based on position. Please verify this mapping is correct, as there seems to be a potential mismatch.


🏁 Script executed:

#!/bin/bash
# Check for potential mismatch between COMPONENT_OPTIONS and COMPONENT_NAMES

echo "Checking component mappings in FLASHTnTLayoutManager.py..."
echo "Component 'combined_spectrum' should map to 'Sequence tag view (Tag table needed)'"

# Verify the option at index 4 in both lists
grep -A 7 "COMPONENT_OPTIONS=\[" content/FLASHTnT/FLASHTnTLayoutManager.py | tail -n 7 | head -n 1
grep -A 6 "COMPONENT_NAMES=\[" content/FLASHTnT/FLASHTnTLayoutManager.py | tail -n 6 | head -n 1

Length of output: 556


🏁 Script executed:

#!/bin/bash
# Extract full COMPONENT_OPTIONS and COMPONENT_NAMES lists for mapping review
echo "=== COMPONENT_OPTIONS ==="
grep -n "COMPONENT_OPTIONS" content/FLASHTnT/FLASHTnTLayoutManager.py -A 20

echo
echo "=== COMPONENT_NAMES ==="
grep -n "COMPONENT_NAMES" content/FLASHTnT/FLASHTnTLayoutManager.py -A 20

Length of output: 12478


Fix misaligned COMPONENT_OPTIONS/COMPONENT_NAMES entries

The 5th and 6th elements in your arrays are currently swapped/misnamed:

  • COMPONENT_OPTIONS[4] = 'Sequence tag view (Tag table needed)' ↔ COMPONENT_NAMES[4] = 'combined_spectrum'
  • COMPONENT_OPTIONS[5] = 'Score Distribution Plot' ↔ COMPONENT_NAMES[5] = 'id_fdr_plot'

These mappings don’t align semantically. Please update one or both lists so that:

  • 'Sequence tag view (Tag table needed)' maps to a name like 'sequence_tag_view'
  • 'Score Distribution Plot' maps to something like 'score_distribution_plot'
  • 'combined_spectrum' pairs with its intended option label (e.g. 'Combined spectrum')

Locations:

  • content/FLASHTnT/FLASHTnTLayoutManager.py lines 10–18 (COMPONENT_OPTIONS)
  • content/FLASHTnT/FLASHTnTLayoutManager.py lines 20–28 (COMPONENT_NAMES)

Suggested diff sketch:

 COMPONENT_OPTIONS = [
     …,
-    'Sequence tag view (Tag table needed)',
+    'Combined spectrum',                  # new or corrected label
+    'Sequence tag view (Tag table needed)',
     'Score Distribution Plot',
     …
 ]

 COMPONENT_NAMES = [
     …,
-    'combined_spectrum',
-    'id_fdr_plot',
+    'combined_spectrum',                  # match new/renamed option above
+    'sequence_tag_view',                  # maps to Sequence tag view
+    'score_distribution_plot',            # maps to Score Distribution Plot
     …
 ]

Committable suggestion skipped: line range outside the PR's diff.

'ms1_raw_heatmap',
'ms1_deconv_heat_map'
]

# Setup cache access
Expand Down
30 changes: 29 additions & 1 deletion src/parse/tnt.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from io import StringIO
from pyopenms import AASequence
from scipy.stats import gaussian_kde

from src.parse.masstable import parseFLASHDeconvOutput, parseFLASHTaggerOutput
from src.render.sequence import (
Expand Down Expand Up @@ -157,4 +158,31 @@ def parseTnT(file_manager, dataset_id, deconv_mzML, anno_mzML, tag_tsv, protein_
}
file_manager.store_data(
dataset_id, 'settings', settings
)
)

density_target, density_decoy = fdr_density_distribution(protein_df, logger=logger)
file_manager.store_data(dataset_id, 'density_id_target', density_target)
file_manager.store_data(dataset_id, 'density_id_decoy', density_decoy)


def fdr_density_distribution(df, logger=None):
df = df[df['ProteoformLevelQvalue'] > 0]
# Find density targets
target_qscores = df[~df['accession'].str.startswith('DECOY_')]['ProteoformLevelQvalue'].dropna()
if len(target_qscores) > 0:
x_target = np.linspace(target_qscores.min(), target_qscores.max(), 200)
kde_target = gaussian_kde(target_qscores)
density_target = pd.DataFrame({'x': x_target, 'y': kde_target(x_target)})
else:
density_target = pd.DataFrame(columns=['x', 'y'])

# Find density decoys (if present)
decoy_qscores = df[df['accession'].str.startswith('DECOY_')]['ProteoformLevelQvalue'].dropna()
if len(decoy_qscores) > 0:
x_decoy = np.linspace(decoy_qscores.min(), decoy_qscores.max(), 200)
kde_decoy = gaussian_kde(decoy_qscores)
density_decoy = pd.DataFrame({'x': x_decoy, 'y': kde_decoy(x_decoy)})
else:
density_decoy = pd.DataFrame(columns=['x', 'y'])

return density_target, density_decoy
Comment on lines +168 to +188
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Implement error handling for the KDE computation.

While the function correctly computes the kernel density estimates for target and decoy distributions, it could benefit from additional error handling. The gaussian_kde function may raise exceptions if the input data has specific characteristics (e.g., constant values).

def fdr_density_distribution(df, logger=None):
    df = df[df['ProteoformLevelQvalue'] > 0]
    # Find density targets
    target_qscores = df[~df['accession'].str.startswith('DECOY_')]['ProteoformLevelQvalue'].dropna()
    if len(target_qscores) > 0:
+       try:
            x_target = np.linspace(target_qscores.min(), target_qscores.max(), 200)
            kde_target = gaussian_kde(target_qscores)
            density_target = pd.DataFrame({'x': x_target, 'y': kde_target(x_target)})
+       except Exception as e:
+           if logger:
+               logger.warning(f"Failed to compute KDE for target distribution: {str(e)}")
+           density_target = pd.DataFrame(columns=['x', 'y'])
    else:
        density_target = pd.DataFrame(columns=['x', 'y'])

    # Find density decoys (if present)
    decoy_qscores = df[df['accession'].str.startswith('DECOY_')]['ProteoformLevelQvalue'].dropna()
    if len(decoy_qscores) > 0:
+       try:
            x_decoy = np.linspace(decoy_qscores.min(), decoy_qscores.max(), 200)
            kde_decoy = gaussian_kde(decoy_qscores)
            density_decoy = pd.DataFrame({'x': x_decoy, 'y': kde_decoy(x_decoy)})
+       except Exception as e:
+           if logger:
+               logger.warning(f"Failed to compute KDE for decoy distribution: {str(e)}")
+           density_decoy = pd.DataFrame(columns=['x', 'y'])
    else:
        density_decoy = pd.DataFrame(columns=['x', 'y'])

    return density_target, density_decoy
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
def fdr_density_distribution(df, logger=None):
df = df[df['ProteoformLevelQvalue'] > 0]
# Find density targets
target_qscores = df[~df['accession'].str.startswith('DECOY_')]['ProteoformLevelQvalue'].dropna()
if len(target_qscores) > 0:
x_target = np.linspace(target_qscores.min(), target_qscores.max(), 200)
kde_target = gaussian_kde(target_qscores)
density_target = pd.DataFrame({'x': x_target, 'y': kde_target(x_target)})
else:
density_target = pd.DataFrame(columns=['x', 'y'])
# Find density decoys (if present)
decoy_qscores = df[df['accession'].str.startswith('DECOY_')]['ProteoformLevelQvalue'].dropna()
if len(decoy_qscores) > 0:
x_decoy = np.linspace(decoy_qscores.min(), decoy_qscores.max(), 200)
kde_decoy = gaussian_kde(decoy_qscores)
density_decoy = pd.DataFrame({'x': x_decoy, 'y': kde_decoy(x_decoy)})
else:
density_decoy = pd.DataFrame(columns=['x', 'y'])
return density_target, density_decoy
def fdr_density_distribution(df, logger=None):
df = df[df['ProteoformLevelQvalue'] > 0]
# Find density targets
target_qscores = df[~df['accession'].str.startswith('DECOY_')]['ProteoformLevelQvalue'].dropna()
if len(target_qscores) > 0:
try:
x_target = np.linspace(target_qscores.min(), target_qscores.max(), 200)
kde_target = gaussian_kde(target_qscores)
density_target = pd.DataFrame({'x': x_target, 'y': kde_target(x_target)})
except Exception as e:
if logger:
logger.warning(f"Failed to compute KDE for target distribution: {e}")
density_target = pd.DataFrame(columns=['x', 'y'])
else:
density_target = pd.DataFrame(columns=['x', 'y'])
# Find density decoys (if present)
decoy_qscores = df[df['accession'].str.startswith('DECOY_')]['ProteoformLevelQvalue'].dropna()
if len(decoy_qscores) > 0:
try:
x_decoy = np.linspace(decoy_qscores.min(), decoy_qscores.max(), 200)
kde_decoy = gaussian_kde(decoy_qscores)
density_decoy = pd.DataFrame({'x': x_decoy, 'y': kde_decoy(x_decoy)})
except Exception as e:
if logger:
logger.warning(f"Failed to compute KDE for decoy distribution: {e}")
density_decoy = pd.DataFrame(columns=['x', 'y'])
else:
density_decoy = pd.DataFrame(columns=['x', 'y'])
return density_target, density_decoy

6 changes: 6 additions & 0 deletions src/render/initialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,12 @@ def initialize_data(comp_name, selected_data, file_manager, tool):
data = file_manager.get_results(selected_data, ['density_decoy'])
data_to_send['density_decoy'] = data['density_decoy']
component_arguments = FDRPlotly(title="FDR Plot")
elif comp_name == 'id_fdr_plot':
data = file_manager.get_results(selected_data, ['density_id_target'])
data_to_send['density_target'] = data['density_id_target']
data = file_manager.get_results(selected_data, ['density_id_decoy'])
data_to_send['density_decoy'] = data['density_id_decoy']
component_arguments = FDRPlotly(title="FDR Plot")
Comment on lines +107 to +112
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Consider adding error handling for missing density datasets.

The code doesn't have explicit error handling for cases where density_id_target or density_id_decoy datasets might be missing. Consider adding a try-except block or using the partial=True parameter in the get_results call to handle such cases gracefully.

elif comp_name == 'id_fdr_plot':
+   try:
        data = file_manager.get_results(selected_data,  ['density_id_target'])
        data_to_send['density_target'] = data['density_id_target']
        data = file_manager.get_results(selected_data,  ['density_id_decoy'])
        data_to_send['density_decoy'] = data['density_id_decoy']
        component_arguments = FDRPlotly(title="FDR Plot")
+   except KeyError:
+       # Handle missing datasets
+       data_to_send['density_target'] = pd.DataFrame(columns=['x', 'y'])
+       data_to_send['density_decoy'] = pd.DataFrame(columns=['x', 'y'])
+       component_arguments = FDRPlotly(title="FDR Plot (No Data Available)")

Alternatively, using the partial=True parameter:

elif comp_name == 'id_fdr_plot':
-   data = file_manager.get_results(selected_data,  ['density_id_target'])
-   data_to_send['density_target'] = data['density_id_target']
-   data = file_manager.get_results(selected_data,  ['density_id_decoy'])
-   data_to_send['density_decoy'] = data['density_id_decoy']
+   data = file_manager.get_results(selected_data,  ['density_id_target', 'density_id_decoy'], partial=True)
+   data_to_send['density_target'] = data.get('density_id_target', pd.DataFrame(columns=['x', 'y']))
+   data_to_send['density_decoy'] = data.get('density_id_decoy', pd.DataFrame(columns=['x', 'y']))
    component_arguments = FDRPlotly(title="FDR Plot")
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
elif comp_name == 'id_fdr_plot':
data = file_manager.get_results(selected_data, ['density_id_target'])
data_to_send['density_target'] = data['density_id_target']
data = file_manager.get_results(selected_data, ['density_id_decoy'])
data_to_send['density_decoy'] = data['density_id_decoy']
component_arguments = FDRPlotly(title="FDR Plot")
elif comp_name == 'id_fdr_plot':
# load both densities at once, allowing missing keys
data = file_manager.get_results(
selected_data,
['density_id_target', 'density_id_decoy'],
partial=True
)
data_to_send['density_target'] = data.get(
'density_id_target',
pd.DataFrame(columns=['x', 'y'])
)
data_to_send['density_decoy'] = data.get(
'density_id_decoy',
pd.DataFrame(columns=['x', 'y'])
)
component_arguments = FDRPlotly(title="FDR Plot")

elif comp_name == 'protein_table':
# TODO: Unify lookup or remove in vue
data = file_manager.get_results(selected_data, ['scan_table'])
Expand Down