Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions bin/format_kegg_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from glob import glob
import logging
import subprocess
from skbio import write as write_sequence, read as read_sequence
from collections import defaultdict
import gzip
import argparse
Expand Down Expand Up @@ -64,6 +63,7 @@ def process_kegg(
download_date = get_iso_date()
if gene_ko_link_loc is not None and Path(gene_ko_link_loc).exists():
# add KOs to end of header where KO is not already there
from skbio import write as write_sequence
kegg_mod_loc = path.join(output_dir, "kegg.mod.fa")
write_sequence(
generate_modified_kegg_fasta(kegg_loc, gene_ko_link_loc),
Expand All @@ -73,7 +73,7 @@ def process_kegg(
else:
kegg_mod_loc = kegg_loc
# make mmseqsdb from modified kegg fasta
kegg_mmseqs_db = path.join(output_dir, "kegg.%s.mmsdb" % download_date)
kegg_mmseqs_db = path.join(output_dir, "kegg.mmsdb")
create_mmseqs(
kegg_mod_loc,
kegg_mmseqs_db,
Expand Down Expand Up @@ -114,6 +114,7 @@ def generate_modified_kegg_fasta(kegg_fasta, gene_ko_link_loc=None):
Takes kegg fasta file and gene ko link file, adds kos not already in headers to headers
Whish I knew about this, oh well I may split this out.
"""
from skbio import write as write_sequence, read as read_sequence
genes_ko_dict = defaultdict(list)
if gene_ko_link_loc is not None:
if gene_ko_link_loc.endswith(".gz"):
Expand Down Expand Up @@ -150,9 +151,8 @@ def main():
)
parser.add_argument(
"--skip_gene_ko_link",
type=bool,
action="store_true",
help="Skip gene KO link processing. If not passed in, `--gene_ko_link_loc` is required",
default=False,
)
parser.add_argument(
"--output_dir", type=str, help="Path to the output directory", default="kegg"
Expand Down
4 changes: 2 additions & 2 deletions modules/local/database/format_kegg_db.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ process FORMAT_KEGG_DB {
errorStrategy 'finish'

conda "${moduleDir}/environment.yml"
container "community.wave.seqera.io/library/python_scikit-bio_scipy:0f89a100e990daf2"
container "community.wave.seqera.io/library/python_pandas_hmmer_mmseqs2_pruned:d2c88b719ab1322c"

tag { ch_kegg_pep }

Expand All @@ -19,7 +19,7 @@ process FORMAT_KEGG_DB {

script:
"""
if [ ${skip_gene_ko_link} ]; then
if [ "${skip_gene_ko_link}" = "true" ]; then
echo "No Gene KO Link file provided. Running KEGG DB formatting without"
format_kegg_database.py --kegg_loc ${ch_kegg_pep} --download_date ${kegg_download_date} --threads ${params.threads} --output_dir kegg --skip_gene_ko_link
else
Expand Down
2 changes: 1 addition & 1 deletion workflows/dram.nf
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ workflow DRAM {

gene_ko_link_f = params.gene_ko_link_loc && file(params.gene_ko_link_loc).exists() ? file(params.gene_ko_link_loc) : default_sheet
kegg_download_date = params.kegg_download_date ? params.kegg_download_date : "''"
skip_gene_ko_link = params.skip_gene_ko_link ? 1 : 0
skip_gene_ko_link = params.skip_gene_ko_link ? "true" : "false"
FORMAT_KEGG_DB( kegg_pep_f, gene_ko_link_f, kegg_download_date, skip_gene_ko_link )

} else if (params.merge_annotations){
Expand Down