From 328566412a2d24983e57b2760270a5282684d687 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Wed, 3 Sep 2025 17:01:40 -0400 Subject: [PATCH 01/37] init demo --- sprocket-tests/custom/quickcheck.sh | 10 ++++ .../data_structures/flag_filter.toml | 49 +++++++++++++++++++ sprocket-tests/tools/picard.toml | 11 +++++ sprocket-tests/tools/samtools.toml | 31 ++++++++++++ 4 files changed, 101 insertions(+) create mode 100644 sprocket-tests/custom/quickcheck.sh create mode 100644 sprocket-tests/data_structures/flag_filter.toml create mode 100644 sprocket-tests/tools/picard.toml create mode 100644 sprocket-tests/tools/samtools.toml diff --git a/sprocket-tests/custom/quickcheck.sh b/sprocket-tests/custom/quickcheck.sh new file mode 100644 index 000000000..8c270cc8e --- /dev/null +++ b/sprocket-tests/custom/quickcheck.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +set -euo pipefail + +out_json=$1 + +out_bam=$(jq -r .bam "$out_json") + +samtools quickcheck "$out_bam" + diff --git a/sprocket-tests/data_structures/flag_filter.toml b/sprocket-tests/data_structures/flag_filter.toml new file mode 100644 index 000000000..549004e1f --- /dev/null +++ b/sprocket-tests/data_structures/flag_filter.toml @@ -0,0 +1,49 @@ +[[validate_string_is_12bit_int]] +name = "decimal_passes" # each test must have a unique identifier +[validate_string_is_12bit_int.inputs] +number = "5" +# without any tests explicitly configured, Sprocket will consider the task executing with a 0 exit code to be a "pass" and any non-zero exit code as a "fail" + +[[validate_string_is_12bit_int]] +name = "hexadecimal_passes" +[validate_string_is_12bit_int.inputs] +number = "0x900" +[validate_string_is_12bit_int.tests] +stdout.contains = "Input number (0x900) is valid" # builtin test for checking STDOUT logs + +[[validate_string_is_12bit_int]] +name = "too_big_hexadecimal_fails" +[validate_string_is_12bit_int.inputs] +number = "0x1000" +[validate_string_is_12bit_int.tests] +exit_code = 42 # the task should fail for this test +stderr.contains = "Input number (0x1000) is invalid" # similar to the stdout test + +[[validate_string_is_12bit_int]] +name = "too_big_decimal_fails" +[validate_string_is_12bit_int.inputs] +number = "4096" +[validate_string_is_12bit_int.tests] +exit_code = 42 +stderr.contains = [ + "Input number (4096) interpreted as decimal", + "But number must be less than 4096!", +] # `contains` test can also be an array of strings + +[[validate_flag_filter]] # a workflow test +name = "valid_FlagFilter_passes" +[validate_flag_filter.inputs.flags] +include_if_all = "3" # decimal +exclude_if_any = "0xF04" # hexadecimal +include_if_any = "03" # octal +exclude_if_all = "4095" # decimal + +[[validate_flag_filter]] +name = "invalid_FlagFilter_fails" +[validate_flag_filter.inputs.flags] +include_if_all = "" # empty string +exclude_if_any = "this is not a number" +include_if_any = "000000000011" # binary interpreted as octal. Too many digits for octal +exclude_if_all = "4095" # this is fine +[validate_flag_filter.tests] +should_fail = true diff --git a/sprocket-tests/tools/picard.toml b/sprocket-tests/tools/picard.toml new file mode 100644 index 000000000..b1723646b --- /dev/null +++ b/sprocket-tests/tools/picard.toml @@ -0,0 +1,11 @@ +[[merge_sam_files]] +name = "Merge works" +[merge_sam_files.inputs] +bams = [ + "$FIXTURES/test1.bam", + "$FIXTURES/test2.bam", +] +prefix = "test.merged" +[merge_sam_files.tests] +custom = "quickcheck.sh" + diff --git a/sprocket-tests/tools/samtools.toml b/sprocket-tests/tools/samtools.toml new file mode 100644 index 000000000..070d9c0bc --- /dev/null +++ b/sprocket-tests/tools/samtools.toml @@ -0,0 +1,31 @@ +[[bam_to_fastq]] +name = "kitchen_sink" +[[bam_to_fastq.matrix]] +bam = [ + "$FIXTURES/test1.bam", + "$FIXTURES/test2.bam", + "$FIXTURES/test3.bam", +] +bam_index = [ + "$FIXTURES/test1.bam.bai", + "$FIXTURES/test2.bam.bai", + "$FIXTURES/test3.bam.bai", +] +[[bam_to_fastq.matrix]] +bitwise_filter = [ + { include_if_all = "0x0", exclude_if_any = "0x900", include_if_any = "0x0", exclude_if_all = "0x0" }, + { include_if_all = "00", exclude_if_any = "0x904", include_if_any = "3", exclude_if_all = "0" }, +] +[[bam_to_fastq.matrix]] +paired_end = [true, false] +[[bam_to_fastq.matrix]] +retain_collated_bam = [true, false] +[[bam_to_fastq.matrix]] +append_read_number = [true, false] +[[bam_to_fastq.matrix]] +output_singletons = [true, false] +[bam_to_fastq.inputs] +prefix = "kitchen_sink_test" # the `prefix` input will be shared by _all_ permutations of the test matrix +# this test is to ensure all the options (and combinations thereof) are valid +# so no tests beyond a `0` exit code are needed here + From 7bc1e3a9c85e19c47dfb1f87c78930fb9b8b367e Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Tue, 9 Sep 2025 08:16:55 -0400 Subject: [PATCH 02/37] revise: mv TOML out of tests dir and into main workspace --- .../data_structures => data_structures}/flag_filter.toml | 0 {sprocket-tests/tools => tools}/picard.toml | 0 {sprocket-tests/tools => tools}/samtools.toml | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename {sprocket-tests/data_structures => data_structures}/flag_filter.toml (100%) rename {sprocket-tests/tools => tools}/picard.toml (100%) rename {sprocket-tests/tools => tools}/samtools.toml (100%) diff --git a/sprocket-tests/data_structures/flag_filter.toml b/data_structures/flag_filter.toml similarity index 100% rename from sprocket-tests/data_structures/flag_filter.toml rename to data_structures/flag_filter.toml diff --git a/sprocket-tests/tools/picard.toml b/tools/picard.toml similarity index 100% rename from sprocket-tests/tools/picard.toml rename to tools/picard.toml diff --git a/sprocket-tests/tools/samtools.toml b/tools/samtools.toml similarity index 100% rename from sprocket-tests/tools/samtools.toml rename to tools/samtools.toml From 6438780b5ab75c551365755080ea1567cdd4ef86 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Tue, 18 Nov 2025 09:18:08 -0500 Subject: [PATCH 03/37] switch from TOML to YAML --- data_structures/flag_filter.toml | 49 -------------------------------- data_structures/flag_filter.yaml | 43 ++++++++++++++++++++++++++++ tools/picard.toml | 11 ------- tools/picard.yaml | 9 ++++++ tools/samtools.toml | 31 -------------------- tools/samtools.yaml | 34 ++++++++++++++++++++++ 6 files changed, 86 insertions(+), 91 deletions(-) delete mode 100644 data_structures/flag_filter.toml create mode 100644 data_structures/flag_filter.yaml delete mode 100644 tools/picard.toml create mode 100644 tools/picard.yaml delete mode 100644 tools/samtools.toml create mode 100644 tools/samtools.yaml diff --git a/data_structures/flag_filter.toml b/data_structures/flag_filter.toml deleted file mode 100644 index 549004e1f..000000000 --- a/data_structures/flag_filter.toml +++ /dev/null @@ -1,49 +0,0 @@ -[[validate_string_is_12bit_int]] -name = "decimal_passes" # each test must have a unique identifier -[validate_string_is_12bit_int.inputs] -number = "5" -# without any tests explicitly configured, Sprocket will consider the task executing with a 0 exit code to be a "pass" and any non-zero exit code as a "fail" - -[[validate_string_is_12bit_int]] -name = "hexadecimal_passes" -[validate_string_is_12bit_int.inputs] -number = "0x900" -[validate_string_is_12bit_int.tests] -stdout.contains = "Input number (0x900) is valid" # builtin test for checking STDOUT logs - -[[validate_string_is_12bit_int]] -name = "too_big_hexadecimal_fails" -[validate_string_is_12bit_int.inputs] -number = "0x1000" -[validate_string_is_12bit_int.tests] -exit_code = 42 # the task should fail for this test -stderr.contains = "Input number (0x1000) is invalid" # similar to the stdout test - -[[validate_string_is_12bit_int]] -name = "too_big_decimal_fails" -[validate_string_is_12bit_int.inputs] -number = "4096" -[validate_string_is_12bit_int.tests] -exit_code = 42 -stderr.contains = [ - "Input number (4096) interpreted as decimal", - "But number must be less than 4096!", -] # `contains` test can also be an array of strings - -[[validate_flag_filter]] # a workflow test -name = "valid_FlagFilter_passes" -[validate_flag_filter.inputs.flags] -include_if_all = "3" # decimal -exclude_if_any = "0xF04" # hexadecimal -include_if_any = "03" # octal -exclude_if_all = "4095" # decimal - -[[validate_flag_filter]] -name = "invalid_FlagFilter_fails" -[validate_flag_filter.inputs.flags] -include_if_all = "" # empty string -exclude_if_any = "this is not a number" -include_if_any = "000000000011" # binary interpreted as octal. Too many digits for octal -exclude_if_all = "4095" # this is fine -[validate_flag_filter.tests] -should_fail = true diff --git a/data_structures/flag_filter.yaml b/data_structures/flag_filter.yaml new file mode 100644 index 000000000..9888ec27b --- /dev/null +++ b/data_structures/flag_filter.yaml @@ -0,0 +1,43 @@ +validate_string_is_12bit_int: + - name: decimal_passes + inputs: + - number: "5" + - name: hexadecimal_passes + inputs: + - number: "0x900" + tests: + stdout: + contains: Input number (0x900) is valid + - name: too_big_hexadecimal_fails + inputs: + - number: "0x1000" + tests: + exit_code: 42 + stderr: + contains: Input number (0x1000) is invalid + - name: too_big_decimal_fails + inputs: + - number: "4096" + tests: + exit_code: 42 + stderr: + contains: + - Input number (4096) interpreted as decimal + - But number must be less than 4096! +validate_flag_filter: + - name: valid_FlagFilter_passes + inputs: + - flags: + include_if_all: "3" + exclude_if_any: "0xF04" + include_if_any: "03" + exclude_if_all: "4095" + - name: invalid_FlagFilter_fails + inputs: + - flags: + include_if_all: "" + exclude_if_any: this is not a number + include_if_any: "000000000011" + exclude_if_all: "4095" + tests: + should_fail: true diff --git a/tools/picard.toml b/tools/picard.toml deleted file mode 100644 index b1723646b..000000000 --- a/tools/picard.toml +++ /dev/null @@ -1,11 +0,0 @@ -[[merge_sam_files]] -name = "Merge works" -[merge_sam_files.inputs] -bams = [ - "$FIXTURES/test1.bam", - "$FIXTURES/test2.bam", -] -prefix = "test.merged" -[merge_sam_files.tests] -custom = "quickcheck.sh" - diff --git a/tools/picard.yaml b/tools/picard.yaml new file mode 100644 index 000000000..075d6c794 --- /dev/null +++ b/tools/picard.yaml @@ -0,0 +1,9 @@ +merge_sam_files: + - name: Merge works + inputs: + - bams: + - $FIXTURES/test1.bam + - $FIXTURES/test2.bam + - prefix: test.merged + tests: + custom: quickcheck.sh diff --git a/tools/samtools.toml b/tools/samtools.toml deleted file mode 100644 index 070d9c0bc..000000000 --- a/tools/samtools.toml +++ /dev/null @@ -1,31 +0,0 @@ -[[bam_to_fastq]] -name = "kitchen_sink" -[[bam_to_fastq.matrix]] -bam = [ - "$FIXTURES/test1.bam", - "$FIXTURES/test2.bam", - "$FIXTURES/test3.bam", -] -bam_index = [ - "$FIXTURES/test1.bam.bai", - "$FIXTURES/test2.bam.bai", - "$FIXTURES/test3.bam.bai", -] -[[bam_to_fastq.matrix]] -bitwise_filter = [ - { include_if_all = "0x0", exclude_if_any = "0x900", include_if_any = "0x0", exclude_if_all = "0x0" }, - { include_if_all = "00", exclude_if_any = "0x904", include_if_any = "3", exclude_if_all = "0" }, -] -[[bam_to_fastq.matrix]] -paired_end = [true, false] -[[bam_to_fastq.matrix]] -retain_collated_bam = [true, false] -[[bam_to_fastq.matrix]] -append_read_number = [true, false] -[[bam_to_fastq.matrix]] -output_singletons = [true, false] -[bam_to_fastq.inputs] -prefix = "kitchen_sink_test" # the `prefix` input will be shared by _all_ permutations of the test matrix -# this test is to ensure all the options (and combinations thereof) are valid -# so no tests beyond a `0` exit code are needed here - diff --git a/tools/samtools.yaml b/tools/samtools.yaml new file mode 100644 index 000000000..0e09fe9f4 --- /dev/null +++ b/tools/samtools.yaml @@ -0,0 +1,34 @@ +bam_to_fastq: + - name: kitchen_sink + inputs: + - bam: + - $FIXTURES/test1.bam + - $FIXTURES/test2.bam + - $FIXTURES/test3.bam + bam_index: + - $FIXTURES/test1.bam.bai + - $FIXTURES/test2.bam.bai + - $FIXTURES/test3.bam.bai + - bitwise_filter: + - include_if_all: "0x0" + exclude_if_any: "0x900" + include_if_any: "0x0" + exclude_if_all: "0x0" + - include_if_all: "00" + exclude_if_any: "0x904" + include_if_any: "3" + exclude_if_all: "0" + - paired_end: + - true + - false + - retain_collated_bam: + - true + - false + - append_read_number: + - true + - false + - output_singletons: + - true + - false + - prefix: + - kitchen_sink_test From 62744f295be4186311f62b44606ec2ab2adc34fc Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Wed, 19 Nov 2025 14:54:00 -0500 Subject: [PATCH 04/37] WIP --- data_structures/flag_filter.yaml | 16 ++++++++++------ tools/picard.yaml | 6 +++--- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/data_structures/flag_filter.yaml b/data_structures/flag_filter.yaml index 9888ec27b..793edb8f4 100644 --- a/data_structures/flag_filter.yaml +++ b/data_structures/flag_filter.yaml @@ -1,23 +1,27 @@ validate_string_is_12bit_int: - name: decimal_passes inputs: - - number: "5" + - number: + - "5" - name: hexadecimal_passes inputs: - - number: "0x900" + - number: + - "0x900" tests: stdout: contains: Input number (0x900) is valid - name: too_big_hexadecimal_fails inputs: - - number: "0x1000" + - number: + - "0x1000" tests: exit_code: 42 stderr: contains: Input number (0x1000) is invalid - name: too_big_decimal_fails inputs: - - number: "4096" + - number: + - "4096" tests: exit_code: 42 stderr: @@ -28,14 +32,14 @@ validate_flag_filter: - name: valid_FlagFilter_passes inputs: - flags: - include_if_all: "3" + - include_if_all: "3" exclude_if_any: "0xF04" include_if_any: "03" exclude_if_all: "4095" - name: invalid_FlagFilter_fails inputs: - flags: - include_if_all: "" + - include_if_all: "" exclude_if_any: this is not a number include_if_any: "000000000011" exclude_if_all: "4095" diff --git a/tools/picard.yaml b/tools/picard.yaml index 075d6c794..502cd7ea1 100644 --- a/tools/picard.yaml +++ b/tools/picard.yaml @@ -2,8 +2,8 @@ merge_sam_files: - name: Merge works inputs: - bams: - - $FIXTURES/test1.bam - - $FIXTURES/test2.bam - - prefix: test.merged + - [$FIXTURES/test1.bam, $FIXTURES/test2.bam] + - prefix: + - test.merged tests: custom: quickcheck.sh From a9cc715c65c4b8242f4e679e54bcc2cb10dc57e8 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Wed, 19 Nov 2025 14:57:20 -0500 Subject: [PATCH 05/37] fix: tests -> assertions --- data_structures/flag_filter.yaml | 6 +++--- tools/picard.yaml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/data_structures/flag_filter.yaml b/data_structures/flag_filter.yaml index 793edb8f4..d503db985 100644 --- a/data_structures/flag_filter.yaml +++ b/data_structures/flag_filter.yaml @@ -14,7 +14,7 @@ validate_string_is_12bit_int: inputs: - number: - "0x1000" - tests: + assertions: exit_code: 42 stderr: contains: Input number (0x1000) is invalid @@ -22,7 +22,7 @@ validate_string_is_12bit_int: inputs: - number: - "4096" - tests: + assertions: exit_code: 42 stderr: contains: @@ -43,5 +43,5 @@ validate_flag_filter: exclude_if_any: this is not a number include_if_any: "000000000011" exclude_if_all: "4095" - tests: + assertions: should_fail: true diff --git a/tools/picard.yaml b/tools/picard.yaml index 502cd7ea1..a6a7be331 100644 --- a/tools/picard.yaml +++ b/tools/picard.yaml @@ -5,5 +5,5 @@ merge_sam_files: - [$FIXTURES/test1.bam, $FIXTURES/test2.bam] - prefix: - test.merged - tests: + assertions: custom: quickcheck.sh From e09621cfa16f756c3f1dae9f9adf57b82588a726 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Wed, 19 Nov 2025 16:09:23 -0500 Subject: [PATCH 06/37] Update flag_filter.yaml --- data_structures/flag_filter.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data_structures/flag_filter.yaml b/data_structures/flag_filter.yaml index d503db985..a9abf5b16 100644 --- a/data_structures/flag_filter.yaml +++ b/data_structures/flag_filter.yaml @@ -7,7 +7,7 @@ validate_string_is_12bit_int: inputs: - number: - "0x900" - tests: + assertions: stdout: contains: Input number (0x900) is valid - name: too_big_hexadecimal_fails From 0be911adff50fd00e911ae69b47b701c5551e49e Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Sat, 29 Nov 2025 19:58:16 -0500 Subject: [PATCH 07/37] feat: use Peter's top level mapping representation --- data_structures/flag_filter.yaml | 20 +++---- data_structures/read_group.yml | 13 +++++ tools/picard.yaml | 4 +- tools/samtools.yaml | 89 ++++++++++++++++++++++---------- 4 files changed, 88 insertions(+), 38 deletions(-) create mode 100644 data_structures/read_group.yml diff --git a/data_structures/flag_filter.yaml b/data_structures/flag_filter.yaml index a9abf5b16..1a667700a 100644 --- a/data_structures/flag_filter.yaml +++ b/data_structures/flag_filter.yaml @@ -1,27 +1,27 @@ validate_string_is_12bit_int: - name: decimal_passes inputs: - - number: - - "5" + number: + - "5" - name: hexadecimal_passes inputs: - - number: - - "0x900" + number: + - "0x900" assertions: stdout: contains: Input number (0x900) is valid - name: too_big_hexadecimal_fails inputs: - - number: - - "0x1000" + number: + - "0x1000" assertions: exit_code: 42 stderr: contains: Input number (0x1000) is invalid - name: too_big_decimal_fails inputs: - - number: - - "4096" + number: + - "4096" assertions: exit_code: 42 stderr: @@ -31,14 +31,14 @@ validate_string_is_12bit_int: validate_flag_filter: - name: valid_FlagFilter_passes inputs: - - flags: + flags: - include_if_all: "3" exclude_if_any: "0xF04" include_if_any: "03" exclude_if_all: "4095" - name: invalid_FlagFilter_fails inputs: - - flags: + flags: - include_if_all: "" exclude_if_any: this is not a number include_if_any: "000000000011" diff --git a/data_structures/read_group.yml b/data_structures/read_group.yml new file mode 100644 index 000000000..c92e5c2b0 --- /dev/null +++ b/data_structures/read_group.yml @@ -0,0 +1,13 @@ +# Note this file has the extension `.yml` while other tests end with `.yaml`. +# This is an intentional test that both extensions work. +read_group_to_string: + - name: bad_id + inputs: + read_group: + - ID: id, + SM: sample_a + LB: library + assertions: + exit_code: 1 + stdout: + contains: ID (id) must not match pattern diff --git a/tools/picard.yaml b/tools/picard.yaml index a6a7be331..f2b547bb4 100644 --- a/tools/picard.yaml +++ b/tools/picard.yaml @@ -1,9 +1,9 @@ merge_sam_files: - name: Merge works inputs: - - bams: + bams: - [$FIXTURES/test1.bam, $FIXTURES/test2.bam] - - prefix: + prefix: - test.merged assertions: custom: quickcheck.sh diff --git a/tools/samtools.yaml b/tools/samtools.yaml index 0e09fe9f4..8f6e5276f 100644 --- a/tools/samtools.yaml +++ b/tools/samtools.yaml @@ -1,34 +1,71 @@ bam_to_fastq: - name: kitchen_sink inputs: - - bam: + $files: + bam: + - $FIXTURES/test1.bam + - $FIXTURES/test2.bam + - $FIXTURES/test3.bam + bam_index: + - $FIXTURES/test1.bam.bai + - $FIXTURES/test2.bam.bai + - $FIXTURES/test3.bam.bai + bitwise_filter: + - include_if_all: "0x0" + exclude_if_any: "0x900" + include_if_any: "0x0" + exclude_if_all: "0x0" + - include_if_all: "00" + exclude_if_any: "0x904" + include_if_any: "3" + exclude_if_all: "0" + paired_end: + - true + - false + retain_collated_bam: + - true + - false + append_read_number: + - true + - false + output_singletons: + - true + - false + prefix: + - kitchen_sink_test + - name: simpler + inputs: + output_singletons: + - true + - false + $files: + bam: - $FIXTURES/test1.bam - $FIXTURES/test2.bam - - $FIXTURES/test3.bam bam_index: - $FIXTURES/test1.bam.bai - $FIXTURES/test2.bam.bai - - $FIXTURES/test3.bam.bai - - bitwise_filter: - - include_if_all: "0x0" - exclude_if_any: "0x900" - include_if_any: "0x0" - exclude_if_all: "0x0" - - include_if_all: "00" - exclude_if_any: "0x904" - include_if_any: "3" - exclude_if_all: "0" - - paired_end: - - true - - false - - retain_collated_bam: - - true - - false - - append_read_number: - - true - - false - - output_singletons: - - true - - false - - prefix: - - kitchen_sink_test + - name: not as simple + inputs: + output_singletons: + - true + - false + $files: + bam: + - $FIXTURES/test1.bam + - $FIXTURES/test2.bam + - $FIXTURES/test3.bam + bam_index: + - $FIXTURES/test1.bam.bai + - $FIXTURES/test2.bam.bai + - $FIXTURES/test3.bam.bai + $ref: + ref_fasta: + - hg19.fasta + - GRCh38.fasta + ref_fasta_index: + - hg19.fa.fai + - GRCh38.fa.fai + prefix: + - not_as_simple + From 374392a61921c0c688c087da0652b6f46cf3bfb8 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Tue, 9 Dec 2025 13:56:33 -0500 Subject: [PATCH 08/37] set up sprocket test infra --- sprocket-tests/custom/quickcheck.sh | 10 ---------- test/bin/quickcheck.sh | 0 .../fixtures}/1scattered.interval_list | 0 ...001_R01C01.beta_swan_norm_unfiltered.genomic.csv | 0 ...001_R03C01.beta_swan_norm_unfiltered.genomic.csv | 0 .../fixtures}/201533520001_R03C01_Grn.idat | Bin .../fixtures}/201533520001_R03C01_Red.idat | Bin .../fixtures}/Aligned.sortedByCoord.chr9_chr22.bam | 0 .../Aligned.sortedByCoord.chr9_chr22.bam.bai | 0 .../input => test/fixtures}/GRCh38.chr1_chr19.dict | 0 {tests/input => test/fixtures}/GRCh38.chr1_chr19.fa | 0 .../fixtures}/GRCh38.chr1_chr19.fa.fai | 0 .../input => test/fixtures}/GRCh38.chr9_chr22.fa.gz | 0 .../fixtures}/GRCh38.chrY_chrM.bwa_db.tar.gz | 0 .../input => test/fixtures}/GRCh38.chrY_chrM.dict | 0 {tests/input => test/fixtures}/GRCh38.chrY_chrM.fa | 0 .../input => test/fixtures}/GRCh38.chrY_chrM.fa.fai | 0 .../Homo_sapiens_assembly38.dbsnp138.top5000.vcf | 0 ...Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx | Bin ...Mills_and_1000G_gold_standard.indels.hg38.vcf.gz | 0 ...s_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi | Bin {tests/input => test/fixtures}/README.md | 0 .../fixtures}/chr1_chr19.interval_list | 0 {tests/input => test/fixtures}/combined_beta.csv | 0 {tests/input => test/fixtures}/filtered_beta.csv | 0 {tests/input => test/fixtures}/fusions.BCR_ABL1.tsv | 0 .../fixtures}/gencode.v31.chr9_chr22.gtf.gz | 0 .../fixtures}/gencode.v31.chrY_chrM.gene.bed | 0 .../fixtures}/gencode.v31.chrY_chrM.genelengths.txt | 0 .../fixtures}/gencode.v31.chrY_chrM.gtf.gz | 0 .../fixtures}/kraken2_C_elegans_library.tar.gz | 0 .../input => test/fixtures}/kraken2_db.mini.tar.gz | 0 .../input => test/fixtures}/kraken2_taxonomy.tar.gz | 0 {tests/input => test/fixtures}/random10k.r1.fq.gz | 0 {tests/input => test/fixtures}/random10k.r2.fq.gz | 0 .../fixtures}/star_db.chrY_chrM.tar.gz | 0 .../fixtures}/test.PE.2_RGs.Aligned.out.sorted.bam | 0 {tests/input => test/fixtures}/test.bam | 0 {tests/input => test/fixtures}/test.bam.bai | 0 .../fixtures}/test.bwa_aln_pe.chrY_chrM.TPM.txt | 0 .../fixtures}/test.bwa_aln_pe.chrY_chrM.bam | 0 .../fixtures}/test.bwa_aln_pe.chrY_chrM.bam.bai | 0 .../test.bwa_aln_pe.chrY_chrM.feature-counts.txt | 0 .../test.bwa_aln_pe.chrY_chrM.readlength.txt | 0 {tests/input => test/fixtures}/test.extra_RG.bam | 0 {tests/input => test/fixtures}/test.fa | 0 {tests/input => test/fixtures}/test.tar.gz | 0 .../fixtures}/test.unaccounted_read.bam | 0 {tests/input => test/fixtures}/test1.vcf.gz | 0 {tests/input => test/fixtures}/test1.vcf.gz.tbi | Bin {tests/input => test/fixtures}/test2.bam | 0 {tests/input => test/fixtures}/test2.vcf.gz | 0 {tests/input => test/fixtures}/test2.vcf.gz.tbi | Bin {tests/input => test/fixtures}/test_R1.fq.gz | 0 {tests/input => test/fixtures}/test_R2.fq.gz | 0 .../input => test/fixtures}/test_rnaseq_variant.bam | 0 .../fixtures}/test_rnaseq_variant.bam.bai | 0 .../fixtures}/test_rnaseq_variant.recal.txt | 0 {tests/input => test/fixtures}/umap.csv | 0 .../wgs_calling_regions.hg38.interval_list | 0 60 files changed, 10 deletions(-) delete mode 100644 sprocket-tests/custom/quickcheck.sh create mode 100644 test/bin/quickcheck.sh rename {tests/input => test/fixtures}/1scattered.interval_list (100%) rename {tests/input => test/fixtures}/201533520001_R01C01.beta_swan_norm_unfiltered.genomic.csv (100%) rename {tests/input => test/fixtures}/201533520001_R03C01.beta_swan_norm_unfiltered.genomic.csv (100%) rename {tests/input => test/fixtures}/201533520001_R03C01_Grn.idat (100%) rename {tests/input => test/fixtures}/201533520001_R03C01_Red.idat (100%) rename {tests/input => test/fixtures}/Aligned.sortedByCoord.chr9_chr22.bam (100%) rename {tests/input => test/fixtures}/Aligned.sortedByCoord.chr9_chr22.bam.bai (100%) rename {tests/input => test/fixtures}/GRCh38.chr1_chr19.dict (100%) rename {tests/input => test/fixtures}/GRCh38.chr1_chr19.fa (100%) rename {tests/input => test/fixtures}/GRCh38.chr1_chr19.fa.fai (100%) rename {tests/input => test/fixtures}/GRCh38.chr9_chr22.fa.gz (100%) rename {tests/input => test/fixtures}/GRCh38.chrY_chrM.bwa_db.tar.gz (100%) rename {tests/input => test/fixtures}/GRCh38.chrY_chrM.dict (100%) rename {tests/input => test/fixtures}/GRCh38.chrY_chrM.fa (100%) rename {tests/input => test/fixtures}/GRCh38.chrY_chrM.fa.fai (100%) rename {tests/input => test/fixtures}/Homo_sapiens_assembly38.dbsnp138.top5000.vcf (100%) rename {tests/input => test/fixtures}/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx (100%) rename {tests/input => test/fixtures}/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz (100%) rename {tests/input => test/fixtures}/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi (100%) rename {tests/input => test/fixtures}/README.md (100%) rename {tests/input => test/fixtures}/chr1_chr19.interval_list (100%) rename {tests/input => test/fixtures}/combined_beta.csv (100%) rename {tests/input => test/fixtures}/filtered_beta.csv (100%) rename {tests/input => test/fixtures}/fusions.BCR_ABL1.tsv (100%) rename {tests/input => test/fixtures}/gencode.v31.chr9_chr22.gtf.gz (100%) rename {tests/input => test/fixtures}/gencode.v31.chrY_chrM.gene.bed (100%) rename {tests/input => test/fixtures}/gencode.v31.chrY_chrM.genelengths.txt (100%) rename {tests/input => test/fixtures}/gencode.v31.chrY_chrM.gtf.gz (100%) rename {tests/input => test/fixtures}/kraken2_C_elegans_library.tar.gz (100%) rename {tests/input => test/fixtures}/kraken2_db.mini.tar.gz (100%) rename {tests/input => test/fixtures}/kraken2_taxonomy.tar.gz (100%) rename {tests/input => test/fixtures}/random10k.r1.fq.gz (100%) rename {tests/input => test/fixtures}/random10k.r2.fq.gz (100%) rename {tests/input => test/fixtures}/star_db.chrY_chrM.tar.gz (100%) rename {tests/input => test/fixtures}/test.PE.2_RGs.Aligned.out.sorted.bam (100%) rename {tests/input => test/fixtures}/test.bam (100%) rename {tests/input => test/fixtures}/test.bam.bai (100%) rename {tests/input => test/fixtures}/test.bwa_aln_pe.chrY_chrM.TPM.txt (100%) rename {tests/input => test/fixtures}/test.bwa_aln_pe.chrY_chrM.bam (100%) rename {tests/input => test/fixtures}/test.bwa_aln_pe.chrY_chrM.bam.bai (100%) rename {tests/input => test/fixtures}/test.bwa_aln_pe.chrY_chrM.feature-counts.txt (100%) rename {tests/input => test/fixtures}/test.bwa_aln_pe.chrY_chrM.readlength.txt (100%) rename {tests/input => test/fixtures}/test.extra_RG.bam (100%) rename {tests/input => test/fixtures}/test.fa (100%) rename {tests/input => test/fixtures}/test.tar.gz (100%) rename {tests/input => test/fixtures}/test.unaccounted_read.bam (100%) rename {tests/input => test/fixtures}/test1.vcf.gz (100%) rename {tests/input => test/fixtures}/test1.vcf.gz.tbi (100%) rename {tests/input => test/fixtures}/test2.bam (100%) rename {tests/input => test/fixtures}/test2.vcf.gz (100%) rename {tests/input => test/fixtures}/test2.vcf.gz.tbi (100%) rename {tests/input => test/fixtures}/test_R1.fq.gz (100%) rename {tests/input => test/fixtures}/test_R2.fq.gz (100%) rename {tests/input => test/fixtures}/test_rnaseq_variant.bam (100%) rename {tests/input => test/fixtures}/test_rnaseq_variant.bam.bai (100%) rename {tests/input => test/fixtures}/test_rnaseq_variant.recal.txt (100%) rename {tests/input => test/fixtures}/umap.csv (100%) rename {tests/input => test/fixtures}/wgs_calling_regions.hg38.interval_list (100%) diff --git a/sprocket-tests/custom/quickcheck.sh b/sprocket-tests/custom/quickcheck.sh deleted file mode 100644 index 8c270cc8e..000000000 --- a/sprocket-tests/custom/quickcheck.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash - -set -euo pipefail - -out_json=$1 - -out_bam=$(jq -r .bam "$out_json") - -samtools quickcheck "$out_bam" - diff --git a/test/bin/quickcheck.sh b/test/bin/quickcheck.sh new file mode 100644 index 000000000..e69de29bb diff --git a/tests/input/1scattered.interval_list b/test/fixtures/1scattered.interval_list similarity index 100% rename from tests/input/1scattered.interval_list rename to test/fixtures/1scattered.interval_list diff --git a/tests/input/201533520001_R01C01.beta_swan_norm_unfiltered.genomic.csv b/test/fixtures/201533520001_R01C01.beta_swan_norm_unfiltered.genomic.csv similarity index 100% rename from tests/input/201533520001_R01C01.beta_swan_norm_unfiltered.genomic.csv rename to test/fixtures/201533520001_R01C01.beta_swan_norm_unfiltered.genomic.csv diff --git a/tests/input/201533520001_R03C01.beta_swan_norm_unfiltered.genomic.csv b/test/fixtures/201533520001_R03C01.beta_swan_norm_unfiltered.genomic.csv similarity index 100% rename from tests/input/201533520001_R03C01.beta_swan_norm_unfiltered.genomic.csv rename to test/fixtures/201533520001_R03C01.beta_swan_norm_unfiltered.genomic.csv diff --git a/tests/input/201533520001_R03C01_Grn.idat b/test/fixtures/201533520001_R03C01_Grn.idat similarity index 100% rename from tests/input/201533520001_R03C01_Grn.idat rename to test/fixtures/201533520001_R03C01_Grn.idat diff --git a/tests/input/201533520001_R03C01_Red.idat b/test/fixtures/201533520001_R03C01_Red.idat similarity index 100% rename from tests/input/201533520001_R03C01_Red.idat rename to test/fixtures/201533520001_R03C01_Red.idat diff --git a/tests/input/Aligned.sortedByCoord.chr9_chr22.bam b/test/fixtures/Aligned.sortedByCoord.chr9_chr22.bam similarity index 100% rename from tests/input/Aligned.sortedByCoord.chr9_chr22.bam rename to test/fixtures/Aligned.sortedByCoord.chr9_chr22.bam diff --git a/tests/input/Aligned.sortedByCoord.chr9_chr22.bam.bai b/test/fixtures/Aligned.sortedByCoord.chr9_chr22.bam.bai similarity index 100% rename from tests/input/Aligned.sortedByCoord.chr9_chr22.bam.bai rename to test/fixtures/Aligned.sortedByCoord.chr9_chr22.bam.bai diff --git a/tests/input/GRCh38.chr1_chr19.dict b/test/fixtures/GRCh38.chr1_chr19.dict similarity index 100% rename from tests/input/GRCh38.chr1_chr19.dict rename to test/fixtures/GRCh38.chr1_chr19.dict diff --git a/tests/input/GRCh38.chr1_chr19.fa b/test/fixtures/GRCh38.chr1_chr19.fa similarity index 100% rename from tests/input/GRCh38.chr1_chr19.fa rename to test/fixtures/GRCh38.chr1_chr19.fa diff --git a/tests/input/GRCh38.chr1_chr19.fa.fai b/test/fixtures/GRCh38.chr1_chr19.fa.fai similarity index 100% rename from tests/input/GRCh38.chr1_chr19.fa.fai rename to test/fixtures/GRCh38.chr1_chr19.fa.fai diff --git a/tests/input/GRCh38.chr9_chr22.fa.gz b/test/fixtures/GRCh38.chr9_chr22.fa.gz similarity index 100% rename from tests/input/GRCh38.chr9_chr22.fa.gz rename to test/fixtures/GRCh38.chr9_chr22.fa.gz diff --git a/tests/input/GRCh38.chrY_chrM.bwa_db.tar.gz b/test/fixtures/GRCh38.chrY_chrM.bwa_db.tar.gz similarity index 100% rename from tests/input/GRCh38.chrY_chrM.bwa_db.tar.gz rename to test/fixtures/GRCh38.chrY_chrM.bwa_db.tar.gz diff --git a/tests/input/GRCh38.chrY_chrM.dict b/test/fixtures/GRCh38.chrY_chrM.dict similarity index 100% rename from tests/input/GRCh38.chrY_chrM.dict rename to test/fixtures/GRCh38.chrY_chrM.dict diff --git a/tests/input/GRCh38.chrY_chrM.fa b/test/fixtures/GRCh38.chrY_chrM.fa similarity index 100% rename from tests/input/GRCh38.chrY_chrM.fa rename to test/fixtures/GRCh38.chrY_chrM.fa diff --git a/tests/input/GRCh38.chrY_chrM.fa.fai b/test/fixtures/GRCh38.chrY_chrM.fa.fai similarity index 100% rename from tests/input/GRCh38.chrY_chrM.fa.fai rename to test/fixtures/GRCh38.chrY_chrM.fa.fai diff --git a/tests/input/Homo_sapiens_assembly38.dbsnp138.top5000.vcf b/test/fixtures/Homo_sapiens_assembly38.dbsnp138.top5000.vcf similarity index 100% rename from tests/input/Homo_sapiens_assembly38.dbsnp138.top5000.vcf rename to test/fixtures/Homo_sapiens_assembly38.dbsnp138.top5000.vcf diff --git a/tests/input/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx b/test/fixtures/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx similarity index 100% rename from tests/input/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx rename to test/fixtures/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx diff --git a/tests/input/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz b/test/fixtures/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz similarity index 100% rename from tests/input/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz rename to test/fixtures/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz diff --git a/tests/input/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi b/test/fixtures/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi similarity index 100% rename from tests/input/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi rename to test/fixtures/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi diff --git a/tests/input/README.md b/test/fixtures/README.md similarity index 100% rename from tests/input/README.md rename to test/fixtures/README.md diff --git a/tests/input/chr1_chr19.interval_list b/test/fixtures/chr1_chr19.interval_list similarity index 100% rename from tests/input/chr1_chr19.interval_list rename to test/fixtures/chr1_chr19.interval_list diff --git a/tests/input/combined_beta.csv b/test/fixtures/combined_beta.csv similarity index 100% rename from tests/input/combined_beta.csv rename to test/fixtures/combined_beta.csv diff --git a/tests/input/filtered_beta.csv b/test/fixtures/filtered_beta.csv similarity index 100% rename from tests/input/filtered_beta.csv rename to test/fixtures/filtered_beta.csv diff --git a/tests/input/fusions.BCR_ABL1.tsv b/test/fixtures/fusions.BCR_ABL1.tsv similarity index 100% rename from tests/input/fusions.BCR_ABL1.tsv rename to test/fixtures/fusions.BCR_ABL1.tsv diff --git a/tests/input/gencode.v31.chr9_chr22.gtf.gz b/test/fixtures/gencode.v31.chr9_chr22.gtf.gz similarity index 100% rename from tests/input/gencode.v31.chr9_chr22.gtf.gz rename to test/fixtures/gencode.v31.chr9_chr22.gtf.gz diff --git a/tests/input/gencode.v31.chrY_chrM.gene.bed b/test/fixtures/gencode.v31.chrY_chrM.gene.bed similarity index 100% rename from tests/input/gencode.v31.chrY_chrM.gene.bed rename to test/fixtures/gencode.v31.chrY_chrM.gene.bed diff --git a/tests/input/gencode.v31.chrY_chrM.genelengths.txt b/test/fixtures/gencode.v31.chrY_chrM.genelengths.txt similarity index 100% rename from tests/input/gencode.v31.chrY_chrM.genelengths.txt rename to test/fixtures/gencode.v31.chrY_chrM.genelengths.txt diff --git a/tests/input/gencode.v31.chrY_chrM.gtf.gz b/test/fixtures/gencode.v31.chrY_chrM.gtf.gz similarity index 100% rename from tests/input/gencode.v31.chrY_chrM.gtf.gz rename to test/fixtures/gencode.v31.chrY_chrM.gtf.gz diff --git a/tests/input/kraken2_C_elegans_library.tar.gz b/test/fixtures/kraken2_C_elegans_library.tar.gz similarity index 100% rename from tests/input/kraken2_C_elegans_library.tar.gz rename to test/fixtures/kraken2_C_elegans_library.tar.gz diff --git a/tests/input/kraken2_db.mini.tar.gz b/test/fixtures/kraken2_db.mini.tar.gz similarity index 100% rename from tests/input/kraken2_db.mini.tar.gz rename to test/fixtures/kraken2_db.mini.tar.gz diff --git a/tests/input/kraken2_taxonomy.tar.gz b/test/fixtures/kraken2_taxonomy.tar.gz similarity index 100% rename from tests/input/kraken2_taxonomy.tar.gz rename to test/fixtures/kraken2_taxonomy.tar.gz diff --git a/tests/input/random10k.r1.fq.gz b/test/fixtures/random10k.r1.fq.gz similarity index 100% rename from tests/input/random10k.r1.fq.gz rename to test/fixtures/random10k.r1.fq.gz diff --git a/tests/input/random10k.r2.fq.gz b/test/fixtures/random10k.r2.fq.gz similarity index 100% rename from tests/input/random10k.r2.fq.gz rename to test/fixtures/random10k.r2.fq.gz diff --git a/tests/input/star_db.chrY_chrM.tar.gz b/test/fixtures/star_db.chrY_chrM.tar.gz similarity index 100% rename from tests/input/star_db.chrY_chrM.tar.gz rename to test/fixtures/star_db.chrY_chrM.tar.gz diff --git a/tests/input/test.PE.2_RGs.Aligned.out.sorted.bam b/test/fixtures/test.PE.2_RGs.Aligned.out.sorted.bam similarity index 100% rename from tests/input/test.PE.2_RGs.Aligned.out.sorted.bam rename to test/fixtures/test.PE.2_RGs.Aligned.out.sorted.bam diff --git a/tests/input/test.bam b/test/fixtures/test.bam similarity index 100% rename from tests/input/test.bam rename to test/fixtures/test.bam diff --git a/tests/input/test.bam.bai b/test/fixtures/test.bam.bai similarity index 100% rename from tests/input/test.bam.bai rename to test/fixtures/test.bam.bai diff --git a/tests/input/test.bwa_aln_pe.chrY_chrM.TPM.txt b/test/fixtures/test.bwa_aln_pe.chrY_chrM.TPM.txt similarity index 100% rename from tests/input/test.bwa_aln_pe.chrY_chrM.TPM.txt rename to test/fixtures/test.bwa_aln_pe.chrY_chrM.TPM.txt diff --git a/tests/input/test.bwa_aln_pe.chrY_chrM.bam b/test/fixtures/test.bwa_aln_pe.chrY_chrM.bam similarity index 100% rename from tests/input/test.bwa_aln_pe.chrY_chrM.bam rename to test/fixtures/test.bwa_aln_pe.chrY_chrM.bam diff --git a/tests/input/test.bwa_aln_pe.chrY_chrM.bam.bai b/test/fixtures/test.bwa_aln_pe.chrY_chrM.bam.bai similarity index 100% rename from tests/input/test.bwa_aln_pe.chrY_chrM.bam.bai rename to test/fixtures/test.bwa_aln_pe.chrY_chrM.bam.bai diff --git a/tests/input/test.bwa_aln_pe.chrY_chrM.feature-counts.txt b/test/fixtures/test.bwa_aln_pe.chrY_chrM.feature-counts.txt similarity index 100% rename from tests/input/test.bwa_aln_pe.chrY_chrM.feature-counts.txt rename to test/fixtures/test.bwa_aln_pe.chrY_chrM.feature-counts.txt diff --git a/tests/input/test.bwa_aln_pe.chrY_chrM.readlength.txt b/test/fixtures/test.bwa_aln_pe.chrY_chrM.readlength.txt similarity index 100% rename from tests/input/test.bwa_aln_pe.chrY_chrM.readlength.txt rename to test/fixtures/test.bwa_aln_pe.chrY_chrM.readlength.txt diff --git a/tests/input/test.extra_RG.bam b/test/fixtures/test.extra_RG.bam similarity index 100% rename from tests/input/test.extra_RG.bam rename to test/fixtures/test.extra_RG.bam diff --git a/tests/input/test.fa b/test/fixtures/test.fa similarity index 100% rename from tests/input/test.fa rename to test/fixtures/test.fa diff --git a/tests/input/test.tar.gz b/test/fixtures/test.tar.gz similarity index 100% rename from tests/input/test.tar.gz rename to test/fixtures/test.tar.gz diff --git a/tests/input/test.unaccounted_read.bam b/test/fixtures/test.unaccounted_read.bam similarity index 100% rename from tests/input/test.unaccounted_read.bam rename to test/fixtures/test.unaccounted_read.bam diff --git a/tests/input/test1.vcf.gz b/test/fixtures/test1.vcf.gz similarity index 100% rename from tests/input/test1.vcf.gz rename to test/fixtures/test1.vcf.gz diff --git a/tests/input/test1.vcf.gz.tbi b/test/fixtures/test1.vcf.gz.tbi similarity index 100% rename from tests/input/test1.vcf.gz.tbi rename to test/fixtures/test1.vcf.gz.tbi diff --git a/tests/input/test2.bam b/test/fixtures/test2.bam similarity index 100% rename from tests/input/test2.bam rename to test/fixtures/test2.bam diff --git a/tests/input/test2.vcf.gz b/test/fixtures/test2.vcf.gz similarity index 100% rename from tests/input/test2.vcf.gz rename to test/fixtures/test2.vcf.gz diff --git a/tests/input/test2.vcf.gz.tbi b/test/fixtures/test2.vcf.gz.tbi similarity index 100% rename from tests/input/test2.vcf.gz.tbi rename to test/fixtures/test2.vcf.gz.tbi diff --git a/tests/input/test_R1.fq.gz b/test/fixtures/test_R1.fq.gz similarity index 100% rename from tests/input/test_R1.fq.gz rename to test/fixtures/test_R1.fq.gz diff --git a/tests/input/test_R2.fq.gz b/test/fixtures/test_R2.fq.gz similarity index 100% rename from tests/input/test_R2.fq.gz rename to test/fixtures/test_R2.fq.gz diff --git a/tests/input/test_rnaseq_variant.bam b/test/fixtures/test_rnaseq_variant.bam similarity index 100% rename from tests/input/test_rnaseq_variant.bam rename to test/fixtures/test_rnaseq_variant.bam diff --git a/tests/input/test_rnaseq_variant.bam.bai b/test/fixtures/test_rnaseq_variant.bam.bai similarity index 100% rename from tests/input/test_rnaseq_variant.bam.bai rename to test/fixtures/test_rnaseq_variant.bam.bai diff --git a/tests/input/test_rnaseq_variant.recal.txt b/test/fixtures/test_rnaseq_variant.recal.txt similarity index 100% rename from tests/input/test_rnaseq_variant.recal.txt rename to test/fixtures/test_rnaseq_variant.recal.txt diff --git a/tests/input/umap.csv b/test/fixtures/umap.csv similarity index 100% rename from tests/input/umap.csv rename to test/fixtures/umap.csv diff --git a/tests/input/wgs_calling_regions.hg38.interval_list b/test/fixtures/wgs_calling_regions.hg38.interval_list similarity index 100% rename from tests/input/wgs_calling_regions.hg38.interval_list rename to test/fixtures/wgs_calling_regions.hg38.interval_list From d89fd65ae5e2de848b77e0f2aebc6385c17e9284 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Thu, 11 Dec 2025 09:51:05 -0500 Subject: [PATCH 09/37] picard: lower merge mem for testing --- tools/picard.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/picard.wdl b/tools/picard.wdl index 07324ebe2..33927919b 100755 --- a/tools/picard.wdl +++ b/tools/picard.wdl @@ -385,7 +385,7 @@ task merge_sam_files { String sort_order = "coordinate" String validation_stringency = "SILENT" Boolean threading = true - Int memory_gb = 40 + Int memory_gb = 10 Int modify_disk_size_gb = 0 } From 81d9608417005ebdf8fda57c69b9ba3f8af56cd3 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Thu, 11 Dec 2025 09:51:19 -0500 Subject: [PATCH 10/37] point to new fixtures --- tools/picard.yaml | 2 +- tools/samtools.yaml | 50 +++++++++------------------------------------ 2 files changed, 11 insertions(+), 41 deletions(-) diff --git a/tools/picard.yaml b/tools/picard.yaml index f2b547bb4..7d031e5dd 100644 --- a/tools/picard.yaml +++ b/tools/picard.yaml @@ -2,7 +2,7 @@ merge_sam_files: - name: Merge works inputs: bams: - - [$FIXTURES/test1.bam, $FIXTURES/test2.bam] + - [test.bwa_aln_pe.chrY_chrM.bam, test.PE.2_RGs.Aligned.out.sorted.bam] prefix: - test.merged assertions: diff --git a/tools/samtools.yaml b/tools/samtools.yaml index 8f6e5276f..2f75fdb52 100644 --- a/tools/samtools.yaml +++ b/tools/samtools.yaml @@ -1,15 +1,11 @@ bam_to_fastq: - name: kitchen_sink inputs: - $files: - bam: - - $FIXTURES/test1.bam - - $FIXTURES/test2.bam - - $FIXTURES/test3.bam - bam_index: - - $FIXTURES/test1.bam.bai - - $FIXTURES/test2.bam.bai - - $FIXTURES/test3.bam.bai + bam: + - Aligned.sortedByCoord.chr9_chr22.bam + - test.bam + - test_rnaseq_variant.bam + - test.bwa_aln_pe.chrY_chrM.bam bitwise_filter: - include_if_all: "0x0" exclude_if_any: "0x900" @@ -38,34 +34,8 @@ bam_to_fastq: output_singletons: - true - false - $files: - bam: - - $FIXTURES/test1.bam - - $FIXTURES/test2.bam - bam_index: - - $FIXTURES/test1.bam.bai - - $FIXTURES/test2.bam.bai - - name: not as simple - inputs: - output_singletons: - - true - - false - $files: - bam: - - $FIXTURES/test1.bam - - $FIXTURES/test2.bam - - $FIXTURES/test3.bam - bam_index: - - $FIXTURES/test1.bam.bai - - $FIXTURES/test2.bam.bai - - $FIXTURES/test3.bam.bai - $ref: - ref_fasta: - - hg19.fasta - - GRCh38.fasta - ref_fasta_index: - - hg19.fa.fai - - GRCh38.fa.fai - prefix: - - not_as_simple - + bam: + - Aligned.sortedByCoord.chr9_chr22.bam + - test.bam + - test_rnaseq_variant.bam + - test.bwa_aln_pe.chrY_chrM.bam From 5d97f28c6fb2cf7d8e1bd1b6fde83d5a5ac05d82 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Mon, 15 Dec 2025 07:36:47 -0500 Subject: [PATCH 11/37] Revert "picard: lower merge mem for testing" This reverts commit d89fd65ae5e2de848b77e0f2aebc6385c17e9284. --- tools/picard.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/picard.wdl b/tools/picard.wdl index 33927919b..07324ebe2 100755 --- a/tools/picard.wdl +++ b/tools/picard.wdl @@ -385,7 +385,7 @@ task merge_sam_files { String sort_order = "coordinate" String validation_stringency = "SILENT" Boolean threading = true - Int memory_gb = 10 + Int memory_gb = 40 Int modify_disk_size_gb = 0 } From 1b7235dd45737f977edabb1fc3aa1b66b3ebb9dd Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Sat, 27 Dec 2025 16:43:34 -0500 Subject: [PATCH 12/37] WIP --- data_structures/read_group.yml | 13 ------------- data_structures/{ => test}/flag_filter.yaml | 11 +++++------ data_structures/test/read_group.yaml | 11 +++++++++++ tools/{ => test}/picard.yaml | 0 tools/{ => test}/samtools.yaml | 2 -- 5 files changed, 16 insertions(+), 21 deletions(-) delete mode 100644 data_structures/read_group.yml rename data_structures/{ => test}/flag_filter.yaml (79%) create mode 100644 data_structures/test/read_group.yaml rename tools/{ => test}/picard.yaml (100%) rename tools/{ => test}/samtools.yaml (96%) diff --git a/data_structures/read_group.yml b/data_structures/read_group.yml deleted file mode 100644 index c92e5c2b0..000000000 --- a/data_structures/read_group.yml +++ /dev/null @@ -1,13 +0,0 @@ -# Note this file has the extension `.yml` while other tests end with `.yaml`. -# This is an intentional test that both extensions work. -read_group_to_string: - - name: bad_id - inputs: - read_group: - - ID: id, - SM: sample_a - LB: library - assertions: - exit_code: 1 - stdout: - contains: ID (id) must not match pattern diff --git a/data_structures/flag_filter.yaml b/data_structures/test/flag_filter.yaml similarity index 79% rename from data_structures/flag_filter.yaml rename to data_structures/test/flag_filter.yaml index 1a667700a..4da14bcdd 100644 --- a/data_structures/flag_filter.yaml +++ b/data_structures/test/flag_filter.yaml @@ -8,8 +8,8 @@ validate_string_is_12bit_int: number: - "0x900" assertions: - stdout: - contains: Input number (0x900) is valid + stderr: + - Input number \(.*\) is valid - name: too_big_hexadecimal_fails inputs: number: @@ -17,7 +17,7 @@ validate_string_is_12bit_int: assertions: exit_code: 42 stderr: - contains: Input number (0x1000) is invalid + - Input number \(.*\) is invalid - name: too_big_decimal_fails inputs: number: @@ -25,9 +25,8 @@ validate_string_is_12bit_int: assertions: exit_code: 42 stderr: - contains: - - Input number (4096) interpreted as decimal - - But number must be less than 4096! + - Input number \(.*\) interpreted as decimal + - But number must be less than 4096! validate_flag_filter: - name: valid_FlagFilter_passes inputs: diff --git a/data_structures/test/read_group.yaml b/data_structures/test/read_group.yaml new file mode 100644 index 000000000..a4367d8af --- /dev/null +++ b/data_structures/test/read_group.yaml @@ -0,0 +1,11 @@ +read_group_to_string: + - name: bad_id + inputs: + read_group: + - ID: id, + SM: sample_a + LB: library + assertions: + exit_code: 1 + stdout: + - ID (.*) must not match pattern diff --git a/tools/picard.yaml b/tools/test/picard.yaml similarity index 100% rename from tools/picard.yaml rename to tools/test/picard.yaml diff --git a/tools/samtools.yaml b/tools/test/samtools.yaml similarity index 96% rename from tools/samtools.yaml rename to tools/test/samtools.yaml index 2f75fdb52..feca44f2d 100644 --- a/tools/samtools.yaml +++ b/tools/test/samtools.yaml @@ -3,7 +3,6 @@ bam_to_fastq: inputs: bam: - Aligned.sortedByCoord.chr9_chr22.bam - - test.bam - test_rnaseq_variant.bam - test.bwa_aln_pe.chrY_chrM.bam bitwise_filter: @@ -36,6 +35,5 @@ bam_to_fastq: - false bam: - Aligned.sortedByCoord.chr9_chr22.bam - - test.bam - test_rnaseq_variant.bam - test.bwa_aln_pe.chrY_chrM.bam From 881fa7aa7a53eaf0c99062337d6244fe93f5d5c9 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Sun, 28 Dec 2025 08:46:42 -0500 Subject: [PATCH 13/37] delete pytest folder --- .../input_json/get_read_groups.json | 3 - .../input_json/read_group_bad_id.json | 18 -- .../input_json/read_group_bad_sample.json | 18 -- .../input_json/read_group_good.json | 18 -- .../input_json/read_group_missing_sample.json | 8 - .../read_group_sample_with_space.json | 18 -- tests/data_structures/test_flag_filter.yaml | 74 -------- tests/data_structures/test_read_group.yaml | 51 ------ tests/tools/input_json/arriba.json | 9 - tests/tools/input_json/bwa_aln.json | 5 - tests/tools/input_json/bwa_aln_pe.json | 6 - tests/tools/input_json/bwa_mem.json | 5 - tests/tools/input_json/calc_tpm.json | 4 - tests/tools/input_json/gatk4_apply_bqsr.json | 5 - .../input_json/gatk4_base_recalibrator.json | 15 -- .../input_json/gatk4_haplotype_caller.json | 10 -- .../input_json/gatk4_split_n_cigar_reads.json | 8 - .../input_json/gatk4_variant_filtration.json | 7 - tests/tools/input_json/kraken_build_db.json | 6 - .../kraken_create_library_from_fastas.json | 5 - tests/tools/input_json/multiqc.json | 6 - tests/tools/input_json/multiqc_empty.json | 4 - .../tools/input_json/ngsderive_encoding.json | 6 - .../input_json/picard_merge_sam_files.json | 7 - tests/tools/input_json/picard_merge_vcfs.json | 11 -- tests/tools/input_json/sambamba_merge.json | 7 - tests/tools/input_json/samtools_merge.json | 7 - .../tools/input_json/samtools_subsample.json | 4 - .../input_json/star_alignment_PE_1_pair.json | 13 -- .../input_json/star_alignment_PE_2_pairs.json | 16 -- tests/tools/input_json/star_alignment_SE.json | 10 -- .../input_json/util_add_to_bam_header.json | 4 - tests/tools/input_json/util_download.json | 6 - tests/tools/input_json/util_split_string.json | 4 - tests/tools/test_arriba.yaml | 40 ----- tests/tools/test_bwa.yaml | 40 ----- tests/tools/test_deeptools.yaml | 9 - tests/tools/test_fastp.yaml | 12 -- tests/tools/test_fastqc.yaml | 10 -- tests/tools/test_fq.yaml | 27 --- tests/tools/test_gatk4.yaml | 65 ------- tests/tools/test_htseq.yaml | 19 -- tests/tools/test_kraken2.yaml | 55 ------ tests/tools/test_librarian.yaml | 10 -- tests/tools/test_md5sum.yaml | 11 -- tests/tools/test_mosdepth.yaml | 10 -- tests/tools/test_multiqc.yaml | 20 --- tests/tools/test_ngsderive.yaml | 68 -------- tests/tools/test_picard.yaml | 163 ------------------ tests/tools/test_qualimap.yaml | 11 -- tests/tools/test_sambamba.yaml | 51 ------ tests/tools/test_samtools.py | 53 ------ tests/tools/test_samtools.yaml | 125 -------------- tests/tools/test_star.yaml | 43 ----- tests/tools/test_util.yaml | 94 ---------- .../_test_methylation-preprocess.yaml | 15 -- tests/workflows/input_json/combine_data.json | 7 - .../input_json/dnaseq-standard-fastq.json | 18 -- .../workflows/input_json/dnaseq-standard.json | 4 - tests/workflows/input_json/filter_probes.json | 4 - tests/workflows/input_json/generate_umap.json | 3 - tests/workflows/input_json/plot_umap.json | 3 - .../input_json/process_raw_idats.json | 6 - tests/workflows/input_json/qc-standard.json | 10 -- .../input_json/rnaseq-standard-fastq.json | 21 --- .../workflows/input_json/rnaseq-standard.json | 6 - .../input_json/rnaseq-variant-calling.json | 16 -- tests/workflows/test_methylation-cohort.yaml | 40 ----- 68 files changed, 1487 deletions(-) delete mode 100644 tests/data_structures/input_json/get_read_groups.json delete mode 100644 tests/data_structures/input_json/read_group_bad_id.json delete mode 100644 tests/data_structures/input_json/read_group_bad_sample.json delete mode 100644 tests/data_structures/input_json/read_group_good.json delete mode 100644 tests/data_structures/input_json/read_group_missing_sample.json delete mode 100644 tests/data_structures/input_json/read_group_sample_with_space.json delete mode 100644 tests/data_structures/test_flag_filter.yaml delete mode 100644 tests/data_structures/test_read_group.yaml delete mode 100644 tests/tools/input_json/arriba.json delete mode 100644 tests/tools/input_json/bwa_aln.json delete mode 100644 tests/tools/input_json/bwa_aln_pe.json delete mode 100644 tests/tools/input_json/bwa_mem.json delete mode 100644 tests/tools/input_json/calc_tpm.json delete mode 100644 tests/tools/input_json/gatk4_apply_bqsr.json delete mode 100644 tests/tools/input_json/gatk4_base_recalibrator.json delete mode 100644 tests/tools/input_json/gatk4_haplotype_caller.json delete mode 100644 tests/tools/input_json/gatk4_split_n_cigar_reads.json delete mode 100644 tests/tools/input_json/gatk4_variant_filtration.json delete mode 100644 tests/tools/input_json/kraken_build_db.json delete mode 100644 tests/tools/input_json/kraken_create_library_from_fastas.json delete mode 100644 tests/tools/input_json/multiqc.json delete mode 100644 tests/tools/input_json/multiqc_empty.json delete mode 100644 tests/tools/input_json/ngsderive_encoding.json delete mode 100644 tests/tools/input_json/picard_merge_sam_files.json delete mode 100644 tests/tools/input_json/picard_merge_vcfs.json delete mode 100644 tests/tools/input_json/sambamba_merge.json delete mode 100644 tests/tools/input_json/samtools_merge.json delete mode 100644 tests/tools/input_json/samtools_subsample.json delete mode 100644 tests/tools/input_json/star_alignment_PE_1_pair.json delete mode 100644 tests/tools/input_json/star_alignment_PE_2_pairs.json delete mode 100644 tests/tools/input_json/star_alignment_SE.json delete mode 100644 tests/tools/input_json/util_add_to_bam_header.json delete mode 100644 tests/tools/input_json/util_download.json delete mode 100644 tests/tools/input_json/util_split_string.json delete mode 100644 tests/tools/test_arriba.yaml delete mode 100644 tests/tools/test_bwa.yaml delete mode 100644 tests/tools/test_deeptools.yaml delete mode 100644 tests/tools/test_fastp.yaml delete mode 100644 tests/tools/test_fastqc.yaml delete mode 100644 tests/tools/test_fq.yaml delete mode 100644 tests/tools/test_gatk4.yaml delete mode 100644 tests/tools/test_htseq.yaml delete mode 100644 tests/tools/test_kraken2.yaml delete mode 100644 tests/tools/test_librarian.yaml delete mode 100644 tests/tools/test_md5sum.yaml delete mode 100644 tests/tools/test_mosdepth.yaml delete mode 100644 tests/tools/test_multiqc.yaml delete mode 100644 tests/tools/test_ngsderive.yaml delete mode 100644 tests/tools/test_picard.yaml delete mode 100644 tests/tools/test_qualimap.yaml delete mode 100644 tests/tools/test_sambamba.yaml delete mode 100644 tests/tools/test_samtools.py delete mode 100644 tests/tools/test_samtools.yaml delete mode 100644 tests/tools/test_star.yaml delete mode 100644 tests/tools/test_util.yaml delete mode 100644 tests/workflows/_test_methylation-preprocess.yaml delete mode 100644 tests/workflows/input_json/combine_data.json delete mode 100644 tests/workflows/input_json/dnaseq-standard-fastq.json delete mode 100644 tests/workflows/input_json/dnaseq-standard.json delete mode 100644 tests/workflows/input_json/filter_probes.json delete mode 100644 tests/workflows/input_json/generate_umap.json delete mode 100644 tests/workflows/input_json/plot_umap.json delete mode 100644 tests/workflows/input_json/process_raw_idats.json delete mode 100644 tests/workflows/input_json/qc-standard.json delete mode 100644 tests/workflows/input_json/rnaseq-standard-fastq.json delete mode 100644 tests/workflows/input_json/rnaseq-standard.json delete mode 100644 tests/workflows/input_json/rnaseq-variant-calling.json delete mode 100644 tests/workflows/test_methylation-cohort.yaml diff --git a/tests/data_structures/input_json/get_read_groups.json b/tests/data_structures/input_json/get_read_groups.json deleted file mode 100644 index 9b3f1bbc7..000000000 --- a/tests/data_structures/input_json/get_read_groups.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "get_read_groups.bam": "https://github.com/stjude/CICERO/raw/master/test/data/input/test.bam" -} \ No newline at end of file diff --git a/tests/data_structures/input_json/read_group_bad_id.json b/tests/data_structures/input_json/read_group_bad_id.json deleted file mode 100644 index 41d05152c..000000000 --- a/tests/data_structures/input_json/read_group_bad_id.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "read_group_to_string.read_group":{ - "ID": "id", - "SM": "sample_a", - "LB": "library", - "BC": "barcode", - "PU": "platform_unit", - "PL": "ILLUMINA", - "CN": "center_name", - "DT": "date", - "DS": "description", - "PI": 1, - "PG": "program_group", - "PM": "platform_model", - "FO": "ACMG", - "KS": "key_sequence" - } -} \ No newline at end of file diff --git a/tests/data_structures/input_json/read_group_bad_sample.json b/tests/data_structures/input_json/read_group_bad_sample.json deleted file mode 100644 index 452d17b8d..000000000 --- a/tests/data_structures/input_json/read_group_bad_sample.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "read_group_to_string.read_group":{ - "ID": "R123", - "SM": "sample1", - "LB": "library", - "BC": "barcode", - "PU": "platform_unit", - "PL": "ILLUMINA", - "CN": "center_name", - "DT": "date", - "DS": "description", - "PI": 1, - "PG": "program_group", - "PM": "platform_model", - "FO": "ACMG", - "KS": "key_sequence" - } -} \ No newline at end of file diff --git a/tests/data_structures/input_json/read_group_good.json b/tests/data_structures/input_json/read_group_good.json deleted file mode 100644 index 81753f65d..000000000 --- a/tests/data_structures/input_json/read_group_good.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "read_group_to_string.read_group":{ - "ID": "R123", - "SM": "IPSC-1", - "LB": "library", - "BC": "barcode", - "PU": "platform_unit", - "PL": "ILLUMINA", - "CN": "center_name", - "DT": "date", - "DS": "description", - "PI": 1, - "PG": "program_group", - "PM": "platform_model", - "FO": "ACMG", - "KS": "key_sequence" - } -} \ No newline at end of file diff --git a/tests/data_structures/input_json/read_group_missing_sample.json b/tests/data_structures/input_json/read_group_missing_sample.json deleted file mode 100644 index 8a23be66a..000000000 --- a/tests/data_structures/input_json/read_group_missing_sample.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "read_group_to_string.read_group":{ - "ID": "id" - }, - "read_group_to_string.required_fields": [ - "SM" - ] -} \ No newline at end of file diff --git a/tests/data_structures/input_json/read_group_sample_with_space.json b/tests/data_structures/input_json/read_group_sample_with_space.json deleted file mode 100644 index f0c7d88d0..000000000 --- a/tests/data_structures/input_json/read_group_sample_with_space.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "read_group_to_string.read_group": { - "ID": "myID", - "SM": "sample a", - "LB": "library", - "BC": "barcode", - "PU": "platform_unit", - "PL": "ILLUMINA", - "CN": "center_name", - "DT": "date", - "DS": "description", - "PI": 1, - "PG": "program_group", - "PM": "platform_model", - "FO": "ACMG", - "KS": "key_sequence" - } -} \ No newline at end of file diff --git a/tests/data_structures/test_flag_filter.yaml b/tests/data_structures/test_flag_filter.yaml deleted file mode 100644 index 20a1fb4d5..000000000 --- a/tests/data_structures/test_flag_filter.yaml +++ /dev/null @@ -1,74 +0,0 @@ -- name: flag_filter_0x900 - tags: - - flag_filter - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t validate_string_is_12bit_int data_structures/flag_filter.wdl number="0x900" - -- name: flag_filter_5 - tags: - - flag_filter - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t validate_string_is_12bit_int data_structures/flag_filter.wdl number="5" - -- name: flag_filter_01 - tags: - - flag_filter - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t validate_string_is_12bit_int data_structures/flag_filter.wdl number="01" - -- name: flag_filter_0x1000 - tags: - - flag_filter - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t validate_string_is_12bit_int data_structures/flag_filter.wdl number="0x1000" - exit_code: 1 - stderr: - contains: - - "Input number (0x1000) is invalid" - -- name: flag_filter_neg1 - tags: - - flag_filter - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t validate_string_is_12bit_int data_structures/flag_filter.wdl number="-1" - exit_code: 1 - stderr: - contains: - - "Input number (-1) is invalid" - -- name: flag_filter_4096 - tags: - - flag_filter - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t validate_string_is_12bit_int data_structures/flag_filter.wdl number="4096" - exit_code: 1 - stderr: - contains: - - "Input number (4096) interpreted as decimal" - - "But number must be less than 4096!" - -- name: flag_filter_4095 - tags: - - flag_filter - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t validate_string_is_12bit_int data_structures/flag_filter.wdl number="4095" - -- name: flag_filter_string - tags: - - flag_filter - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t validate_string_is_12bit_int data_structures/flag_filter.wdl number="string" - exit_code: 1 - stderr: - contains: - - "Input number (string) is invalid" - -- name: flag_filter_empty_string - tags: - - flag_filter - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t validate_string_is_12bit_int data_structures/flag_filter.wdl number="" - exit_code: 1 - stderr: - contains: - - "Input number () is invalid" diff --git a/tests/data_structures/test_read_group.yaml b/tests/data_structures/test_read_group.yaml deleted file mode 100644 index 41d09e543..000000000 --- a/tests/data_structures/test_read_group.yaml +++ /dev/null @@ -1,51 +0,0 @@ -- name: read_group_bad_id - tags: - - read_group - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -w read_group_to_string -i tests/data_structures/input_json/read_group_bad_id.json data_structures/read_group.wdl - exit_code: 1 - stderr: - contains: - - "ID (id) must not match pattern" - -- name: read_group_bad_sample - tags: - - read_group - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -w read_group_to_string -i tests/data_structures/input_json/read_group_bad_sample.json data_structures/read_group.wdl - exit_code: 1 - stderr: - contains: - - "SM must not match pattern" - -- name: read_group_good - tags: - - read_group - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -w read_group_to_string -i tests/data_structures/input_json/read_group_good.json data_structures/read_group.wdl - -- name: read_group_missing_sample - tags: - - read_group - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -w read_group_to_string -i tests/data_structures/input_json/read_group_missing_sample.json data_structures/read_group.wdl - exit_code: 1 - stderr: - contains: - - "SM is required" - -- name: read_group_sample_with_space - tags: - - read_group - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -w read_group_to_string -i tests/data_structures/input_json/read_group_sample_with_space.json data_structures/read_group.wdl - exit_code: 1 - stderr: - contains: - - "SM must not match pattern" - -- name: get_read_groups - tags: - - read_group - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t get_read_groups -i tests/data_structures/input_json/get_read_groups.json data_structures/read_group.wdl \ No newline at end of file diff --git a/tests/tools/input_json/arriba.json b/tests/tools/input_json/arriba.json deleted file mode 100644 index 0721bbd93..000000000 --- a/tests/tools/input_json/arriba.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "arriba.bam": "../../input/Aligned.sortedByCoord.chr9_chr22.bam", - "arriba.gtf": "../../input/gencode.v31.chr9_chr22.gtf.gz", - "arriba.reference_fasta_gz": "../../input/GRCh38.chr9_chr22.fa.gz", - "arriba.disable_filters": [ - "blacklist" - ], - "arriba.prefix": "fusions" -} \ No newline at end of file diff --git a/tests/tools/input_json/bwa_aln.json b/tests/tools/input_json/bwa_aln.json deleted file mode 100644 index 3b59db584..000000000 --- a/tests/tools/input_json/bwa_aln.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "bwa_aln.fastq": "../../input/test_R1.fq.gz", - "bwa_aln.read_group": "@RG\\tID:test\\tSM:test", - "bwa_aln.bwa_db_tar_gz": "../../input/GRCh38.chrY_chrM.bwa_db.tar.gz" -} \ No newline at end of file diff --git a/tests/tools/input_json/bwa_aln_pe.json b/tests/tools/input_json/bwa_aln_pe.json deleted file mode 100644 index f878cb118..000000000 --- a/tests/tools/input_json/bwa_aln_pe.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "bwa_aln_pe.read_one_fastq_gz": "../../input/test_R1.fq.gz", - "bwa_aln_pe.read_two_fastq_gz": "../../input/test_R2.fq.gz", - "bwa_aln_pe.read_group": "@RG\\tID:test\\tSM:test", - "bwa_aln_pe.bwa_db_tar_gz": "../../input/GRCh38.chrY_chrM.bwa_db.tar.gz" -} \ No newline at end of file diff --git a/tests/tools/input_json/bwa_mem.json b/tests/tools/input_json/bwa_mem.json deleted file mode 100644 index 4563e2b8b..000000000 --- a/tests/tools/input_json/bwa_mem.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "bwa_mem.read_one_fastq_gz": "../../input/test_R1.fq.gz", - "bwa_mem.read_group": "@RG\\tID:test\\tSM:test", - "bwa_mem.bwa_db_tar_gz": "../../input/GRCh38.chrY_chrM.bwa_db.tar.gz" -} \ No newline at end of file diff --git a/tests/tools/input_json/calc_tpm.json b/tests/tools/input_json/calc_tpm.json deleted file mode 100644 index d4c9fc03f..000000000 --- a/tests/tools/input_json/calc_tpm.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "calc_tpm.counts": "../../input/test.bwa_aln_pe.chrY_chrM.feature-counts.txt", - "calc_tpm.feature_lengths": "../../input/gencode.v31.chrY_chrM.genelengths.txt" -} \ No newline at end of file diff --git a/tests/tools/input_json/gatk4_apply_bqsr.json b/tests/tools/input_json/gatk4_apply_bqsr.json deleted file mode 100644 index d73c2a3eb..000000000 --- a/tests/tools/input_json/gatk4_apply_bqsr.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "apply_bqsr.bam": "../../input/test_rnaseq_variant.bam", - "apply_bqsr.bam_index": "../../input/test_rnaseq_variant.bam.bai", - "apply_bqsr.recalibration_report": "../../input/test_rnaseq_variant.recal.txt" -} \ No newline at end of file diff --git a/tests/tools/input_json/gatk4_base_recalibrator.json b/tests/tools/input_json/gatk4_base_recalibrator.json deleted file mode 100644 index e63e309b2..000000000 --- a/tests/tools/input_json/gatk4_base_recalibrator.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "base_recalibrator.bam": "../../input/test_rnaseq_variant.bam", - "base_recalibrator.bam_index": "../../input/test_rnaseq_variant.bam.bai", - "base_recalibrator.fasta": "../../input/GRCh38.chr1_chr19.fa", - "base_recalibrator.fasta_index": "../../input/GRCh38.chr1_chr19.fa.fai", - "base_recalibrator.dict": "../../input/GRCh38.chr1_chr19.dict", - "base_recalibrator.dbSNP_vcf": "../../input/Homo_sapiens_assembly38.dbsnp138.top5000.vcf", - "base_recalibrator.dbSNP_vcf_index": "../../input/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx", - "base_recalibrator.known_indels_sites_vcfs": [ - "../../input/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz" - ], - "base_recalibrator.known_indels_sites_indices": [ - "../../input/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi" - ] -} \ No newline at end of file diff --git a/tests/tools/input_json/gatk4_haplotype_caller.json b/tests/tools/input_json/gatk4_haplotype_caller.json deleted file mode 100644 index 653fb5714..000000000 --- a/tests/tools/input_json/gatk4_haplotype_caller.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "haplotype_caller.bam": "../../input/test_rnaseq_variant.bam", - "haplotype_caller.bam_index": "../../input/test_rnaseq_variant.bam.bai", - "haplotype_caller.fasta": "../../input/GRCh38.chr1_chr19.fa", - "haplotype_caller.fasta_index": "../../input/GRCh38.chr1_chr19.fa.fai", - "haplotype_caller.dict": "../../input/GRCh38.chr1_chr19.dict", - "haplotype_caller.dbSNP_vcf": "../../input/Homo_sapiens_assembly38.dbsnp138.top5000.vcf", - "haplotype_caller.dbSNP_vcf_index": "../../input/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx", - "haplotype_caller.interval_list": "../../input/chr1_chr19.interval_list" -} \ No newline at end of file diff --git a/tests/tools/input_json/gatk4_split_n_cigar_reads.json b/tests/tools/input_json/gatk4_split_n_cigar_reads.json deleted file mode 100644 index e547714cf..000000000 --- a/tests/tools/input_json/gatk4_split_n_cigar_reads.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "split_n_cigar_reads.bam": "../../input/test.bam", - "split_n_cigar_reads.bam_index": "../../input/test.bam.bai", - "split_n_cigar_reads.fasta": "../../input/GRCh38.chr1_chr19.fa", - "split_n_cigar_reads.fasta_index": "../../input/GRCh38.chr1_chr19.fa.fai", - "split_n_cigar_reads.dict": "../../input/GRCh38.chr1_chr19.dict", - "split_n_cigar_reads.prefix": "split" -} \ No newline at end of file diff --git a/tests/tools/input_json/gatk4_variant_filtration.json b/tests/tools/input_json/gatk4_variant_filtration.json deleted file mode 100644 index ba6d698da..000000000 --- a/tests/tools/input_json/gatk4_variant_filtration.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "variant_filtration.vcf": "../../input/test1.vcf.gz", - "variant_filtration.vcf_index": "../../input/test1.vcf.gz.tbi", - "variant_filtration.fasta": "../../input/GRCh38.chr1_chr19.fa", - "variant_filtration.fasta_index": "../../input/GRCh38.chr1_chr19.fa.fai", - "variant_filtration.dict": "../../input/GRCh38.chr1_chr19.dict" -} \ No newline at end of file diff --git a/tests/tools/input_json/kraken_build_db.json b/tests/tools/input_json/kraken_build_db.json deleted file mode 100644 index d3251ef77..000000000 --- a/tests/tools/input_json/kraken_build_db.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "build_db.tarballs": [ - "../../input/kraken2_taxonomy.tar.gz", - "../../input/kraken2_C_elegans_library.tar.gz" - ] -} \ No newline at end of file diff --git a/tests/tools/input_json/kraken_create_library_from_fastas.json b/tests/tools/input_json/kraken_create_library_from_fastas.json deleted file mode 100644 index 1d15046d6..000000000 --- a/tests/tools/input_json/kraken_create_library_from_fastas.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "create_library_from_fastas.fastas_gz": [ - "https://ftp.ncbi.nlm.nih.gov/genomes/refseq/invertebrate/Caenorhabditis_elegans/reference/GCF_000002985.6_WBcel235/GCF_000002985.6_WBcel235_genomic.fna.gz" - ] -} \ No newline at end of file diff --git a/tests/tools/input_json/multiqc.json b/tests/tools/input_json/multiqc.json deleted file mode 100644 index 556d7111f..000000000 --- a/tests/tools/input_json/multiqc.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "multiqc.files": [ - "../../input/test.bwa_aln_pe.chrY_chrM.readlength.txt" - ], - "multiqc.report_name": "test.bwa_aln_pe.chrY_chrM.multiqc" -} \ No newline at end of file diff --git a/tests/tools/input_json/multiqc_empty.json b/tests/tools/input_json/multiqc_empty.json deleted file mode 100644 index f2fbc2d6c..000000000 --- a/tests/tools/input_json/multiqc_empty.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "multiqc.files": [], - "multiqc.report_name": "empty" -} \ No newline at end of file diff --git a/tests/tools/input_json/ngsderive_encoding.json b/tests/tools/input_json/ngsderive_encoding.json deleted file mode 100644 index 3b4d25fd5..000000000 --- a/tests/tools/input_json/ngsderive_encoding.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "encoding.ngs_files": [ - "../../input/test.bwa_aln_pe.chrY_chrM.bam" - ], - "encoding.outfile_name": "test.bwa_aln_pe.chrY_chrM.encoding.tsv" -} \ No newline at end of file diff --git a/tests/tools/input_json/picard_merge_sam_files.json b/tests/tools/input_json/picard_merge_sam_files.json deleted file mode 100644 index 59448d62d..000000000 --- a/tests/tools/input_json/picard_merge_sam_files.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "merge_sam_files.bams": [ - "../../input/test.bwa_aln_pe.chrY_chrM.bam", - "../../input/test.PE.2_RGs.Aligned.out.sorted.bam" - ], - "merge_sam_files.prefix": "test.merged" -} \ No newline at end of file diff --git a/tests/tools/input_json/picard_merge_vcfs.json b/tests/tools/input_json/picard_merge_vcfs.json deleted file mode 100644 index 729faa020..000000000 --- a/tests/tools/input_json/picard_merge_vcfs.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "merge_vcfs.vcfs": [ - "../../input/test1.vcf.gz", - "../../input/test2.vcf.gz" - ], - "merge_vcfs.vcfs_indexes": [ - "../../input/test1.vcf.gz.tbi", - "../../input/test2.vcf.gz.tbi" - ], - "merge_vcfs.output_vcf_name": "test.vcf.gz" -} \ No newline at end of file diff --git a/tests/tools/input_json/sambamba_merge.json b/tests/tools/input_json/sambamba_merge.json deleted file mode 100644 index 2b8c66b13..000000000 --- a/tests/tools/input_json/sambamba_merge.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "merge.bams": [ - "../../input/test.bwa_aln_pe.chrY_chrM.bam", - "../../input/test.PE.2_RGs.Aligned.out.sorted.bam" - ], - "merge.prefix": "test.merged" -} \ No newline at end of file diff --git a/tests/tools/input_json/samtools_merge.json b/tests/tools/input_json/samtools_merge.json deleted file mode 100644 index 2b8c66b13..000000000 --- a/tests/tools/input_json/samtools_merge.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "merge.bams": [ - "../../input/test.bwa_aln_pe.chrY_chrM.bam", - "../../input/test.PE.2_RGs.Aligned.out.sorted.bam" - ], - "merge.prefix": "test.merged" -} \ No newline at end of file diff --git a/tests/tools/input_json/samtools_subsample.json b/tests/tools/input_json/samtools_subsample.json deleted file mode 100644 index 05e5fe4b5..000000000 --- a/tests/tools/input_json/samtools_subsample.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "subsample.bam": "https://github.com/stjude/CICERO/raw/master/test/data/input/test.bam", - "subsample.desired_reads": 100 -} \ No newline at end of file diff --git a/tests/tools/input_json/star_alignment_PE_1_pair.json b/tests/tools/input_json/star_alignment_PE_1_pair.json deleted file mode 100644 index befd948bd..000000000 --- a/tests/tools/input_json/star_alignment_PE_1_pair.json +++ /dev/null @@ -1,13 +0,0 @@ -{ - "alignment.read_one_fastqs_gz": [ - "../../input/test_R1.fq.gz" - ], - "alignment.read_two_fastqs_gz": [ - "../../input/test_R2.fq.gz" - ], - "alignment.star_db_tar_gz": "../../input/star_db.chrY_chrM.tar.gz", - "alignment.prefix": "test", - "alignment.read_groups": [ - "ID:test SM:test PL:ILLUMINA" - ] -} \ No newline at end of file diff --git a/tests/tools/input_json/star_alignment_PE_2_pairs.json b/tests/tools/input_json/star_alignment_PE_2_pairs.json deleted file mode 100644 index 45e479445..000000000 --- a/tests/tools/input_json/star_alignment_PE_2_pairs.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "alignment.read_one_fastqs_gz": [ - "../../input/test_R1.fq.gz", - "../../input/random10k.r1.fq.gz" - ], - "alignment.read_two_fastqs_gz": [ - "../../input/test_R2.fq.gz", - "../../input/random10k.r2.fq.gz" - ], - "alignment.star_db_tar_gz": "../../input/star_db.chrY_chrM.tar.gz", - "alignment.prefix": "test", - "alignment.read_groups": [ - "ID:test SM:test PL:ILLUMINA", - "ID:random" - ] -} \ No newline at end of file diff --git a/tests/tools/input_json/star_alignment_SE.json b/tests/tools/input_json/star_alignment_SE.json deleted file mode 100644 index c30d29281..000000000 --- a/tests/tools/input_json/star_alignment_SE.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "alignment.read_one_fastqs_gz": [ - "../../input/test_R1.fq.gz" - ], - "alignment.prefix": "test", - "alignment.read_groups": [ - "ID:foo" - ], - "alignment.star_db_tar_gz": "../../input/star_db.chrY_chrM.tar.gz" -} \ No newline at end of file diff --git a/tests/tools/input_json/util_add_to_bam_header.json b/tests/tools/input_json/util_add_to_bam_header.json deleted file mode 100644 index 71f5e7960..000000000 --- a/tests/tools/input_json/util_add_to_bam_header.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "add_to_bam_header.bam": "../../input/test.bwa_aln_pe.chrY_chrM.bam", - "add_to_bam_header.additional_header": "@RG\tID:3" -} \ No newline at end of file diff --git a/tests/tools/input_json/util_download.json b/tests/tools/input_json/util_download.json deleted file mode 100644 index 16dde86b1..000000000 --- a/tests/tools/input_json/util_download.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "download.url": "https://raw.githubusercontent.com/stjudecloud/workflows/main/LICENSE.md", - "download.outfile_name": "license.txt", - "download.md5sum": "cf3575bd84ab3151c7e9700b5f1a9746", - "download.disk_size_gb": 1 -} \ No newline at end of file diff --git a/tests/tools/input_json/util_split_string.json b/tests/tools/input_json/util_split_string.json deleted file mode 100644 index 68e3d245b..000000000 --- a/tests/tools/input_json/util_split_string.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "split_string.string": "rg1 , rg2", - "split_string.delimiter": " , " -} \ No newline at end of file diff --git a/tests/tools/test_arriba.yaml b/tests/tools/test_arriba.yaml deleted file mode 100644 index 883a9a9c7..000000000 --- a/tests/tools/test_arriba.yaml +++ /dev/null @@ -1,40 +0,0 @@ -- name: arriba - tags: - - arriba - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t arriba -i tests/tools/input_json/arriba.json tools/arriba.wdl - files: - - path: output/outputs.json - contains: - - fusions.tsv - - fusions.discarded.tsv - -- name: arriba_tsv_to_vcf - tags: - - arriba - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t arriba_tsv_to_vcf tools/arriba.wdl fusions="tests/input/fusions.BCR_ABL1.tsv" reference_fasta="tests/input/GRCh38.chr9_chr22.fa.gz" prefix="fusions" - files: - - path: output/outputs.json - contains: - - fusions.vcf - -- name: arriba_extract_fusion_supporting_alignments - tags: - - arriba - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t arriba_extract_fusion_supporting_alignments tools/arriba.wdl fusions="tests/input/fusions.BCR_ABL1.tsv" bam="tests/input/Aligned.sortedByCoord.chr9_chr22.bam" bam_index="tests/input/Aligned.sortedByCoord.chr9_chr22.bam.bai" prefix="fusions" - files: - - path: output/outputs.json - contains: - - fusions_1.bam - -- name: arriba_annotate_exon_numbers - tags: - - arriba - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t arriba_annotate_exon_numbers tools/arriba.wdl fusions="tests/input/fusions.BCR_ABL1.tsv" gtf="tests/input/gencode.v31.chr9_chr22.gtf.gz" prefix="fusions" - files: - - path: output/outputs.json - contains: - - fusions.annotated.tsv diff --git a/tests/tools/test_bwa.yaml b/tests/tools/test_bwa.yaml deleted file mode 100644 index 6c38821ef..000000000 --- a/tests/tools/test_bwa.yaml +++ /dev/null @@ -1,40 +0,0 @@ -- name: bwa_aln - tags: - - bwa - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t bwa_aln -i tests/tools/input_json/bwa_aln.json tools/bwa.wdl - files: - - path: output/outputs.json - contains: - - test.bam - -- name: bwa_aln_pe - tags: - - bwa - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t bwa_aln_pe -i tests/tools/input_json/bwa_aln_pe.json tools/bwa.wdl - files: - - path: output/outputs.json - contains: - - test.bam - -- name: bwa_mem - tags: - - bwa - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t bwa_mem -i tests/tools/input_json/bwa_mem.json tools/bwa.wdl - files: - - path: output/outputs.json - contains: - - test.bam - -- name: build_bwa_db - tags: - - bwa - - reference - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t build_bwa_db tools/bwa.wdl reference_fasta="tests/input/GRCh38.chrY_chrM.fa" - files: - - path: output/outputs.json - contains: - - bwa_db.tar.gz diff --git a/tests/tools/test_deeptools.yaml b/tests/tools/test_deeptools.yaml deleted file mode 100644 index 64c671edf..000000000 --- a/tests/tools/test_deeptools.yaml +++ /dev/null @@ -1,9 +0,0 @@ -- name: deeptools_bam_coverage - tags: - - deeptools - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t bam_coverage tools/deeptools.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" bam_index="tests/input/test.bwa_aln_pe.chrY_chrM.bam.bai" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.bw diff --git a/tests/tools/test_fastp.yaml b/tests/tools/test_fastp.yaml deleted file mode 100644 index 006db1087..000000000 --- a/tests/tools/test_fastp.yaml +++ /dev/null @@ -1,12 +0,0 @@ -- name: fastp - tags: - - fastp - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t fastp tools/fastp.wdl read_one_fastq="tests/input/test_R1.fq.gz" read_two_fastq="tests/input/test_R2.fq.gz" - files: - - path: output/outputs.json - contains: - - test.trimmed.R1.fastq.gz - - test.trimmed.R2.fastq.gz - - test.trimmed.fastp.html - - test.trimmed.fastp.json \ No newline at end of file diff --git a/tests/tools/test_fastqc.yaml b/tests/tools/test_fastqc.yaml deleted file mode 100644 index 2be254308..000000000 --- a/tests/tools/test_fastqc.yaml +++ /dev/null @@ -1,10 +0,0 @@ -- name: fastqc - tags: - - fastqc - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t fastqc tools/fastqc.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM_fastqc.zip - - test.bwa_aln_pe.chrY_chrM.fastqc_results.tar.gz diff --git a/tests/tools/test_fq.yaml b/tests/tools/test_fq.yaml deleted file mode 100644 index dae76f1bc..000000000 --- a/tests/tools/test_fq.yaml +++ /dev/null @@ -1,27 +0,0 @@ -- name: fqlint - tags: - - fq - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t fqlint tools/fq.wdl read_one_fastq=tests/input/test_R1.fq.gz read_two_fastq=tests/input/test_R2.fq.gz - -- name: subsample_fq - tags: - - fq - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t subsample tools/fq.wdl read_one_fastq=tests/input/test_R1.fq.gz read_two_fastq=tests/input/test_R2.fq.gz record_count=1000 - files: - - path: output/outputs.json - contains: - - test.R1.subsampled.fastq.gz - - test.R2.subsampled.fastq.gz - -- name: subsample_fq_percentage - tags: - - fq - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t subsample tools/fq.wdl read_one_fastq=tests/input/test_R1.fq.gz read_two_fastq=tests/input/test_R2.fq.gz probability=0.01 - files: - - path: output/outputs.json - contains: - - test.R1.subsampled.fastq.gz - - test.R2.subsampled.fastq.gz diff --git a/tests/tools/test_gatk4.yaml b/tests/tools/test_gatk4.yaml deleted file mode 100644 index 200a7f33e..000000000 --- a/tests/tools/test_gatk4.yaml +++ /dev/null @@ -1,65 +0,0 @@ -- name: gatk4_split_n_cigar_reads - tags: - - gatk4 - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t split_n_cigar_reads -i tests/tools/input_json/gatk4_split_n_cigar_reads.json tools/gatk4.wdl - files: - - path: output/outputs.json - contains: - - split.bam - - split.bam.bai - - split.bam.md5 - -- name: gatk4_base_recalibrator - tags: - - gatk4 - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t base_recalibrator -i tests/tools/input_json/gatk4_base_recalibrator.json tools/gatk4.wdl - files: - - path: output/outputs.json - contains: - - test_rnaseq_variant.recal.txt - -- name: gatk4_apply_bqsr - tags: - - gatk4 - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t apply_bqsr -i tests/tools/input_json/gatk4_apply_bqsr.json tools/gatk4.wdl - files: - - path: output/outputs.json - contains: - - test_rnaseq_variant.bqsr.bam - - test_rnaseq_variant.bqsr.bam.bai - -- name: gatk4_haplotype_caller - tags: - - gatk4 - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t haplotype_caller -i tests/tools/input_json/gatk4_haplotype_caller.json tools/gatk4.wdl - files: - - path: output/outputs.json - contains: - - test_rnaseq_variant.vcf.gz - -- name: gatk4_variant_filtration - tags: - - gatk4 - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t variant_filtration -i tests/tools/input_json/gatk4_variant_filtration.json tools/gatk4.wdl - files: - - path: output/outputs.json - contains: - - test1.filtered.vcf.gz - - test1.filtered.vcf.gz.tbi - -- name: gatk4_mark_duplicates_spark - tags: - - gatk4 - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t mark_duplicates_spark tools/gatk4.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.MarkDuplicates.bam - - test.bwa_aln_pe.chrY_chrM.MarkDuplicates.bam.bai - - test.bwa_aln_pe.chrY_chrM.MarkDuplicates.metrics.txt \ No newline at end of file diff --git a/tests/tools/test_htseq.yaml b/tests/tools/test_htseq.yaml deleted file mode 100644 index f35b7adbd..000000000 --- a/tests/tools/test_htseq.yaml +++ /dev/null @@ -1,19 +0,0 @@ -- name: htseq_count - tags: - - htseq - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t count tools/htseq.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" gtf="tests/input/gencode.v31.chrY_chrM.gtf.gz" strandedness="no" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.feature-counts.txt - -- name: calc_tpm - tags: - - htseq - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t calc_tpm -i tests/tools/input_json/calc_tpm.json tools/htseq.wdl - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.TPM.txt diff --git a/tests/tools/test_kraken2.yaml b/tests/tools/test_kraken2.yaml deleted file mode 100644 index 22564eaf6..000000000 --- a/tests/tools/test_kraken2.yaml +++ /dev/null @@ -1,55 +0,0 @@ -- name: download_taxonomy - tags: - - kraken - - reference - - slow - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t download_taxonomy tools/kraken2.wdl - files: - - path: output/outputs.json - contains: - - kraken2_taxonomy.tar.gz - -- name: download_library - tags: - - kraken - - reference - - slow - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t download_library tools/kraken2.wdl library_name='fungi' - files: - - path: output/outputs.json - contains: - - kraken2_fungi_library.tar.gz - -- name: create_library_from_fastas - tags: - - kraken - - reference - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t create_library_from_fastas -i tests/tools/input_json/kraken_create_library_from_fastas.json tools/kraken2.wdl - files: - - path: output/outputs.json - contains: - - kraken2_custom_library.tar.gz - -- name: build_db - tags: - - kraken - - reference - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t build_db -i tests/tools/input_json/kraken_build_db.json tools/kraken2.wdl - files: - - path: output/outputs.json - contains: - - kraken2_db.tar.gz - -- name: kraken - tags: - - kraken - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t kraken tools/kraken2.wdl read_one_fastq_gz="tests/input/test_R1.fq.gz" read_two_fastq_gz="tests/input/test_R2.fq.gz" db="tests/input/kraken2_db.mini.tar.gz" - files: - - path: output/outputs.json - contains: - - test.kraken2.txt diff --git a/tests/tools/test_librarian.yaml b/tests/tools/test_librarian.yaml deleted file mode 100644 index a0033514e..000000000 --- a/tests/tools/test_librarian.yaml +++ /dev/null @@ -1,10 +0,0 @@ -- name: librarian - tags: - - librarian - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t librarian tools/librarian.wdl read_one_fastq="tests/input/test_R1.fq.gz" - files: - - path: output/outputs.json - contains: - - test.librarian.tar.gz - - librarian_heatmap.txt diff --git a/tests/tools/test_md5sum.yaml b/tests/tools/test_md5sum.yaml deleted file mode 100644 index a1199b897..000000000 --- a/tests/tools/test_md5sum.yaml +++ /dev/null @@ -1,11 +0,0 @@ -- name: compute_checksum - tags: - - md5sum - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t compute_checksum tools/md5sum.wdl file="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.bam.md5 - # contains: - # - "77fa2f59b0083202c73b0c80b60b24f6" diff --git a/tests/tools/test_mosdepth.yaml b/tests/tools/test_mosdepth.yaml deleted file mode 100644 index e0b33e21f..000000000 --- a/tests/tools/test_mosdepth.yaml +++ /dev/null @@ -1,10 +0,0 @@ -- name: mosdepth_coverage - tags: - - mosdepth - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t coverage tools/mosdepth.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" bam_index="tests/input/test.bwa_aln_pe.chrY_chrM.bam.bai" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.mosdepth.summary.txt - - test.bwa_aln_pe.chrY_chrM.mosdepth.global.dist.txt diff --git a/tests/tools/test_multiqc.yaml b/tests/tools/test_multiqc.yaml deleted file mode 100644 index c2537df7b..000000000 --- a/tests/tools/test_multiqc.yaml +++ /dev/null @@ -1,20 +0,0 @@ -- name: multiqc - tags: - - multiqc - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t multiqc -i tests/tools/input_json/multiqc.json tools/multiqc.wdl - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.multiqc.html - -- name: multiqc_empty - tags: - - multiqc - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t multiqc -i tests/tools/input_json/multiqc_empty.json tools/multiqc.wdl - exit_code: 1 - stderr: - contains: - - "No analysis results found" - - "MultiQC didn't find any valid files" \ No newline at end of file diff --git a/tests/tools/test_ngsderive.yaml b/tests/tools/test_ngsderive.yaml deleted file mode 100644 index b94534d5e..000000000 --- a/tests/tools/test_ngsderive.yaml +++ /dev/null @@ -1,68 +0,0 @@ -- name: strandedness - tags: - - ngsderive - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t strandedness tools/ngsderive.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" bam_index="tests/input/test.bwa_aln_pe.chrY_chrM.bam.bai" gene_model="tests/input/gencode.v31.chrY_chrM.gtf.gz" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.strandedness.tsv - - "Unstranded" - -- name: instrument - tags: - - ngsderive - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t instrument tools/ngsderive.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.instrument.tsv - - "multiple instruments" - # - "unknown confidence" - -- name: read_length - tags: - - ngsderive - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t read_length tools/ngsderive.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" bam_index="tests/input/test.bwa_aln_pe.chrY_chrM.bam.bai" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.readlength.tsv - # contains: - # - "150=20000" - -- name: encoding - tags: - - ngsderive - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t encoding -i tests/tools/input_json/ngsderive_encoding.json tools/ngsderive.wdl - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.encoding.tsv - # contains: - # - "ASCII range: 74-74" - # - "Illumina 1.3" - -- name: junction_annotation - tags: - - ngsderive - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t junction_annotation tools/ngsderive.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" bam_index="tests/input/test.bwa_aln_pe.chrY_chrM.bam.bai" gene_model="tests/input/gencode.v31.chrY_chrM.gtf.gz" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.junction_summary.tsv - - test.bwa_aln_pe.chrY_chrM.junctions.tsv.gz - -- name: endedness - tags: - - ngsderive - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t endedness tools/ngsderive.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.endedness.tsv diff --git a/tests/tools/test_picard.yaml b/tests/tools/test_picard.yaml deleted file mode 100644 index ca99cab36..000000000 --- a/tests/tools/test_picard.yaml +++ /dev/null @@ -1,163 +0,0 @@ -- name: picard_mark_duplicates - tags: - - picard - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t mark_duplicates tools/picard.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.MarkDuplicates.bam - - test.bwa_aln_pe.chrY_chrM.MarkDuplicates.bam.bai - - test.bwa_aln_pe.chrY_chrM.MarkDuplicates.bam.md5 - - test.bwa_aln_pe.chrY_chrM.MarkDuplicates.metrics.txt - -- name: picard_validate_bam - tags: - - picard - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t validate_bam tools/picard.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.ValidateSamFile.txt - -- name: picard_bam_to_fastq - tags: - - picard - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t bam_to_fastq tools/picard.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.R1.fastq.gz - - test.bwa_aln_pe.chrY_chrM.R2.fastq.gz - -- name: picard_sort - tags: - - picard - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t sort tools/picard.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" memory_gb=16 - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.sorted.bam - - test.bwa_aln_pe.chrY_chrM.sorted.bam.bai - -- name: picard_sort_queryname - tags: - - picard - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t sort tools/picard.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" memory_gb=16 sort_order="queryname" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.sorted.bam - must_not_contain: - - test.bwa_aln_pe.chrY_chrM.sorted.bam.bai - -- name: picard_merge_sam_files - tags: - - picard - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t merge_sam_files -i tests/tools/input_json/picard_merge_sam_files.json tools/picard.wdl - files: - - path: output/outputs.json - contains: - - test.merged.bam - -- name: picard_clean_sam - tags: - - picard - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t clean_sam tools/picard.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.cleaned.bam - -- name: picard_collect_wgs_metrics - tags: - - picard - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t collect_wgs_metrics tools/picard.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" reference_fasta="tests/input/GRCh38.chrY_chrM.fa" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.CollectWgsMetrics.txt - -- name: picard_collect_alignment_summary_metrics - tags: - - picard - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t collect_alignment_summary_metrics tools/picard.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.CollectAlignmentSummaryMetrics.txt - - test.bwa_aln_pe.chrY_chrM.CollectAlignmentSummaryMetrics.pdf - -- name: picard_collect_gc_bias_metrics - tags: - - picard - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t collect_gc_bias_metrics tools/picard.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" reference_fasta=tests/input/GRCh38.chrY_chrM.fa - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.CollectGcBiasMetrics.txt - - test.bwa_aln_pe.chrY_chrM.CollectGcBiasMetrics.summary.txt - - test.bwa_aln_pe.chrY_chrM.CollectGcBiasMetrics.pdf - -- name: picard_collect_insert_size_metrics - tags: - - picard - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t collect_insert_size_metrics tools/picard.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.CollectInsertSizeMetrics.txt - - test.bwa_aln_pe.chrY_chrM.CollectInsertSizeMetrics.pdf - -- name: picard_quality_score_distribution - tags: - - picard - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t quality_score_distribution tools/picard.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.QualityScoreDistribution.txt - - test.bwa_aln_pe.chrY_chrM.QualityScoreDistribution.pdf - -- name: picard_merge_vcfs - tags: - - picard - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t merge_vcfs -i tests/tools/input_json/picard_merge_vcfs.json tools/picard.wdl - files: - - path: output/outputs.json - contains: - - test.vcf.gz - -- name: picard_scatter_interval_list - tags: - - picard - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t scatter_interval_list tools/picard.wdl interval_list="tests/input/wgs_calling_regions.hg38.interval_list" scatter_count=3 - files: - - path: output/outputs.json - contains: - - 1scattered.interval_list - - 2scattered.interval_list - - 3scattered.interval_list - -- name: picard_create_sequence_dictionary - tags: - - picard - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t create_sequence_dictionary tools/picard.wdl fasta="tests/input/GRCh38.chrY_chrM.fa" outfile_name="GRCh38.chrY_chrM.dict" - files: - - path: output/outputs.json - contains: - - GRCh38.chrY_chrM.dict \ No newline at end of file diff --git a/tests/tools/test_qualimap.yaml b/tests/tools/test_qualimap.yaml deleted file mode 100644 index 405b31517..000000000 --- a/tests/tools/test_qualimap.yaml +++ /dev/null @@ -1,11 +0,0 @@ -- name: qualimap_rnaseq - tags: - - qualimap - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t rnaseq tools/qualimap.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" gtf="tests/input/gencode.v31.chrY_chrM.gtf.gz" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.qualimap_rnaseq_results.tar.gz - - rnaseq_qc_results.txt - - coverage_profile_along_genes_(total).txt diff --git a/tests/tools/test_sambamba.yaml b/tests/tools/test_sambamba.yaml deleted file mode 100644 index 4a1464a2b..000000000 --- a/tests/tools/test_sambamba.yaml +++ /dev/null @@ -1,51 +0,0 @@ -- name: sambamba_index - tags: - - sambamba - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t index tools/sambamba.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.bam.bai - -- name: sambamba_merge - tags: - - sambamba - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t merge -i tests/tools/input_json/sambamba_merge.json tools/sambamba.wdl - files: - - path: output/outputs.json - contains: - - test.merged.bam - -- name: sambamba_sort - tags: - - sambamba - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t sort tools/sambamba.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.sorted.bam - -- name: sambamba_flagstat - tags: - - sambamba - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t flagstat tools/sambamba.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.flagstat.txt - -- name: sambamba_markdup - tags: - - sambamba - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t markdup tools/sambamba.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.markdup.bam - - test.bwa_aln_pe.chrY_chrM.markdup.bam.bai - - test.bwa_aln_pe.chrY_chrM.markdup_log.txt \ No newline at end of file diff --git a/tests/tools/test_samtools.py b/tests/tools/test_samtools.py deleted file mode 100644 index 272ed7b76..000000000 --- a/tests/tools/test_samtools.py +++ /dev/null @@ -1,53 +0,0 @@ -"""TODO: rewrite this so it is runner agnostic""" - -# import pytest -# import pathlib -# from collections import OrderedDict - -# import pysam -# import fastq - - -# @pytest.mark.workflow('samtools_split') -# def test_samtools_split(workflow_dir): -# bam = pathlib.Path(workflow_dir, 'test-output/out/split_bams/0/test.1.bam') -# samfile = pysam.AlignmentFile(bam, "rb") -# bam_header = OrderedDict((k, v) for k, v in samfile.header.items()) -# read_groups = [read_group['ID'] for read_group in bam_header.get('RG', []) if 'ID' in read_group] -# assert len(read_groups) == 1 -# assert read_groups[0] == "1" - -# second_bam = pathlib.Path(workflow_dir, 'test-output/out/split_bams/1/test.2.bam') -# second_samfile = pysam.AlignmentFile(second_bam, "rb") -# second_bam_header = OrderedDict((k, v) for k, v in second_samfile.header.items()) -# second_read_groups = [read_group['ID'] for read_group in second_bam_header.get('RG', []) if 'ID' in read_group] -# assert len(second_read_groups) == 1 -# assert second_read_groups[0] == "2" - -# @pytest.mark.workflow('samtools_merge') -# def test_samtools_merge(workflow_dir): -# bam = pathlib.Path(workflow_dir, 'test-output/out/merged_bam/test.bam') -# samfile = pysam.AlignmentFile(bam, "rb") -# bam_header = OrderedDict((k, v) for k, v in samfile.header.items()) -# read_groups = [read_group['ID'] for read_group in bam_header.get('RG', []) if 'ID' in read_group] -# assert len(read_groups) == 2 -# assert read_groups[0] == "test2" -# assert read_groups[1] == "test.bwa_aln_pe.chrY_chrM" - -# @pytest.mark.workflow('samtools_collate', 'samtools_collate_to_fastq') -# def test_samtools_collate(workflow_dir): -# bam = pathlib.Path(workflow_dir, 'test-output/out/collated_bam/test.bwa_aln_pe.chrY_chrM.collated.bam') -# samfile = pysam.AlignmentFile(bam, "rb") - -# reads = list(samfile.fetch(until_eof=True)) -# for c in range(0, 100, 2): -# assert reads[c].query_name == reads[c+1].query_name -# assert reads[c].is_read1 != reads[c+1].is_read1 - -# @pytest.mark.workflow('samtools_bam_to_fastq', 'samtools_collate_to_fastq') -# def test_samtools_bam_to_fastq(workflow_dir): -# fq1 = fastq.read(pathlib.Path(workflow_dir, 'test-output/out/read_one_fastq_gz/test.bwa_aln_pe.chrY_chrM.R1.fastq.gz')) -# fq2 = fastq.read(pathlib.Path(workflow_dir, 'test-output/out/read_two_fastq_gz/test.bwa_aln_pe.chrY_chrM.R2.fastq.gz')) - -# for r1, r2 in zip(fq1, fq2): -# assert r1.head.removesuffix("/1") == r2.head.removesuffix("/2") diff --git a/tests/tools/test_samtools.yaml b/tests/tools/test_samtools.yaml deleted file mode 100644 index 4d08c8583..000000000 --- a/tests/tools/test_samtools.yaml +++ /dev/null @@ -1,125 +0,0 @@ -- name: samtools_quickcheck - tags: - - samtools - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t quickcheck tools/samtools.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - -- name: samtools_split - tags: - - samtools - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t split tools/samtools.wdl bam="tests/input/test.bam" - files: - - path: output/outputs.json - contains: - - test.1.bam - - test.2.bam - -- name: samtools_split_unaccounted - tags: - - samtools - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t split tools/samtools.wdl bam="tests/input/test.unaccounted_read.bam" - exit_code: 1 - stderr: - contains: - - "There are reads present with bad or missing RG tags!" - -- name: samtools_split_extra_RG - tags: - - samtools - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t split tools/samtools.wdl bam="tests/input/test.extra_RG.bam" - exit_code: 1 - stderr: - contains: - - "No reads are in output BAM test.extra_RG.no_match.bam!" - - "This is likely caused by malformed RG records." - -- name: samtools_flagstat - tags: - - samtools - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t flagstat tools/samtools.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.flagstat.txt - # contains: - # - "20000" - # - "0 + 0 secondary" - # - "20000 + 0 in total (QC-passed reads + QC-failed reads)" - # - "10000 + 0 read1" - # - "10000 + 0 read2" - -- name: samtools_index - tags: - - samtools - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t index tools/samtools.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.bam.bai - -- name: samtools_subsample - tags: - - samtools - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t subsample -i tests/tools/input_json/samtools_subsample.json tools/samtools.wdl - files: - - path: output/outputs.json - contains: - - test.sampled.bam - -- name: samtools_merge - tags: - - samtools - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t merge -i tests/tools/input_json/samtools_merge.json tools/samtools.wdl - files: - - path: output/outputs.json - contains: - - test.merged.bam - -- name: samtools_addreplacerg - tags: - - samtools - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t addreplacerg tools/samtools.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" read_group_id="test" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.addreplacerg.bam - -- name: samtools_collate - tags: - - samtools - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t collate tools/samtools.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.collated.bam - -- name: samtools_bam_to_fastq - tags: - - samtools - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t bam_to_fastq tools/samtools.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" retain_collated_bam=true - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.collated.bam - - test.bwa_aln_pe.chrY_chrM.R1.fastq.gz - - test.bwa_aln_pe.chrY_chrM.R2.fastq.gz - -- name: samtools_faidx - tags: - - samtools - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t faidx tools/samtools.wdl fasta="tests/input/test.fa" - files: - - path: output/outputs.json - contains: - - test.fa.fai diff --git a/tests/tools/test_star.yaml b/tests/tools/test_star.yaml deleted file mode 100644 index 159f217e7..000000000 --- a/tests/tools/test_star.yaml +++ /dev/null @@ -1,43 +0,0 @@ -- name: build_star_db - tags: - - star - - reference - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t build_star_db tools/star.wdl reference_fasta=tests/input/GRCh38.chrY_chrM.fa gtf=tests/input/gencode.v31.chrY_chrM.gtf.gz - files: - - path: output/outputs.json - contains: - - star_db.tar.gz - -- name: star_alignment_PE_1_pair - tags: - - star - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t alignment -i tests/tools/input_json/star_alignment_PE_1_pair.json tools/star.wdl - files: - - path: output/outputs.json - contains: - - test.Log.final.out - - test.Aligned.out.bam - -- name: star_alignment_PE_2_pairs - tags: - - star - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t alignment -i tests/tools/input_json/star_alignment_PE_2_pairs.json tools/star.wdl - files: - - path: output/outputs.json - contains: - - test.Log.final.out - - test.Aligned.out.bam - -- name: star_alignment_SE - tags: - - star - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t alignment -i tests/tools/input_json/star_alignment_SE.json tools/star.wdl - files: - - path: output/outputs.json - contains: - - test.Log.final.out - - test.Aligned.out.bam \ No newline at end of file diff --git a/tests/tools/test_util.yaml b/tests/tools/test_util.yaml deleted file mode 100644 index 695da9859..000000000 --- a/tests/tools/test_util.yaml +++ /dev/null @@ -1,94 +0,0 @@ -- name: download - tags: - - util - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -i tests/tools/input_json/util_download.json -t download tools/util.wdl - files: - - path: output/outputs.json - contains: - - license.txt - # md5sum: cf3575bd84ab3151c7e9700b5f1a9746 - # contains: - # - "MIT License" - -- name: split_string - tags: - - util - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t split_string -i tests/tools/input_json/util_split_string.json tools/util.wdl - stdout: - contains: - - "rg1" - - "rg2" - must_not_contain: - - "rg1 , rg2" - -- name: calc_feature_lengths - tags: - - util - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t calc_feature_lengths tools/util.wdl gtf="tests/input/gencode.v31.chrY_chrM.gtf.gz" - files: - - path: output/outputs.json - contains: - - gencode.v31.chrY_chrM.genelengths.txt - # contains: - # - "AL954722.1" - -- name: compression_integrity - tags: - - util - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t compression_integrity tools/util.wdl bgzipped_file="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - -# TODO: This does not test that the record was properly added to the header. -- name: add_to_bam_header - tags: - - util - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t add_to_bam_header -i tests/tools/input_json/util_add_to_bam_header.json tools/util.wdl - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.reheader.bam - -- name: unpack_tarball - tags: - - util - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t unpack_tarball tools/util.wdl tarball="tests/input/test.tar.gz" - files: - - path: output/outputs.json - contains: - - test_file_a - - test_file_b - -- name: make_coverage_regions_bed - tags: - - util - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t make_coverage_regions_bed tools/util.wdl gtf="tests/input/gencode.v31.chrY_chrM.gtf.gz" feature_type="exon" - files: - - path: output/outputs.json - contains: - - gencode.v31.chrY_chrM.exon.bed - -- name: global_phred_scores - tags: - - util - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t global_phred_scores tools/util.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.global_PHRED_scores.tsv - -- name: split_fastq - tags: - - util - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t split_fastq tools/util.wdl fastq="tests/input/test_R1.fq.gz" prefix="test.R1." - files: - - path: output/outputs.json - contains: - - test.R1.000000.fastq.gz \ No newline at end of file diff --git a/tests/workflows/_test_methylation-preprocess.yaml b/tests/workflows/_test_methylation-preprocess.yaml deleted file mode 100644 index 3bfe379b8..000000000 --- a/tests/workflows/_test_methylation-preprocess.yaml +++ /dev/null @@ -1,15 +0,0 @@ -- name: process_raw_idats - tags: - - methylation - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t process_raw_idats -i tests/workflows/input_json/process_raw_idats.json workflows/methylation/methylation-preprocess.wdl - files: - - path: output/outputs.json - contains: - - 201533520001_R03C01.beta_swan_norm_unfiltered.csv - - 201533520001_R03C01.beta_swan_norm_unfiltered.genomic.csv - - 201533520001_R03C01.annotation.csv - - 201533520001_R03C01.beta.csv - - 201533520001_R03C01.cn_values.csv - - 201533520001_R03C01.m_values.csv - - 201533520001_R03C01.probeNames.csv \ No newline at end of file diff --git a/tests/workflows/input_json/combine_data.json b/tests/workflows/input_json/combine_data.json deleted file mode 100644 index ec1edd324..000000000 --- a/tests/workflows/input_json/combine_data.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "combine_data.files_to_combine": [ - "../../input/201533520001_R01C01.beta_swan_norm_unfiltered.genomic.csv", - "../../input/201533520001_R03C01.beta_swan_norm_unfiltered.genomic.csv" - ], - "combine_data.combined_file_name": "combined_beta.csv" -} \ No newline at end of file diff --git a/tests/workflows/input_json/dnaseq-standard-fastq.json b/tests/workflows/input_json/dnaseq-standard-fastq.json deleted file mode 100644 index f1db97b82..000000000 --- a/tests/workflows/input_json/dnaseq-standard-fastq.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "dnaseq_standard_fastq_experimental.read_one_fastqs_gz": [ - "../../input/test_R1.fq.gz" - ], - "dnaseq_standard_fastq_experimental.read_two_fastqs_gz": [ - "../../input/test_R2.fq.gz" - ], - "dnaseq_standard_fastq_experimental.read_groups": [ - { - "ID": "test", - "PI": 150, - "PL": "ILLUMINA", - "SM": "Sample", - "LB": "Sample" - } - ], - "dnaseq_standard_fastq_experimental.bwa_db": "../../input/GRCh38.chrY_chrM.bwa_db.tar.gz" -} \ No newline at end of file diff --git a/tests/workflows/input_json/dnaseq-standard.json b/tests/workflows/input_json/dnaseq-standard.json deleted file mode 100644 index 06e8d8cac..000000000 --- a/tests/workflows/input_json/dnaseq-standard.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "dnaseq_standard_experimental.bam": "../../input/test.bwa_aln_pe.chrY_chrM.bam", - "dnaseq_standard_experimental.bwa_db": "../../input/GRCh38.chrY_chrM.bwa_db.tar.gz" -} \ No newline at end of file diff --git a/tests/workflows/input_json/filter_probes.json b/tests/workflows/input_json/filter_probes.json deleted file mode 100644 index 8b7d0a024..000000000 --- a/tests/workflows/input_json/filter_probes.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "filter_probes.beta_values": "../../input/combined_beta.csv", - "filter_probes.num_probes": 1000 -} \ No newline at end of file diff --git a/tests/workflows/input_json/generate_umap.json b/tests/workflows/input_json/generate_umap.json deleted file mode 100644 index 670c02298..000000000 --- a/tests/workflows/input_json/generate_umap.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "generate_umap.filtered_beta_values": "../../input/filtered_beta.csv" -} \ No newline at end of file diff --git a/tests/workflows/input_json/plot_umap.json b/tests/workflows/input_json/plot_umap.json deleted file mode 100644 index 86444c490..000000000 --- a/tests/workflows/input_json/plot_umap.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "plot_umap.umap": "../../input/umap.csv" -} \ No newline at end of file diff --git a/tests/workflows/input_json/process_raw_idats.json b/tests/workflows/input_json/process_raw_idats.json deleted file mode 100644 index b1dbbbc77..000000000 --- a/tests/workflows/input_json/process_raw_idats.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "process_raw_idats.idats": { - "left": "../../input/201533520001_R03C01_Grn.idat", - "right": "../../input/201533520001_R03C01_Red.idat" - } -} \ No newline at end of file diff --git a/tests/workflows/input_json/qc-standard.json b/tests/workflows/input_json/qc-standard.json deleted file mode 100644 index 0f552166e..000000000 --- a/tests/workflows/input_json/qc-standard.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "quality_check_standard.bam": "../../input/test.bwa_aln_pe.chrY_chrM.bam", - "quality_check_standard.bam_index": "../../input/test.bwa_aln_pe.chrY_chrM.bam.bai", - "quality_check_standard.gtf": "../../input/gencode.v31.chrY_chrM.gtf.gz", - "quality_check_standard.kraken_db": "../../input/kraken2_db.mini.tar.gz", - "quality_check_standard.rna": true, - "quality_check_standard.coverage_beds": [ - "../../input/gencode.v31.chrY_chrM.gene.bed" - ] -} \ No newline at end of file diff --git a/tests/workflows/input_json/rnaseq-standard-fastq.json b/tests/workflows/input_json/rnaseq-standard-fastq.json deleted file mode 100644 index 452d4c932..000000000 --- a/tests/workflows/input_json/rnaseq-standard-fastq.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "rnaseq_standard_fastq.read_one_fastqs_gz": [ - "../../input/test_R1.fq.gz" - ], - "rnaseq_standard_fastq.read_two_fastqs_gz": [ - "../../input/test_R2.fq.gz" - ], - "rnaseq_standard_fastq.read_groups": [ - { - "ID": "test", - "PI": 150, - "PL": "ILLUMINA", - "SM": "Sample", - "LB": "Sample" - } - ], - "rnaseq_standard_fastq.prefix": "test", - "rnaseq_standard_fastq.gtf": "../../input/gencode.v31.chrY_chrM.gtf.gz", - "rnaseq_standard_fastq.star_db": "../../input/star_db.chrY_chrM.tar.gz", - "rnaseq_standard_fastq.strandedness": "Unstranded" -} \ No newline at end of file diff --git a/tests/workflows/input_json/rnaseq-standard.json b/tests/workflows/input_json/rnaseq-standard.json deleted file mode 100644 index a42cbc5a1..000000000 --- a/tests/workflows/input_json/rnaseq-standard.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "rnaseq_standard.bam": "../../input/test.bwa_aln_pe.chrY_chrM.bam", - "rnaseq_standard.gtf": "../../input/gencode.v31.chrY_chrM.gtf.gz", - "rnaseq_standard.star_db": "../../input/star_db.chrY_chrM.tar.gz", - "rnaseq_standard.strandedness": "Unstranded" -} \ No newline at end of file diff --git a/tests/workflows/input_json/rnaseq-variant-calling.json b/tests/workflows/input_json/rnaseq-variant-calling.json deleted file mode 100644 index e108c5298..000000000 --- a/tests/workflows/input_json/rnaseq-variant-calling.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "rnaseq_variant_calling.bam": "../../input/test.bwa_aln_pe.chrY_chrM.bam", - "rnaseq_variant_calling.bam_index": "../../input/test.bwa_aln_pe.chrY_chrM.bam.bai", - "rnaseq_variant_calling.fasta": "../../input/GRCh38.chrY_chrM.fa", - "rnaseq_variant_calling.fasta_index": "../../input/GRCh38.chrY_chrM.fa.fai", - "rnaseq_variant_calling.dict": "../../input/GRCh38.chrY_chrM.dict", - "rnaseq_variant_calling.calling_interval_list": "../../input/wgs_calling_regions.hg38.interval_list", - "rnaseq_variant_calling.known_vcfs": [ - "../../input/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz" - ], - "rnaseq_variant_calling.known_vcf_indexes": [ - "../../input/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi" - ], - "rnaseq_variant_calling.dbSNP_vcf": "../../input/Homo_sapiens_assembly38.dbsnp138.top5000.vcf", - "rnaseq_variant_calling.dbSNP_vcf_index": "../../input/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx" -} \ No newline at end of file diff --git a/tests/workflows/test_methylation-cohort.yaml b/tests/workflows/test_methylation-cohort.yaml deleted file mode 100644 index f981b75a7..000000000 --- a/tests/workflows/test_methylation-cohort.yaml +++ /dev/null @@ -1,40 +0,0 @@ -- name: combine_data - tags: - - methylation - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t combine_data -i tests/workflows/input_json/combine_data.json workflows/methylation/methylation-cohort.wdl - files: - - path: output/outputs.json - contains: - - combined_beta.csv - -- name: filter_probes - tags: - - methylation - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t filter_probes -i tests/workflows/input_json/filter_probes.json workflows/methylation/methylation-cohort.wdl - files: - - path: output/outputs.json - contains: - - filtered.beta.csv - - filtered.probes.csv - -- name: generate_umap - tags: - - methylation - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t generate_umap -i tests/workflows/input_json/generate_umap.json workflows/methylation/methylation-cohort.wdl - files: - - path: output/outputs.json - contains: - - umap.csv - -- name: plot_umap - tags: - - methylation - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t plot_umap -i tests/workflows/input_json/plot_umap.json workflows/methylation/methylation-cohort.wdl - files: - - path: output/outputs.json - contains: - - umap.png From d2898f06c9751ed306fba91bfe2e3fe3b6738059 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Sun, 28 Dec 2025 08:47:05 -0500 Subject: [PATCH 14/37] revise: change validate_read_group logic --- data_structures/read_group.wdl | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/data_structures/read_group.wdl b/data_structures/read_group.wdl index 151f58ac8..719fdb409 100644 --- a/data_structures/read_group.wdl +++ b/data_structures/read_group.wdl @@ -156,15 +156,13 @@ task validate_read_group { input { ReadGroup read_group - Array[String] required_fields = [] + Array[String] required_fields = ["SM"] Boolean restrictive = true } # The SAM spec allows any printable ASCII character in header fields. String sam_spec_pattern = "[\\ -~]+" # We have the opinion that is too permissive for ID and SM. - String id_pattern = "id" - String sample_pattern = "sample.?" String restrictive_pattern = "\\ " # Disallow spaces Array[String] platforms = [ "CAPILLARY", "DNBSEQ", "ELEMENT", "HELICOS", "ILLUMINA", "IONTORRENT", "LS454", @@ -174,11 +172,9 @@ task validate_read_group { command <<< exit_code=0 if ~{restrictive}; then - if [[ ~{read_group.ID} =~ ^~{id_pattern}$ ]] \ - || [[ ~{read_group.ID} =~ ~{restrictive_pattern} ]] + if [[ "~{read_group.ID}" =~ ~{restrictive_pattern} ]] then - >&2 echo "ID (~{read_group.ID}) must not match patterns:" - >&2 echo "'~{id_pattern}' or '~{restrictive_pattern}'" + >&2 echo "ID must not contain spaces" exit_code=1 fi fi @@ -194,11 +190,9 @@ task validate_read_group { fi if ~{defined(read_group.SM)}; then if ~{restrictive}; then - if [[ "~{read_group.SM}" =~ ^~{sample_pattern}$ ]] \ - || [[ "~{read_group.SM}" =~ ~{restrictive_pattern} ]] + if [[ "~{read_group.SM}" =~ ~{restrictive_pattern} ]] then - >&2 echo "SM must not match patterns:" - >&2 echo "'~{sample_pattern}' or '~{restrictive_pattern}'" + >&2 echo "SM must not contain spaces" exit_code=1 fi fi From 6c910ce58f812ce5e512ad4c61286d669aa539f3 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Sun, 28 Dec 2025 08:47:21 -0500 Subject: [PATCH 15/37] test data_structures --- data_structures/test/flag_filter.yaml | 22 ++++--- data_structures/test/read_group.yaml | 90 +++++++++++++++++++++++++-- 2 files changed, 98 insertions(+), 14 deletions(-) diff --git a/data_structures/test/flag_filter.yaml b/data_structures/test/flag_filter.yaml index 4da14bcdd..fb6a55927 100644 --- a/data_structures/test/flag_filter.yaml +++ b/data_structures/test/flag_filter.yaml @@ -1,19 +1,23 @@ validate_string_is_12bit_int: - - name: decimal_passes + - name: valid_numbers inputs: number: - "5" - - name: hexadecimal_passes - inputs: - number: - "0x900" + - "01" + - "4095" assertions: stderr: - Input number \(.*\) is valid - - name: too_big_hexadecimal_fails + - name: invalid_numbers inputs: number: - "0x1000" + - "" + - "string" + - this is not a number + - "000000000011" + - "-1" assertions: exit_code: 42 stderr: @@ -38,9 +42,9 @@ validate_flag_filter: - name: invalid_FlagFilter_fails inputs: flags: - - include_if_all: "" - exclude_if_any: this is not a number - include_if_any: "000000000011" - exclude_if_all: "4095" + - include_if_all: "3" + exclude_if_any: "0xF04" + include_if_any: "03" + exclude_if_all: "" # empty string should trigger a fail assertions: should_fail: true diff --git a/data_structures/test/read_group.yaml b/data_structures/test/read_group.yaml index a4367d8af..a5435be74 100644 --- a/data_structures/test/read_group.yaml +++ b/data_structures/test/read_group.yaml @@ -1,11 +1,91 @@ -read_group_to_string: - - name: bad_id +validate_read_group: + - name: valid_read_groups inputs: read_group: - - ID: id, + - ID: id + SM: sample + - ID: R2 + SM: sampleA + LB: spaces are allowed in LB + BC: barcode with a space + PU: platform_unit + PL: ILLUMINA + CN: center_name + DT: date + DS: description + PI: 1 + PG: program_group + PM: platform_model + FO: ACMG + KS: key_sequence + - name: id_with_spaces + inputs: + read_group: + - ID: ids should not have spaces # this is a problem SM: sample_a LB: library + BC: barcode + PU: platform_unit + PL: ILLUMINA + CN: center_name + DT: date + DS: description + PI: 1 + PG: program_group + PM: platform_model + FO: ACMG + KS: key_sequence + assertions: + exit_code: 1 + stderr: + - ID must not contain spaces + - name: sample_with_spaces + inputs: + read_group: + - ID: R123 + SM: samples should not have spaces + LB: library can have spaces though + BC: barcode + PU: platform_unit + PL: ILLUMINA + CN: center_name + DT: date + DS: description + PI: 1 + PG: program_group + PM: platform_model + FO: ACMG + KS: key_sequence + assertions: + exit_code: 1 + stderr: + - SM must not contain spaces + - name: spaces_allowed + inputs: + read_group: + - ID: technically permissable but gross + SM: same here! + restrictive: + - false + - name: missing_sample + inputs: + read_group: + - ID: R123 + LB: library assertions: exit_code: 1 - stdout: - - ID (.*) must not match pattern + stderr: + - SM is required + - name: missing_sample_allowed + inputs: + read_group: + - ID: R1 + LB: lib + required_fields: + - [] + +get_read_groups: + - name: works + inputs: + bam: + - test.bam From 64476ad2a7ed9412dd26ae52e1bde13ae8ba8bc5 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Sun, 28 Dec 2025 09:20:28 -0500 Subject: [PATCH 16/37] chore: log TODO --- data_structures/test/read_group.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/data_structures/test/read_group.yaml b/data_structures/test/read_group.yaml index a5435be74..d255bb66b 100644 --- a/data_structures/test/read_group.yaml +++ b/data_structures/test/read_group.yaml @@ -89,3 +89,5 @@ get_read_groups: inputs: bam: - test.bam + +# TODO: test read_group_to_string From c76f2016c009db0999dd8b835c4471c635782a12 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Sun, 28 Dec 2025 12:10:33 -0500 Subject: [PATCH 17/37] more tests --- tools/test/arriba.yaml | 42 +++++++++++++++++++++++++++++++++++++++ tools/test/bwa.yaml | 35 ++++++++++++++++++++++++++++++++ tools/test/deeptools.yaml | 7 +++++++ 3 files changed, 84 insertions(+) create mode 100644 tools/test/arriba.yaml create mode 100644 tools/test/bwa.yaml create mode 100644 tools/test/deeptools.yaml diff --git a/tools/test/arriba.yaml b/tools/test/arriba.yaml new file mode 100644 index 000000000..b274dd692 --- /dev/null +++ b/tools/test/arriba.yaml @@ -0,0 +1,42 @@ +arriba: + - name: works + inputs: + bam: + - Aligned.sortedByCoord.chr9_chr22.bam + gtf: + - gencode.v31.chr9_chr22.gtf.gz + reference_fasta_gz: + - GRCh38.chr9_chr22.fa.gz + disable_filters: + - [ blacklist ] + prefix: + - fusions +arriba_tsv_to_vcf: + - name: works + inputs: + fusions: + - fusions.BCR_ABL1.tsv + reference_fasta: + - GRCh38.chr9_chr22.fa.gz + prefix: + - fusions +arriba_extract_fusion_supporting_alignments: + - name: works + inputs: + fusions: + - fusions.BCR_ABL1.tsv + bam: + - Aligned.sortedByCoord.chr9_chr22.bam + bam_index: + - Aligned.sortedByCoord.chr9_chr22.bam.bai + prefix: + - fusions +arriba_annotate_exon_numbers: + - name: works + inputs: + fusions: + - fusions.BCR_ABL1.tsv + gtf: + - gencode.v31.chr9_chr22.gtf.gz + prefix: + - fusions \ No newline at end of file diff --git a/tools/test/bwa.yaml b/tools/test/bwa.yaml new file mode 100644 index 000000000..55004379e --- /dev/null +++ b/tools/test/bwa.yaml @@ -0,0 +1,35 @@ +bwa_aln: + - name: works + inputs: + fastq: + - test_R1.fq.gz + read_group: + - "@RG\\tID:test\\tSM:test" + bwa_db_tar_gz: + - GRCh38.chrY_chrM.bwa_db.tar.gz +bwa_aln_pe: + - name: works + inputs: + read_one_fastq_gz: + - test_R1.fq.gz + read_two_fastq_gz: + - test_R2.fq.gz + read_group: + - "@RG\\tID:test\\tSM:test" + bwa_db_tar_gz: + - GRCh38.chrY_chrM.bwa_db.tar.gz +bwa_mem: + - name: works + inputs: + read_one_fastq_gz: + - test_R1.fq.gz + read_group: + - "@RG\\tID:test\\tSM:test" + bwa_db_tar_gz: + - GRCh38.chrY_chrM.bwa_db.tar.gz +build_bwa_db: + - name: works + tags: [ reference, slow ] + inputs: + reference_fasta: + - GRCh38.chrY_chrM.fa \ No newline at end of file diff --git a/tools/test/deeptools.yaml b/tools/test/deeptools.yaml new file mode 100644 index 000000000..5b71a1a26 --- /dev/null +++ b/tools/test/deeptools.yaml @@ -0,0 +1,7 @@ +bam_coverage: + - name: works + inputs: + bam: + - test.bwa_aln_pe.chrY_chrM.bam + bam_index: + - test.bwa_aln_pe.chrY_chrM.bam.bai \ No newline at end of file From 2fb9d234124ff6cbd3536a83defeede9a6dccc29 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Sun, 28 Dec 2025 12:11:51 -0500 Subject: [PATCH 18/37] WIP: remove broken CI elements --- .github/workflows/build-and-test.yaml | 3 -- .github/workflows/pytest.yaml | 56 --------------------------- 2 files changed, 59 deletions(-) delete mode 100644 .github/workflows/pytest.yaml diff --git a/.github/workflows/build-and-test.yaml b/.github/workflows/build-and-test.yaml index 6d042f359..04c5a79bc 100644 --- a/.github/workflows/build-and-test.yaml +++ b/.github/workflows/build-and-test.yaml @@ -7,6 +7,3 @@ jobs: call-build: uses: ./.github/workflows/docker-build.yaml secrets: inherit - call-test: - uses: ./.github/workflows/pytest.yaml - needs: call-build diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml deleted file mode 100644 index 64c0b310b..000000000 --- a/.github/workflows/pytest.yaml +++ /dev/null @@ -1,56 +0,0 @@ -name: pytest-workflow Check - -on: - workflow_call: - -jobs: - list-tags: - runs-on: ubuntu-latest - outputs: - tags: ${{ steps.set-tags.outputs.tags }} - steps: - - name: checkout - uses: actions/checkout@v4 - - name: set tags - id: set-tags - # remove the "reference" tag as it's redundant with other tags - run: echo "tags=$(find tests -name '*.yaml' -exec yq --output-format yaml '.[].tags[] ' {} \; | sort | uniq | grep -vE 'reference|slow' | jq -ncR '[inputs]')" >> $GITHUB_OUTPUT - pytest_check: - needs: list-tags - runs-on: ubuntu-latest - strategy: - matrix: - tag: ${{ fromJson(needs.list-tags.outputs.tags) }} - runner: [sprocket, miniwdl] - fail-fast: false - steps: - - uses: actions/checkout@v4 - with: - lfs: true - - name: Update Rust - if: matrix.runner == 'sprocket' - run: rustup update stable && rustup default stable - - name: Build Sprocket - if: matrix.runner == 'sprocket' - run: | - cargo install sprocket --locked - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.10' - - name: Install miniwdl and pytest-workflow - run: | - python -m pip install --upgrade pip - pip install -r requirements-ci.txt - - name: filter tests - # don't run slow tests in CI - run: | - find tests -name '*.yaml' -exec yq --output-format yaml -i 'del(.[] | select(.tags[] | test("slow") ) )' {} \; - - name: Update containers - run: | - ./developer_scripts/update_container_tags.sh ${GITHUB_REF##*/} - - name: Run pytest-workflow - env: - RUNNER: ${{ matrix.runner }} - run: | - pytest --basetemp /home/runner/work/pytest --tag ${{ matrix.tag }} From d1e0d998d4b084a9de9002f147d3e6b219cbb08d Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Mon, 29 Dec 2025 07:23:23 -0500 Subject: [PATCH 19/37] add test depth (more bams and fastqs tested) --- tools/test/arriba.yaml | 13 +++++++------ tools/test/bwa.yaml | 17 +++++++++++++---- tools/test/deeptools.yaml | 15 +++++++++++---- 3 files changed, 31 insertions(+), 14 deletions(-) diff --git a/tools/test/arriba.yaml b/tools/test/arriba.yaml index b274dd692..e55e9e9d2 100644 --- a/tools/test/arriba.yaml +++ b/tools/test/arriba.yaml @@ -23,12 +23,13 @@ arriba_tsv_to_vcf: arriba_extract_fusion_supporting_alignments: - name: works inputs: - fusions: - - fusions.BCR_ABL1.tsv - bam: - - Aligned.sortedByCoord.chr9_chr22.bam - bam_index: - - Aligned.sortedByCoord.chr9_chr22.bam.bai + $files: + fusions: + - fusions.BCR_ABL1.tsv + bam: + - Aligned.sortedByCoord.chr9_chr22.bam + bam_index: + - Aligned.sortedByCoord.chr9_chr22.bam.bai prefix: - fusions arriba_annotate_exon_numbers: diff --git a/tools/test/bwa.yaml b/tools/test/bwa.yaml index 55004379e..6dc3b3d9e 100644 --- a/tools/test/bwa.yaml +++ b/tools/test/bwa.yaml @@ -3,6 +3,9 @@ bwa_aln: inputs: fastq: - test_R1.fq.gz + - test_R2.fq.gz + - random10k.r1.fq.gz + - random10k.r2.fq.gz read_group: - "@RG\\tID:test\\tSM:test" bwa_db_tar_gz: @@ -10,10 +13,13 @@ bwa_aln: bwa_aln_pe: - name: works inputs: - read_one_fastq_gz: - - test_R1.fq.gz - read_two_fastq_gz: - - test_R2.fq.gz + $files: + read_one_fastq_gz: + - test_R1.fq.gz + - random10k.r1.fq.gz + read_two_fastq_gz: + - test_R2.fq.gz + - random10k.r2.fq.gz read_group: - "@RG\\tID:test\\tSM:test" bwa_db_tar_gz: @@ -23,6 +29,9 @@ bwa_mem: inputs: read_one_fastq_gz: - test_R1.fq.gz + - test_R2.fq.gz + - random10k.r1.fq.gz + - random10k.r2.fq.gz read_group: - "@RG\\tID:test\\tSM:test" bwa_db_tar_gz: diff --git a/tools/test/deeptools.yaml b/tools/test/deeptools.yaml index 5b71a1a26..cba7fff3a 100644 --- a/tools/test/deeptools.yaml +++ b/tools/test/deeptools.yaml @@ -1,7 +1,14 @@ bam_coverage: - name: works inputs: - bam: - - test.bwa_aln_pe.chrY_chrM.bam - bam_index: - - test.bwa_aln_pe.chrY_chrM.bam.bai \ No newline at end of file + $files: + bam: + - test.bwa_aln_pe.chrY_chrM.bam + - Aligned.sortedByCoord.chr9_chr22.bam + - test_rnaseq_variant.bam + - test.bam + bam_index: + - test.bwa_aln_pe.chrY_chrM.bam.bai + - Aligned.sortedByCoord.chr9_chr22.bam.bai + - test_rnaseq_variant.bam.bai + - test.bam.bai \ No newline at end of file From 51ad295b4907811c5461f3a537e6fc9cd0aac643 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Mon, 29 Dec 2025 07:26:17 -0500 Subject: [PATCH 20/37] Update read_group.yaml --- data_structures/test/read_group.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/data_structures/test/read_group.yaml b/data_structures/test/read_group.yaml index d255bb66b..d91c53972 100644 --- a/data_structures/test/read_group.yaml +++ b/data_structures/test/read_group.yaml @@ -88,6 +88,9 @@ get_read_groups: - name: works inputs: bam: + - test.bwa_aln_pe.chrY_chrM.bam + - Aligned.sortedByCoord.chr9_chr22.bam + - test_rnaseq_variant.bam - test.bam # TODO: test read_group_to_string From 2b7d0ddedf9e9f1dae67eaaf5b98f15a78de4168 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Mon, 29 Dec 2025 07:40:07 -0500 Subject: [PATCH 21/37] nesting for test fixtures --- data_structures/test/read_group.yaml | 8 ++--- .../Aligned.sortedByCoord.chr9_chr22.bam | 0 .../Aligned.sortedByCoord.chr9_chr22.bam.bai | 0 .../test.PE.2_RGs.Aligned.out.sorted.bam | 0 test/fixtures/{ => bams}/test.bam | 0 test/fixtures/{ => bams}/test.bam.bai | 0 .../{ => bams}/test.bwa_aln_pe.chrY_chrM.bam | 0 .../test.bwa_aln_pe.chrY_chrM.bam.bai | 0 test/fixtures/{ => bams}/test.extra_RG.bam | 0 .../{ => bams}/test.unaccounted_read.bam | 0 test/fixtures/{ => bams}/test2.bam | 0 .../{ => bams}/test_rnaseq_variant.bam | 0 .../{ => bams}/test_rnaseq_variant.bam.bai | 0 test/fixtures/{ => fastqs}/random10k.r1.fq.gz | 0 test/fixtures/{ => fastqs}/random10k.r2.fq.gz | 0 test/fixtures/{ => fastqs}/test_R1.fq.gz | 0 test/fixtures/{ => fastqs}/test_R2.fq.gz | 0 .../{ => reference}/GRCh38.chr1_chr19.dict | 0 .../{ => reference}/GRCh38.chr1_chr19.fa | 0 .../{ => reference}/GRCh38.chr1_chr19.fa.fai | 0 .../{ => reference}/GRCh38.chr9_chr22.fa.gz | 0 .../GRCh38.chrY_chrM.bwa_db.tar.gz | 0 .../{ => reference}/GRCh38.chrY_chrM.dict | 0 .../{ => reference}/GRCh38.chrY_chrM.fa | 0 .../{ => reference}/GRCh38.chrY_chrM.fa.fai | 0 .../gencode.v31.chr9_chr22.gtf.gz | 0 .../gencode.v31.chrY_chrM.gene.bed | 0 .../gencode.v31.chrY_chrM.genelengths.txt | 0 .../gencode.v31.chrY_chrM.gtf.gz | 0 .../kraken2_C_elegans_library.tar.gz | 0 .../{ => reference}/kraken2_db.mini.tar.gz | 0 .../{ => reference}/kraken2_taxonomy.tar.gz | 0 .../{ => reference}/star_db.chrY_chrM.tar.gz | 0 test/fixtures/{ => reference}/test.fa | 0 ...mo_sapiens_assembly38.dbsnp138.top5000.vcf | 0 ...apiens_assembly38.dbsnp138.top5000.vcf.idx | Bin ...and_1000G_gold_standard.indels.hg38.vcf.gz | 0 ...1000G_gold_standard.indels.hg38.vcf.gz.tbi | Bin test/fixtures/{ => vcfs}/test1.vcf.gz | 0 test/fixtures/{ => vcfs}/test1.vcf.gz.tbi | Bin test/fixtures/{ => vcfs}/test2.vcf.gz | 0 test/fixtures/{ => vcfs}/test2.vcf.gz.tbi | Bin tools/test/arriba.yaml | 14 ++++---- tools/test/bwa.yaml | 32 +++++++++--------- tools/test/deeptools.yaml | 16 ++++----- tools/test/picard.yaml | 2 +- tools/test/samtools.yaml | 12 +++---- 47 files changed, 42 insertions(+), 42 deletions(-) rename test/fixtures/{ => bams}/Aligned.sortedByCoord.chr9_chr22.bam (100%) rename test/fixtures/{ => bams}/Aligned.sortedByCoord.chr9_chr22.bam.bai (100%) rename test/fixtures/{ => bams}/test.PE.2_RGs.Aligned.out.sorted.bam (100%) rename test/fixtures/{ => bams}/test.bam (100%) rename test/fixtures/{ => bams}/test.bam.bai (100%) rename test/fixtures/{ => bams}/test.bwa_aln_pe.chrY_chrM.bam (100%) rename test/fixtures/{ => bams}/test.bwa_aln_pe.chrY_chrM.bam.bai (100%) rename test/fixtures/{ => bams}/test.extra_RG.bam (100%) rename test/fixtures/{ => bams}/test.unaccounted_read.bam (100%) rename test/fixtures/{ => bams}/test2.bam (100%) rename test/fixtures/{ => bams}/test_rnaseq_variant.bam (100%) rename test/fixtures/{ => bams}/test_rnaseq_variant.bam.bai (100%) rename test/fixtures/{ => fastqs}/random10k.r1.fq.gz (100%) rename test/fixtures/{ => fastqs}/random10k.r2.fq.gz (100%) rename test/fixtures/{ => fastqs}/test_R1.fq.gz (100%) rename test/fixtures/{ => fastqs}/test_R2.fq.gz (100%) rename test/fixtures/{ => reference}/GRCh38.chr1_chr19.dict (100%) rename test/fixtures/{ => reference}/GRCh38.chr1_chr19.fa (100%) rename test/fixtures/{ => reference}/GRCh38.chr1_chr19.fa.fai (100%) rename test/fixtures/{ => reference}/GRCh38.chr9_chr22.fa.gz (100%) rename test/fixtures/{ => reference}/GRCh38.chrY_chrM.bwa_db.tar.gz (100%) rename test/fixtures/{ => reference}/GRCh38.chrY_chrM.dict (100%) rename test/fixtures/{ => reference}/GRCh38.chrY_chrM.fa (100%) rename test/fixtures/{ => reference}/GRCh38.chrY_chrM.fa.fai (100%) rename test/fixtures/{ => reference}/gencode.v31.chr9_chr22.gtf.gz (100%) rename test/fixtures/{ => reference}/gencode.v31.chrY_chrM.gene.bed (100%) rename test/fixtures/{ => reference}/gencode.v31.chrY_chrM.genelengths.txt (100%) rename test/fixtures/{ => reference}/gencode.v31.chrY_chrM.gtf.gz (100%) rename test/fixtures/{ => reference}/kraken2_C_elegans_library.tar.gz (100%) rename test/fixtures/{ => reference}/kraken2_db.mini.tar.gz (100%) rename test/fixtures/{ => reference}/kraken2_taxonomy.tar.gz (100%) rename test/fixtures/{ => reference}/star_db.chrY_chrM.tar.gz (100%) rename test/fixtures/{ => reference}/test.fa (100%) rename test/fixtures/{ => vcfs}/Homo_sapiens_assembly38.dbsnp138.top5000.vcf (100%) rename test/fixtures/{ => vcfs}/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx (100%) rename test/fixtures/{ => vcfs}/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz (100%) rename test/fixtures/{ => vcfs}/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi (100%) rename test/fixtures/{ => vcfs}/test1.vcf.gz (100%) rename test/fixtures/{ => vcfs}/test1.vcf.gz.tbi (100%) rename test/fixtures/{ => vcfs}/test2.vcf.gz (100%) rename test/fixtures/{ => vcfs}/test2.vcf.gz.tbi (100%) diff --git a/data_structures/test/read_group.yaml b/data_structures/test/read_group.yaml index d91c53972..b5257e9e0 100644 --- a/data_structures/test/read_group.yaml +++ b/data_structures/test/read_group.yaml @@ -88,9 +88,9 @@ get_read_groups: - name: works inputs: bam: - - test.bwa_aln_pe.chrY_chrM.bam - - Aligned.sortedByCoord.chr9_chr22.bam - - test_rnaseq_variant.bam - - test.bam + - bams/test.bwa_aln_pe.chrY_chrM.bam + - bams/Aligned.sortedByCoord.chr9_chr22.bam + - bams/test_rnaseq_variant.bam + - bams/test.bam # TODO: test read_group_to_string diff --git a/test/fixtures/Aligned.sortedByCoord.chr9_chr22.bam b/test/fixtures/bams/Aligned.sortedByCoord.chr9_chr22.bam similarity index 100% rename from test/fixtures/Aligned.sortedByCoord.chr9_chr22.bam rename to test/fixtures/bams/Aligned.sortedByCoord.chr9_chr22.bam diff --git a/test/fixtures/Aligned.sortedByCoord.chr9_chr22.bam.bai b/test/fixtures/bams/Aligned.sortedByCoord.chr9_chr22.bam.bai similarity index 100% rename from test/fixtures/Aligned.sortedByCoord.chr9_chr22.bam.bai rename to test/fixtures/bams/Aligned.sortedByCoord.chr9_chr22.bam.bai diff --git a/test/fixtures/test.PE.2_RGs.Aligned.out.sorted.bam b/test/fixtures/bams/test.PE.2_RGs.Aligned.out.sorted.bam similarity index 100% rename from test/fixtures/test.PE.2_RGs.Aligned.out.sorted.bam rename to test/fixtures/bams/test.PE.2_RGs.Aligned.out.sorted.bam diff --git a/test/fixtures/test.bam b/test/fixtures/bams/test.bam similarity index 100% rename from test/fixtures/test.bam rename to test/fixtures/bams/test.bam diff --git a/test/fixtures/test.bam.bai b/test/fixtures/bams/test.bam.bai similarity index 100% rename from test/fixtures/test.bam.bai rename to test/fixtures/bams/test.bam.bai diff --git a/test/fixtures/test.bwa_aln_pe.chrY_chrM.bam b/test/fixtures/bams/test.bwa_aln_pe.chrY_chrM.bam similarity index 100% rename from test/fixtures/test.bwa_aln_pe.chrY_chrM.bam rename to test/fixtures/bams/test.bwa_aln_pe.chrY_chrM.bam diff --git a/test/fixtures/test.bwa_aln_pe.chrY_chrM.bam.bai b/test/fixtures/bams/test.bwa_aln_pe.chrY_chrM.bam.bai similarity index 100% rename from test/fixtures/test.bwa_aln_pe.chrY_chrM.bam.bai rename to test/fixtures/bams/test.bwa_aln_pe.chrY_chrM.bam.bai diff --git a/test/fixtures/test.extra_RG.bam b/test/fixtures/bams/test.extra_RG.bam similarity index 100% rename from test/fixtures/test.extra_RG.bam rename to test/fixtures/bams/test.extra_RG.bam diff --git a/test/fixtures/test.unaccounted_read.bam b/test/fixtures/bams/test.unaccounted_read.bam similarity index 100% rename from test/fixtures/test.unaccounted_read.bam rename to test/fixtures/bams/test.unaccounted_read.bam diff --git a/test/fixtures/test2.bam b/test/fixtures/bams/test2.bam similarity index 100% rename from test/fixtures/test2.bam rename to test/fixtures/bams/test2.bam diff --git a/test/fixtures/test_rnaseq_variant.bam b/test/fixtures/bams/test_rnaseq_variant.bam similarity index 100% rename from test/fixtures/test_rnaseq_variant.bam rename to test/fixtures/bams/test_rnaseq_variant.bam diff --git a/test/fixtures/test_rnaseq_variant.bam.bai b/test/fixtures/bams/test_rnaseq_variant.bam.bai similarity index 100% rename from test/fixtures/test_rnaseq_variant.bam.bai rename to test/fixtures/bams/test_rnaseq_variant.bam.bai diff --git a/test/fixtures/random10k.r1.fq.gz b/test/fixtures/fastqs/random10k.r1.fq.gz similarity index 100% rename from test/fixtures/random10k.r1.fq.gz rename to test/fixtures/fastqs/random10k.r1.fq.gz diff --git a/test/fixtures/random10k.r2.fq.gz b/test/fixtures/fastqs/random10k.r2.fq.gz similarity index 100% rename from test/fixtures/random10k.r2.fq.gz rename to test/fixtures/fastqs/random10k.r2.fq.gz diff --git a/test/fixtures/test_R1.fq.gz b/test/fixtures/fastqs/test_R1.fq.gz similarity index 100% rename from test/fixtures/test_R1.fq.gz rename to test/fixtures/fastqs/test_R1.fq.gz diff --git a/test/fixtures/test_R2.fq.gz b/test/fixtures/fastqs/test_R2.fq.gz similarity index 100% rename from test/fixtures/test_R2.fq.gz rename to test/fixtures/fastqs/test_R2.fq.gz diff --git a/test/fixtures/GRCh38.chr1_chr19.dict b/test/fixtures/reference/GRCh38.chr1_chr19.dict similarity index 100% rename from test/fixtures/GRCh38.chr1_chr19.dict rename to test/fixtures/reference/GRCh38.chr1_chr19.dict diff --git a/test/fixtures/GRCh38.chr1_chr19.fa b/test/fixtures/reference/GRCh38.chr1_chr19.fa similarity index 100% rename from test/fixtures/GRCh38.chr1_chr19.fa rename to test/fixtures/reference/GRCh38.chr1_chr19.fa diff --git a/test/fixtures/GRCh38.chr1_chr19.fa.fai b/test/fixtures/reference/GRCh38.chr1_chr19.fa.fai similarity index 100% rename from test/fixtures/GRCh38.chr1_chr19.fa.fai rename to test/fixtures/reference/GRCh38.chr1_chr19.fa.fai diff --git a/test/fixtures/GRCh38.chr9_chr22.fa.gz b/test/fixtures/reference/GRCh38.chr9_chr22.fa.gz similarity index 100% rename from test/fixtures/GRCh38.chr9_chr22.fa.gz rename to test/fixtures/reference/GRCh38.chr9_chr22.fa.gz diff --git a/test/fixtures/GRCh38.chrY_chrM.bwa_db.tar.gz b/test/fixtures/reference/GRCh38.chrY_chrM.bwa_db.tar.gz similarity index 100% rename from test/fixtures/GRCh38.chrY_chrM.bwa_db.tar.gz rename to test/fixtures/reference/GRCh38.chrY_chrM.bwa_db.tar.gz diff --git a/test/fixtures/GRCh38.chrY_chrM.dict b/test/fixtures/reference/GRCh38.chrY_chrM.dict similarity index 100% rename from test/fixtures/GRCh38.chrY_chrM.dict rename to test/fixtures/reference/GRCh38.chrY_chrM.dict diff --git a/test/fixtures/GRCh38.chrY_chrM.fa b/test/fixtures/reference/GRCh38.chrY_chrM.fa similarity index 100% rename from test/fixtures/GRCh38.chrY_chrM.fa rename to test/fixtures/reference/GRCh38.chrY_chrM.fa diff --git a/test/fixtures/GRCh38.chrY_chrM.fa.fai b/test/fixtures/reference/GRCh38.chrY_chrM.fa.fai similarity index 100% rename from test/fixtures/GRCh38.chrY_chrM.fa.fai rename to test/fixtures/reference/GRCh38.chrY_chrM.fa.fai diff --git a/test/fixtures/gencode.v31.chr9_chr22.gtf.gz b/test/fixtures/reference/gencode.v31.chr9_chr22.gtf.gz similarity index 100% rename from test/fixtures/gencode.v31.chr9_chr22.gtf.gz rename to test/fixtures/reference/gencode.v31.chr9_chr22.gtf.gz diff --git a/test/fixtures/gencode.v31.chrY_chrM.gene.bed b/test/fixtures/reference/gencode.v31.chrY_chrM.gene.bed similarity index 100% rename from test/fixtures/gencode.v31.chrY_chrM.gene.bed rename to test/fixtures/reference/gencode.v31.chrY_chrM.gene.bed diff --git a/test/fixtures/gencode.v31.chrY_chrM.genelengths.txt b/test/fixtures/reference/gencode.v31.chrY_chrM.genelengths.txt similarity index 100% rename from test/fixtures/gencode.v31.chrY_chrM.genelengths.txt rename to test/fixtures/reference/gencode.v31.chrY_chrM.genelengths.txt diff --git a/test/fixtures/gencode.v31.chrY_chrM.gtf.gz b/test/fixtures/reference/gencode.v31.chrY_chrM.gtf.gz similarity index 100% rename from test/fixtures/gencode.v31.chrY_chrM.gtf.gz rename to test/fixtures/reference/gencode.v31.chrY_chrM.gtf.gz diff --git a/test/fixtures/kraken2_C_elegans_library.tar.gz b/test/fixtures/reference/kraken2_C_elegans_library.tar.gz similarity index 100% rename from test/fixtures/kraken2_C_elegans_library.tar.gz rename to test/fixtures/reference/kraken2_C_elegans_library.tar.gz diff --git a/test/fixtures/kraken2_db.mini.tar.gz b/test/fixtures/reference/kraken2_db.mini.tar.gz similarity index 100% rename from test/fixtures/kraken2_db.mini.tar.gz rename to test/fixtures/reference/kraken2_db.mini.tar.gz diff --git a/test/fixtures/kraken2_taxonomy.tar.gz b/test/fixtures/reference/kraken2_taxonomy.tar.gz similarity index 100% rename from test/fixtures/kraken2_taxonomy.tar.gz rename to test/fixtures/reference/kraken2_taxonomy.tar.gz diff --git a/test/fixtures/star_db.chrY_chrM.tar.gz b/test/fixtures/reference/star_db.chrY_chrM.tar.gz similarity index 100% rename from test/fixtures/star_db.chrY_chrM.tar.gz rename to test/fixtures/reference/star_db.chrY_chrM.tar.gz diff --git a/test/fixtures/test.fa b/test/fixtures/reference/test.fa similarity index 100% rename from test/fixtures/test.fa rename to test/fixtures/reference/test.fa diff --git a/test/fixtures/Homo_sapiens_assembly38.dbsnp138.top5000.vcf b/test/fixtures/vcfs/Homo_sapiens_assembly38.dbsnp138.top5000.vcf similarity index 100% rename from test/fixtures/Homo_sapiens_assembly38.dbsnp138.top5000.vcf rename to test/fixtures/vcfs/Homo_sapiens_assembly38.dbsnp138.top5000.vcf diff --git a/test/fixtures/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx b/test/fixtures/vcfs/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx similarity index 100% rename from test/fixtures/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx rename to test/fixtures/vcfs/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx diff --git a/test/fixtures/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz b/test/fixtures/vcfs/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz similarity index 100% rename from test/fixtures/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz rename to test/fixtures/vcfs/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz diff --git a/test/fixtures/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi b/test/fixtures/vcfs/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi similarity index 100% rename from test/fixtures/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi rename to test/fixtures/vcfs/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi diff --git a/test/fixtures/test1.vcf.gz b/test/fixtures/vcfs/test1.vcf.gz similarity index 100% rename from test/fixtures/test1.vcf.gz rename to test/fixtures/vcfs/test1.vcf.gz diff --git a/test/fixtures/test1.vcf.gz.tbi b/test/fixtures/vcfs/test1.vcf.gz.tbi similarity index 100% rename from test/fixtures/test1.vcf.gz.tbi rename to test/fixtures/vcfs/test1.vcf.gz.tbi diff --git a/test/fixtures/test2.vcf.gz b/test/fixtures/vcfs/test2.vcf.gz similarity index 100% rename from test/fixtures/test2.vcf.gz rename to test/fixtures/vcfs/test2.vcf.gz diff --git a/test/fixtures/test2.vcf.gz.tbi b/test/fixtures/vcfs/test2.vcf.gz.tbi similarity index 100% rename from test/fixtures/test2.vcf.gz.tbi rename to test/fixtures/vcfs/test2.vcf.gz.tbi diff --git a/tools/test/arriba.yaml b/tools/test/arriba.yaml index e55e9e9d2..e96b4d8ce 100644 --- a/tools/test/arriba.yaml +++ b/tools/test/arriba.yaml @@ -2,11 +2,11 @@ arriba: - name: works inputs: bam: - - Aligned.sortedByCoord.chr9_chr22.bam + - bams/Aligned.sortedByCoord.chr9_chr22.bam gtf: - - gencode.v31.chr9_chr22.gtf.gz + - reference/gencode.v31.chr9_chr22.gtf.gz reference_fasta_gz: - - GRCh38.chr9_chr22.fa.gz + - reference/GRCh38.chr9_chr22.fa.gz disable_filters: - [ blacklist ] prefix: @@ -17,7 +17,7 @@ arriba_tsv_to_vcf: fusions: - fusions.BCR_ABL1.tsv reference_fasta: - - GRCh38.chr9_chr22.fa.gz + - reference/GRCh38.chr9_chr22.fa.gz prefix: - fusions arriba_extract_fusion_supporting_alignments: @@ -27,9 +27,9 @@ arriba_extract_fusion_supporting_alignments: fusions: - fusions.BCR_ABL1.tsv bam: - - Aligned.sortedByCoord.chr9_chr22.bam + - bams/Aligned.sortedByCoord.chr9_chr22.bam bam_index: - - Aligned.sortedByCoord.chr9_chr22.bam.bai + - bams/Aligned.sortedByCoord.chr9_chr22.bam.bai prefix: - fusions arriba_annotate_exon_numbers: @@ -38,6 +38,6 @@ arriba_annotate_exon_numbers: fusions: - fusions.BCR_ABL1.tsv gtf: - - gencode.v31.chr9_chr22.gtf.gz + - reference/gencode.v31.chr9_chr22.gtf.gz prefix: - fusions \ No newline at end of file diff --git a/tools/test/bwa.yaml b/tools/test/bwa.yaml index 6dc3b3d9e..bc05f3524 100644 --- a/tools/test/bwa.yaml +++ b/tools/test/bwa.yaml @@ -2,43 +2,43 @@ bwa_aln: - name: works inputs: fastq: - - test_R1.fq.gz - - test_R2.fq.gz - - random10k.r1.fq.gz - - random10k.r2.fq.gz + - fastqs/test_R1.fq.gz + - fastqs/test_R2.fq.gz + - fastqs/random10k.r1.fq.gz + - fastqs/random10k.r2.fq.gz read_group: - "@RG\\tID:test\\tSM:test" bwa_db_tar_gz: - - GRCh38.chrY_chrM.bwa_db.tar.gz + - reference/GRCh38.chrY_chrM.bwa_db.tar.gz bwa_aln_pe: - name: works inputs: $files: read_one_fastq_gz: - - test_R1.fq.gz - - random10k.r1.fq.gz + - fastqs/test_R1.fq.gz + - fastqs/random10k.r1.fq.gz read_two_fastq_gz: - - test_R2.fq.gz - - random10k.r2.fq.gz + - fastqs/test_R2.fq.gz + - fastqs/random10k.r2.fq.gz read_group: - "@RG\\tID:test\\tSM:test" bwa_db_tar_gz: - - GRCh38.chrY_chrM.bwa_db.tar.gz + - reference/GRCh38.chrY_chrM.bwa_db.tar.gz bwa_mem: - name: works inputs: read_one_fastq_gz: - - test_R1.fq.gz - - test_R2.fq.gz - - random10k.r1.fq.gz - - random10k.r2.fq.gz + - fastqs/test_R1.fq.gz + - fastqs/test_R2.fq.gz + - fastqs/random10k.r1.fq.gz + - fastqs/random10k.r2.fq.gz read_group: - "@RG\\tID:test\\tSM:test" bwa_db_tar_gz: - - GRCh38.chrY_chrM.bwa_db.tar.gz + - reference/GRCh38.chrY_chrM.bwa_db.tar.gz build_bwa_db: - name: works tags: [ reference, slow ] inputs: reference_fasta: - - GRCh38.chrY_chrM.fa \ No newline at end of file + - reference/GRCh38.chrY_chrM.fa \ No newline at end of file diff --git a/tools/test/deeptools.yaml b/tools/test/deeptools.yaml index cba7fff3a..e44caf285 100644 --- a/tools/test/deeptools.yaml +++ b/tools/test/deeptools.yaml @@ -3,12 +3,12 @@ bam_coverage: inputs: $files: bam: - - test.bwa_aln_pe.chrY_chrM.bam - - Aligned.sortedByCoord.chr9_chr22.bam - - test_rnaseq_variant.bam - - test.bam + - bams/test.bwa_aln_pe.chrY_chrM.bam + - bams/Aligned.sortedByCoord.chr9_chr22.bam + - bams/test_rnaseq_variant.bam + - bams/test.bam bam_index: - - test.bwa_aln_pe.chrY_chrM.bam.bai - - Aligned.sortedByCoord.chr9_chr22.bam.bai - - test_rnaseq_variant.bam.bai - - test.bam.bai \ No newline at end of file + - bams/test.bwa_aln_pe.chrY_chrM.bam.bai + - bams/Aligned.sortedByCoord.chr9_chr22.bam.bai + - bams/test_rnaseq_variant.bam.bai + - bams/test.bam.bai \ No newline at end of file diff --git a/tools/test/picard.yaml b/tools/test/picard.yaml index 7d031e5dd..1c9f80721 100644 --- a/tools/test/picard.yaml +++ b/tools/test/picard.yaml @@ -2,7 +2,7 @@ merge_sam_files: - name: Merge works inputs: bams: - - [test.bwa_aln_pe.chrY_chrM.bam, test.PE.2_RGs.Aligned.out.sorted.bam] + - [bams/test.bwa_aln_pe.chrY_chrM.bam, bams/test.PE.2_RGs.Aligned.out.sorted.bam] prefix: - test.merged assertions: diff --git a/tools/test/samtools.yaml b/tools/test/samtools.yaml index feca44f2d..52a5de1b1 100644 --- a/tools/test/samtools.yaml +++ b/tools/test/samtools.yaml @@ -2,9 +2,9 @@ bam_to_fastq: - name: kitchen_sink inputs: bam: - - Aligned.sortedByCoord.chr9_chr22.bam - - test_rnaseq_variant.bam - - test.bwa_aln_pe.chrY_chrM.bam + - bams/Aligned.sortedByCoord.chr9_chr22.bam + - bams/test_rnaseq_variant.bam + - bams/test.bwa_aln_pe.chrY_chrM.bam bitwise_filter: - include_if_all: "0x0" exclude_if_any: "0x900" @@ -34,6 +34,6 @@ bam_to_fastq: - true - false bam: - - Aligned.sortedByCoord.chr9_chr22.bam - - test_rnaseq_variant.bam - - test.bwa_aln_pe.chrY_chrM.bam + - bams/Aligned.sortedByCoord.chr9_chr22.bam + - bams/test_rnaseq_variant.bam + - bams/test.bwa_aln_pe.chrY_chrM.bam From b3b9c2702a1f3b3b470cc078ac931f3b599d0236 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Mon, 29 Dec 2025 10:27:29 -0500 Subject: [PATCH 22/37] deprecate fastqc --- tools/fastqc.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/fastqc.wdl b/tools/fastqc.wdl index a79de80b8..cf6fe238c 100755 --- a/tools/fastqc.wdl +++ b/tools/fastqc.wdl @@ -5,6 +5,8 @@ version 1.1 task fastqc { meta { description: "Generates a FastQC quality control metrics report for the input BAM file" + warning: "**[DEPRECATED]** We prefer the analysis provided by `fastp` which computes similar metrics but is faster and more robust. Please see the `fastp` task in `fastp.wdl` instead of using FastQC!" + deprecated: true outputs: { raw_data: "A zip archive of raw FastQC data. Can be parsed by MultiQC.", results: "A gzipped tar archive of all FastQC output files", From 038909d57b539665a829f7c97756886483b31bde Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Mon, 29 Dec 2025 10:27:38 -0500 Subject: [PATCH 23/37] log TODO --- tools/test/arriba.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/test/arriba.yaml b/tools/test/arriba.yaml index e96b4d8ce..8f51f6456 100644 --- a/tools/test/arriba.yaml +++ b/tools/test/arriba.yaml @@ -1,3 +1,4 @@ +# TODO: test advanced options arriba: - name: works inputs: From 77271b361496d1cf2bc137e8910acd17ec902432 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Mon, 29 Dec 2025 10:28:04 -0500 Subject: [PATCH 24/37] tests for fastp and fq --- tools/test/fastp.yaml | 11 +++++++++ tools/test/fq.yaml | 56 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 tools/test/fastp.yaml create mode 100644 tools/test/fq.yaml diff --git a/tools/test/fastp.yaml b/tools/test/fastp.yaml new file mode 100644 index 000000000..851d0ce33 --- /dev/null +++ b/tools/test/fastp.yaml @@ -0,0 +1,11 @@ +# TODO: test advanced options +fastp: + - name: works + inputs: + $files: + read_one_fastq: + - fastqs/test_R1.fq.gz + - fastqs/random10k.r1.fq.gz + read_two_fastq: + - fastqs/test_R2.fq.gz + - fastqs/random10k.r2.fq.gz \ No newline at end of file diff --git a/tools/test/fq.yaml b/tools/test/fq.yaml new file mode 100644 index 000000000..9f5fc6095 --- /dev/null +++ b/tools/test/fq.yaml @@ -0,0 +1,56 @@ +# TODO: add lint tests for malformed fastqs +fqlint: + - name: valid_fastqs + inputs: + $files: + read_one_fastq: + - fastqs/test_R1.fq.gz + - fastqs/random10k.r1.fq.gz + read_two_fastq: + - fastqs/test_R2.fq.gz + - fastqs/random10k.r2.fq.gz +subsample: + - name: works + inputs: + $files: + read_one_fastq: + - fastqs/test_R1.fq.gz + - fastqs/random10k.r1.fq.gz + read_two_fastq: + - fastqs/test_R2.fq.gz + - fastqs/random10k.r2.fq.gz + $sampling_controls: + record_count: + - 1000 + - 500 + - -1 # negative should disable + - 0 # so should zero + probability: + - 0.0 # 0 should disable + - 1.0 # 1 should also disable + - 0.5 + - 0.25 + - name: conflicting_args + inputs: + $files: + read_one_fastq: + - fastqs/random10k.r1.fq.gz + read_two_fastq: + - fastqs/random10k.r2.fq.gz + record_count: + - -1 + - 0 + probability: + - 0.0 + - 1.0 + assertions: + exit_code: 2 + - name: neither_count_nor_probability_specified + inputs: + $files: + read_one_fastq: + - fastqs/random10k.r1.fq.gz + read_two_fastq: + - fastqs/random10k.r2.fq.gz + assertions: + exit_code: 2 From 5ed086613d0dd5eae4687c61fb32a8fc6843b420 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Mon, 29 Dec 2025 10:58:11 -0500 Subject: [PATCH 25/37] mv reference vcfs to right dir --- .../Homo_sapiens_assembly38.dbsnp138.top5000.vcf | 0 ...Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx | Bin ...Mills_and_1000G_gold_standard.indels.hg38.vcf.gz | 0 ...s_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi | Bin 4 files changed, 0 insertions(+), 0 deletions(-) rename test/fixtures/{vcfs => reference}/Homo_sapiens_assembly38.dbsnp138.top5000.vcf (100%) rename test/fixtures/{vcfs => reference}/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx (100%) rename test/fixtures/{vcfs => reference}/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz (100%) rename test/fixtures/{vcfs => reference}/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi (100%) diff --git a/test/fixtures/vcfs/Homo_sapiens_assembly38.dbsnp138.top5000.vcf b/test/fixtures/reference/Homo_sapiens_assembly38.dbsnp138.top5000.vcf similarity index 100% rename from test/fixtures/vcfs/Homo_sapiens_assembly38.dbsnp138.top5000.vcf rename to test/fixtures/reference/Homo_sapiens_assembly38.dbsnp138.top5000.vcf diff --git a/test/fixtures/vcfs/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx b/test/fixtures/reference/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx similarity index 100% rename from test/fixtures/vcfs/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx rename to test/fixtures/reference/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx diff --git a/test/fixtures/vcfs/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz b/test/fixtures/reference/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz similarity index 100% rename from test/fixtures/vcfs/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz rename to test/fixtures/reference/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz diff --git a/test/fixtures/vcfs/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi b/test/fixtures/reference/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi similarity index 100% rename from test/fixtures/vcfs/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi rename to test/fixtures/reference/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi From 8a186b64a95f38074dd51b518aefcee1e27b70bc Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Mon, 29 Dec 2025 11:34:13 -0500 Subject: [PATCH 26/37] Create gatk4.yaml --- tools/test/gatk4.yaml | 100 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 tools/test/gatk4.yaml diff --git a/tools/test/gatk4.yaml b/tools/test/gatk4.yaml new file mode 100644 index 000000000..73bd4fe9d --- /dev/null +++ b/tools/test/gatk4.yaml @@ -0,0 +1,100 @@ +apply_bqsr: + - name: works + inputs: + $files: + bam: + - bams/test_rnaseq_variant.bam + bam_index: + - bams/test_rnaseq_variant.bam.bai + recalibration_report: + - test_rnaseq_variant.recal.txt +base_recalibrator: + - name: works + inputs: + $sample: + bam: + - bams/test_rnaseq_variant.bam + bam_index: + - bams/test_rnaseq_variant.bam.bai + $reference: + fasta: + - reference/GRCh38.chr1_chr19.fa + fasta_index: + - reference/GRCh38.chr1_chr19.fa.fai + dict: + - reference/GRCh38.chr1_chr19.dict + $dbsnp: + dbSNP_vcf: + - reference/Homo_sapiens_assembly38.dbsnp138.top5000.vcf + dbSNP_vcf_index: + - reference/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx + $known_indels: + known_indels_sites_vcfs: + - [ reference/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz ] + known_indels_sites_indices: + - [ reference/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi ] +haplotype_caller: + - name: works + tags: [ slow ] + inputs: + $sample: + bam: + - bams/test_rnaseq_variant.bam + bam_index: + - bams/test_rnaseq_variant.bam.bai + $reference: + fasta: + - reference/GRCh38.chr1_chr19.fa + fasta_index: + - reference/GRCh38.chr1_chr19.fa.fai + dict: + - reference/GRCh38.chr1_chr19.dict + $dbsnp: + dbSNP_vcf: + - reference/Homo_sapiens_assembly38.dbsnp138.top5000.vcf + dbSNP_vcf_index: + - reference/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx + interval_list: + - chr1_chr19.interval_list +split_n_cigar_reads: + - name: works + inputs: + $sample: + bam: + - bams/test_rnaseq_variant.bam + bam_index: + - bams/test_rnaseq_variant.bam.bai + $reference: + fasta: + - reference/GRCh38.chr1_chr19.fa + fasta_index: + - reference/GRCh38.chr1_chr19.fa.fai + dict: + - reference/GRCh38.chr1_chr19.dict + prefix: + - split +variant_filtration: + - name: works + inputs: + $sample: + vcf: + - vcfs/test1.vcf.gz + - vcfs/test2.vcf.gz + vcf_index: + - vcfs/test1.vcf.gz.tbi + - vcfs/test2.vcf.gz.tbi + $reference: + fasta: + - reference/GRCh38.chr1_chr19.fa + fasta_index: + - reference/GRCh38.chr1_chr19.fa.fai + dict: + - reference/GRCh38.chr1_chr19.dict +mark_duplicates_spark: + - name: works + inputs: + bam: + - bams/test.bwa_aln_pe.chrY_chrM.bam + - bams/Aligned.sortedByCoord.chr9_chr22.bam + - bams/test_rnaseq_variant.bam + - bams/test.bam \ No newline at end of file From 52da94cb055553ea3e6f0e821a682e6ddcba75c5 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Mon, 29 Dec 2025 14:31:25 -0500 Subject: [PATCH 27/37] WIP --- .gitignore | 1 + tools/htseq.wdl | 15 +++------------ tools/test/gatk4.yaml | 1 + tools/test/htseq.yaml | 37 +++++++++++++++++++++++++++++++++++++ 4 files changed, 42 insertions(+), 12 deletions(-) create mode 100644 tools/test/htseq.yaml diff --git a/.gitignore b/.gitignore index c1ff16be3..2bab65b34 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +pytest/ # Ignore common bioinformatics formats used in these workflows. # But only if they are in the root of this repo /*.fastq.gz diff --git a/tools/htseq.wdl b/tools/htseq.wdl index 95d604e02..bcaca9ce0 100755 --- a/tools/htseq.wdl +++ b/tools/htseq.wdl @@ -34,15 +34,6 @@ task count { description: "GFF attribute to be used as feature ID", group: "Common", } - mode: { - description: "Mode to handle reads overlapping more than one feature. `union` is recommended for most use-cases.", - external_help: "https://htseq.readthedocs.io/en/latest/htseqcount.html#htseq-count-counting-reads-within-features", - choices: [ - "union", - "intersection-strict", - "intersection-nonempty", - ], - } include_custom_header: { description: "Include a custom header for the output file? If true, the first line of the output file will be `~{idattr}\t~{prefix}`.", warning: "This is not an official feature of HTSeq. This may break downstream tools that expect the typical headerless HTSeq output format.", @@ -80,7 +71,6 @@ task count { String prefix = basename(bam, ".bam") String feature_type = "exon" String idattr = "gene_name" - String mode = "union" Boolean include_custom_header = true Boolean pos_sorted = false Boolean nonunique = false @@ -93,6 +83,9 @@ task count { String outfile_name = prefix + ".feature-counts.txt" + # the docs recommend this for most use cases, so we hardcode + String mode = "union" + Float bam_size = size(bam, "GiB") Float gtf_size = size(gtf, "GiB") @@ -107,8 +100,6 @@ task count { if ~{include_custom_header}; then echo -e "~{idattr}\t~{prefix}" > "~{outfile_name}" - else - true > "~{outfile_name}" # ensure file is empty fi # 9223372036854776000 == max 64 bit Float diff --git a/tools/test/gatk4.yaml b/tools/test/gatk4.yaml index 73bd4fe9d..bfb256ae6 100644 --- a/tools/test/gatk4.yaml +++ b/tools/test/gatk4.yaml @@ -1,3 +1,4 @@ +# TODO: advanced options apply_bqsr: - name: works inputs: diff --git a/tools/test/htseq.yaml b/tools/test/htseq.yaml new file mode 100644 index 000000000..1a2356883 --- /dev/null +++ b/tools/test/htseq.yaml @@ -0,0 +1,37 @@ +count: + - name: kitchen_sink + tags: [ slow ] + inputs: + $files: + bam: + - bams/test.bwa_aln_pe.chrY_chrM.bam + gtf: + - reference/gencode.v31.chrY_chrM.gtf.gz + pos_sorted: + - false + strandedness: + - yes + - no + - reverse + include_custom_header: + - true + - false + nonunique: + - true + - false + secondary_alignments: + - true + - false + supplementary_alignments: + - true + - false + minaqual: + - 0 + - 10 +calc_tpm: + - name: works + inputs: + counts: + - test.bwa_aln_pe.chrY_chrM.feature-counts.txt + feature_lengths: + - reference/gencode.v31.chrY_chrM.genelengths.txt \ No newline at end of file From 8658e212da835e8437c332eb1a3e2f987152bec6 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Sat, 3 Jan 2026 11:28:20 -0500 Subject: [PATCH 28/37] Update flag_filter.yaml --- data_structures/test/flag_filter.yaml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/data_structures/test/flag_filter.yaml b/data_structures/test/flag_filter.yaml index fb6a55927..cd64d0cca 100644 --- a/data_structures/test/flag_filter.yaml +++ b/data_structures/test/flag_filter.yaml @@ -6,6 +6,8 @@ validate_string_is_12bit_int: - "0x900" - "01" - "4095" + - "0" + - "072" assertions: stderr: - Input number \(.*\) is valid @@ -14,10 +16,11 @@ validate_string_is_12bit_int: number: - "0x1000" - "" - - "string" + - string - this is not a number - "000000000011" - "-1" + - "08" assertions: exit_code: 42 stderr: @@ -26,6 +29,7 @@ validate_string_is_12bit_int: inputs: number: - "4096" + - "9999" assertions: exit_code: 42 stderr: From 275865919980ee0b24f91b8b7239e5de66dc29df Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Sun, 4 Jan 2026 11:41:04 -0500 Subject: [PATCH 29/37] fix: SE works in addition to PE --- tools/fq.wdl | 2 +- tools/test/bwa.yaml | 16 +++++++++++----- tools/test/fastp.yaml | 6 +++++- tools/test/fq.yaml | 8 ++++++++ 4 files changed, 25 insertions(+), 7 deletions(-) diff --git a/tools/fq.wdl b/tools/fq.wdl index 55148a99d..e5eab4763 100755 --- a/tools/fq.wdl +++ b/tools/fq.wdl @@ -154,7 +154,7 @@ task subsample { ~{probability_arg} \ ~{record_count_arg} \ --r1-dst "~{r1_dst}" \ - ~{"--r2-dst '" + r2_dst + "'"} \ + ~{if defined(read_two_fastq) then "--r2-dst '" + r2_dst + "'" else ""} \ "~{read_one_fastq}" \ ~{"'" + read_two_fastq + "'"} >>> diff --git a/tools/test/bwa.yaml b/tools/test/bwa.yaml index bc05f3524..f5e1d7c15 100644 --- a/tools/test/bwa.yaml +++ b/tools/test/bwa.yaml @@ -27,11 +27,17 @@ bwa_aln_pe: bwa_mem: - name: works inputs: - read_one_fastq_gz: - - fastqs/test_R1.fq.gz - - fastqs/test_R2.fq.gz - - fastqs/random10k.r1.fq.gz - - fastqs/random10k.r2.fq.gz + $samples: + read_one_fastq_gz: + - fastqs/test_R1.fq.gz + - fastqs/test_R2.fq.gz + - fastqs/random10k.r1.fq.gz + - fastqs/random10k.r2.fq.gz + read_two_fastq_gz: + - fastqs/test_R2.fq.gz + - null + - fastqs/random10k.r2.fq.gz + - null read_group: - "@RG\\tID:test\\tSM:test" bwa_db_tar_gz: diff --git a/tools/test/fastp.yaml b/tools/test/fastp.yaml index 851d0ce33..272624876 100644 --- a/tools/test/fastp.yaml +++ b/tools/test/fastp.yaml @@ -6,6 +6,10 @@ fastp: read_one_fastq: - fastqs/test_R1.fq.gz - fastqs/random10k.r1.fq.gz + - fastqs/test_R2.fq.gz + - fastqs/random10k.r2.fq.gz read_two_fastq: - fastqs/test_R2.fq.gz - - fastqs/random10k.r2.fq.gz \ No newline at end of file + - fastqs/random10k.r2.fq.gz + - null + - null \ No newline at end of file diff --git a/tools/test/fq.yaml b/tools/test/fq.yaml index 9f5fc6095..c8c48084a 100644 --- a/tools/test/fq.yaml +++ b/tools/test/fq.yaml @@ -6,9 +6,13 @@ fqlint: read_one_fastq: - fastqs/test_R1.fq.gz - fastqs/random10k.r1.fq.gz + - fastqs/test_R2.fq.gz + - fastqs/random10k.r2.fq.gz read_two_fastq: - fastqs/test_R2.fq.gz - fastqs/random10k.r2.fq.gz + - null + - null subsample: - name: works inputs: @@ -16,9 +20,13 @@ subsample: read_one_fastq: - fastqs/test_R1.fq.gz - fastqs/random10k.r1.fq.gz + - fastqs/test_R2.fq.gz + - fastqs/random10k.r2.fq.gz read_two_fastq: - fastqs/test_R2.fq.gz - fastqs/random10k.r2.fq.gz + - null + - null $sampling_controls: record_count: - 1000 From 108983536cfe8e6ab678eb226516fd78b0388dd6 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Tue, 27 Jan 2026 11:18:46 -0500 Subject: [PATCH 30/37] chore: don't specify defaults in two (disagreeing) places --- data_structures/read_group.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/data_structures/read_group.wdl b/data_structures/read_group.wdl index 719fdb409..e0e75d715 100644 --- a/data_structures/read_group.wdl +++ b/data_structures/read_group.wdl @@ -83,7 +83,7 @@ workflow read_group_to_string { input { ReadGroup read_group - Array[String] required_fields = [] + Array[String] required_fields = ["SM"] Boolean format_as_sam_record = false Boolean restrictive = true } @@ -156,8 +156,8 @@ task validate_read_group { input { ReadGroup read_group - Array[String] required_fields = ["SM"] - Boolean restrictive = true + Array[String] required_fields + Boolean restrictive } # The SAM spec allows any printable ASCII character in header fields. @@ -374,7 +374,7 @@ task inner_read_group_to_string { input { ReadGroup read_group - Boolean format_as_sam_record = false + Boolean format_as_sam_record } String delimiter = if format_as_sam_record then "\\t" else " " From 5b690422c77bad542d0ab60b7ac7ca5e550e1bdb Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Wed, 28 Jan 2026 12:46:35 -0500 Subject: [PATCH 31/37] add a few of (experimental) output assertions --- data_structures/test/read_group.yaml | 29 +++++----- tools/test/fastp.yaml | 79 +++++++++++++++++++++++----- 2 files changed, 79 insertions(+), 29 deletions(-) diff --git a/data_structures/test/read_group.yaml b/data_structures/test/read_group.yaml index b5257e9e0..12ef8e5ad 100644 --- a/data_structures/test/read_group.yaml +++ b/data_structures/test/read_group.yaml @@ -1,11 +1,11 @@ -validate_read_group: +read_group_to_string: - name: valid_read_groups inputs: read_group: - - ID: id - SM: sample - - ID: R2 - SM: sampleA + - ID: R1 + SM: sampleFoo + - ID: R1 + SM: sampleFoo LB: spaces are allowed in LB BC: barcode with a space PU: platform_unit @@ -18,6 +18,11 @@ validate_read_group: PM: platform_model FO: ACMG KS: key_sequence + assertions: + outputs: + validated_read_group: + - Contains: R1 + - Contains: sampleFoo - name: id_with_spaces inputs: read_group: @@ -36,9 +41,7 @@ validate_read_group: FO: ACMG KS: key_sequence assertions: - exit_code: 1 - stderr: - - ID must not contain spaces + should_fail: true - name: sample_with_spaces inputs: read_group: @@ -57,9 +60,7 @@ validate_read_group: FO: ACMG KS: key_sequence assertions: - exit_code: 1 - stderr: - - SM must not contain spaces + should_fail: true - name: spaces_allowed inputs: read_group: @@ -73,9 +74,7 @@ validate_read_group: - ID: R123 LB: library assertions: - exit_code: 1 - stderr: - - SM is required + should_fail: true - name: missing_sample_allowed inputs: read_group: @@ -92,5 +91,3 @@ get_read_groups: - bams/Aligned.sortedByCoord.chr9_chr22.bam - bams/test_rnaseq_variant.bam - bams/test.bam - -# TODO: test read_group_to_string diff --git a/tools/test/fastp.yaml b/tools/test/fastp.yaml index 272624876..56429b143 100644 --- a/tools/test/fastp.yaml +++ b/tools/test/fastp.yaml @@ -1,15 +1,68 @@ # TODO: test advanced options fastp: - - name: works - inputs: - $files: - read_one_fastq: - - fastqs/test_R1.fq.gz - - fastqs/random10k.r1.fq.gz - - fastqs/test_R2.fq.gz - - fastqs/random10k.r2.fq.gz - read_two_fastq: - - fastqs/test_R2.fq.gz - - fastqs/random10k.r2.fq.gz - - null - - null \ No newline at end of file + - name: SE_trimming + inputs: + read_one_fastq: + - fastqs/test_R1.fq.gz + - fastqs/random10k.r1.fq.gz + - fastqs/test_R2.fq.gz + - fastqs/random10k.r2.fq.gz + assertions: + outputs: + single_end_reads_fastq_gz: + - Defined: true + read_one_fastq_gz: + - Defined: false + read_two_fastq_gz: + - Defined: false + - name: PE_trimming + inputs: + read_one_fastq: + - fastqs/test_R1.fq.gz + - fastqs/random10k.r1.fq.gz + read_two_fastq: + - fastqs/test_R2.fq.gz + - fastqs/random10k.r2.fq.gz + assertions: + outputs: + single_end_reads_fastq_gz: + - Defined: false + read_one_fastq_gz: + - Defined: true + read_two_fastq_gz: + - Defined: true + - name: SE_qc + inputs: + read_one_fastq: + - fastqs/test_R1.fq.gz + - fastqs/random10k.r1.fq.gz + - fastqs/test_R2.fq.gz + - fastqs/random10k.r2.fq.gz + output_fastq: + - false + assertions: + outputs: + single_end_reads_fastq_gz: + - Defined: false + read_one_fastq_gz: + - Defined: false + read_two_fastq_gz: + - Defined: false + - name: PE_qc + inputs: + read_one_fastq: + - fastqs/test_R1.fq.gz + - fastqs/random10k.r1.fq.gz + read_two_fastq: + - fastqs/test_R2.fq.gz + - fastqs/random10k.r2.fq.gz + output_fastq: + - false + assertions: + outputs: + single_end_reads_fastq_gz: + - Defined: false + read_one_fastq_gz: + - Defined: false + read_two_fastq_gz: + - Defined: false \ No newline at end of file From b1f9d8fc2e58ff87263466b74dfed3487e2aafcc Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Thu, 19 Mar 2026 14:27:24 -0400 Subject: [PATCH 32/37] sprocket format --- data_structures/flag_filter.wdl | 9 +- data_structures/read_group.wdl | 35 ++- tools/arriba.wdl | 75 +++++-- tools/bwa.wdl | 59 ++--- tools/deeptools.wdl | 3 +- tools/fastp.wdl | 79 ++++--- tools/fastqc.wdl | 1 - tools/fq.wdl | 26 ++- tools/gatk4.wdl | 106 +++++---- tools/htseq.wdl | 38 +++- tools/kraken2.wdl | 91 ++++---- tools/librarian.wdl | 9 +- tools/md5sum.wdl | 3 +- tools/mosdepth.wdl | 6 +- tools/ngsderive.wdl | 33 ++- tools/picard.wdl | 51 +++-- tools/qualimap.wdl | 23 +- tools/sambamba.wdl | 21 +- tools/samtools.wdl | 210 ++++++++++++------ tools/star.wdl | 64 +++--- tools/util.wdl | 34 +-- workflows/chipseq/chipseq-standard.wdl | 53 +++-- workflows/dnaseq/dnaseq-core.wdl | 47 ++-- workflows/dnaseq/dnaseq-standard-fastq.wdl | 15 +- workflows/dnaseq/dnaseq-standard.wdl | 13 +- workflows/general/alignment-post.wdl | 32 ++- workflows/general/bam-to-fastqs.wdl | 22 +- workflows/general/samtools-merge.wdl | 27 +-- workflows/methylation/methylation-cohort.wdl | 81 ++++--- .../methylation/methylation-preprocess.wdl | 6 +- .../methylation/methylation-standard.wdl | 8 +- workflows/qc/markdups-post.wdl | 6 +- workflows/qc/quality-check-standard.wdl | 163 +++++++++----- workflows/reference/bwa-db-build.wdl | 4 +- workflows/reference/gatk-reference.wdl | 20 +- workflows/reference/qc-reference.wdl | 25 ++- workflows/reference/star-db-build.wdl | 6 +- workflows/rnaseq/rnaseq-core.wdl | 36 ++- workflows/rnaseq/rnaseq-standard-fastq.wdl | 16 +- workflows/rnaseq/rnaseq-standard.wdl | 5 +- workflows/rnaseq/rnaseq-variant-calling.wdl | 12 +- 41 files changed, 934 insertions(+), 639 deletions(-) diff --git a/data_structures/flag_filter.wdl b/data_structures/flag_filter.wdl index 27d81a0be..263b7f456 100644 --- a/data_structures/flag_filter.wdl +++ b/data_structures/flag_filter.wdl @@ -58,7 +58,6 @@ ## In short, those are all flags corresponding to the quality of the read ## and them being `true` may indicate that the read is of low quality and ## should be excluded. - version 1.1 struct FlagFilter { @@ -127,15 +126,15 @@ workflow validate_flag_filter { } call validate_string_is_12bit_int as validate_include_if_any { input: - number = flags.include_if_any + number = flags.include_if_any, } call validate_string_is_12bit_int as validate_include_if_all { input: - number = flags.include_if_all + number = flags.include_if_all, } call validate_string_is_12bit_int as validate_exclude_if_any { input: - number = flags.exclude_if_any + number = flags.exclude_if_any, } call validate_string_is_12bit_int as validate_exclude_if_all { input: - number = flags.exclude_if_all + number = flags.exclude_if_all, } } diff --git a/data_structures/read_group.wdl b/data_structures/read_group.wdl index e0e75d715..df81982d6 100644 --- a/data_structures/read_group.wdl +++ b/data_structures/read_group.wdl @@ -40,7 +40,6 @@ ## } ## } ## ``` - version 1.1 #@ except: SnakeCase @@ -83,7 +82,9 @@ workflow read_group_to_string { input { ReadGroup read_group - Array[String] required_fields = ["SM"] + Array[String] required_fields = [ + "SM", + ] Boolean format_as_sam_record = false Boolean restrictive = true } @@ -99,8 +100,7 @@ workflow read_group_to_string { } output { - String validated_read_group - = inner_read_group_to_string.stringified_read_group + String validated_read_group = inner_read_group_to_string.stringified_read_group } } @@ -109,7 +109,7 @@ task get_read_groups { description: "Gets read group information from a BAM file and writes it out as JSON which is converted to a WDL struct." warning: "This task will uppercase any lowercase `PL` values it finds, as is required by the [SAM specification](https://samtools.github.io/hts-specs/SAMv1.pdf)." outputs: { - read_groups: "An array of `ReadGroup` structs containing read group information." + read_groups: "An array of `ReadGroup` structs containing read group information.", } } @@ -165,8 +165,18 @@ task validate_read_group { # We have the opinion that is too permissive for ID and SM. String restrictive_pattern = "\\ " # Disallow spaces Array[String] platforms = [ - "CAPILLARY", "DNBSEQ", "ELEMENT", "HELICOS", "ILLUMINA", "IONTORRENT", "LS454", - "ONT", "PACBIO", "SINGULAR", "SOLID", "ULTIMA", + "CAPILLARY", + "DNBSEQ", + "ELEMENT", + "HELICOS", + "ILLUMINA", + "IONTORRENT", + "LS454", + "ONT", + "PACBIO", + "SINGULAR", + "SOLID", + "ULTIMA", ] command <<< @@ -262,7 +272,10 @@ task validate_read_group { fi fi if [ "$(echo "~{sep(" ", required_fields)}" | grep -Ewc "KS")" -eq 1 ]; then - if [ -z "~{if defined(read_group.KS) then read_group.KS else ""}" ]; then + if [ -z "~{if defined(read_group.KS) + then read_group.KS + else "" + }" ]; then >&2 echo "KS is required" exit_code=1 fi @@ -360,7 +373,7 @@ task inner_read_group_to_string { description: "Converts a `ReadGroup` struct to a `String` **without any validation**." warning: "Please use the `read_group_to_string` workflow, which has validation of the `ReadGroup` contents." outputs: { - stringified_read_group: "Input `ReadGroup` as a string" + stringified_read_group: "Input `ReadGroup` as a string", } } @@ -377,7 +390,9 @@ task inner_read_group_to_string { Boolean format_as_sam_record } - String delimiter = if format_as_sam_record then "\\t" else " " + String delimiter = if format_as_sam_record + then "\\t" + else " " command <<< if ~{format_as_sam_record}; then diff --git a/tools/arriba.wdl b/tools/arriba.wdl index 8ea5c8e3c..84da082c0 100644 --- a/tools/arriba.wdl +++ b/tools/arriba.wdl @@ -1,5 +1,4 @@ ## [Homepage](https://arriba.readthedocs.io/en/latest/) - version 1.1 task arriba { @@ -138,14 +137,40 @@ task arriba { File? protein_domains File? wgs_svs Array[String] interesting_contigs = [ - "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", - "15", "16", "17", "18", "19", "20", "21", "22", "X", "Y", "AC_*", "NC_*", + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "X", + "Y", + "AC_*", + "NC_*", + ] + Array[String] viral_contigs = [ + "AC_*", + "NC_*", ] - Array[String] viral_contigs = ["AC_*", "NC_*"] Array[String] disable_filters = [] #@ except: LineWidth - String feature_name - = "gene_name=gene_name|gene_id,gene_id=gene_id,transcript_id=transcript_id,feature_exon=exon,feature_CDS=CDS" + String feature_name = "gene_name=gene_name|gene_id,gene_id=gene_id,transcript_id=transcript_id,feature_exon=exon,feature_CDS=CDS" String prefix = basename(bam, ".bam") + ".fusions" String strandedness = "auto" Boolean mark_duplicates = true @@ -176,10 +201,8 @@ task arriba { } Int bam_size_gb = ceil(size(bam, "GiB")) - Int disk_size_gb = bam_size_gb - + ceil(size(gtf, "GiB")) - + ceil(size(reference_fasta_gz, "GiB")) - + modify_disk_size_gb + Int disk_size_gb = bam_size_gb + ceil(size(gtf, "GiB")) + ceil(size(reference_fasta_gz, + "GiB")) + modify_disk_size_gb Int memory_gb = bam_size_gb + modify_memory_gb command <<< @@ -198,18 +221,15 @@ task arriba { ~{"-d '" + wgs_svs + "'"} \ -D ~{max_genomic_breakpoint_distance} \ -s "~{strandedness}" \ - ~{( - if length(interesting_contigs) > 0 + ~{(if length(interesting_contigs) > 0 then "-i " + sep(",", quote(interesting_contigs)) else "" )} \ - ~{( - if length(viral_contigs) > 0 + ~{(if length(viral_contigs) > 0 then "-v " + sep(",", quote(viral_contigs)) else "" )} \ - ~{( - if length(disable_filters) > 0 + ~{(if length(disable_filters) > 0 then "-f " + sep(",", quote(disable_filters)) else "" )} \ @@ -232,9 +252,18 @@ task arriba { -l ~{max_itd_length} \ -z ~{min_itd_allele_fraction} \ -Z ~{min_itd_supporting_reads} \ - ~{if mark_duplicates then "" else "-u"} \ - ~{if report_additional_columns then "-X" else ""} \ - ~{if fill_gaps then "-I" else ""} + ~{if mark_duplicates + then "" + else "-u" + } \ + ~{if report_additional_columns + then "-X" + else "" + } \ + ~{if fill_gaps + then "-I" + else "" + } >>> output { @@ -255,7 +284,7 @@ task arriba_tsv_to_vcf { meta { description: "Convert Arriba TSV format fusions to VCF format." outputs: { - fusions_vcf: "Output file of fusions in VCF format" + fusions_vcf: "Output file of fusions in VCF format", } } @@ -274,9 +303,7 @@ task arriba_tsv_to_vcf { } Int input_size_gb = ceil(size(fusions, "GiB")) - Int disk_size_gb = ceil(input_size_gb) - + (ceil(size(reference_fasta, "GiB")) * 3) - + modify_disk_size_gb + Int disk_size_gb = ceil(input_size_gb) + (ceil(size(reference_fasta, "GiB")) * 3) + modify_disk_size_gb command <<< set -euo pipefail @@ -356,7 +383,7 @@ task arriba_annotate_exon_numbers { meta { description: "Annotate fusions with exon numbers." outputs: { - fusion_tsv: "TSV file with fusions annotated with exon numbers" + fusion_tsv: "TSV file with fusions annotated with exon numbers", } } diff --git a/tools/bwa.wdl b/tools/bwa.wdl index dbba3f2e7..3a7b3cb0d 100644 --- a/tools/bwa.wdl +++ b/tools/bwa.wdl @@ -1,12 +1,11 @@ ## [Homepage](https://github.com/lh3/bwa) - version 1.1 task bwa_aln { meta { description: "Maps Single-End FASTQ files to BAM format using bwa aln" outputs: { - bam: "Aligned BAM format file" + bam: "Aligned BAM format file", } } @@ -36,9 +35,7 @@ task bwa_aln { File fastq File bwa_db_tar_gz String read_group - String prefix = sub( - basename(fastq), - "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", + String prefix = sub(basename(fastq), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", "" # Once replacing with capturing groups is supported, replace with group 3 ) Boolean use_all_cores = false @@ -50,8 +47,7 @@ task bwa_aln { Float input_fastq_size = size(fastq, "GiB") Float reference_size = size(bwa_db_tar_gz, "GiB") - Int disk_size_gb = ( - ceil((input_fastq_size + reference_size) * 2) + 10 + modify_disk_size_gb + Int disk_size_gb = (ceil((input_fastq_size + reference_size) * 2) + 10 + modify_disk_size_gb ) command <<< @@ -98,7 +94,7 @@ task bwa_aln_pe { meta { description: "Maps Paired-End FASTQ files to BAM format using bwa aln" outputs: { - bam: "Aligned BAM format file" + bam: "Aligned BAM format file", } } @@ -132,11 +128,8 @@ task bwa_aln_pe { File read_two_fastq_gz File bwa_db_tar_gz String read_group - String prefix = sub( - basename(read_one_fastq_gz), - "([_\\.][rR][12])?(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$", - "" - ) + String prefix = sub(basename(read_one_fastq_gz), "([_\\.][rR][12])?(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$", + "") Boolean use_all_cores = false Int ncpu = 4 Int modify_disk_size_gb = 0 @@ -144,12 +137,10 @@ task bwa_aln_pe { String output_bam = prefix + ".bam" - Float input_fastq_size = ( - size(read_one_fastq_gz, "GiB") + size(read_two_fastq_gz, "GiB") - ) + Float input_fastq_size = (size(read_one_fastq_gz, "GiB") + size(read_two_fastq_gz, "GiB" + )) Float reference_size = size(bwa_db_tar_gz, "GiB") - Int disk_size_gb = ( - ceil((input_fastq_size + reference_size) * 2) + 5 + modify_disk_size_gb + Int disk_size_gb = (ceil((input_fastq_size + reference_size) * 2) + 5 + modify_disk_size_gb ) command <<< @@ -202,7 +193,7 @@ task bwa_mem { meta { description: "Maps FASTQ files to BAM format using bwa mem" outputs: { - bam: "Aligned BAM format file" + bam: "Aligned BAM format file", } } @@ -230,11 +221,8 @@ task bwa_mem { File bwa_db_tar_gz String read_group File? read_two_fastq_gz - String prefix = sub( - basename(read_one_fastq_gz), - "([_\\.][rR][12])?(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$", - "" - ) + String prefix = sub(basename(read_one_fastq_gz), "([_\\.][rR][12])?(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$", + "") Boolean use_all_cores = false Int ncpu = 4 Int modify_disk_size_gb = 0 @@ -242,11 +230,10 @@ task bwa_mem { String output_bam = prefix + ".bam" - Float input_fastq_size = size(read_one_fastq_gz, "GiB") - + size(read_two_fastq_gz, "GiB") + Float input_fastq_size = size(read_one_fastq_gz, "GiB") + size(read_two_fastq_gz, "GiB" + ) Float reference_size = size(bwa_db_tar_gz, "GiB") - Int disk_size_gb = ( - ceil((input_fastq_size + reference_size) * 2) + 10 + modify_disk_size_gb + Int disk_size_gb = (ceil((input_fastq_size + reference_size) * 2) + 10 + modify_disk_size_gb ) command <<< @@ -271,9 +258,10 @@ task bwa_mem { -R "~{read_group}" \ bwa_db/"$PREFIX" \ "~{basename(read_one_fastq_gz)}" \ - ~{( - if defined(read_two_fastq_gz) - then "'" + basename(select_first([read_two_fastq_gz])) + "'" + ~{(if defined(read_two_fastq_gz) + then "'" + basename(select_first([ + read_two_fastq_gz, + ])) + "'" else "" )} \ | samtools view --no-PG --threads "$samtools_cores" -hb - \ @@ -281,9 +269,10 @@ task bwa_mem { rm -r bwa_db rm "~{basename(read_one_fastq_gz)}" - ~{( - if defined(read_two_fastq_gz) - then "rm '" + basename(select_first([read_two_fastq_gz])) + "'" + ~{(if defined(read_two_fastq_gz) + then "rm '" + basename(select_first([ + read_two_fastq_gz, + ])) + "'" else "" )} >>> @@ -305,7 +294,7 @@ task build_bwa_db { meta { description: "Creates a BWA index and returns it as a compressed tar archive" outputs: { - bwa_db_tar_gz: "Tarballed bwa reference files" + bwa_db_tar_gz: "Tarballed bwa reference files", } } diff --git a/tools/deeptools.wdl b/tools/deeptools.wdl index 91b5e1b06..d30738abc 100755 --- a/tools/deeptools.wdl +++ b/tools/deeptools.wdl @@ -1,12 +1,11 @@ ## [Homepage](https://deeptools.readthedocs.io/en/develop/index.html) - version 1.1 task bam_coverage { meta { description: "Generates a BigWig coverage track using bamCoverage from DeepTools" outputs: { - bigwig: "BigWig format coverage file" + bigwig: "BigWig format coverage file", } } diff --git a/tools/fastp.wdl b/tools/fastp.wdl index 42b78e34e..64d1ea545 100644 --- a/tools/fastp.wdl +++ b/tools/fastp.wdl @@ -95,9 +95,7 @@ task fastp { input { File read_one_fastq File? read_two_fastq - String prefix = sub( - basename(read_one_fastq), - "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", + String prefix = sub(basename(read_one_fastq), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", "" # Once replacing with capturing groups is supported, replace with group 3 ) + ".trimmed" Boolean output_fastq = true @@ -114,7 +112,9 @@ task fastp { Boolean phred64 = false Boolean use_all_cores = false Int first_n_reads = 0 - Int duplicate_accuracy = if deduplicate then 3 else 1 + Int duplicate_accuracy = if deduplicate + then 3 + else 1 Int n_base_limit = 5 Int qualified_quality = 15 Int unqualified_percent = 40 @@ -148,9 +148,9 @@ task fastp { Float input_size = size(read_one_fastq, "GB") + size(read_two_fastq, "GB") Int disk_size_gb = ceil(input_size) * 2 + 10 + modify_disk_size_gb - command <<< + command <<< set -euo pipefail - + n_cores=~{ncpu} if ~{use_all_cores}; then n_cores=$(nproc) @@ -159,41 +159,71 @@ task fastp { fastp \ -i "~{read_one_fastq}" \ ~{"-I '" + read_two_fastq + "'"} \ - ~{( - if output_fastq - then "-o '" + ( - if defined(read_two_fastq) + ~{(if output_fastq + then "-o '" + (if defined(read_two_fastq) then "~{prefix}.R1.fastq.gz" else "~{prefix}.fastq.gz" ) + "'" else "" )} \ - ~{( - if (defined(read_two_fastq) && output_fastq) + ~{(if (defined(read_two_fastq) && output_fastq) then "-O '" + prefix + ".R2.fastq.gz'" else "" )} \ --reads_to_process ~{first_n_reads} \ - ~{if deduplicate then "--dedup" else ""} \ + ~{if deduplicate + then "--dedup" + else "" + } \ --dup_calc_accuracy ~{duplicate_accuracy} \ - ~{if disable_duplicate_eval then "--dont_eval_duplication" else ""} \ - ~{if phred64 then "--phred64" else ""} \ - ~{if disable_quality_filter then "--disable_quality_filtering" else ""} \ + ~{if disable_duplicate_eval + then "--dont_eval_duplication" + else "" + } \ + ~{if phred64 + then "--phred64" + else "" + } \ + ~{if disable_quality_filter + then "--disable_quality_filtering" + else "" + } \ -n ~{n_base_limit} \ -q ~{qualified_quality} \ -u ~{unqualified_percent} \ -e ~{average_quality} \ - ~{if disable_length_filter then "--disable_length_filtering" else ""} \ + ~{if disable_length_filter + then "--disable_length_filtering" + else "" + } \ -l ~{length_required} \ --length_limit ~{length_limit} \ - ~{if enable_complexity_filter then "-y" else ""} \ + ~{if enable_complexity_filter + then "-y" + else "" + } \ -Y ~{complexity_threshold} \ - ~{if enable_overrepresentation_eval then "-p" else ""} \ + ~{if enable_overrepresentation_eval + then "-p" + else "" + } \ -P ~{overrepresentation_sampling} \ - ~{if disable_adapter_trimming then "--disable_adapter_trimming" else ""} \ - ~{if enable_pe_adapter_trimming then "-2" else ""} \ - ~{if allow_gap_overlap_trimming then "--allow_gap_overlap_trimming" else ""} \ - ~{if enable_base_correction then "-c" else ""} \ + ~{if disable_adapter_trimming + then "--disable_adapter_trimming" + else "" + } \ + ~{if enable_pe_adapter_trimming + then "-2" + else "" + } \ + ~{if allow_gap_overlap_trimming + then "--allow_gap_overlap_trimming" + else "" + } \ + ~{if enable_base_correction + then "-c" + else "" + } \ --overlap_len_require ~{overlap_len_require} \ --overlap_diff_limit ~{overlap_diff_limit} \ --overlap_diff_percent_limit ~{overlap_diff_percent_limit} \ @@ -219,8 +249,7 @@ task fastp { runtime { cpu: ncpu - memory: ( - if disable_duplicate_eval + memory: (if disable_duplicate_eval then "4 GB" else dup_acc_to_mem[duplicate_accuracy] ) diff --git a/tools/fastqc.wdl b/tools/fastqc.wdl index cf6fe238c..06aee4034 100755 --- a/tools/fastqc.wdl +++ b/tools/fastqc.wdl @@ -1,5 +1,4 @@ ## [Homepage](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) - version 1.1 task fastqc { diff --git a/tools/fq.wdl b/tools/fq.wdl index e5eab4763..b353597f2 100755 --- a/tools/fq.wdl +++ b/tools/fq.wdl @@ -1,5 +1,4 @@ ## [Homepage](https://github.com/stjude-rust-labs/fq) - version 1.1 task fqlint { @@ -67,9 +66,7 @@ task fqlint { Float read1_size = size(read_one_fastq, "GiB") Float read2_size = size(read_two_fastq, "GiB") - Int memory_gb = ( - ceil((read1_size + read2_size) * 0.25) + 1 + modify_memory_gb - ) + Int memory_gb = (ceil((read1_size + read2_size) * 0.25) + 1 + modify_memory_gb) Int disk_size_gb = ceil((read1_size + read2_size) * 2) + modify_disk_size_gb @@ -78,7 +75,10 @@ task fqlint { ~{sep(" ", prefix("--disable-validator ", squote(disable_validator_codes)))} \ --single-read-validation-level "~{single_read_validation_level}" \ --paired-read-validation-level "~{paired_read_validation_level}" \ - --lint-mode ~{if panic then "panic" else "log"} \ + --lint-mode ~{if panic + then "panic" + else "log" + } \ "~{read_one_fastq}" \ ~{"'" + read_two_fastq + "'"} >>> @@ -123,9 +123,7 @@ task subsample { input { File read_one_fastq File? read_two_fastq - String prefix = sub( - basename(read_one_fastq), - "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", + String prefix = sub(basename(read_one_fastq), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", "" # Once replacing with capturing groups is supported, replace with group 3 ) Float probability = 1.0 @@ -138,12 +136,13 @@ task subsample { Int disk_size_gb = ceil((read1_size + read2_size) * 2) + modify_disk_size_gb - String probability_arg = ( - if (probability < 1.0 && probability > 0) + String probability_arg = (if (probability < 1.0 && probability > 0) then "-p ~{probability}" else "" ) - String record_count_arg = if (record_count > 0) then "-n ~{record_count}" else "" + String record_count_arg = if (record_count > 0) + then "-n ~{record_count}" + else "" String r1_dst = prefix + ".R1.subsampled.fastq.gz" String r2_dst = prefix + ".R2.subsampled.fastq.gz" @@ -154,7 +153,10 @@ task subsample { ~{probability_arg} \ ~{record_count_arg} \ --r1-dst "~{r1_dst}" \ - ~{if defined(read_two_fastq) then "--r2-dst '" + r2_dst + "'" else ""} \ + ~{if defined(read_two_fastq) + then "--r2-dst '" + r2_dst + "'" + else "" + } \ "~{read_one_fastq}" \ ~{"'" + read_two_fastq + "'"} >>> diff --git a/tools/gatk4.wdl b/tools/gatk4.wdl index fe0bac91f..e4a709e18 100644 --- a/tools/gatk4.wdl +++ b/tools/gatk4.wdl @@ -1,5 +1,4 @@ ## [Homepage](https://software.broadinstitute.org/gatk) - version 1.1 task split_n_cigar_reads { @@ -13,7 +12,7 @@ task split_n_cigar_reads { } } - parameter_meta { + parameter_meta { bam: "Input BAM format file to with unsplit reads containing Ns in their CIGAR strings." bam_index: "BAM index file corresponding to the input BAM" fasta: "Reference genome in FASTA format. Must be uncompressed." @@ -37,23 +36,21 @@ task split_n_cigar_reads { Int ncpu = 8 } - Int disk_size_gb = ceil(size(bam, "GB") + 1) * 3 - + ceil(size(fasta, "GB")) - + modify_disk_size_gb + Int disk_size_gb = ceil(size(bam, "GB") + 1) * 3 + ceil(size(fasta, "GB")) + modify_disk_size_gb Int java_heap_size = ceil(memory_gb * 0.9) command <<< - set -euo pipefail - - gatk \ - --java-options "-Xms4000m -Xmx~{java_heap_size}g" \ - SplitNCigarReads \ - -R "~{fasta}" \ - -I "~{bam}" \ - -O "~{prefix}.bam" \ - -OBM true - # GATK is unreasonable and uses the plain ".bai" suffix. - mv "~{prefix}.bai" "~{prefix}.bam.bai" + set -euo pipefail + + gatk \ + --java-options "-Xms4000m -Xmx~{java_heap_size}g" \ + SplitNCigarReads \ + -R "~{fasta}" \ + -I "~{bam}" \ + -O "~{prefix}.bam" \ + -OBM true + # GATK is unreasonable and uses the plain ".bai" suffix. + mv "~{prefix}.bai" "~{prefix}.bam.bai" >>> output { @@ -76,11 +73,11 @@ task base_recalibrator { description: "Generates recalibration report for base quality score recalibration." external_help: "https://gatk.broadinstitute.org/hc/en-us/articles/360036897372-BaseRecalibratorSpark-BETA" outputs: { - recalibration_report: "Recalibration report file" + recalibration_report: "Recalibration report file", } } - parameter_meta { + parameter_meta { bam: "Input BAM format file on which to recabilbrate base quality scores" bam_index: "BAM index file corresponding to the input BAM" fasta: "Reference genome in FASTA format" @@ -114,23 +111,22 @@ task base_recalibrator { Int memory_gb = 25 Int modify_disk_size_gb = 0 Int ncpu = 4 - } + } - Int disk_size_gb = ceil(size(bam, "GB") + 1) * 3 - + ceil(size(fasta, "GB")) - + modify_disk_size_gb + Int disk_size_gb = ceil(size(bam, "GB") + 1) * 3 + ceil(size(fasta, "GB")) + modify_disk_size_gb Int java_heap_size = ceil(memory_gb * 0.9) #@ except: LineWidth command <<< # shellcheck disable=SC2102 gatk \ - --java-options "-XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10 -Xms4000m -Xmx~{java_heap_size}g" \ + --java-options "-XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10 -Xms4000m -Xmx~{ + java_heap_size + }g" \ BaseRecalibratorSpark \ -R "~{fasta}" \ -I "~{bam}" \ - ~{( - if use_original_quality_scores + ~{(if use_original_quality_scores then "--use-original-qualities" else "" )} \ @@ -163,7 +159,7 @@ task apply_bqsr { } } - parameter_meta { + parameter_meta { bam: "Input BAM format file on which to apply base quality score recalibration" bam_index: "BAM index file corresponding to the input BAM" recalibration_report: "Recalibration report file" @@ -194,11 +190,16 @@ task apply_bqsr { # shellcheck disable=SC2102 gatk \ - --java-options "-XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10 -Xms3000m -Xmx~{java_heap_size}g" \ + --java-options "-XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10 -Xms3000m -Xmx~{ + java_heap_size + }g" \ ApplyBQSRSpark \ --spark-master local[~{ncpu}] \ -I "~{bam}" \ - ~{if use_original_quality_scores then "--use-original-qualities" else "" } \ + ~{if use_original_quality_scores + then "--use-original-qualities" + else "" + } \ -O "~{prefix}.bqsr.bam" \ --bqsr-recal-file "~{recalibration_report}" >>> @@ -227,7 +228,7 @@ task haplotype_caller { } } - parameter_meta { + parameter_meta { bam: "Input BAM format file on which to call variants" bam_index: "BAM index file corresponding to the input BAM" interval_list: { @@ -269,10 +270,7 @@ task haplotype_caller { Int ncpu = 4 } - Int disk_size_gb = ceil(size(bam, "GB") * 2) - + 30 - + ceil(size(fasta, "GB")) - + modify_disk_size_gb + Int disk_size_gb = ceil(size(bam, "GB") * 2) + 30 + ceil(size(fasta, "GB")) + modify_disk_size_gb Int java_heap_size = ceil(memory_gb * 0.9) #@ except: LineWidth @@ -284,7 +282,10 @@ task haplotype_caller { -I "~{bam}" \ -L "~{interval_list}" \ -O "~{prefix}.vcf.gz" \ - ~{if use_soft_clipped_bases then "" else "--dont-use-soft-clipped-bases"} \ + ~{if use_soft_clipped_bases + then "" + else "--dont-use-soft-clipped-bases" + } \ --standard-min-confidence-threshold-for-calling ~{stand_call_conf} \ --dbsnp "~{dbSNP_vcf}" >>> @@ -313,7 +314,7 @@ task variant_filtration { } } - parameter_meta { + parameter_meta { vcf: "Input VCF format file to filter" vcf_index: "VCF index file corresponding to the input VCF" fasta: "Reference genome in FASTA format" @@ -340,8 +341,14 @@ task variant_filtration { File fasta File fasta_index File dict - Array[String] filter_names = ["FS", "QD"] - Array[String] filter_expressions = ["FS > 30.0", "QD < 2.0"] + Array[String] filter_names = [ + "FS", + "QD", + ] + Array[String] filter_expressions = [ + "FS > 30.0", + "QD < 2.0", + ] String prefix = basename(vcf, ".vcf.gz") Int cluster = 3 Int window = 35 @@ -377,7 +384,7 @@ task variant_filtration { } task mark_duplicates_spark { - meta { + meta { description: "Marks duplicate reads in the input BAM file using GATK's Spark implementation of Picard's MarkDuplicates." external_help: "https://gatk.broadinstitute.org/hc/en-us/articles/13832682540699-MarkDuplicatesSpark" outputs: { @@ -427,7 +434,7 @@ task mark_duplicates_spark { group: "Common", } optical_distance: { - description: "Maximum distance between read coordinates to consider them optical duplicates. If `0`, then optical duplicate marking is disabled.", + description: "Maximum distance between read coordinates to consider them optical duplicates. If `0`, then optical duplicate marking is disabled.", help: "Suggested settings of 100 for unpatterned versions of the Illumina platform (e.g. HiSeq) or 2500 for patterned flowcell models (e.g. NovaSeq). Calculation of distance depends on coordinate data embedded in the read names, typically produced by the Illumina sequencing machines.", warning: "Optical duplicate detection will not work on non-standard names without modifying `read_name_regex`.", } @@ -452,13 +459,10 @@ task mark_duplicates_spark { Float bam_size = size(bam, "GiB") Int memory_gb = min(ceil(bam_size + 15), 50) + modify_memory_gb - Int disk_size_gb = ( - ( - if create_bam - then ceil((bam_size * 2) + 10) - else ceil(bam_size + 10) - ) + modify_disk_size_gb - ) + Int disk_size_gb = ((if create_bam + then ceil((bam_size * 2) + 10) + else ceil(bam_size + 10) + ) + modify_disk_size_gb) Int java_heap_size = ceil(memory_gb * 0.9) @@ -470,12 +474,16 @@ task mark_duplicates_spark { --java-options "-Xmx~{java_heap_size}g" \ -I "~{bam}" \ -M "~{prefix}.metrics.txt" \ - -O "~{if create_bam then prefix + ".bam" else "/dev/null"}" \ + -O "~{if create_bam + then prefix + ".bam" + else "/dev/null" + }" \ --create-output-bam-index ~{create_bam} \ --read-validation-stringency "~{validation_stringency}" \ --duplicate-scoring-strategy "~{duplicate_scoring_strategy}" \ - --read-name-regex '~{ - if (optical_distance > 0) then read_name_regex else "null" + --read-name-regex '~{if (optical_distance > 0) + then read_name_regex + else "null" }' \ --duplicate-tagging-policy "~{tagging_policy}" \ --optical-duplicate-pixel-distance ~{optical_distance} \ diff --git a/tools/htseq.wdl b/tools/htseq.wdl index bcaca9ce0..cb8b16014 100755 --- a/tools/htseq.wdl +++ b/tools/htseq.wdl @@ -1,5 +1,4 @@ ## [Homepage](https://github.com/htseq/htseq) - version 1.1 task count { @@ -9,7 +8,7 @@ task count { feature_counts: { description: "A two column TSV file. First column is feature names and second column is counts.", help: "Presence of a header is determined by the `include_custom_header` parameter.", - } + }, } } @@ -89,10 +88,14 @@ task count { Float bam_size = size(bam, "GiB") Float gtf_size = size(gtf, "GiB") - Int memory_gb = (if pos_sorted then ceil(bam_size) + 4 else 4) + modify_memory_gb + Int memory_gb = (if pos_sorted + then ceil(bam_size) + 4 + else 4 + ) + modify_memory_gb - Int disk_size_gb = ceil( - (bam_size + gtf_size) * if pos_sorted then 4 else 1 + Int disk_size_gb = ceil((bam_size + gtf_size) * if pos_sorted + then 4 + else 1 ) + 10 + modify_disk_size_gb command <<< @@ -105,16 +108,24 @@ task count { # 9223372036854776000 == max 64 bit Float htseq-count -f bam \ --max-reads-in-buffer 9223372036854776000 \ - -r ~{if pos_sorted then "pos" else "name"} \ + -r ~{if pos_sorted + then "pos" + else "name" + } \ -s "~{strandedness}" \ -a ~{minaqual} \ -t "~{feature_type}" \ -m "~{mode}" \ -i "~{idattr}" \ - --nonunique ~{if nonunique then "all" else "none"} \ - --secondary-alignments ~{if secondary_alignments then "score" else "ignore"} \ - --supplementary-alignments ~{( - if supplementary_alignments + --nonunique ~{if nonunique + then "all" + else "none" + } \ + --secondary-alignments ~{if secondary_alignments + then "score" + else "ignore" + } \ + --supplementary-alignments ~{(if supplementary_alignments then "score" else "ignore" )} \ @@ -139,7 +150,7 @@ task calc_tpm { meta { description: "Given a feature counts file and a feature lengths file, calculate Transcripts Per Million (TPM)" outputs: { - tpm_file: "Transcripts Per Million (TPM) file. A two column headered TSV file." + tpm_file: "Transcripts Per Million (TPM) file. A two column headered TSV file.", } } @@ -171,7 +182,10 @@ task calc_tpm { "~{counts}" \ "~{feature_lengths}" \ "~{outfile_name}" \ - ~{if has_header then "--counts_has_header" else ""} + ~{if has_header + then "--counts_has_header" + else "" + } >>> output { diff --git a/tools/kraken2.wdl b/tools/kraken2.wdl index b501ffa48..5d2081918 100644 --- a/tools/kraken2.wdl +++ b/tools/kraken2.wdl @@ -1,5 +1,4 @@ ## [Homepage](https://github.com/DerrickWood/kraken2) - version 1.1 task download_taxonomy { @@ -9,7 +8,7 @@ task download_taxonomy { taxonomy: { description: "The NCBI taxonomy, which is needed by the `build_db` task.", warning: "This output is not human-readable or meant for anything other than building a Kraken2 database.", - } + }, } } @@ -27,7 +26,10 @@ task download_taxonomy { set -euo pipefail kraken2-build --download-taxonomy \ - ~{if protein then "--protein" else ""} \ + ~{if protein + then "--protein" + else "" + } \ --use-ftp \ --db "~{db_name}" 2>&1 \ | awk '/gunzip:/ { print; exit 42 } !/gunzip:/ { print }' 1>&2 @@ -58,7 +60,7 @@ task download_library { library: { description: "A library of reference genomes, which is needed by the `build_db` task.", warning: "This output is not human-readable or meant for anything other than building a Kraken2 database.", - } + }, } } @@ -97,21 +99,24 @@ task download_library { String db_name = "kraken2_" + library_name + "_library" #@ except: ExpressionSpacing - Int disk_size_gb = ( - ( - if library_name == "bacteria" then 300 - else if library_name == "nr" then 600 - else if library_name == "nt" then 2500 - else 25 - ) + modify_disk_size_gb - ) + Int disk_size_gb = ((if library_name == "bacteria" + then 300 + else if library_name == "nr" + then 600 + else if library_name == "nt" + then 2500 + else 25 + ) + modify_disk_size_gb) command <<< set -euo pipefail kraken2-build --download-library \ "~{library_name}" \ - ~{if protein then "--protein" else ""} \ + ~{if protein + then "--protein" + else "" + } \ --use-ftp \ --db "~{db_name}" 2>&1 \ | awk '/gunzip:/ { print; exit 42 } !/gunzip:/ { print }' 1>&2 @@ -140,7 +145,7 @@ task create_library_from_fastas { custom_library: { description: "Kraken2 compatible library, which is needed by the `build_db` task.", warning: "This output is not human-readable or meant for anything other than building a Kraken2 database.", - } + }, } } @@ -172,7 +177,10 @@ task create_library_from_fastas { while read -r fasta; do gunzip -c "$fasta" > tmp.fa kraken2-build \ - ~{if protein then "--protein" else ""} \ + ~{if protein + then "--protein" + else "" + } \ --add-to-library tmp.fa \ --db "~{db_name}" done < fastas.txt @@ -200,7 +208,7 @@ task build_db { meta { description: "Builds a custom Kraken2 database" outputs: { - built_db: "A complete Kraken2 database" + built_db: "A complete Kraken2 database", } } @@ -238,9 +246,15 @@ task build_db { String db_name = "kraken2_db" Boolean protein = false Boolean use_all_cores = false - Int kmer_len = if protein then 15 else 35 - Int minimizer_len = if protein then 12 else 31 - Int minimizer_spaces = if protein then 0 else 7 + Int kmer_len = if protein + then 15 + else 35 + Int minimizer_len = if protein + then 12 + else 31 + Int minimizer_spaces = if protein + then 0 + else 7 Int max_db_size_gb = -1 Int ncpu = 4 Int modify_memory_gb = 0 @@ -249,13 +263,10 @@ task build_db { Float tarballs_size = size(tarballs, "GiB") Int disk_size_gb = ceil(tarballs_size * 6) + 10 + modify_disk_size_gb - Int memory_gb = ( - ( - if (max_db_size_gb > 0) - then ceil(max_db_size_gb * 1.2) - else ceil(tarballs_size * 2) - ) + modify_memory_gb - ) + Int memory_gb = ((if (max_db_size_gb > 0) + then ceil(max_db_size_gb * 1.2) + else ceil(tarballs_size * 2) + ) + modify_memory_gb) String max_db_size_bytes = "~{max_db_size_gb}000000000" @@ -277,12 +288,14 @@ task build_db { >&2 echo "*** start DB build ***" kraken2-build --build \ - ~{if protein then "--protein" else ""} \ + ~{if protein + then "--protein" + else "" + } \ --kmer-len ~{kmer_len} \ --minimizer-len ~{minimizer_len} \ --minimizer-spaces ~{minimizer_spaces} \ - ~{( - if (max_db_size_gb > 0) + ~{(if (max_db_size_gb > 0) then "--max-db-size '" + max_db_size_bytes + "'" else "" )} \ @@ -359,9 +372,7 @@ task kraken { File read_two_fastq_gz #@ except: InputName File db - String prefix = sub( - basename(read_one_fastq_gz), - "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", + String prefix = sub(basename(read_one_fastq_gz), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", "" # Once replacing with capturing groups is supported, replace with group 3 ) Boolean store_sequences = false @@ -376,11 +387,9 @@ task kraken { Float db_size = size(db, "GiB") Float read1_size = size(read_one_fastq_gz, "GiB") Float read2_size = size(read_two_fastq_gz, "GiB") - Int disk_size_gb_calculation = ( - ceil((db_size * 2) + read1_size + read2_size) + 10 + modify_disk_size_gb + Int disk_size_gb_calculation = (ceil((db_size * 2) + read1_size + read2_size) + 10 + modify_disk_size_gb ) - Int disk_size_gb = ( - if store_sequences + Int disk_size_gb = (if store_sequences then disk_size_gb_calculation + ceil(read1_size + read2_size) else disk_size_gb_calculation ) @@ -403,12 +412,18 @@ task kraken { kraken2 --db kraken2_db/ \ --paired \ - --output ~{if store_sequences then "'" + out_sequences + "'" else "-"} \ + --output ~{if store_sequences + then "'" + out_sequences + "'" + else "-" + } \ --threads "$n_cores" \ --minimum-base-quality ~{min_base_quality} \ --report "~{out_report}" \ --report-zero-counts \ - ~{if use_names then "--use-names" else ""} \ + ~{if use_names + then "--use-names" + else "" + } \ "~{read_one_fastq_gz}" \ "~{read_two_fastq_gz}" diff --git a/tools/librarian.wdl b/tools/librarian.wdl index faa136618..88b2fd078 100644 --- a/tools/librarian.wdl +++ b/tools/librarian.wdl @@ -1,5 +1,4 @@ ## # librarian - version 1.1 task librarian { @@ -24,18 +23,14 @@ task librarian { input { File read_one_fastq - String prefix = sub( - basename(read_one_fastq), - "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", + String prefix = sub(basename(read_one_fastq), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", "" # Once replacing with capturing groups is supported, replace with group 3 ) + ".librarian" Int modify_disk_size_gb = 0 } Float read1_size = size(read_one_fastq, "GiB") - Int disk_size_gb = ( - ceil(read1_size) + 10 + modify_disk_size_gb - ) + Int disk_size_gb = (ceil(read1_size) + 10 + modify_disk_size_gb) command <<< set -euo pipefail diff --git a/tools/md5sum.wdl b/tools/md5sum.wdl index e967e55c3..1e79260a0 100755 --- a/tools/md5sum.wdl +++ b/tools/md5sum.wdl @@ -1,12 +1,11 @@ ## [Homepage](https://github.com/coreutils/coreutils) - version 1.1 task compute_checksum { meta { description: "Generates an MD5 checksum for the input file" outputs: { - md5sum: "STDOUT of the `md5sum` command that has been redirected to a file" + md5sum: "STDOUT of the `md5sum` command that has been redirected to a file", } } diff --git a/tools/mosdepth.wdl b/tools/mosdepth.wdl index 69b81d1ac..fdf4775c9 100644 --- a/tools/mosdepth.wdl +++ b/tools/mosdepth.wdl @@ -1,5 +1,4 @@ ## [Homepage](https://github.com/brentp/mosdepth) - version 1.1 task coverage { @@ -53,7 +52,10 @@ task coverage { -n \ ~{"-b '" + coverage_bed + "'"} \ -Q ~{min_mapping_quality} \ - ~{if (use_fast_mode) then "-x" else ""} \ + ~{if (use_fast_mode) + then "-x" + else "" + } \ "~{prefix}" \ "$CWD_BAM" diff --git a/tools/ngsderive.wdl b/tools/ngsderive.wdl index 3abaac343..72cb92ced 100644 --- a/tools/ngsderive.wdl +++ b/tools/ngsderive.wdl @@ -1,5 +1,4 @@ ## [Homepage](https://github.com/stjudecloud/ngsderive) - version 1.1 task strandedness { @@ -61,7 +60,10 @@ task strandedness { ln -s "~{gene_model}" "$CWD_GFF" ngsderive strandedness --verbose \ - ~{if split_by_rg then "--split-by-rg" else ""} \ + ~{if split_by_rg + then "--split-by-rg" + else "" + } \ -m ~{min_reads_per_gene} \ -n ~{num_genes} \ -q ~{min_mapq} \ @@ -398,21 +400,30 @@ task endedness { } Float bam_size = size(bam, "GiB") - Int memory_gb = ( - if calc_rpt - then ( - ceil(bam_size * 2.5) + 4 + modify_memory_gb - ) + Int memory_gb = (if calc_rpt + then (ceil(bam_size * 2.5) + 4 + modify_memory_gb) else 4 ) Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb command <<< ngsderive endedness --verbose \ - ~{if lenient then "--lenient" else ""} \ - ~{if calc_rpt then "-r" else ""} \ - ~{if round_rpt then "--round-rpt" else ""} \ - ~{if split_by_rg then "--split-by-rg" else ""} \ + ~{if lenient + then "--lenient" + else "" + } \ + ~{if calc_rpt + then "-r" + else "" + } \ + ~{if round_rpt + then "--round-rpt" + else "" + } \ + ~{if split_by_rg + then "--split-by-rg" + else "" + } \ --paired-deviance ~{paired_deviance} \ -n ~{num_reads} \ "~{bam}" \ diff --git a/tools/picard.wdl b/tools/picard.wdl index 8f35947d1..3c8f7d4ce 100755 --- a/tools/picard.wdl +++ b/tools/picard.wdl @@ -1,5 +1,4 @@ ## [Homepage](https://broadinstitute.github.io/picard/) - version 1.1 task mark_duplicates { @@ -84,13 +83,10 @@ task mark_duplicates { Float bam_size = size(bam, "GiB") Int memory_gb = min(ceil(bam_size + 12), 50) + modify_memory_gb - Int disk_size_gb = ( - ( - if create_bam - then ceil((bam_size * 2) + 10) - else ceil(bam_size + 10) - ) + modify_disk_size_gb - ) + Int disk_size_gb = ((if create_bam + then ceil((bam_size * 2) + 10) + else ceil(bam_size + 10) + ) + modify_disk_size_gb) Int java_heap_size = ceil(memory_gb * 0.9) @@ -100,13 +96,17 @@ task mark_duplicates { picard -Xmx~{java_heap_size}g MarkDuplicates \ -I "~{bam}" \ --METRICS_FILE "~{prefix}.metrics.txt" \ - -O "~{if create_bam then prefix + ".bam" else "/dev/null"}" \ + -O "~{if create_bam + then prefix + ".bam" + else "/dev/null" + }" \ --CREATE_INDEX ~{create_bam} \ --CREATE_MD5_FILE ~{create_bam} \ --VALIDATION_STRINGENCY "~{validation_stringency}" \ --DUPLICATE_SCORING_STRATEGY "~{duplicate_scoring_strategy}" \ - --READ_NAME_REGEX '~{ - if (optical_distance > 0) then read_name_regex else "null" + --READ_NAME_REGEX '~{if (optical_distance > 0) + then read_name_regex + else "null" }' \ --TAGGING_POLICY "~{tagging_policy}" \ --CLEAR_DT ~{clear_dt} \ @@ -194,9 +194,10 @@ task validate_bam { Int modify_disk_size_gb = 0 } - String mode_arg = if (summary_mode) then "--MODE SUMMARY" else "" - String stringency_arg = ( - if (index_validation_stringency_less_exhaustive) + String mode_arg = if (summary_mode) + then "--MODE SUMMARY" + else "" + String stringency_arg = (if (index_validation_stringency_less_exhaustive) then "--INDEX_VALIDATION_STRINGENCY LESS_EXHAUSTIVE" else "" ) @@ -418,8 +419,10 @@ task merge_sam_files { File merged_bam_md5 = outfile_name + ".md5" } - runtime{ - cpu: if threading then 2 else 1 + runtime { + cpu: if threading + then 2 + else 1 memory: "~{memory_gb} GB" disks: "~{disk_size_gb} GB" container: "quay.io/biocontainers/picard:3.1.1--hdfd78af_0" @@ -504,7 +507,7 @@ task collect_wgs_metrics { wgs_metrics: { description: "Output report of `picard CollectWgsMetrics`", external_help: "https://broadinstitute.github.io/picard/picard-metric-definitions.html#CollectWgsMetrics.WgsMetrics", - } + }, } } @@ -851,8 +854,7 @@ task bam_to_fastq { picard -Xmx~{java_heap_size}g SamToFastq INPUT="~{bam}" \ FASTQ="~{prefix}.R1.fastq" \ - ~{( - if paired + ~{(if paired then "SECOND_END_FASTQ='" + prefix + ".R2.fastq'" else "" )} \ @@ -860,7 +862,10 @@ task bam_to_fastq { VALIDATION_STRINGENCY=SILENT gzip "~{prefix}.R1.fastq" \ - ~{if paired then "'" + prefix + ".R2.fastq'" else ""} + ~{if paired + then "'" + prefix + ".R2.fastq'" + else "" + } >>> output { @@ -868,7 +873,7 @@ task bam_to_fastq { File? read_two_fastq_gz = "~{prefix}.R2.fastq.gz" } - runtime{ + runtime { memory: "~{memory_gb} GB" disks: "~{disk_size_gb} GB" container: "quay.io/biocontainers/picard:3.1.1--hdfd78af_0" @@ -932,7 +937,7 @@ task scatter_interval_list { } } - parameter_meta { + parameter_meta { interval_list: "Input interval list to split" scatter_count: "Number of interval lists to create" subdivision_mode: { @@ -999,7 +1004,7 @@ task create_sequence_dictionary { description: "Creates a sequence dictionary for the input FASTA file using Picard" external_help: "https://gatk.broadinstitute.org/hc/en-us/articles/13832748622491-CreateSequenceDictionary-Picard-" outputs: { - dictionary: "Sequence dictionary produced by `picard CreateSequenceDictionary`." + dictionary: "Sequence dictionary produced by `picard CreateSequenceDictionary`.", } } diff --git a/tools/qualimap.wdl b/tools/qualimap.wdl index 67cd89d14..4aec8447d 100755 --- a/tools/qualimap.wdl +++ b/tools/qualimap.wdl @@ -1,5 +1,4 @@ ## [Homepage](http://qualimap.bioinfo.cipf.es/) - version 1.1 task rnaseq { @@ -41,8 +40,12 @@ task rnaseq { } String out_tar_gz = prefix + ".tar.gz" - String name_sorted_arg = if (name_sorted) then "-s" else "" - String paired_end_arg = if (paired_end) then "-pe" else "" + String name_sorted_arg = if (name_sorted) + then "-s" + else "" + String paired_end_arg = if (paired_end) + then "-pe" + else "" Int java_heap_size = ceil(memory_gb * 0.9) Float bam_size = size(bam, "GiB") @@ -50,13 +53,10 @@ task rnaseq { # Qualimap has an inefficient name sorting algorithm and will # use an excessive amount of storage. - Int disk_size_gb = ( - ( - if name_sorted - then ceil(bam_size + gtf_size + 15) - else ceil(((bam_size + gtf_size) * 12) + 10) - ) + modify_disk_size_gb - ) + Int disk_size_gb = ((if name_sorted + then ceil(bam_size + gtf_size + 15) + else ceil(((bam_size + gtf_size) * 12) + 10) + ) + modify_disk_size_gb) command <<< set -euo pipefail @@ -81,8 +81,7 @@ task rnaseq { output { File raw_summary = "~{prefix}/rnaseq_qc_results.txt" - File raw_coverage - = "~{prefix}/raw_data_qualimapReport/coverage_profile_along_genes_(total).txt" + File raw_coverage = "~{prefix}/raw_data_qualimapReport/coverage_profile_along_genes_(total).txt" File results = out_tar_gz } diff --git a/tools/sambamba.wdl b/tools/sambamba.wdl index 726b4714f..ac1fe349b 100644 --- a/tools/sambamba.wdl +++ b/tools/sambamba.wdl @@ -1,12 +1,11 @@ ## [Homepage](https://lomereiter.github.io/sambamba/) - version 1.1 task index { meta { description: "Creates a `.bai` BAM index for the input BAM" outputs: { - bam_index: "A `.bai` BAM index associated with the input BAM. Filename will be `basename(bam) + '.bai'`." + bam_index: "A `.bai` BAM index associated with the input BAM. Filename will be `basename(bam) + '.bai'`.", } } @@ -63,7 +62,7 @@ task merge { meta { description: "Merges multiple sorted BAMs into a single BAM" outputs: { - merged_bam: "The BAM resulting from merging all the input BAMs" + merged_bam: "The BAM resulting from merging all the input BAMs", } } @@ -123,7 +122,7 @@ task sort { meta { description: "Sorts the input BAM file" outputs: { - sorted_bam: "The input BAM after it has been sorted according to `sort_order`" + sorted_bam: "The input BAM after it has been sorted according to `sort_order`", } } @@ -157,7 +156,10 @@ task sort { sambamba sort \ --nthreads ~{ncpu} \ -o "~{outfile_name}" \ - ~{if queryname_sort then "-n" else ""} \ + ~{if queryname_sort + then "-n" + else "" + } \ "~{bam}" >>> @@ -209,7 +211,10 @@ task markdup { command <<< sambamba markdup \ --nthreads ~{ncpu} \ - ~{if remove_duplicates then "--remove-duplicates" else ""} \ + ~{if remove_duplicates + then "--remove-duplicates" + else "" + } \ "~{bam}" \ "~{prefix}.markdup.bam" \ > "~{prefix}.markdup_log.txt" @@ -234,7 +239,7 @@ task flagstat { meta { description: "Produces a report containing statistics about the alignments based on the bit flags set in the BAM" outputs: { - flagstat_report: "`sambamba flagstat` STDOUT redirected to a file" + flagstat_report: "`sambamba flagstat` STDOUT redirected to a file", } } @@ -275,7 +280,7 @@ task flagstat { >>> output { - File flagstat_report = outfile_name + File flagstat_report = outfile_name } runtime { diff --git a/tools/samtools.wdl b/tools/samtools.wdl index 21a8cfd46..b75864496 100755 --- a/tools/samtools.wdl +++ b/tools/samtools.wdl @@ -1,5 +1,4 @@ ## [Homepage](http://samtools.sourceforge.net/) - version 1.1 import "../data_structures/flag_filter.wdl" @@ -38,7 +37,7 @@ task split { meta { description: "Runs Samtools split on the input BAM file. This splits the BAM by read group into one or more output files." outputs: { - split_bams: "The split BAM files. The extensions will contain read group IDs, and will end in `.bam`." + split_bams: "The split BAM files. The extensions will contain read group IDs, and will end in `.bam`.", } } @@ -131,12 +130,12 @@ task split { rm first_read.sam done fi - + exit $EXITCODE >>> output { - Array[File] split_bams = glob("*.bam") + Array[File] split_bams = glob("*.bam") } runtime { @@ -152,7 +151,7 @@ task flagstat { meta { description: "Produces a `samtools flagstat` report containing statistics about the alignments based on the bit flags set in the BAM" outputs: { - flagstat_report: "`samtools flagstat` STDOUT redirected to a file" + flagstat_report: "`samtools flagstat` STDOUT redirected to a file", } } @@ -195,7 +194,7 @@ task flagstat { >>> output { - File flagstat_report = outfile_name + File flagstat_report = outfile_name } runtime { @@ -210,7 +209,7 @@ task index { meta { description: "Creates a `.bai` BAM index for the input BAM" outputs: { - bam_index: "A `.bai` BAM index associated with the input BAM. Filename will be `basename(bam) + '.bai'`." + bam_index: "A `.bai` BAM index associated with the input BAM. Filename will be `basename(bam) + '.bai'`.", } } @@ -397,7 +396,6 @@ task subsample { fi rm first_read.sam fi - >>> output { @@ -419,7 +417,7 @@ task filter { description: "Filters a BAM based on its bitwise flag value." help: "This task is a wrapper around `samtools view`. This task will fail if there are no reads in the output BAM. This can happen either because the input BAM was empty or because the supplied `bitwise_filter` was too strict. If you want to down-sample a BAM, use the `subsample` task instead." outputs: { - filtered_bam: "BAM file that has been filtered based on the input flags" + filtered_bam: "BAM file that has been filtered based on the input flags", } } @@ -505,7 +503,7 @@ task merge { meta { description: "Merges multiple sorted BAMs into a single BAM" outputs: { - merged_bam: "The BAM resulting from merging all the input BAMs" + merged_bam: "The BAM resulting from merging all the input BAMs", } } @@ -584,11 +582,26 @@ task merge { samtools merge \ --threads "$n_cores" \ ~{"-h \"" + new_header + "\""} \ - ~{if name_sorted then "-n" else ""} \ - ~{if (region != "") then "-R \"" + region + "\"" else ""} \ - ~{if attach_rg then "-r" else ""} \ - ~{if combine_rg then "-c" else ""} \ - ~{if combine_pg then "-p" else ""} \ + ~{if name_sorted + then "-n" + else "" + } \ + ~{if (region != "") + then "-R \"" + region + "\"" + else "" + } \ + ~{if attach_rg + then "-r" + else "" + } \ + ~{if combine_rg + then "-c" + else "" + } \ + ~{if combine_pg + then "-p" + else "" + } \ "~{prefix}.bam" \ "${bams[@]}" @@ -613,7 +626,7 @@ task addreplacerg { meta { description: "Adds or replaces read group tags" outputs: { - tagged_bam: "The transformed input BAM after read group modifications have been applied" + tagged_bam: "The transformed input BAM after read group modifications have been applied", } } @@ -677,8 +690,14 @@ task addreplacerg { --threads "$n_cores" \ ~{sep(" ", prefix("-r ", squote(read_group_line)))} \ ~{"-R \"" + read_group_id + "\""} \ - -m ~{if orphan_only then "orphan_only" else "overwrite_all"} \ - ~{if overwrite_header_record then "-w" else ""} \ + -m ~{if orphan_only + then "orphan_only" + else "overwrite_all" + } \ + ~{if overwrite_header_record + then "-w" + else "" + } \ -o "~{outfile_name}" \ "~{bam}" >>> @@ -700,7 +719,7 @@ task collate { meta { description: "Runs `samtools collate` on the input BAM file. Shuffles and groups reads together by their names." outputs: { - collated_bam: "A collated BAM (reads sharing a name next to each other, no other guarantee of sort order)" + collated_bam: "A collated BAM (reads sharing a name next to each other, no other guarantee of sort order)", } } @@ -751,7 +770,10 @@ task collate { samtools collate \ --threads "$n_cores" \ - ~{if fast_mode then "-f" else ""} \ + ~{if fast_mode + then "-f" + else "" + } \ -o "~{outfile_name}" \ "~{bam}" >>> @@ -854,13 +876,12 @@ task bam_to_fastq { } Float bam_size = size(bam, "GiB") - Int memory_gb = ( - if (collated || !paired_end) + Int memory_gb = (if (collated || !paired_end) then 4 else (ceil(bam_size * 0.4) + 4) ) + modify_memory_gb - Int disk_size_gb = ceil(bam_size * ( - if (retain_collated_bam && !collated && paired_end) + Int disk_size_gb = ceil(bam_size * (if (retain_collated_bam && !collated && paired_end + ) then 5 else 2 )) + 10 + modify_disk_size_gb @@ -878,13 +899,18 @@ task bam_to_fastq { mkfifo bam_pipe if ! ~{collated} && ~{paired_end}; then samtools collate \ - ~{if retain_collated_bam then "" else "-u"} \ + ~{if retain_collated_bam + then "" + else "-u" + } \ --threads "$n_cores" \ - ~{if fast_mode then "-f" else ""} \ + ~{if fast_mode + then "-f" + else "" + } \ -O \ "~{bam}" \ - | tee ~{( - if retain_collated_bam + | tee ~{(if retain_collated_bam then "\"" + prefix + ".collated.bam\"" else "" )} \ @@ -900,32 +926,26 @@ task bam_to_fastq { -F "~{bitwise_filter.exclude_if_any}" \ --rf "~{bitwise_filter.include_if_any}" \ -G "~{bitwise_filter.exclude_if_all}" \ - ~{( - if append_read_number + ~{(if append_read_number then "-N" else "-n" )} \ - -1 ~{( - if paired_end + -1 ~{(if paired_end then "\"" + prefix + ".R1.fastq.gz\"" else "\"" + prefix + ".fastq.gz\"" )} \ - -2 ~{( - if paired_end + -2 ~{(if paired_end then "\"" + prefix + ".R2.fastq.gz\"" else "\"" + prefix + ".fastq.gz\"" )} \ - ~{( - if paired_end - then ( - if output_singletons + ~{(if paired_end + then (if output_singletons then "-s \"" + prefix + ".singleton.fastq.gz\"" else "-s junk.singleton.fastq.gz" ) else "" )} \ - -0 ~{( - if paired_end + -0 ~{(if paired_end then "junk.unknown_bit_setting.fastq.gz" else "\"" + prefix + ".fastq.gz\"" )} \ @@ -971,7 +991,7 @@ task fixmate { description: "Runs `samtools fixmate` on the input BAM file. This fills in mate coordinates and insert size fields among other tags and fields." warning: "This task assumes a name-sorted or name-collated input BAM. If you have a position-sorted BAM, please use the `position_sorted_fixmate` task." outputs: { - fixmate_bam: "The BAM resulting from running `samtools fixmate` on the input BAM" + fixmate_bam: "The BAM resulting from running `samtools fixmate` on the input BAM", } } @@ -1042,11 +1062,26 @@ task fixmate { samtools fixmate \ --threads "$n_cores" \ - ~{if remove_unaligned_and_secondary then "-r" else ""} \ - ~{if disable_proper_pair_check then "-p" else ""} \ - ~{if add_cigar then "-c" else ""} \ - ~{if add_mate_score then "-m" else ""} \ - ~{if disable_flag_sanitization then "-z off" else ""} \ + ~{if remove_unaligned_and_secondary + then "-r" + else "" + } \ + ~{if disable_proper_pair_check + then "-p" + else "" + } \ + ~{if add_cigar + then "-c" + else "" + } \ + ~{if add_mate_score + then "-m" + else "" + } \ + ~{if disable_flag_sanitization + then "-z off" + else "" + } \ "~{bam}" \ "~{prefix}~{extension}" >>> @@ -1070,7 +1105,7 @@ task position_sorted_fixmate { warning: "If you already have a collated BAM, please use the `fixmate` task." help: "`fixmate` fills in mate coordinates and insert size fields among other tags and fields. This task collates the input BAM, runs `fixmate`, and then resorts the output into a position-sorted BAM." outputs: { - fixmate_bam: "BAM file with mate information added" + fixmate_bam: "BAM file with mate information added", } } @@ -1137,18 +1172,36 @@ task position_sorted_fixmate { samtools collate \ --threads "$n_cores" \ - ~{if fast_mode then "-f" else ""} \ + ~{if fast_mode + then "-f" + else "" + } \ -u \ -O \ "~{bam}" \ | samtools fixmate \ --threads "$n_cores" \ -u \ - ~{if remove_unaligned_and_secondary then "-r" else ""} \ - ~{if disable_proper_pair_check then "-p" else ""} \ - ~{if add_cigar then "-c" else ""} \ - ~{if add_mate_score then "-m" else ""} \ - ~{if disable_flag_sanitization then "-z off" else ""} \ + ~{if remove_unaligned_and_secondary + then "-r" + else "" + } \ + ~{if disable_proper_pair_check + then "-p" + else "" + } \ + ~{if add_cigar + then "-c" + else "" + } \ + ~{if add_mate_score + then "-m" + else "" + } \ + ~{if disable_flag_sanitization + then "-z off" + else "" + } \ - \ - \ | samtools sort \ @@ -1279,25 +1332,54 @@ task markdup { samtools markdup \ --threads "$n_cores" \ - -f "~{prefix + if json then ".json" else ".txt"}" \ + -f "~{prefix + if json + then ".json" + else ".txt" + }" \ --read-coords '~{read_coords_regex}' \ --coords-order "~{coordinates_order}" \ - ~{if remove_duplicates then "-r" else ""} \ - ~{if mark_supp_or_sec_or_unmapped_as_duplicates then "-S" else ""} \ - ~{if mark_duplicates_with_do_tag then "-t" else ""} \ - ~{if duplicate_count then "--duplicate-count" else ""} \ - ~{if include_qc_fails then "--include-fails" else ""} \ - ~{if duplicates_of_duplicates_check then "" else "--no-multi-dup"} \ - ~{if use_read_groups then "--use-read-groups" else ""} \ + ~{if remove_duplicates + then "-r" + else "" + } \ + ~{if mark_supp_or_sec_or_unmapped_as_duplicates + then "-S" + else "" + } \ + ~{if mark_duplicates_with_do_tag + then "-t" + else "" + } \ + ~{if duplicate_count + then "--duplicate-count" + else "" + } \ + ~{if include_qc_fails + then "--include-fails" + else "" + } \ + ~{if duplicates_of_duplicates_check + then "" + else "--no-multi-dup" + } \ + ~{if use_read_groups + then "--use-read-groups" + else "" + } \ -l ~{max_readlen} \ -d ~{optical_distance} \ -c \ "~{bam}" \ - "~{if create_bam then prefix + ".bam" else "/dev/null"}" + "~{if create_bam + then prefix + ".bam" + else "/dev/null" + }" >>> output { - File markdup_report = prefix + if json then ".json" else ".txt" + File markdup_report = prefix + if json + then ".json" + else ".txt" File? markdup_bam = prefix + ".bam" } @@ -1314,7 +1396,7 @@ task faidx { meta { description: "Creates a `.fai` FASTA index for the input FASTA" outputs: { - fasta_index: "A `.fai` FASTA index associated with the input FASTA. Filename will be `basename(fasta) + '.fai'`." + fasta_index: "A `.fai` FASTA index associated with the input FASTA. Filename will be `basename(fasta) + '.fai'`.", } } diff --git a/tools/star.wdl b/tools/star.wdl index 47fd141e7..8b77ced1b 100755 --- a/tools/star.wdl +++ b/tools/star.wdl @@ -1,12 +1,11 @@ ## [Homepage](https://github.com/alexdobin/STAR) - version 1.1 task build_star_db { meta { description: "Runs STAR's build command to generate a STAR format reference for alignment" outputs: { - star_db: "A gzipped TAR file containing the STAR reference files. Suitable as the `star_db_tar_gz` input to the `alignment` task." + star_db: "A gzipped TAR file containing the STAR reference files. Suitable as the `star_db_tar_gz` input to the `alignment` task.", } } @@ -86,8 +85,7 @@ task build_star_db { Float reference_fasta_size = size(reference_fasta, "GiB") Float gtf_size = size(gtf, "GiB") - Int disk_size_gb = ( - ceil((reference_fasta_size + gtf_size) * 3) + 10 + modify_disk_size_gb + Int disk_size_gb = (ceil((reference_fasta_size + gtf_size) * 3) + 10 + modify_disk_size_gb ) # Leave 2GB as system overhead @@ -558,7 +556,11 @@ task alignment { Array[File] read_one_fastqs_gz Array[String] read_groups Array[File]? read_two_fastqs_gz - Array[Int] out_sj_filter_intron_max_vs_read_n = [50000, 100000, 200000] + Array[Int] out_sj_filter_intron_max_vs_read_n = [ + 50000, + 100000, + 200000, + ] SpliceJunctionMotifs out_sj_filter_overhang_min = SpliceJunctionMotifs { noncanonical_motifs: 30, GT_AG_and_CT_AC_motif: 12, @@ -595,9 +597,7 @@ task alignment { Pair[Int, Int] clip_3p_n_bases = (0, 0) Pair[Int, Int] clip_3p_after_adapter_n_bases = (0, 0) Pair[Int, Int] clip_5p_n_bases = (0, 0) - String prefix = sub( - basename(read_one_fastqs_gz[0]), - "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", + String prefix = sub(basename(read_one_fastqs_gz[0]), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", "" # Once replacing with capturing groups is supported, replace with group 3 ) String read_name_separator = "/" @@ -699,16 +699,16 @@ task alignment { Int modify_disk_size_gb = 0 } - Array[File] read_twos = select_first([read_two_fastqs_gz, []]) + Array[File] read_twos = select_first([ + read_two_fastqs_gz, + [], + ]) Float read_one_fastqs_size = size(read_one_fastqs_gz, "GiB") Float read_two_fastqs_size = size(read_twos, "GiB") Float star_db_tar_gz_size = size(star_db_tar_gz, "GiB") - Int disk_size_gb = ( - ( - ceil(read_one_fastqs_size + read_two_fastqs_size + star_db_tar_gz_size) * 3 - ) + 10 + modify_disk_size_gb - ) + Int disk_size_gb = ((ceil(read_one_fastqs_size + read_two_fastqs_size + star_db_tar_gz_size + ) * 3) + 10 + modify_disk_size_gb) command <<< set -euo pipefail @@ -733,9 +733,9 @@ task alignment { --outFileNamePrefix "~{prefix + "."}" \ --twopassMode "~{twopass_mode}" \ --outSAMattrRGline ~{sep(" , ", read_groups)} \ - --outSJfilterIntronMaxVsReadN ~{ - sep(" ", quote(out_sj_filter_intron_max_vs_read_n)) - } \ + --outSJfilterIntronMaxVsReadN ~{sep(" ", quote( + out_sj_filter_intron_max_vs_read_n + ))} \ --outSJfilterOverhangMin ~{sep(" ", quote([ out_sj_filter_overhang_min.noncanonical_motifs, out_sj_filter_overhang_min.GT_AG_and_CT_AC_motif, @@ -766,33 +766,31 @@ task alignment { align_sj_stitch_mismatch_n_max.GC_AG_and_CT_GC_motif, align_sj_stitch_mismatch_n_max.AT_AC_and_GT_AT_motif, ]))} \ - --clip3pAdapterSeq "~{clip_3p_adapter_seq.left}" ~{( - if (length(read_twos) != 0) + --clip3pAdapterSeq "~{clip_3p_adapter_seq.left}" ~{(if (length(read_twos) != 0 + ) then "'" + clip_3p_adapter_seq.right + "'" else "" )} \ - --clip3pAdapterMMp ~{clip_3p_adapter_mmp.left} ~{( - if (length(read_twos) != 0) + --clip3pAdapterMMp ~{clip_3p_adapter_mmp.left} ~{(if (length(read_twos) != 0) then clip_3p_adapter_mmp.right else None )} \ - --alignEndsProtrude ~{align_ends_protrude.left} "~{( - if (length(read_twos) != 0) + --alignEndsProtrude ~{align_ends_protrude.left} "~{(if (length(read_twos) != 0 + ) then align_ends_protrude.right else None )}" \ - --clip3pNbases ~{clip_3p_n_bases.left} ~{( - if (length(read_twos) != 0) + --clip3pNbases ~{clip_3p_n_bases.left} ~{(if (length(read_twos) != 0) then clip_3p_n_bases.right else None )} \ - --clip3pAfterAdapterNbases ~{clip_3p_after_adapter_n_bases.left} ~{( - if (length(read_twos) != 0) + --clip3pAfterAdapterNbases ~{clip_3p_after_adapter_n_bases.left} ~{(if (length( + read_twos + ) != 0) then clip_3p_after_adapter_n_bases.right else None )} \ - --clip5pNbases ~{clip_5p_n_bases.left} ~{( - if (length(read_twos) != 0) + --clip5pNbases ~{clip_5p_n_bases.left} ~{(if (length(read_twos) != 0) then clip_5p_n_bases.right else None )} \ @@ -803,11 +801,11 @@ task alignment { --outSAMunmapped "~{out_sam_unmapped}" \ --outSAMorder "~{out_sam_order}" \ --outSAMreadID "~{out_sam_read_id}" \ - --outSAMtlen ~{( - if (out_sam_tlen == "left_plus") + --outSAMtlen ~{(if (out_sam_tlen == "left_plus") then "1" - else ( - if (out_sam_tlen == "left_any") then "2" else "error" + else (if (out_sam_tlen == "left_any") + then "2" + else "error" ) )} \ --outFilterType "~{out_filter_type}" \ diff --git a/tools/util.wdl b/tools/util.wdl index 3bb4c5963..be141df2c 100644 --- a/tools/util.wdl +++ b/tools/util.wdl @@ -1,12 +1,11 @@ ## # Utilities - version 1.1 task download { meta { description: "Uses wget to download a file from a remote URL to the local filesystem" outputs: { - downloaded_file: "File downloaded from provided URL" + downloaded_file: "File downloaded from provided URL", } } @@ -53,7 +52,7 @@ task split_string { description: "Split a string into an array of strings based on a delimiter" warning: "This implementation will result in a runtime error if the provided string has any embedded single quotes (`'`)!" outputs: { - split_strings: "Split string as an array" + split_strings: "Split string as an array", } } @@ -90,7 +89,7 @@ task calc_feature_lengths { description: "Calculate feature lengths from a GTF file using the non-overlapping exonic length algorithm" help: "The non-overlapping exonic length algorithm can be implemented as the sum of each base covered by at least one exon; where each base is given a value of 1 regardless of how many exons overlap it." outputs: { - feature_lengths: "A two column headered TSV file with feature names in the first column and feature lengths (as integers) in the second column" + feature_lengths: "A two column headered TSV file with feature names in the first column and feature lengths (as integers) in the second column", } } @@ -166,7 +165,7 @@ task add_to_bam_header { meta { description: "Adds another line of text to the bottom of a BAM header" outputs: { - reheadered_bam: "The BAM after its header has been modified" + reheadered_bam: "The BAM after its header has been modified", } } @@ -215,7 +214,7 @@ task unpack_tarball { meta { description: "Accepts a `.tar.gz` archive and converts it into a flat array of files. Any directory structure of the archive is ignored." outputs: { - tarball_contents: "An array of files found in the input tarball" + tarball_contents: "An array of files found in the input tarball", } } @@ -314,7 +313,7 @@ task global_phred_scores { meta { description: "Calculates statistics about PHRED scores of the input BAM" outputs: { - phred_scores: "Headered TSV file containing PHRED score statistics" + phred_scores: "Headered TSV file containing PHRED score statistics", } } @@ -339,7 +338,10 @@ task global_phred_scores { command <<< python3 /scripts/util/calc_global_phred_scores.py \ - ~{if fast_mode then "--fast_mode" else ""} \ + ~{if fast_mode + then "--fast_mode" + else "" + } \ "~{bam}" \ "~{prefix}" >>> @@ -384,13 +386,15 @@ task check_fastq_and_rg_concordance { Array[String]? read_two_names } - Array[String] read_twos = select_first([read_two_names, []]) + Array[String] read_twos = select_first([ + read_two_names, + [], + ]) command <<< python3 /scripts/util/check_FQs_and_RGs.py \ --read-one-fastqs "~{sep(",", read_one_names)}" \ - ~{( - if length(read_twos) > 0 + ~{(if length(read_twos) > 0 then "--read-two-fastqs \"" + sep(",", squote(read_twos)) + "\"" else "" )} \ @@ -407,7 +411,7 @@ task split_fastq { meta { description: "Splits a FASTQ into multiple files based on the number of reads per file" outputs: { - fastqs: "Array of FASTQ files, each containing a subset of the input FASTQ" + fastqs: "Array of FASTQ files, each containing a subset of the input FASTQ", } } @@ -427,11 +431,7 @@ task split_fastq { input { File fastq - String prefix = sub( - basename(fastq), - "(fastq|fq)\\.gz$", - "" - ) + String prefix = sub(basename(fastq), "(fastq|fq)\\.gz$", "") Int reads_per_file = 10000000 Int modify_disk_size_gb = 0 Int ncpu = 2 diff --git a/workflows/chipseq/chipseq-standard.wdl b/workflows/chipseq/chipseq-standard.wdl index d9f9290df..aa30759fe 100755 --- a/workflows/chipseq/chipseq-standard.wdl +++ b/workflows/chipseq/chipseq-standard.wdl @@ -9,11 +9,14 @@ import "../../tools/samtools.wdl" import "../../tools/util.wdl" import "../general/bam-to-fastqs.wdl" as b2fq #@ except: LineWidth -import "https://raw.githubusercontent.com/stjude/seaseq/2.3/workflows/workflows/mapping.wdl" as seaseq_map +import "https://raw.githubusercontent.com/stjude/seaseq/2.3/workflows/workflows/mapping.wdl" + as seaseq_map #@ except: LineWidth -import "https://raw.githubusercontent.com/stjude/seaseq/3.0/workflows/tasks/samtools.wdl" as seaseq_samtools +import "https://raw.githubusercontent.com/stjude/seaseq/3.0/workflows/tasks/samtools.wdl" + as seaseq_samtools #@ except: LineWidth -import "https://raw.githubusercontent.com/stjude/seaseq/3.0/workflows/tasks/seaseq_util.wdl" as seaseq_util +import "https://raw.githubusercontent.com/stjude/seaseq/3.0/workflows/tasks/seaseq_util.wdl" + as seaseq_util workflow chipseq_standard_experimental { meta { @@ -67,7 +70,10 @@ workflow chipseq_standard_experimental { use_all_cores, } } - File selected_bam = select_first([subsample.sampled_bam, bam]) + File selected_bam = select_first([ + subsample.sampled_bam, + bam, + ]) call read_group.get_read_groups after validate_input_bam { input: bam = selected_bam, @@ -79,7 +85,7 @@ workflow chipseq_standard_experimental { use_all_cores, } - scatter (pair in zip(bam_to_fastqs.read1s, get_read_groups.read_groups)){ + scatter (pair in zip(bam_to_fastqs.read1s, get_read_groups.read_groups)) { if (enable_read_trimming) { call fp.fastp as trim { input: read_one_fastq = pair.left, @@ -93,10 +99,13 @@ workflow chipseq_standard_experimental { } } - File chosen_fastq = select_first([trim.single_end_reads_fastq_gz, pair.left]) + File chosen_fastq = select_first([ + trim.single_end_reads_fastq_gz, + pair.left, + ]) call seaseq_util.basicfastqstats as basic_stats { input: - fastqfile = chosen_fastq + fastqfile = chosen_fastq, } call seaseq_map.mapping as bowtie_single_end_mapping { input: fastqfile = chosen_fastq, @@ -104,13 +113,11 @@ workflow chipseq_standard_experimental { metricsfile = basic_stats.metrics_out, blacklist = excludelist, } - File chosen_bam = select_first( - [ - bowtie_single_end_mapping.bklist_bam, - bowtie_single_end_mapping.mkdup_bam, - bowtie_single_end_mapping.sorted_bam, - ] - ) + File chosen_bam = select_first([ + bowtie_single_end_mapping.bklist_bam, + bowtie_single_end_mapping.mkdup_bam, + bowtie_single_end_mapping.sorted_bam, + ]) call read_group.read_group_to_string { input: read_group = pair.right, @@ -127,7 +134,7 @@ workflow chipseq_standard_experimental { } Array[File] aligned_bams = addreplacerg.tagged_bam - scatter(aligned_bam in aligned_bams){ + scatter (aligned_bam in aligned_bams) { call picard.clean_sam as picard_clean { input: bam = aligned_bam, } @@ -147,7 +154,9 @@ workflow chipseq_standard_experimental { use_all_cores, } #@ except: UnusedCall - call picard.validate_bam { input: bam = markdup.mkdupbam } + call picard.validate_bam { input: + bam = markdup.mkdupbam, + } call md5sum.compute_checksum { input: file = markdup.mkdupbam, @@ -164,9 +173,13 @@ workflow chipseq_standard_experimental { File bam_checksum = compute_checksum.md5sum File bam_index = samtools_index.bam_index File bigwig = deeptools_bam_coverage.bigwig - Array[File] fastp_reports = select_all(flatten([fastp.report, trim.report])) - Array[File] fastp_jsons = select_all(flatten( - [fastp.report_json, trim.report_json] - )) + Array[File] fastp_reports = select_all(flatten([ + fastp.report, + trim.report, + ])) + Array[File] fastp_jsons = select_all(flatten([ + fastp.report_json, + trim.report_json, + ])) } } diff --git a/workflows/dnaseq/dnaseq-core.wdl b/workflows/dnaseq/dnaseq-core.wdl index 62b027db2..21a1d3990 100644 --- a/workflows/dnaseq/dnaseq-core.wdl +++ b/workflows/dnaseq/dnaseq-core.wdl @@ -1,5 +1,4 @@ ## **WARNING:** this workflow is experimental! Use at your own risk! - version 1.1 import "../../tools/bwa.wdl" @@ -69,10 +68,7 @@ workflow dnaseq_core_experimental { read_groups, } - scatter (tuple in zip( - zip(read_one_fastqs_gz, read_two_fastqs_gz), - read_groups - )) { + scatter (tuple in zip(zip(read_one_fastqs_gz, read_two_fastqs_gz), read_groups)) { if (enable_read_trimming) { call fp.fastp as trim after validate { input: read_one_fastq = tuple.left.left, @@ -87,8 +83,14 @@ workflow dnaseq_core_experimental { output_fastq = enable_read_trimming, } } - File chosen_r1_fastq = select_first([trim.read_one_fastq_gz, tuple.left.left]) - File chosen_r2_fastq = select_first([trim.read_two_fastq_gz, tuple.left.right]) + File chosen_r1_fastq = select_first([ + trim.read_one_fastq_gz, + tuple.left.left, + ]) + File chosen_r2_fastq = select_first([ + trim.read_two_fastq_gz, + tuple.left.right, + ]) call util.split_fastq as read_ones after validate { input: fastq = chosen_r1_fastq, @@ -105,11 +107,8 @@ workflow dnaseq_core_experimental { read_one_fastq_gz = t.left, read_two_fastq_gz = t.right, bwa_db_tar_gz = bwa_db, - prefix = sub(sub( - basename(t.left), - "(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$", - "" - ), "\\.([rR][12])\\.", "."), + prefix = sub(sub(basename(t.left), "(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$", + ""), "\\.([rR][12])\\.", "."), read_group = tuple.right, use_all_cores, } @@ -119,17 +118,17 @@ workflow dnaseq_core_experimental { read_one_fastq_gz = t.left, read_two_fastq_gz = t.right, bwa_db_tar_gz = bwa_db, - prefix = sub(sub( - basename(t.left), - "(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$", - "" - ), "\\.([rR][12])\\.", "."), + prefix = sub(sub(basename(t.left), "(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$", + ""), "\\.([rR][12])\\.", "."), read_group = tuple.right, use_all_cores, } } call picard.sort as sort { input: - bam = select_first([bwa_mem.bam, bwa_aln_pe.bam]) + bam = select_first([ + bwa_mem.bam, + bwa_aln_pe.bam, + ]), } } } @@ -146,9 +145,13 @@ workflow dnaseq_core_experimental { output { File harmonized_bam = merge.merged_bam File harmonized_bam_index = index.bam_index - Array[File] fastp_reports = select_all(flatten([fastp.report, trim.report])) - Array[File] fastp_jsons = select_all(flatten( - [fastp.report_json, trim.report_json] - )) + Array[File] fastp_reports = select_all(flatten([ + fastp.report, + trim.report, + ])) + Array[File] fastp_jsons = select_all(flatten([ + fastp.report_json, + trim.report_json, + ])) } } diff --git a/workflows/dnaseq/dnaseq-standard-fastq.wdl b/workflows/dnaseq/dnaseq-standard-fastq.wdl index c0542c19d..fdf48606b 100644 --- a/workflows/dnaseq/dnaseq-standard-fastq.wdl +++ b/workflows/dnaseq/dnaseq-standard-fastq.wdl @@ -1,5 +1,4 @@ ## **WARNING:** this workflow is experimental! Use at your own risk! - version 1.1 import "../../data_structures/read_group.wdl" @@ -54,9 +53,7 @@ workflow dnaseq_standard_fastq_experimental { Array[File] read_one_fastqs_gz Array[File] read_two_fastqs_gz Array[ReadGroup] read_groups - String prefix = sub( - basename(read_one_fastqs_gz[0]), - "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", + String prefix = sub(basename(read_one_fastqs_gz[0]), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", "" # Once replacing with capturing groups is supported, replace with group 3 ) String aligner = "mem" @@ -101,12 +98,10 @@ workflow dnaseq_standard_fastq_experimental { subsample.subsampled_read1, read_one_fastqs_gz, ]) - Array[File] selected_read_two_fastqs = select_all( - select_first([ - subsample.subsampled_read2, - read_two_fastqs_gz, - ]) - ) + Array[File] selected_read_two_fastqs = select_all(select_first([ + subsample.subsampled_read2, + read_two_fastqs_gz, + ])) call dnaseq_core_wf.dnaseq_core_experimental after fqlint { input: read_one_fastqs_gz = selected_read_one_fastqs, diff --git a/workflows/dnaseq/dnaseq-standard.wdl b/workflows/dnaseq/dnaseq-standard.wdl index 7b2e86154..194c18cea 100644 --- a/workflows/dnaseq/dnaseq-standard.wdl +++ b/workflows/dnaseq/dnaseq-standard.wdl @@ -1,5 +1,4 @@ ## **WARNING:** this workflow is experimental! Use at your own risk! - version 1.1 import "../../data_structures/read_group.wdl" @@ -55,7 +54,7 @@ workflow dnaseq_standard_experimental { } call parse_input { input: - aligner + aligner, } if (validate_input) { @@ -71,7 +70,10 @@ workflow dnaseq_standard_experimental { use_all_cores, } } - File selected_bam = select_first([subsample.sampled_bam, bam]) + File selected_bam = select_first([ + subsample.sampled_bam, + bam, + ]) call read_group.get_read_groups after parse_input { input: bam = selected_bam, @@ -95,7 +97,10 @@ workflow dnaseq_standard_experimental { SM: sample_override, } } - ReadGroup selected_rg = select_first([overriden_rg, rg]) + ReadGroup selected_rg = select_first([ + overriden_rg, + rg, + ]) call read_group.read_group_to_string { input: read_group = selected_rg, format_as_sam_record = true, diff --git a/workflows/general/alignment-post.wdl b/workflows/general/alignment-post.wdl index 53c18d64a..9caa5344c 100644 --- a/workflows/general/alignment-post.wdl +++ b/workflows/general/alignment-post.wdl @@ -4,7 +4,8 @@ import "../../tools/md5sum.wdl" import "../../tools/picard.wdl" import "../../tools/samtools.wdl" #@ except: LineWidth -import "https://raw.githubusercontent.com/stjude/XenoCP/4.0.0-alpha/wdl/workflows/xenocp.wdl" as xenocp_wf +import "https://raw.githubusercontent.com/stjude/XenoCP/4.0.0-alpha/wdl/workflows/xenocp.wdl" + as xenocp_wf workflow alignment_post { meta { @@ -12,7 +13,7 @@ workflow alignment_post { outputs: { processed_bam: "Input BAM after being transformed by standard processing", bam_index: "BAI index associated with `processed_bam`", - bam_checksum: "STDOUT of the `md5sum` command run on the input BAM that has been redirected to a file", + bam_checksum: "STDOUT of the `md5sum` command run on the input BAM that has been redirected to a file", validate_report: "Validation report produced by `picard ValidateSamFile`. Validation warnings and errors are logged.", } allowNestedInputs: true @@ -46,7 +47,9 @@ workflow alignment_post { Boolean use_all_cores = false } - call picard.sort as picard_sort { input: bam } + call picard.sort as picard_sort { input: + bam, + } if (cleanse_xenograft) { call samtools.index as pre_xenocp_index { input: @@ -57,14 +60,23 @@ workflow alignment_post { call xenocp_wf.xenocp { input: input_bam = picard_sort.sorted_bam, input_bai = pre_xenocp_index.bam_index, - reference_tar_gz = select_first([contaminant_db, ""]), - aligner = select_first([xenocp_aligner, "undefined"]), + reference_tar_gz = select_first([ + contaminant_db, + "", + ]), + aligner = select_first([ + xenocp_aligner, + "undefined", + ]), skip_duplicate_marking = true, } } if (mark_duplicates) { call picard.mark_duplicates as picard_markdup { input: - bam = select_first([xenocp.bam, picard_sort.sorted_bam]), + bam = select_first([ + xenocp.bam, + picard_sort.sorted_bam, + ]), } } @@ -79,9 +91,13 @@ workflow alignment_post { use_all_cores, } File aligned_bam_index = samtools_index.bam_index - call picard.validate_bam { input: bam = aligned_bam } + call picard.validate_bam { input: + bam = aligned_bam, + } - call md5sum.compute_checksum { input: file = aligned_bam } + call md5sum.compute_checksum { input: + file = aligned_bam, + } output { File processed_bam = aligned_bam diff --git a/workflows/general/bam-to-fastqs.wdl b/workflows/general/bam-to-fastqs.wdl index 409ac0133..573db7c7d 100644 --- a/workflows/general/bam-to-fastqs.wdl +++ b/workflows/general/bam-to-fastqs.wdl @@ -27,7 +27,9 @@ workflow bam_to_fastqs { Boolean use_all_cores = false } - call samtools.quickcheck { input: bam } + call samtools.quickcheck { input: + bam, + } call samtools.split after quickcheck { input: bam, @@ -42,11 +44,13 @@ workflow bam_to_fastqs { } if (paired_end) { - scatter (reads in - zip(bam_to_fastq.read_one_fastq_gz, bam_to_fastq.read_two_fastq_gz) - ) { + scatter (reads in zip(bam_to_fastq.read_one_fastq_gz, bam_to_fastq.read_two_fastq_gz + )) { call fq.fqlint { input: - read_one_fastq = select_first([reads.left, "undefined"]), + read_one_fastq = select_first([ + reads.left, + "undefined", + ]), read_two_fastq = reads.right, } } @@ -54,14 +58,16 @@ workflow bam_to_fastqs { if (!paired_end) { scatter (fq in bam_to_fastq.single_end_reads_fastq_gz) { call fq.fqlint as se_fqlint { input: - read_one_fastq = select_first([fq, "undefined"]), + read_one_fastq = select_first([ + fq, + "undefined", + ]), } } } output { - Array[File] read1s = ( - if paired_end + Array[File] read1s = (if paired_end then select_all(bam_to_fastq.read_one_fastq_gz) else select_all(bam_to_fastq.single_end_reads_fastq_gz) ) diff --git a/workflows/general/samtools-merge.wdl b/workflows/general/samtools-merge.wdl index a34585c99..fe6825278 100644 --- a/workflows/general/samtools-merge.wdl +++ b/workflows/general/samtools-merge.wdl @@ -1,21 +1,20 @@ ## **WARNING:** this workflow is experimental! Use at your own risk! - version 1.1 import "../../tools/samtools.wdl" workflow samtools_merge { - meta{ + meta { name: "Merge BAMs" description: "Runs `samtools merge`, with optional iteration to avoid maximum command line argument length" category: "Utility" outputs: { - merged_bam: "The BAM resulting from merging all the input BAMs" + merged_bam: "The BAM resulting from merging all the input BAMs", } allowNestedInputs: true } - parameter_meta{ + parameter_meta { bams: "BAMs to merge into a final BAM" prefix: "Prefix for output BAM." use_all_cores: "Use all cores? Recommended for cloud environments." @@ -31,22 +30,21 @@ workflow samtools_merge { Int bam_length = length(bams) - if (bam_length > max_length){ + if (bam_length > max_length) { # Find the number of merges required - scatter (merge_num in range((bam_length / max_length) + 1)){ + scatter (merge_num in range((bam_length / max_length) + 1)) { # Get the sublist of bams - scatter (bam_num in range(max_length)){ - Int num = ( - if merge_num > 0 + scatter (bam_num in range(max_length)) { + Int num = (if merge_num > 0 then bam_num + (merge_num * max_length) else bam_num ) - if (num < bam_length){ + if (num < bam_length) { File bam_list = bams[num] } } } - scatter (list in bam_list){ + scatter (list in bam_list) { call samtools.merge as inner_merge { input: bams = select_all(list), prefix, @@ -65,7 +63,7 @@ workflow samtools_merge { } } - if (bam_length < max_length){ + if (bam_length < max_length) { call samtools.merge as basic_merge { input: bams, prefix, @@ -76,6 +74,9 @@ workflow samtools_merge { } output { - File merged_bam = select_first([final_merge.merged_bam, basic_merge.merged_bam]) + File merged_bam = select_first([ + final_merge.merged_bam, + basic_merge.merged_bam, + ]) } } diff --git a/workflows/methylation/methylation-cohort.wdl b/workflows/methylation/methylation-cohort.wdl index 4632d5402..03a06cf6b 100644 --- a/workflows/methylation/methylation-cohort.wdl +++ b/workflows/methylation/methylation-cohort.wdl @@ -33,21 +33,20 @@ workflow methylation_cohort { Int beta_length = length(unfiltered_normalized_beta) Int pval_length = length(p_values) - if (beta_length > max_length){ - scatter (merge_num in range((beta_length / max_length) + 1)){ + if (beta_length > max_length) { + scatter (merge_num in range((beta_length / max_length) + 1)) { # Get the sublist of beta files - scatter (beta_num in range(max_length)){ - Int num = ( - if merge_num > 0 + scatter (beta_num in range(max_length)) { + Int num = (if merge_num > 0 then beta_num + (merge_num * max_length) else beta_num ) - if (num < beta_length){ + if (num < beta_length) { File bam_list = unfiltered_normalized_beta[num] } } } - scatter (iter_index in range(length(bam_list))){ + scatter (iter_index in range(length(bam_list))) { call combine_data as inner_merge { input: files_to_combine = select_all(bam_list[iter_index]), combined_file_name = "~{iter_index}.combined.csv", @@ -59,22 +58,21 @@ workflow methylation_cohort { combined_file_name = "combined_beta.csv", } - if (pval_length > 0 && !skip_pvalue_check){ + if (pval_length > 0 && !skip_pvalue_check) { # If p-values are provided, merge those as well - scatter (merge_num in range((pval_length / max_length) + 1)){ + scatter (merge_num in range((pval_length / max_length) + 1)) { # Get the sublist of p-value files - scatter (pval_num in range(max_length)){ - Int num_p = ( - if merge_num > 0 + scatter (pval_num in range(max_length)) { + Int num_p = (if merge_num > 0 then pval_num + (merge_num * max_length) else pval_num ) - if (num_p < pval_length){ + if (num_p < pval_length) { File pval_list = p_values[num_p] } } } - scatter (iter_index in range(length(pval_list))){ + scatter (iter_index in range(length(pval_list))) { call combine_data as inner_merge_pvals { input: files_to_combine = select_all(pval_list[iter_index]), combined_file_name = "~{iter_index}.pvals.combined.csv", @@ -88,12 +86,12 @@ workflow methylation_cohort { } } - if (beta_length <= max_length){ + if (beta_length <= max_length) { call combine_data as simple_merge { input: files_to_combine = unfiltered_normalized_beta, combined_file_name = "combined_beta.csv", } - if (pval_length > 0 && !skip_pvalue_check){ + if (pval_length > 0 && !skip_pvalue_check) { call combine_data as simple_merge_pval { input: files_to_combine = p_values, combined_file_name = "combined_pvals.csv", @@ -101,23 +99,19 @@ workflow methylation_cohort { } } - File? pval_file = ( - if (pval_length > 0 && !skip_pvalue_check) - then select_first( - [ - final_merge_pvals.combined_file, - simple_merge_pval.combined_file, - ]) + File? pval_file = (if (pval_length > 0 && !skip_pvalue_check) + then select_first([ + final_merge_pvals.combined_file, + simple_merge_pval.combined_file, + ]) else None ) call filter_probes { input: - beta_values = select_first( - [ - final_merge.combined_file, - simple_merge.combined_file, - ] - ), + beta_values = select_first([ + final_merge.combined_file, + simple_merge.combined_file, + ]), p_values = pval_file, num_probes, } @@ -131,12 +125,10 @@ workflow methylation_cohort { } output { - File combined_beta = select_first( - [ - final_merge.combined_file, - simple_merge.combined_file, - ] - ) + File combined_beta = select_first([ + final_merge.combined_file, + simple_merge.combined_file, + ]) File filtered_beta = filter_probes.filtered_beta_values File filtered_probeset = filter_probes.filtered_probes File umap_embedding = generate_umap.umap @@ -149,7 +141,7 @@ task combine_data { meta { description: "Combine data from multiple CSV files by column" outputs: { - combined_file: "Combined CSV file" + combined_file: "Combined CSV file", } } @@ -173,16 +165,19 @@ task combine_data { Int modify_memory_gb = 0 } - Int memory_gb = ceil(size(files_to_combine, "GiB") * - if simple_merge then 2 else 1) - + modify_memory_gb - + 2 + Int memory_gb = ceil(size(files_to_combine, "GiB") * if simple_merge + then 2 + else 1 + ) + modify_memory_gb + 2 Int disk_size_gb = ceil(size(files_to_combine, "GiB") * 2) + 2 command <<< python /scripts/methylation/combine.py \ --output-name "~{combined_file_name}" \ - ~{if simple_merge then "--simple-merge" else ""} \ + ~{if simple_merge + then "--simple-merge" + else "" + } \ ~{sep(" ", quote(files_to_combine))} >>> @@ -258,7 +253,7 @@ task generate_umap { meta { description: "Generate UMAP embedding" outputs: { - umap: "UMAP embedding for all samples" + umap: "UMAP embedding for all samples", } } @@ -297,7 +292,7 @@ task plot_umap { meta { description: "Plot UMAP embedding" outputs: { - umap_plot: "UMAP plot for all samples" + umap_plot: "UMAP plot for all samples", } } diff --git a/workflows/methylation/methylation-preprocess.wdl b/workflows/methylation/methylation-preprocess.wdl index 95132cfc8..f85ed8613 100644 --- a/workflows/methylation/methylation-preprocess.wdl +++ b/workflows/methylation/methylation-preprocess.wdl @@ -48,10 +48,8 @@ task process_raw_idats { >>> output { - File beta_swan_norm_unfiltered - = out_base + ".beta_swan_norm_unfiltered.csv" - File beta_swan_norm_unfiltered_genomic - = out_base + ".beta_swan_norm_unfiltered.genomic.csv" + File beta_swan_norm_unfiltered = out_base + ".beta_swan_norm_unfiltered.csv" + File beta_swan_norm_unfiltered_genomic = out_base + ".beta_swan_norm_unfiltered.genomic.csv" File annotation = out_base + ".annotation.csv" File beta_unnorm = out_base + ".beta.csv" File cn_values = out_base + ".cn_values.csv" diff --git a/workflows/methylation/methylation-standard.wdl b/workflows/methylation/methylation-standard.wdl index 618d41efb..23be2d06b 100644 --- a/workflows/methylation/methylation-standard.wdl +++ b/workflows/methylation/methylation-standard.wdl @@ -32,19 +32,17 @@ workflow methylation { scatter (pair in zip(green_idats, red_idats)) { call preprocess.process_raw_idats { input: - idats = pair + idats = pair, } } call cohort.methylation_cohort { input: - unfiltered_normalized_beta = - process_raw_idats.beta_swan_norm_unfiltered_genomic, + unfiltered_normalized_beta = process_raw_idats.beta_swan_norm_unfiltered_genomic, p_values = process_raw_idats.probe_pvalues, } output { - Array[File] beta_swan_norm_unfiltered_genomic = - process_raw_idats.beta_swan_norm_unfiltered_genomic + Array[File] beta_swan_norm_unfiltered_genomic = process_raw_idats.beta_swan_norm_unfiltered_genomic File combined_beta = methylation_cohort.combined_beta File filtered_beta = methylation_cohort.filtered_beta File filtered_probeset = methylation_cohort.filtered_probeset diff --git a/workflows/qc/markdups-post.wdl b/workflows/qc/markdups-post.wdl index 70771d8e8..2e0420fa5 100644 --- a/workflows/qc/markdups-post.wdl +++ b/workflows/qc/markdups-post.wdl @@ -5,7 +5,6 @@ ## whether a read is a duplicate or not. ## But the tasks called below produce different results depending on whether the ## input BAM has been duplicate marked or not. - version 1.1 import "../../tools/mosdepth.wdl" @@ -61,7 +60,7 @@ workflow markdups_post { bam_index = markdups_bam_index, prefix = prefix + "." + "whole_genome", } - scatter(coverage_pair in zip(coverage_beds, coverage_labels)) { + scatter (coverage_pair in zip(coverage_beds, coverage_labels)) { call mosdepth.coverage as regions_coverage { input: bam = markdups_bam, bam_index = markdups_bam_index, @@ -72,8 +71,7 @@ workflow markdups_post { output { File insert_size_metrics = collect_insert_size_metrics.insert_size_metrics - File insert_size_metrics_pdf - = collect_insert_size_metrics.insert_size_metrics_pdf + File insert_size_metrics_pdf = collect_insert_size_metrics.insert_size_metrics_pdf File flagstat_report = flagstat.flagstat_report File mosdepth_global_summary = wg_coverage.summary File mosdepth_global_dist = wg_coverage.global_dist diff --git a/workflows/qc/quality-check-standard.wdl b/workflows/qc/quality-check-standard.wdl index 1587b7fda..3762e2883 100644 --- a/workflows/qc/quality-check-standard.wdl +++ b/workflows/qc/quality-check-standard.wdl @@ -126,8 +126,7 @@ workflow quality_check_standard { File kraken_db File? gtf #@ except: LineWidth - File multiqc_config - = "https://raw.githubusercontent.com/stjudecloud/workflows/main/workflows/qc/multiqc_config/multiqc_config.yaml" + File multiqc_config = "https://raw.githubusercontent.com/stjudecloud/workflows/main/workflows/qc/multiqc_config/multiqc_config.yaml" Array[File] extra_multiqc_inputs = [] Array[File] coverage_beds = [] Array[String] coverage_labels = [] @@ -164,20 +163,24 @@ workflow quality_check_standard { coverage_labels, } call flag_filter.validate_flag_filter as kraken_filter_validator { input: - flags = standard_filter + flags = standard_filter, } if (run_comparative_kraken) { - call flag_filter.validate_flag_filter - as comparative_kraken_filter_validator - { input: - flags = comparative_filter + call flag_filter.validate_flag_filter as comparative_kraken_filter_validator { input: + flags = comparative_filter, } } - call md5sum.compute_checksum after parse_input { input: file = bam } + call md5sum.compute_checksum after parse_input { input: + file = bam, + } - call samtools.quickcheck after parse_input { input: bam } - call util.compression_integrity after parse_input { input: bgzipped_file = bam } + call samtools.quickcheck after parse_input { input: + bam, + } + call util.compression_integrity after parse_input { input: + bgzipped_file = bam, + } if (subsample_n_reads > 0) { call samtools.subsample after quickcheck { input: @@ -188,7 +191,10 @@ workflow quality_check_standard { } if (defined(subsample.sampled_bam)) { call samtools.index as subsample_index { input: - bam = select_first([subsample.sampled_bam, "undefined"]), + bam = select_first([ + subsample.sampled_bam, + "undefined", + ]), use_all_cores, } } @@ -203,8 +209,7 @@ workflow quality_check_standard { subsample_index.bam_index, bam_index, ]) - String post_subsample_prefix = ( - if (defined(subsample.sampled_bam)) + String post_subsample_prefix = (if (defined(subsample.sampled_bam)) then prefix + ".subsampled" else prefix ) @@ -235,7 +240,9 @@ workflow quality_check_standard { outfile_name = post_subsample_prefix + ".readlength.tsv", } call ngsderive.encoding after quickcheck { input: - ngs_files = [post_subsample_bam], + ngs_files = [ + post_subsample_bam, + ], outfile_name = post_subsample_prefix + ".encoding.tsv", num_reads = -1, } @@ -249,9 +256,7 @@ workflow quality_check_standard { prefix = post_subsample_prefix, } - call samtools.bam_to_fastq after quickcheck - after kraken_filter_validator - { input: + call samtools.bam_to_fastq after quickcheck after kraken_filter_validator { input: bam = post_subsample_bam, bitwise_filter = standard_filter, prefix = post_subsample_prefix, @@ -267,14 +272,24 @@ workflow quality_check_standard { } call fq.fqlint { input: - read_one_fastq = select_first([bam_to_fastq.read_one_fastq_gz, "undefined"]), - read_two_fastq = select_first([bam_to_fastq.read_two_fastq_gz, "undefined"]), + read_one_fastq = select_first([ + bam_to_fastq.read_one_fastq_gz, + "undefined", + ]), + read_two_fastq = select_first([ + bam_to_fastq.read_two_fastq_gz, + "undefined", + ]), } call kraken2.kraken after fqlint { input: - read_one_fastq_gz - = select_first([bam_to_fastq.read_one_fastq_gz, "undefined"]), - read_two_fastq_gz - = select_first([bam_to_fastq.read_two_fastq_gz, "undefined"]), + read_one_fastq_gz = select_first([ + bam_to_fastq.read_one_fastq_gz, + "undefined", + ]), + read_two_fastq_gz = select_first([ + bam_to_fastq.read_two_fastq_gz, + "undefined", + ]), db = kraken_db, store_sequences = store_kraken_sequences, prefix = post_subsample_prefix, @@ -282,23 +297,29 @@ workflow quality_check_standard { } if (run_fastp) { call fp.fastp after fqlint { input: - read_one_fastq - = select_first([bam_to_fastq.read_one_fastq_gz, "undefined"]), - read_two_fastq - = select_first([bam_to_fastq.read_two_fastq_gz, "undefined"]), + read_one_fastq = select_first([ + bam_to_fastq.read_one_fastq_gz, + "undefined", + ]), + read_two_fastq = select_first([ + bam_to_fastq.read_two_fastq_gz, + "undefined", + ]), output_fastq = false, } } if (run_librarian) { call libraran_tasks.librarian after fqlint { input: - read_one_fastq = select_first([bam_to_fastq.read_one_fastq_gz, "undefined"]), + read_one_fastq = select_first([ + bam_to_fastq.read_one_fastq_gz, + "undefined", + ]), } } if (run_comparative_kraken) { - call samtools.bam_to_fastq as alt_filtered_fastq after quickcheck - after comparative_kraken_filter_validator - { input: + call samtools.bam_to_fastq as alt_filtered_fastq after quickcheck after comparative_kraken_filter_validator { + input: bam = post_subsample_bam, bitwise_filter = comparative_filter, prefix = post_subsample_prefix + ".alt_filtered", @@ -315,16 +336,24 @@ workflow quality_check_standard { use_all_cores, } call fq.fqlint as alt_filtered_fqlint { input: - read_one_fastq - = select_first([alt_filtered_fastq.read_one_fastq_gz, "undefined"]), - read_two_fastq - = select_first([alt_filtered_fastq.read_two_fastq_gz, "undefined"]), + read_one_fastq = select_first([ + alt_filtered_fastq.read_one_fastq_gz, + "undefined", + ]), + read_two_fastq = select_first([ + alt_filtered_fastq.read_two_fastq_gz, + "undefined", + ]), } call kraken2.kraken as comparative_kraken after alt_filtered_fqlint { input: - read_one_fastq_gz - = select_first([alt_filtered_fastq.read_one_fastq_gz, "undefined"]), - read_two_fastq_gz - = select_first([alt_filtered_fastq.read_two_fastq_gz, "undefined"]), + read_one_fastq_gz = select_first([ + alt_filtered_fastq.read_one_fastq_gz, + "undefined", + ]), + read_two_fastq_gz = select_first([ + alt_filtered_fastq.read_two_fastq_gz, + "undefined", + ]), db = kraken_db, store_sequences = store_kraken_sequences, prefix = post_subsample_prefix + ".alt_filtered", @@ -337,8 +366,8 @@ workflow quality_check_standard { bam_index = post_subsample_bam_index, prefix = post_subsample_prefix + ".whole_genome", } - scatter(coverage_pair in zip(coverage_beds, parse_input.labels)) { - call mosdepth.coverage as regions_coverage after quickcheck { input: + scatter (coverage_pair in zip(coverage_beds, parse_input.labels)) { + call mosdepth.coverage as regions_coverage after quickcheck { input: bam = post_subsample_bam, bam_index = post_subsample_bam_index, coverage_bed = coverage_pair.left, @@ -350,19 +379,31 @@ workflow quality_check_standard { call ngsderive.junction_annotation after quickcheck { input: bam = post_subsample_bam, bam_index = post_subsample_bam_index, - gene_model = select_first([gtf, "undefined"]), + gene_model = select_first([ + gtf, + "undefined", + ]), prefix = post_subsample_prefix, } call ngsderive.strandedness after quickcheck { input: bam = post_subsample_bam, bam_index = post_subsample_bam_index, - gene_model = select_first([gtf, "undefined"]), + gene_model = select_first([ + gtf, + "undefined", + ]), outfile_name = post_subsample_prefix + ".strandedness.tsv", } call qualimap.rnaseq as qualimap_rnaseq { input: - bam = select_first([bam_to_fastq.collated_bam, "undefined"]), + bam = select_first([ + bam_to_fastq.collated_bam, + "undefined", + ]), prefix = post_subsample_prefix + ".qualimap_rnaseq_results", - gtf = select_first([gtf, "undefined"]), + gtf = select_first([ + gtf, + "undefined", + ]), name_sorted = true, paired_end = true, # matches default but prevents user from overriding } @@ -434,17 +475,27 @@ workflow quality_check_standard { ], regions_coverage.summary, select_all(regions_coverage.region_dist), - select_first([markdups_post.mosdepth_region_summary, []]), - select_first([markdups_post.mosdepth_region_dist, []]), - ( - if (mark_duplicates && optical_distance > 0) - then [markdups.mark_duplicates_metrics] + select_first([ + markdups_post.mosdepth_region_summary, + [], + ]), + select_first([ + markdups_post.mosdepth_region_dist, + [], + ]), + (if (mark_duplicates && optical_distance > 0) + then [ + markdups.mark_duplicates_metrics, + ] else [] ), ])) call multiqc_tasks.multiqc { input: - files = flatten([multiqc_files, extra_multiqc_inputs]), + files = flatten([ + multiqc_files, + extra_multiqc_inputs, + ]), config = multiqc_config, report_name = post_subsample_prefix + ".multiqc", } @@ -483,7 +534,10 @@ workflow quality_check_standard { File? kraken_sequences = kraken.sequences File? comparative_kraken_sequences = comparative_kraken.sequences File? junctions = junction_annotation.junctions - Array[File] intermediate_files = select_first([optional_files, []]) + Array[File] intermediate_files = select_first([ + optional_files, + [], + ]) } } @@ -491,7 +545,7 @@ task parse_input { meta { description: "Parses and validates the `quality_check_standard` workflow's provided inputs" outputs: { - labels: "An array of labels to use on the result coverage files associated with each coverage BED" + labels: "An array of labels to use on the result coverage files associated with each coverage BED", } } @@ -539,8 +593,7 @@ task parse_input { >>> output { - Array[String] labels = ( - if (coverage_beds_len > 0) + Array[String] labels = (if (coverage_beds_len > 0) then read_lines("labels.txt") else [] ) diff --git a/workflows/reference/bwa-db-build.wdl b/workflows/reference/bwa-db-build.wdl index 3aefef4f1..579385f1c 100644 --- a/workflows/reference/bwa-db-build.wdl +++ b/workflows/reference/bwa-db-build.wdl @@ -40,7 +40,7 @@ workflow bwa_db_build { } output { - File reference_fa = reference_download.downloaded_file - File bwa_db_tar_gz = build_bwa_db.bwa_db_tar_gz + File reference_fa = reference_download.downloaded_file + File bwa_db_tar_gz = build_bwa_db.bwa_db_tar_gz } } diff --git a/workflows/reference/gatk-reference.wdl b/workflows/reference/gatk-reference.wdl index 3b4dff835..2cf2e4087 100644 --- a/workflows/reference/gatk-reference.wdl +++ b/workflows/reference/gatk-reference.wdl @@ -88,16 +88,28 @@ workflow gatk_reference { if (defined(dbSNP_vcf_index_url) && defined(dbSNP_vcf_index_name)) { call util.download as dbsnp_index { input: - url = select_first([dbSNP_vcf_index_url, "undefined"]), - outfile_name = select_first([dbSNP_vcf_index_name, "undefined"]), + url = select_first([ + dbSNP_vcf_index_url, + "undefined", + ]), + outfile_name = select_first([ + dbSNP_vcf_index_name, + "undefined", + ]), disk_size_gb = dbSNP_vcf_index_disk_size_gb, } } if (defined(interval_list_url) && defined(interval_list_name)) { call util.download as intervals { input: - url = select_first([interval_list_url, "undefined"]), - outfile_name = select_first([interval_list_name, "undefined"]), + url = select_first([ + interval_list_url, + "undefined", + ]), + outfile_name = select_first([ + interval_list_name, + "undefined", + ]), disk_size_gb = interval_list_disk_size_gb, } } diff --git a/workflows/reference/qc-reference.wdl b/workflows/reference/qc-reference.wdl index 2d64b8901..28f67aebf 100644 --- a/workflows/reference/qc-reference.wdl +++ b/workflows/reference/qc-reference.wdl @@ -121,12 +121,12 @@ workflow qc_reference { } } - if ( - (length(kraken_fastas) > 0) - || (length(kraken_fasta_urls) > 0) - || (length(kraken_libraries) > 0) - ) { - call kraken2.download_taxonomy { input: protein } + if ((length(kraken_fastas) > 0) || (length(kraken_fasta_urls) > 0) || (length( + kraken_libraries + ) > 0)) { + call kraken2.download_taxonomy { input: + protein, + } } scatter (lib in kraken_libraries) { @@ -136,7 +136,10 @@ workflow qc_reference { } } - Array[File] custom_fastas = flatten([kraken_fastas, fastas_download.downloaded_file]) + Array[File] custom_fastas = flatten([ + kraken_fastas, + fastas_download.downloaded_file, + ]) if (length(custom_fastas) > 0) { call kraken2.create_library_from_fastas { input: fastas_gz = custom_fastas, @@ -145,9 +148,13 @@ workflow qc_reference { } Array[File] kraken_tarballs = flatten([ - select_all([download_taxonomy.taxonomy]), + select_all([ + download_taxonomy.taxonomy, + ]), download_library.library, - select_all([create_library_from_fastas.custom_library]), + select_all([ + create_library_from_fastas.custom_library, + ]), ]) if (length(kraken_tarballs) > 0) { call kraken2.build_db as kraken_build_db { input: diff --git a/workflows/reference/star-db-build.wdl b/workflows/reference/star-db-build.wdl index d3a99fbe2..d2d14b2a1 100644 --- a/workflows/reference/star-db-build.wdl +++ b/workflows/reference/star-db-build.wdl @@ -56,8 +56,8 @@ workflow star_db_build { } output { - File reference_fa = reference_download.downloaded_file - File gtf = gtf_download.downloaded_file - File star_db_tar_gz = build_star_db.star_db + File reference_fa = reference_download.downloaded_file + File gtf = gtf_download.downloaded_file + File star_db_tar_gz = build_star_db.star_db } } diff --git a/workflows/rnaseq/rnaseq-core.wdl b/workflows/rnaseq/rnaseq-core.wdl index b5d088317..afad29046 100644 --- a/workflows/rnaseq/rnaseq-core.wdl +++ b/workflows/rnaseq/rnaseq-core.wdl @@ -144,9 +144,7 @@ workflow rnaseq_core { GC_AG_and_CT_GC_motif: 5, AT_AC_and_GT_AT_motif: 5, } - String prefix = sub( - basename(read_one_fastqs_gz[0]), - "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", + String prefix = sub(basename(read_one_fastqs_gz[0]), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", "" # Once replacing with capturing groups is supported, replace with group 3 ) String xenocp_aligner = "star" @@ -201,13 +199,11 @@ workflow rnaseq_core { } } - Array[File] chosen_r1s = ( - if enable_read_trimming + Array[File] chosen_r1s = (if enable_read_trimming then select_all(trim.read_one_fastq_gz) else read_one_fastqs_gz ) - Array[File] chosen_r2s = ( - if enable_read_trimming + Array[File] chosen_r2s = (if enable_read_trimming then select_all(trim.read_two_fastq_gz) else read_two_fastqs_gz ) @@ -252,8 +248,7 @@ workflow rnaseq_core { gene_model = gtf, } - String htseq_strandedness = ( - if (provided_strandedness != "") + String htseq_strandedness = (if (provided_strandedness != "") then htseq_strandedness_mapping[provided_strandedness] else htseq_strandedness_mapping[ngsderive_strandedness.strandedness_string] ) @@ -262,12 +257,11 @@ workflow rnaseq_core { bam = alignment_post.processed_bam, gtf, strandedness = htseq_strandedness, - prefix = basename(alignment_post.processed_bam, "bam") - + ( - if provided_strandedness == "" - then ngsderive_strandedness.strandedness_string - else provided_strandedness - ), + prefix = basename(alignment_post.processed_bam, "bam") + (if provided_strandedness + == "" + then ngsderive_strandedness.strandedness_string + else provided_strandedness + ), pos_sorted = true, } @@ -280,9 +274,13 @@ workflow rnaseq_core { File feature_counts = htseq_count.feature_counts File inferred_strandedness = ngsderive_strandedness.strandedness_file String inferred_strandedness_string = ngsderive_strandedness.strandedness_string - Array[File] fastp_reports = select_all(flatten([fastp.report, trim.report])) - Array[File] fastp_jsons = select_all(flatten( - [fastp.report_json, trim.report_json] - )) + Array[File] fastp_reports = select_all(flatten([ + fastp.report, + trim.report, + ])) + Array[File] fastp_jsons = select_all(flatten([ + fastp.report_json, + trim.report_json, + ])) } } diff --git a/workflows/rnaseq/rnaseq-standard-fastq.wdl b/workflows/rnaseq/rnaseq-standard-fastq.wdl index dee188b10..29ff3bfc6 100644 --- a/workflows/rnaseq/rnaseq-standard-fastq.wdl +++ b/workflows/rnaseq/rnaseq-standard-fastq.wdl @@ -73,9 +73,7 @@ workflow rnaseq_standard_fastq { Array[File] read_two_fastqs_gz Array[ReadGroup] read_groups File? contaminant_db - String prefix = sub( - basename(read_one_fastqs_gz[0]), - "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", + String prefix = sub(basename(read_one_fastqs_gz[0]), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", "" # Once replacing with capturing groups is supported, replace with group 3 ) String xenocp_aligner = "star" @@ -100,7 +98,7 @@ workflow rnaseq_standard_fastq { } } - if (validate_input){ + if (validate_input) { scatter (reads in zip(read_one_fastqs_gz, read_two_fastqs_gz)) { call fq.fqlint after parse_input { input: read_one_fastq = reads.left, @@ -123,12 +121,10 @@ workflow rnaseq_standard_fastq { subsample.subsampled_read1, read_one_fastqs_gz, ]) - Array[File] selected_read_two_fastqs = select_all( - select_first([ - subsample.subsampled_read2, - read_two_fastqs_gz, - ]) - ) + Array[File] selected_read_two_fastqs = select_all(select_first([ + subsample.subsampled_read2, + read_two_fastqs_gz, + ])) call rnaseq_core_wf.rnaseq_core after fqlint { input: read_one_fastqs_gz = selected_read_one_fastqs, diff --git a/workflows/rnaseq/rnaseq-standard.wdl b/workflows/rnaseq/rnaseq-standard.wdl index c7278a72a..48f70ab95 100755 --- a/workflows/rnaseq/rnaseq-standard.wdl +++ b/workflows/rnaseq/rnaseq-standard.wdl @@ -92,7 +92,10 @@ workflow rnaseq_standard { use_all_cores, } } - File selected_bam = select_first([subsample.sampled_bam, bam]) + File selected_bam = select_first([ + subsample.sampled_bam, + bam, + ]) call read_group.get_read_groups after validate_input_bam { input: bam = selected_bam, diff --git a/workflows/rnaseq/rnaseq-variant-calling.wdl b/workflows/rnaseq/rnaseq-variant-calling.wdl index 8df2e61a1..ab7a8a445 100644 --- a/workflows/rnaseq/rnaseq-variant-calling.wdl +++ b/workflows/rnaseq/rnaseq-variant-calling.wdl @@ -54,7 +54,7 @@ workflow rnaseq_variant_calling { Int scatter_count = 6 } - if (!bam_is_dup_marked){ + if (!bam_is_dup_marked) { call picard.mark_duplicates { input: bam, create_bam = true, @@ -62,8 +62,14 @@ workflow rnaseq_variant_calling { } call gatk.split_n_cigar_reads { input: - bam = select_first([mark_duplicates.duplicate_marked_bam, bam]), - bam_index = select_first([mark_duplicates.duplicate_marked_bam_index, bam_index]), + bam = select_first([ + mark_duplicates.duplicate_marked_bam, + bam, + ]), + bam_index = select_first([ + mark_duplicates.duplicate_marked_bam_index, + bam_index, + ]), fasta, fasta_index, dict, From 8ee911e722ae748bc9f67b9b4464f53beff86a04 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Mon, 23 Mar 2026 14:06:56 -0400 Subject: [PATCH 33/37] ci: rm redundant or unneeded actions --- .github/workflows/miniwdl-check.yaml | 29 --------------------------- .github/workflows/sprocket-check.yaml | 11 ---------- 2 files changed, 40 deletions(-) delete mode 100644 .github/workflows/miniwdl-check.yaml delete mode 100644 .github/workflows/sprocket-check.yaml diff --git a/.github/workflows/miniwdl-check.yaml b/.github/workflows/miniwdl-check.yaml deleted file mode 100644 index 14fa817bc..000000000 --- a/.github/workflows/miniwdl-check.yaml +++ /dev/null @@ -1,29 +0,0 @@ -name: Workflows Miniwdl Check - -on: [push] - -jobs: - miniwdl_check: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: Set up Python - uses: actions/setup-python@v1 - with: - python-version: '3.10' - - name: Install miniwdl - run: | - python -m pip install --upgrade pip - pip install miniwdl - - name: Run miniwdl - run: | - EXITCODE=0 - echo "Checking WDL files using \`miniwdl check\`." - shopt -s extglob - files=$(find ./!(template) -name '*.wdl') - for file in $files; do - echo " [***] $file [***]" - miniwdl check "$file" - EXITCODE=$(($? || EXITCODE)) - done - exit $EXITCODE diff --git a/.github/workflows/sprocket-check.yaml b/.github/workflows/sprocket-check.yaml deleted file mode 100644 index a2cc9ca39..000000000 --- a/.github/workflows/sprocket-check.yaml +++ /dev/null @@ -1,11 +0,0 @@ -name: Workflows Sprocket Check - -on: [push] - -jobs: - sprocket_check: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: Run sprocket - uses: stjude-rust-labs/sprocket-action@main From dd0e3699943ba61007e85ad9f665868f4b63a966 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Mon, 23 Mar 2026 14:07:08 -0400 Subject: [PATCH 34/37] ci: add a format check --- .github/workflows/sprocket-lint.yaml | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/.github/workflows/sprocket-lint.yaml b/.github/workflows/sprocket-lint.yaml index e289f28f7..6ce5cbb60 100644 --- a/.github/workflows/sprocket-lint.yaml +++ b/.github/workflows/sprocket-lint.yaml @@ -3,12 +3,16 @@ name: Workflows Sprocket Lint on: [push] jobs: - sprocket_lint: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: Run sprocket - uses: stjude-rust-labs/sprocket-action@main - with: - lint: true - except: KnownRules + sprocket_lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Run 'sprocket lint' + uses: stjude-rust-labs/sprocket-action@main + with: + lint: true + except: KnownRules + - name: Run 'sprocket format' + uses: stjude-rust-labs/sprocket-action@feat/format + with: + action: format From aa2c4bf657d168b37552798773c6886bbb859d57 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Mon, 23 Mar 2026 14:16:57 -0400 Subject: [PATCH 35/37] sprocket lints --- data_structures/read_group.wdl | 4 ++-- sprocket.toml | 3 +++ tools/arriba.wdl | 1 - tools/gatk4.wdl | 3 --- tools/kraken2.wdl | 1 - workflows/chipseq/chipseq-standard.wdl | 3 --- workflows/general/alignment-post.wdl | 1 - workflows/qc/quality-check-standard.wdl | 1 - 8 files changed, 5 insertions(+), 12 deletions(-) diff --git a/data_structures/read_group.wdl b/data_structures/read_group.wdl index 30955325c..b4247a62b 100644 --- a/data_structures/read_group.wdl +++ b/data_structures/read_group.wdl @@ -149,14 +149,14 @@ task validate_read_group { } parameter_meta { - read_group: "`ReadGroup` struct to validate" required_fields: "Array of read group fields that must be defined. The ID field is always required and does not need to be specified." + read_group: "`ReadGroup` struct to validate" restrictive: "If true, run a stricter validation of field values. Otherwise, check against SAM spec-defined values." } input { - ReadGroup read_group Array[String] required_fields + ReadGroup read_group Boolean restrictive } diff --git a/sprocket.toml b/sprocket.toml index b84f31f61..912ce6a3b 100644 --- a/sprocket.toml +++ b/sprocket.toml @@ -3,6 +3,9 @@ all_lint_rules = true except = ["ContainerUri"] deny_notes = true +[format] +sort_inputs = true + [run.task] cpu_limit_behavior = "try_with_max" memory_limit_behavior = "try_with_max" diff --git a/tools/arriba.wdl b/tools/arriba.wdl index 84da082c0..fb5d0a149 100644 --- a/tools/arriba.wdl +++ b/tools/arriba.wdl @@ -169,7 +169,6 @@ task arriba { "NC_*", ] Array[String] disable_filters = [] - #@ except: LineWidth String feature_name = "gene_name=gene_name|gene_id,gene_id=gene_id,transcript_id=transcript_id,feature_exon=exon,feature_CDS=CDS" String prefix = basename(bam, ".bam") + ".fusions" String strandedness = "auto" diff --git a/tools/gatk4.wdl b/tools/gatk4.wdl index e4a709e18..c54a4e9f9 100644 --- a/tools/gatk4.wdl +++ b/tools/gatk4.wdl @@ -116,7 +116,6 @@ task base_recalibrator { Int disk_size_gb = ceil(size(bam, "GB") + 1) * 3 + ceil(size(fasta, "GB")) + modify_disk_size_gb Int java_heap_size = ceil(memory_gb * 0.9) - #@ except: LineWidth command <<< # shellcheck disable=SC2102 gatk \ @@ -184,7 +183,6 @@ task apply_bqsr { Int disk_size_gb = ceil(size(bam, "GB") * 2) + 30 + modify_disk_size_gb Int java_heap_size = ceil(memory_gb * 0.9) - #@ except: LineWidth command <<< set -euo pipefail @@ -273,7 +271,6 @@ task haplotype_caller { Int disk_size_gb = ceil(size(bam, "GB") * 2) + 30 + ceil(size(fasta, "GB")) + modify_disk_size_gb Int java_heap_size = ceil(memory_gb * 0.9) - #@ except: LineWidth command <<< gatk \ --java-options "-Xms6000m -Xmx~{java_heap_size}g -XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10" \ diff --git a/tools/kraken2.wdl b/tools/kraken2.wdl index 5d2081918..018921bd2 100644 --- a/tools/kraken2.wdl +++ b/tools/kraken2.wdl @@ -98,7 +98,6 @@ task download_library { String db_name = "kraken2_" + library_name + "_library" - #@ except: ExpressionSpacing Int disk_size_gb = ((if library_name == "bacteria" then 300 else if library_name == "nr" diff --git a/workflows/chipseq/chipseq-standard.wdl b/workflows/chipseq/chipseq-standard.wdl index aa30759fe..5f7296726 100755 --- a/workflows/chipseq/chipseq-standard.wdl +++ b/workflows/chipseq/chipseq-standard.wdl @@ -8,13 +8,10 @@ import "../../tools/picard.wdl" import "../../tools/samtools.wdl" import "../../tools/util.wdl" import "../general/bam-to-fastqs.wdl" as b2fq -#@ except: LineWidth import "https://raw.githubusercontent.com/stjude/seaseq/2.3/workflows/workflows/mapping.wdl" as seaseq_map -#@ except: LineWidth import "https://raw.githubusercontent.com/stjude/seaseq/3.0/workflows/tasks/samtools.wdl" as seaseq_samtools -#@ except: LineWidth import "https://raw.githubusercontent.com/stjude/seaseq/3.0/workflows/tasks/seaseq_util.wdl" as seaseq_util diff --git a/workflows/general/alignment-post.wdl b/workflows/general/alignment-post.wdl index 9caa5344c..618871b9d 100644 --- a/workflows/general/alignment-post.wdl +++ b/workflows/general/alignment-post.wdl @@ -3,7 +3,6 @@ version 1.1 import "../../tools/md5sum.wdl" import "../../tools/picard.wdl" import "../../tools/samtools.wdl" -#@ except: LineWidth import "https://raw.githubusercontent.com/stjude/XenoCP/4.0.0-alpha/wdl/workflows/xenocp.wdl" as xenocp_wf diff --git a/workflows/qc/quality-check-standard.wdl b/workflows/qc/quality-check-standard.wdl index 3762e2883..2f7a3bff3 100644 --- a/workflows/qc/quality-check-standard.wdl +++ b/workflows/qc/quality-check-standard.wdl @@ -125,7 +125,6 @@ workflow quality_check_standard { File bam_index File kraken_db File? gtf - #@ except: LineWidth File multiqc_config = "https://raw.githubusercontent.com/stjudecloud/workflows/main/workflows/qc/multiqc_config/multiqc_config.yaml" Array[File] extra_multiqc_inputs = [] Array[File] coverage_beds = [] From a95017588426f37d0c03c28b70c0973d9d576ce5 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Mon, 23 Mar 2026 14:17:11 -0400 Subject: [PATCH 36/37] ignore failing methylation files --- .github/workflows/sprocket-lint.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/sprocket-lint.yaml b/.github/workflows/sprocket-lint.yaml index 6ce5cbb60..a7204d5b6 100644 --- a/.github/workflows/sprocket-lint.yaml +++ b/.github/workflows/sprocket-lint.yaml @@ -12,6 +12,7 @@ jobs: with: lint: true except: KnownRules + exclusions: methylation - name: Run 'sprocket format' uses: stjude-rust-labs/sprocket-action@feat/format with: From 3d4fde089ac8e4c17fe39048e9059fb959b603dd Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Mon, 23 Mar 2026 14:19:36 -0400 Subject: [PATCH 37/37] ci fix --- .github/workflows/sprocket-lint.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/sprocket-lint.yaml b/.github/workflows/sprocket-lint.yaml index a7204d5b6..60f48a8e5 100644 --- a/.github/workflows/sprocket-lint.yaml +++ b/.github/workflows/sprocket-lint.yaml @@ -12,7 +12,7 @@ jobs: with: lint: true except: KnownRules - exclusions: methylation + ignore-patterns: methylation - name: Run 'sprocket format' uses: stjude-rust-labs/sprocket-action@feat/format with: