Skip to content

Commit d4a859b

Browse files
committed
Support for distro tools in workflow testing.
If tool ids from Galaxy's default tool panel are encountered in workflows being executed, served, or tested - they will be added to the tool conf generated for the Galaxy instance started by Planemo. Fixes #780.
1 parent 5b483c7 commit d4a859b

9 files changed

Lines changed: 219 additions & 6 deletions

File tree

planemo/galaxy/config.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,16 @@
4141
gi,
4242
user_api_key,
4343
)
44+
from .distro_tools import (
45+
DISTRO_TOOLS_ID_TO_PATH
46+
)
4447
from .run import (
4548
DOWNLOAD_GALAXY,
4649
setup_common_startup_args,
4750
setup_venv,
4851
)
4952
from .workflows import (
53+
find_tool_ids,
5054
import_workflow,
5155
install_shed_repos,
5256
)
@@ -267,12 +271,13 @@ def config_join(*args):
267271
_handle_job_metrics(config_directory, kwds)
268272

269273
shed_tool_conf = "config/shed_tool_conf.xml"
270-
all_tool_paths = list(tool_paths) + list(kwds.get("extra_tools", []))
274+
all_tool_paths = _all_tool_paths(runnables, **kwds)
271275

272276
tool_directories = set([]) # Things to mount...
273277
for tool_path in all_tool_paths:
274278
directory = os.path.dirname(os.path.normpath(tool_path))
275-
tool_directories.add(directory)
279+
if os.path.exists(directory):
280+
tool_directories.add(directory)
276281

277282
# TODO: remap these.
278283
tool_volumes = []
@@ -396,7 +401,7 @@ def config_join(*args):
396401
_ensure_directory(tool_dependency_dir)
397402

398403
shed_tool_conf = kwds.get("shed_tool_conf") or config_join("shed_tools_conf.xml")
399-
all_tool_paths = list(tool_paths) + list(kwds.get("extra_tools", []))
404+
all_tool_paths = _all_tool_paths(runnables, **kwds)
400405
tool_definition = _tool_conf_entry_for(all_tool_paths)
401406
empty_tool_conf = config_join("empty_tool_conf.xml")
402407

@@ -536,6 +541,19 @@ def config_join(*args):
536541
)
537542

538543

544+
def _all_tool_paths(runnables, **kwds):
545+
tool_paths = [r.path for r in runnables if r.has_tools]
546+
all_tool_paths = list(tool_paths) + list(kwds.get("extra_tools", []))
547+
for runnable in runnables:
548+
if runnable.type.name == "galaxy_workflow":
549+
tool_ids = find_tool_ids(runnable.path)
550+
for tool_id in tool_ids:
551+
if tool_id in DISTRO_TOOLS_ID_TO_PATH:
552+
all_tool_paths.append(DISTRO_TOOLS_ID_TO_PATH[tool_id])
553+
554+
return all_tool_paths
555+
556+
539557
def _shared_galaxy_properties(config_directory, kwds, for_tests):
540558
"""Setup properties useful for local and Docker Galaxy instances.
541559

planemo/galaxy/distro_tools.py

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
# file auto generated with scripts/tool_index_to_id_map.py
2+
DISTRO_TOOLS_ID_TO_PATH = {
3+
"ucsc_table_direct1": "data_source/ucsc_tablebrowser.xml",
4+
"MAF_To_Fasta1": "maf/maf_to_fasta.xml",
5+
"ratmine": "data_source/ratmine.xml",
6+
"mousemine": "data_source/mousemine.xml",
7+
"__SORTLIST__": "${model_tools_path}/sort_collection_list.xml",
8+
"__UNZIP_COLLECTION__": "${model_tools_path}/unzip_collection.xml",
9+
"cbi_rice_mart": "data_source/cbi_rice_mart.xml",
10+
"ucsc_table_direct_archaea1": "data_source/ucsc_tablebrowser_archaea.xml",
11+
"wig_to_bigWig": "filters/wig_to_bigwig.xml",
12+
"ebi_sra_main": "data_source/ebi_sra.xml",
13+
"metabolicmine": "data_source/metabolicmine.xml",
14+
"Extract_features1": "filters/gff/extract_GFF_Features.xml",
15+
"upload1": "data_source/upload.xml",
16+
"wc_gnu": "filters/wc_gnu.xml",
17+
"join1": "filters/joiner.xml",
18+
"random_lines1": "filters/randomlines.xml",
19+
"modENCODEfly": "data_source/fly_modencode.xml",
20+
"gff_filter_by_attribute": "filters/gff/gff_filter_by_attribute.xml",
21+
"gtf2bedgraph": "filters/gtf2bedgraph.xml",
22+
"ChangeCase": "filters/changeCase.xml",
23+
"__FLATTEN__": "${model_tools_path}/flatten_collection.xml",
24+
"wiggle2simple1": "filters/wiggle_to_simple.xml",
25+
"GeneBed_Maf_Fasta2": "maf/genebed_maf_to_fasta.xml",
26+
"trimmer": "filters/trimmer.xml",
27+
"createInterval": "filters/CreateInterval.xml",
28+
"gff_filter_by_feature_count": "filters/gff/gff_filter_by_feature_count.xml",
29+
"Interval2Maf1": "maf/interval2maf.xml",
30+
"genomespace_exporter": "genomespace/genomespace_exporter.xml",
31+
"Show tail1": "filters/tailWrapper.xml",
32+
"barchart_gnuplot": "plotting/bar_chart.xml",
33+
"microbial_import1": "data_source/microbial_import.xml",
34+
"axt_to_concat_fasta": "filters/axt_to_concat_fasta.xml",
35+
"__FILTER_FROM_FILE__": "${model_tools_path}/filter_from_file.xml",
36+
"Interval2Maf_pairwise1": "maf/interval2maf_pairwise.xml",
37+
"Show beginning1": "filters/headWrapper.xml",
38+
"axt_to_lav_1": "filters/axt_to_lav.xml",
39+
"modmine": "data_source/modmine.xml",
40+
"__EXPORT_HISTORY__": "${model_tools_path}/imp_exp/exp_history_to_archive.xml",
41+
"MAF_Thread_For_Species1": "maf/maf_thread_for_species.xml",
42+
"vcf_to_maf_customtrack1": "maf/vcf_to_maf_customtrack.xml",
43+
"MAF_To_BED1": "maf/maf_to_bed.xml",
44+
"__DATA_FETCH__": "${model_tools_path}/data_fetch.xml",
45+
"__IMPORT_HISTORY__": "${model_tools_path}/imp_exp/imp_history_from_archive.xml",
46+
"biomart": "data_source/biomart.xml",
47+
"Sff_extractor": "filters/sff_extractor.xml",
48+
"secure_hash_message_digest": "filters/secure_hash_message_digest.xml",
49+
"MAF_Reverse_Complement_1": "maf/maf_reverse_complement.xml",
50+
"mergeCols1": "filters/mergeCols.xml",
51+
"gff2bed1": "filters/gff2bed.xml",
52+
"Grouping1": "stats/grouping.xml",
53+
"maf_limit_size1": "maf/maf_limit_size.xml",
54+
"sort1": "filters/sorter.xml",
55+
"Convert characters1": "filters/convert_characters.xml",
56+
"MAF_To_Interval1": "maf/maf_to_interval.xml",
57+
"MAF_filter": "maf/maf_filter.xml",
58+
"MAF_split_blocks_by_species1": "maf/maf_split_by_species.xml",
59+
"genomespace_importer": "genomespace/genomespace_importer.xml",
60+
"gene2exon1": "filters/ucsc_gene_bed_to_exon_bed.xml",
61+
"Cut1": "filters/cutWrapper.xml",
62+
"Count1": "filters/uniq.xml",
63+
"MAF_Limit_To_Species1": "maf/maf_limit_to_species.xml",
64+
"ucsc_table_direct_test1": "data_source/ucsc_tablebrowser_test.xml",
65+
"wormbase": "data_source/wormbase.xml",
66+
"maf_stats1": "maf/maf_stats.xml",
67+
"zebrafishmine": "data_source/zebrafishmine.xml",
68+
"Paste1": "filters/pasteWrapper.xml",
69+
"Interval_Maf_Merged_Fasta2": "maf/interval_maf_to_merged_fasta.xml",
70+
"modENCODEworm": "data_source/worm_modencode.xml",
71+
"gtf_filter_by_attribute_values_list": "filters/gff/gtf_filter_by_attribute_values_list.xml",
72+
"Summary_Statistics1": "stats/gsummary.xml",
73+
"qual_stats_boxplot": "plotting/boxplot.xml",
74+
"cat1": "filters/catWrapper.xml",
75+
"maf_by_block_number1": "maf/maf_by_block_number.xml",
76+
"Grep1": "filters/grep.xml",
77+
"eupathdb": "data_source/eupathdb.xml",
78+
"__RELABEL_FROM_FILE__": "${model_tools_path}/relabel_from_file.xml",
79+
"__ZIP_COLLECTION__": "${model_tools_path}/zip_collection.xml",
80+
"lav_to_bed1": "filters/lav_to_bed.xml",
81+
"comp1": "filters/compare.xml",
82+
"bed_to_bigBed": "filters/bed_to_bigbed.xml",
83+
"liftOver1": "extract/liftOver_wrapper.xml",
84+
"bed2gff1": "filters/bed2gff.xml",
85+
"__MERGE_COLLECTION__": "${model_tools_path}/merge_collection.xml",
86+
"gramenemart": "data_source/gramene_mart.xml",
87+
"hbvar": "data_source/hbvar.xml",
88+
"yeastmine": "data_source/yeastmine.xml",
89+
"Filter1": "stats/filtering.xml",
90+
"flymine": "data_source/flymine.xml",
91+
"wormbase_test": "data_source/wormbase_test.xml",
92+
"__FILTER_FAILED_DATASETS__": "${model_tools_path}/filter_failed_collection.xml",
93+
"Extract genomic DNA 1": "extract/extract_genomic_dna.xml",
94+
"Remove beginning1": "filters/remove_beginning.xml",
95+
"flymine_test": "data_source/flymine_test.xml",
96+
"axt_to_fasta": "filters/axt_to_fasta.xml",
97+
"addValue": "filters/fixedValueColumn.xml",
98+
"__TAG_FROM_FILE__": "${model_tools_path}/tag_collection_from_file.xml"
99+
}

planemo/galaxy/workflows.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,11 +69,14 @@ def import_workflow(path, admin_gi, user_gi, from_path=False):
6969
return workflow
7070

7171

72-
def _raw_dict(path, importer):
72+
def _raw_dict(path, importer=None):
7373
if path.endswith(".ga"):
7474
with open(path, "r") as f:
7575
workflow = json.load(f)
7676
else:
77+
if importer is None:
78+
importer = DummyImporterGalaxyInterface()
79+
7780
workflow_directory = os.path.dirname(path)
7881
workflow_directory = os.path.abspath(workflow_directory)
7982
with open(path, "r") as f:
@@ -83,13 +86,22 @@ def _raw_dict(path, importer):
8386
return workflow
8487

8588

89+
def find_tool_ids(path):
90+
tool_ids = []
91+
workflow = _raw_dict(path)
92+
for (order_index, step) in workflow["steps"].items():
93+
tool_id = step.get("tool_id")
94+
tool_ids.append(tool_id)
95+
96+
return tool_ids
97+
98+
8699
WorkflowOutput = namedtuple("WorkflowOutput", ["order_index", "output_name", "label"])
87100

88101

89102
def describe_outputs(path):
90103
"""Return a list of :class:`WorkflowOutput` objects for target workflow."""
91-
importer = DummyImporterGalaxyInterface()
92-
workflow = _raw_dict(path, importer)
104+
workflow = _raw_dict(path)
93105
outputs = []
94106
for (order_index, step) in workflow["steps"].items():
95107
step_outputs = step.get("workflow_outputs", [])

planemo/test/_check_output.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@ def check_output(runnable, output_properties, test_properties, **kwds):
2222
job_output_files = kwds.get("job_output_files", None)
2323
item_label = "Output with path %s" % path
2424
problems = []
25+
if "asserts" in test_properties:
26+
# TODO: break fewer abstractions here...
27+
from galaxy.tools.parser.yaml import __to_test_assert_list
28+
test_properties["assert_list"] = __to_test_assert_list(test_properties["asserts"])
2529
try:
2630
verify(
2731
item_label,

scripts/tool_index.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

scripts/tool_index_to_id_map.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
import json
2+
3+
with open("tool_index.json", "r") as f:
4+
tool_index = json.load(f)
5+
6+
tool_map = {}
7+
8+
for tool in tool_index:
9+
tool_id = tool["id"]
10+
if "/" in tool_id:
11+
continue
12+
config_file = tool["config_file"]
13+
path = None
14+
if "lib/galaxy/tools/" in config_file:
15+
_, path = config_file.split("lib/galaxy/tools/", 1)
16+
path = "${model_tools_path}/%s" % path
17+
elif "tools/" in config_file:
18+
_, path = config_file.split("tools/", 1)
19+
20+
if path:
21+
tool_map[tool_id] = path
22+
23+
as_python = "DISTRO_TOOLS_ID_TO_PATH = %s\n" % json.dumps(tool_map, indent=4)
24+
as_python = as_python.replace(" \n", "\n")
25+
with open("../planemo/galaxy/distro_tools.py", "w") as f:
26+
f.write("# file auto generated with scripts/tool_index_to_id_map.py\n")
27+
f.write(as_python)
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
- doc: Test Galaxy finds Grouping1 tool.
2+
job:
3+
input1:
4+
class: File
5+
path: '1.bed'
6+
outputs:
7+
wf_output_1:
8+
asserts:
9+
has_line:
10+
line: 'chr16 206638'
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
class: GalaxyWorkflow
2+
name: "Test Workflow with Distro Tool"
3+
inputs:
4+
- id: input1
5+
outputs:
6+
- id: wf_output_1
7+
source: grouping#out_file1
8+
steps:
9+
- tool_id: Grouping1
10+
label: grouping
11+
state:
12+
input1:
13+
$link: input1
14+
groupcol: 1
15+
operations:
16+
- opcol: '2' # TODO: shouldn't need to be a string, but integer doesn't work here - does outside conditional :(
17+
optype: mean
18+
opround: 'no' # TODO: default shouldn't need to be specified, but does because in conditional :(

tests/test_cmd_test.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ def test_workflow_test_simple_yaml(self):
2929
test_command = self.append_profile_argument_if_needed(test_command)
3030
test_command += [
3131
"--no_dependency_resolution",
32+
"--galaxy_branch", "release_18.01", # Much better workflow output detection than master for now (pre-release of 18.01)
3233
"--extra_tools", random_lines,
3334
"--extra_tools", cat,
3435
test_artifact,
@@ -48,6 +49,7 @@ def test_workflow_test_simple_ga(self):
4849
test_command = self.append_profile_argument_if_needed(test_command)
4950
test_command += [
5051
"--no_dependency_resolution",
52+
"--galaxy_branch", "release_18.01", # Much better workflow output detection than master for now (pre-release of 18.01)
5153
"--extra_tools", cat,
5254
test_artifact,
5355
]
@@ -58,6 +60,28 @@ def test_workflow_test_simple_ga(self):
5860
# print(o.read())
5961
# raise
6062

63+
@skip_if_environ("PLANEMO_SKIP_GALAXY_TESTS")
64+
def test_workflow_test_distro_tool(self):
65+
"""Test testing a simple GA workflow with Galaxy."""
66+
with self._isolate():
67+
test_artifact = os.path.join(TEST_DATA_DIR, "wf4-distro-tools.gxwf.yml")
68+
test_command = [
69+
"--verbose",
70+
"test"
71+
]
72+
test_command = self.append_profile_argument_if_needed(test_command)
73+
test_command += [
74+
"--no_dependency_resolution",
75+
"--galaxy_branch", "release_18.01", # Much better workflow output detection than master for now (pre-release of 18.01)
76+
test_artifact,
77+
]
78+
# try:
79+
self._check_exit_code(test_command, exit_code=0)
80+
# except Exception:
81+
# with open(os.path.join(f, "tool_test_output.json"), "r") as o:
82+
# print(o.read())
83+
# raise
84+
6185
@skip_if_environ("PLANEMO_SKIP_CWLTOOL_TESTS")
6286
def test_cwltool_tool_test(self):
6387
"""Test testing a CWL tool with cwltool."""

0 commit comments

Comments
 (0)