Skip to content

Commit 1d13a9f

Browse files
authored
Merge pull request #89 from jmchilton/library_upload_2.0
Upload 2.0 support for data library creation.
2 parents 6bc39d8 + dea1ff2 commit 1d13a9f

4 files changed

Lines changed: 133 additions & 31 deletions

File tree

ephemeris/setup_data_libraries.py

Lines changed: 102 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,56 @@
1111
from .common_parser import get_common_args
1212

1313

14-
def setup_data_libraries(gi, data):
14+
def create_legacy(gi, desc):
15+
destination = desc["destination"]
16+
if destination["type"] != "library":
17+
raise Exception("Only libraries may be created with pre-18.05 Galaxies using this script.")
18+
library_name = destination.get("name")
19+
library_description = destination.get("description")
20+
library_synopsis = destination.get("synopsis")
21+
22+
lib = gi.libraries.create_library(library_name, library_description, library_synopsis)
23+
lib_id = lib['id']
24+
folder_id = None
25+
26+
def populate_items(base_folder_id, has_items):
27+
if "items" in has_items:
28+
name = has_items.get("name")
29+
folder_id = base_folder_id
30+
if name:
31+
folder = gi.libraries.create_folder(lib_id, name, base_folder_id=base_folder_id)
32+
folder_id = folder[0]["id"]
33+
for item in has_items["items"]:
34+
populate_items(folder_id, item)
35+
else:
36+
src = has_items["src"]
37+
if src != "url":
38+
raise Exception("For pre-18.05 Galaxies only support URLs src items are supported.")
39+
40+
gi.libraries.upload_file_from_url(
41+
lib_id,
42+
has_items['url'],
43+
folder_id=base_folder_id,
44+
file_type=has_items['ext']
45+
)
46+
47+
populate_items(folder_id, desc)
48+
49+
50+
def create_batch_api(gi, desc):
51+
hc = galaxy.histories.HistoryClient(gi)
52+
tc = galaxy.tools.ToolClient(gi)
53+
54+
history = hc.create_history()
55+
url = "%s/tools/fetch" % gi.url
56+
payload = {
57+
'targets': [desc],
58+
'history_id': history["id"]
59+
}
60+
tc._post(payload=payload, url=url)
61+
62+
63+
def setup_data_libraries(gi, data, training=False, legacy=False):
1564
"""
1665
Load files into a Galaxy data library.
1766
By default all test-data tools from all installed tools
@@ -20,28 +69,55 @@ def setup_data_libraries(gi, data):
2069

2170
log.info("Importing data libraries.")
2271
jc = galaxy.jobs.JobsClient(gi)
72+
config = galaxy.config.ConfigClient(gi)
73+
version = config.get_version()
2374

24-
folders = dict()
25-
26-
libraries = yaml.safe_load(data)
27-
for lib in libraries['libraries']:
28-
folders[lib['name']] = lib['files']
75+
if legacy:
76+
create_func = create_legacy
77+
else:
78+
version_major = version.get("version_major", "16.01")
79+
create_func = create_batch_api if version_major >= "18.05" else create_legacy
80+
81+
library_def = yaml.safe_load(data)
82+
83+
def normalize_items(has_items):
84+
# Synchronize Galaxy batch format with older training material style.
85+
if "files" in has_items:
86+
items = has_items.pop("files")
87+
has_items["items"] = items
88+
89+
items = has_items.get("items", [])
90+
for item in items:
91+
normalize_items(item)
92+
src = item.get("src")
93+
url = item.get("url")
94+
if src is None and url:
95+
item["src"] = "url"
96+
if "file_type" in item:
97+
ext = item.pop("file_type")
98+
item["ext"] = ext
99+
100+
# Normalize library definitions to allow older ephemeris style and native Galaxy batch
101+
# upload formats.
102+
if "libraries" in library_def:
103+
# File contains multiple definitions.
104+
library_def["items"] = library_def.pop("libraries")
105+
106+
if "destination" not in library_def:
107+
library_def["destination"] = {"type": "library"}
108+
destination = library_def["destination"]
109+
110+
if training:
111+
destination["name"] = destination.get("name", 'Training Data')
112+
destination["description"] = destination.get("description", 'Data pulled from online archives.')
113+
else:
114+
destination["name"] = destination.get("name", 'New Data Library')
115+
destination["description"] = destination.get("description", '')
29116

30-
if folders:
31-
log.info("Create 'Test Data' library.")
32-
lib = gi.libraries.create_library('Training Data', 'Data pulled from online archives.')
33-
lib_id = lib['id']
117+
normalize_items(library_def)
34118

35-
for fname, urls in folders.items():
36-
log.info("Creating folder: %s" % fname)
37-
folder = gi.libraries.create_folder(lib_id, fname)
38-
for url in urls:
39-
gi.libraries.upload_file_from_url(
40-
lib_id,
41-
url['url'],
42-
folder_id=folder[0]['id'],
43-
file_type=url['file_type']
44-
)
119+
if library_def:
120+
create_func(gi, library_def)
45121

46122
no_break = True
47123
while True:
@@ -62,9 +138,13 @@ def _parser():
62138
parent = get_common_args()
63139
parser = argparse.ArgumentParser(
64140
parents=[parent],
65-
description='Populate the Galaxy data library with test data.'
141+
description='Populate the Galaxy data library with data.'
66142
)
67143
parser.add_argument('-i', '--infile', required=True, type=argparse.FileType('r'))
144+
parser.add_argument('--training', default=False, action="store_true",
145+
help="Set defaults that make sense for training data.")
146+
parser.add_argument('--legacy', default=False, action="store_true",
147+
help="Use legacy APIs even for newer Galaxies that should have a batch upload API enabled.")
68148
return parser
69149

70150

@@ -80,7 +160,7 @@ def main():
80160
if args.verbose:
81161
log.basicConfig(level=log.DEBUG)
82162

83-
setup_data_libraries(gi, args.infile)
163+
setup_data_libraries(gi, args.infile, training=args.training, legacy=args.legacy)
84164

85165

86166
if __name__ == '__main__':

tests/library_data_example.yaml

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,21 @@
1-
libraries:
2-
- name: "Test data"
3-
files:
4-
- url: https://raw.githubusercontent.com/eteriSokhoyan/test-data/master/cliques-high-representatives.fa
5-
file_type: fasta
6-
- name: "Test data segmentation-fold"
7-
files:
8-
- url: https://raw.githubusercontent.com/yhoogstrate/segmentation-fold/55d0bb28b01e613844ca35cf21fa41379fd72770/scripts/energy-estimation-utility/tests/test-data/workflow-test_cd-box_kturns.xml
9-
file_type: xml
1+
destination:
2+
type: library
3+
name: "Cool Training Library"
4+
description: "A longer description."
5+
synopsis: "Optional - does anyone ever set this?"
6+
items:
7+
- name: "Test Folder 1"
8+
description: "Description of what is in Test Folder 1" # Only populated with new API.
9+
items:
10+
- url: https://raw.githubusercontent.com/eteriSokhoyan/test-data/master/cliques-high-representatives.fa
11+
src: url
12+
ext: fasta
13+
info: "A cool longer description." # Only populated with new API.
14+
dbkey: "hg19" # Only populated with new API.
15+
- name: "Test data segmentation-fold"
16+
items:
17+
- url: https://raw.githubusercontent.com/yhoogstrate/segmentation-fold/55d0bb28b01e613844ca35cf21fa41379fd72770/scripts/energy-estimation-utility/tests/test-data/workflow-test_cd-box_kturns.xml
18+
name: workflow-test_cd-box_kturns.xml # Only populated with new API.
19+
info: Downloaded from https://raw.githubusercontent.com/yhoogstrate/segmentation-fold/55d0bb28b01e613844ca35cf21fa41379fd72770/scripts/energy-estimation-utility/tests/test-data/workflow-test_cd-box_kturns.xml
20+
src: url
21+
ext: xml
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
libraries:
2+
- name: "Test data"
3+
files:
4+
- url: https://raw.githubusercontent.com/eteriSokhoyan/test-data/master/cliques-high-representatives.fa
5+
file_type: fasta
6+
- name: "Test data segmentation-fold"
7+
files:
8+
- url: https://raw.githubusercontent.com/yhoogstrate/segmentation-fold/55d0bb28b01e613844ca35cf21fa41379fd72770/scripts/energy-estimation-utility/tests/test-data/workflow-test_cd-box_kturns.xml
9+
file_type: xml

tests/test_workflow_and_data.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ workflow-install -a admin -g http://localhost:$WEB_PORT -w "$TEST_DATA"/test_wor
1818
echo "Populate data libraries"
1919
setup-data-libraries --user admin@galaxy.org -p admin -g http://localhost:$WEB_PORT -i "$TEST_DATA"/library_data_example.yaml
2020
setup-data-libraries -a admin -g http://localhost:$WEB_PORT -i "$TEST_DATA"/library_data_example.yaml
21+
setup-data-libraries -a admin -g http://localhost:$WEB_PORT -i "$TEST_DATA"/library_data_example_legacy.yaml
2122

2223
echo "Get tool list from Galaxy"
2324
get-tool-list -g http://localhost:$WEB_PORT -o result_tool_list.yaml

0 commit comments

Comments
 (0)