Skip to content

Commit aed3eb3

Browse files
committed
Upload 2.0 support for data library creation.
Will convert older style library YAML format to modern version, will attempt to implement a subset of the modern format with legacy APIs as needed.
1 parent e818203 commit aed3eb3

3 files changed

Lines changed: 125 additions & 31 deletions

File tree

ephemeris/setup_data_libraries.py

Lines changed: 97 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,53 @@
1111
from .common_parser import get_common_args
1212

1313

14-
def setup_data_libraries(gi, data):
14+
def create_legacy(gi, desc):
15+
destination = desc["destination"]
16+
assert destination["type"] == "library"
17+
library_name = destination.get("name")
18+
library_description = destination.get("description")
19+
library_synopsis = destination.get("synopsis")
20+
21+
lib = gi.libraries.create_library(library_name, library_description, library_synopsis)
22+
lib_id = lib['id']
23+
folder_id = None
24+
25+
def populate_items(base_folder_id, has_items):
26+
if "items" in has_items:
27+
name = has_items.get("name")
28+
folder_id = base_folder_id
29+
if name:
30+
folder = gi.libraries.create_folder(lib_id, name, base_folder_id=base_folder_id)
31+
folder_id = folder[0]["id"]
32+
for item in has_items["items"]:
33+
populate_items(folder_id, item)
34+
else:
35+
src = has_items["src"]
36+
assert src == "url", "For pre-18.05 Galaxies only support URLs are supported."
37+
gi.libraries.upload_file_from_url(
38+
lib_id,
39+
has_items['url'],
40+
folder_id=base_folder_id,
41+
file_type=has_items['ext']
42+
)
43+
44+
populate_items(folder_id, desc)
45+
46+
47+
def create_batch_api(gi, desc):
48+
hc = galaxy.histories.HistoryClient(gi)
49+
tc = galaxy.tools.ToolClient(gi)
50+
51+
history = hc.create_history()
52+
url = "%s/tools/fetch" % gi.url
53+
payload = {
54+
'targets': [desc],
55+
'history_id': history["id"]
56+
}
57+
response = tc._post(payload=payload, url=url)
58+
59+
60+
def setup_data_libraries(gi, data, training=False, legacy=False):
1561
"""
1662
Load files into a Galaxy data library.
1763
By default all test-data tools from all installed tools
@@ -20,28 +66,55 @@ def setup_data_libraries(gi, data):
2066

2167
log.info("Importing data libraries.")
2268
jc = galaxy.jobs.JobsClient(gi)
69+
config = galaxy.config.ConfigClient(gi)
70+
version = config.get_version()
2371

24-
folders = dict()
25-
26-
libraries = yaml.safe_load(data)
27-
for lib in libraries['libraries']:
28-
folders[lib['name']] = lib['files']
72+
if legacy:
73+
create_func = create_legacy
74+
else:
75+
version_major = version.get("version_major", "16.01")
76+
create_func = create_batch_api if version_major >= "18.05" else create_legacy
77+
78+
library_def = yaml.safe_load(data)
79+
80+
def normalize_items(has_items):
81+
# Synchronize Galaxy batch format with older training material style.
82+
if "files" in has_items:
83+
items = has_items.pop("files")
84+
has_items["items"] = items
85+
86+
items = has_items.get("items", [])
87+
for item in items:
88+
normalize_items(item)
89+
src = item.get("src")
90+
url = item.get("url")
91+
if src is None and url:
92+
item["src"] = "url"
93+
if "file_type" in item:
94+
ext = item.pop("file_type")
95+
item["ext"] = ext
96+
97+
# Normalize library definitions to allow older ephemeris style and native Galaxy batch
98+
# upload formats.
99+
if "libraries" in library_def:
100+
# File contains multiple definitions.
101+
library_def["items"] = library_def.pop("libraries")
102+
103+
if "destination" not in library_def:
104+
library_def["destination"] = {"type": "library"}
105+
destination = library_def["destination"]
106+
107+
if training:
108+
destination["name"] = destination.get("name", 'Training Data')
109+
destination["description"] = destination.get("description", 'Data pulled from online archives.')
110+
else:
111+
destination["name"] = destination.get("name", 'New Data Library')
112+
destination["description"] = destination.get("description", '')
29113

30-
if folders:
31-
log.info("Create 'Test Data' library.")
32-
lib = gi.libraries.create_library('Training Data', 'Data pulled from online archives.')
33-
lib_id = lib['id']
114+
normalize_items(library_def)
34115

35-
for fname, urls in folders.items():
36-
log.info("Creating folder: %s" % fname)
37-
folder = gi.libraries.create_folder(lib_id, fname)
38-
for url in urls:
39-
gi.libraries.upload_file_from_url(
40-
lib_id,
41-
url['url'],
42-
folder_id=folder[0]['id'],
43-
file_type=url['file_type']
44-
)
116+
if library_def:
117+
create_func(gi, library_def)
45118

46119
no_break = True
47120
while True:
@@ -62,9 +135,11 @@ def _parser():
62135
parent = get_common_args()
63136
parser = argparse.ArgumentParser(
64137
parents=[parent],
65-
description='Populate the Galaxy data library with test data.'
138+
description='Populate the Galaxy data library with data.'
66139
)
67140
parser.add_argument('-i', '--infile', required=True, type=argparse.FileType('r'))
141+
parser.add_argument('--training', default=False, action="store_true", help="Set defaults that make sense for training data.")
142+
parser.add_argument('--legacy', default=False, action="store_true", help="Use legacy APIs even for newer Galaxies that should have a batch upload API enabled.")
68143
return parser
69144

70145

@@ -80,7 +155,7 @@ def main():
80155
if args.verbose:
81156
log.basicConfig(level=log.DEBUG)
82157

83-
setup_data_libraries(gi, args.infile)
158+
setup_data_libraries(gi, args.infile, training=args.training, legacy=args.legacy)
84159

85160

86161
if __name__ == '__main__':

tests/library_data_example.yaml

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,19 @@
1-
libraries:
2-
- name: "Test data"
3-
files:
4-
- url: https://raw.githubusercontent.com/eteriSokhoyan/test-data/master/cliques-high-representatives.fa
5-
file_type: fasta
6-
- name: "Test data segmentation-fold"
7-
files:
8-
- url: https://raw.githubusercontent.com/yhoogstrate/segmentation-fold/55d0bb28b01e613844ca35cf21fa41379fd72770/scripts/energy-estimation-utility/tests/test-data/workflow-test_cd-box_kturns.xml
9-
file_type: xml
1+
destination:
2+
type: library
3+
name: "Cool Training Library"
4+
description: "A longer description."
5+
synopsis: "Optional - does anyone ever set this?"
6+
items:
7+
- name: "Test Folder 1"
8+
description: "Description of what is in Test Folder 1" # Only populated with new API.
9+
items:
10+
- url: https://raw.githubusercontent.com/eteriSokhoyan/test-data/master/cliques-high-representatives.fa
11+
src: url
12+
ext: fasta
13+
info: "A cool longer description." # Only populated with new API.
14+
dbkey: "hg19" # Only populated with new API.
15+
- name: "Test data segmentation-fold"
16+
items:
17+
- url: https://raw.githubusercontent.com/yhoogstrate/segmentation-fold/55d0bb28b01e613844ca35cf21fa41379fd72770/scripts/energy-estimation-utility/tests/test-data/workflow-test_cd-box_kturns.xml
18+
src: url
19+
ext: xml
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
libraries:
2+
- name: "Test data"
3+
files:
4+
- url: https://raw.githubusercontent.com/eteriSokhoyan/test-data/master/cliques-high-representatives.fa
5+
file_type: fasta
6+
- name: "Test data segmentation-fold"
7+
files:
8+
- url: https://raw.githubusercontent.com/yhoogstrate/segmentation-fold/55d0bb28b01e613844ca35cf21fa41379fd72770/scripts/energy-estimation-utility/tests/test-data/workflow-test_cd-box_kturns.xml
9+
file_type: xml

0 commit comments

Comments
 (0)