-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapp.py
More file actions
106 lines (89 loc) · 3.89 KB
/
app.py
File metadata and controls
106 lines (89 loc) · 3.89 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import os
from flask import Flask, request, jsonify, send_file, make_response
from flask_cors import cross_origin
import io
import json
import csv
app = Flask(__name__)
def load_pre_processed_file(filename):
try:
return json.load(open(filename, "r"))
except IOError:
print(f"Cannot load file {filename}")
return {}
def parse_mgi_ids(req):
if "file" in req.files:
file = req.files["file"]
return file.read().decode("utf-8").strip().split("\n")
elif req.json and "mgi_ids" in req.json:
return request.json["mgi_ids"]
else:
return []
def transform_data_for_tabular_formats(data):
results = list()
for gene_data in data:
for allele in gene_data["alleles"]:
allele["mouseGeneSymbol"] = gene_data["mouseGeneSymbol"]
allele["humanGeneSymbols"] = gene_data["humanGeneSymbols"]
allele["humanGeneIds"] = gene_data["humanGeneIds"]
allele["significantLifeStages"] = ", ".join(allele["significantLifeStages"])
allele["significantPhenotypes"] = ", ".join(allele["significantPhenotypes"])
allele["significantSystems"] = ", ".join(allele["significantSystems"])
allele["notSignificantSystems"] = ", ".join(allele["notSignificantSystems"])
allele["notSignificantPhenotypes"] = ", ".join(allele["notSignificantPhenotypes"])
results.append(allele)
return sorted(results, key=lambda k: k["allele"])
PRE_PROCESSED_DATA_PATH = os.environ.get(
"PRE_PROCESSED_DATA_PATH", "pre-computed-results.json")
preprocessed_results_data = load_pre_processed_file(PRE_PROCESSED_DATA_PATH)
@app.route("/mi/impc/batch-query", methods=["POST"])
@cross_origin()
def query_data():
# Parse request headers and data
response_format = request.headers.get("Accept", "application/json").lower()
mgi_ids = parse_mgi_ids(request)
if not mgi_ids:
return jsonify({"error": "No MGI accession IDs provided"}), 400
mgi_ids = list(dict.fromkeys(mgi_ids))
# Query the dataset
filtered_data = list(
map(lambda gene_id: preprocessed_results_data[gene_id] if gene_id in preprocessed_results_data else None, mgi_ids)
)
filtered_data = list(filter(None, filtered_data))
return jsonify(filtered_data)
@app.route("/mi/impc/download-preprocessed-data", methods=["POST"])
@cross_origin()
def download_summary_data():
mgi_ids = list(set(parse_mgi_ids(request)))
if not mgi_ids:
return jsonify({"error": "No MGI accession IDs provided"}), 400
response_format = request.headers.get("Accept", "application/json").lower()
filtered_data = []
for mgi_id in mgi_ids:
if mgi_id in preprocessed_results_data:
final_data = dict(preprocessed_results_data[mgi_id])
filtered_data.append(final_data)
if response_format == "application/json":
return send_file(
io.BytesIO(json.dumps(filtered_data).encode("utf8")),
mimetype="application/json",
as_attachment=True,
download_name="batch-query-results.zip"
)
elif response_format == "text/tab-separated-values":
data_formatted = transform_data_for_tabular_formats(filtered_data)
keys = sorted(data_formatted[0].keys())
string_io = io.StringIO()
writer = csv.DictWriter(string_io, keys, delimiter="\t", lineterminator="\n")
writer.writeheader()
writer.writerows(data_formatted)
output = make_response(string_io.getvalue())
output.headers["Content-Disposition"] = "attachment; filename=batch-query-summary-data.tsv"
output.headers["Content-type"] = "text/tsv"
return output
@app.route("/mi/impc/batch-query/health-check", methods=["GET"])
def health_check():
return jsonify({"status": "ok"}), 200
if __name__ == "__main__":
port = int(os.environ.get('PORT', 5000))
app.run(debug=True, host="0.0.0.0", port=port)