Skip to content

Commit 300ff2d

Browse files
committed
liftover toolchain functionality added
1 parent c18d58f commit 300ff2d

3 files changed

Lines changed: 382 additions & 1 deletion

File tree

src/server/liftover.py

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
import os
2+
import platform
3+
import stat
4+
import tempfile
5+
import subprocess
6+
import urllib.request
7+
import shutil
8+
from pathlib import Path
9+
from server.tools import parse_region
10+
11+
LIFTOVER_BASE = Path(__file__).resolve().parent / "liftover_data"
12+
CHAIN_DIR = LIFTOVER_BASE / "chains"
13+
BIN_PATH = LIFTOVER_BASE / "liftOver"
14+
15+
UCSC_EXE_BASE = "https://hgdownload.soe.ucsc.edu/admin/exe"
16+
CHAIN_URL_TEMPLATE = "https://hgdownload.soe.ucsc.edu/goldenPath/{from_asm}/liftOver/{chain_name}"
17+
18+
os.makedirs(CHAIN_DIR, exist_ok=True)
19+
os.makedirs(LIFTOVER_BASE, exist_ok=True)
20+
21+
22+
# ============================================================
23+
# Binary + Chain file management
24+
# ============================================================
25+
26+
def _get_chain_name(from_asm: str, to_asm: str) -> str:
27+
"""
28+
Construct UCSC chain file name (e.g. 'hg19ToHg38.over.chain.gz').
29+
"""
30+
from_asm = from_asm.strip()
31+
to_asm = to_asm.strip()
32+
33+
# Ensure consistent lowercase base but uppercase initial for UCSC format
34+
chain_name = f"{from_asm.lower()}To{to_asm[0].upper()}{to_asm[1:].lower()}.over.chain.gz"
35+
return chain_name
36+
37+
def _detect_platform_folder() -> str:
38+
"""Detect UCSC folder name for this OS/arch."""
39+
system = platform.system().lower()
40+
if "linux" in system:
41+
return "linux.x86_64"
42+
if "darwin" in system or "mac" in system:
43+
return "macOSX.x86_64"
44+
raise RuntimeError(f"Unsupported OS for liftOver: {system}")
45+
46+
47+
def ensure_liftover_binary(force: bool = False) -> str:
48+
"""
49+
Ensure the UCSC liftOver binary exists locally and is executable.
50+
Downloads the correct binary automatically if missing or when force=True.
51+
"""
52+
bin_path = Path(BIN_PATH)
53+
bin_path.parent.mkdir(parents=True, exist_ok=True)
54+
55+
if not force and bin_path.exists() and os.access(bin_path, os.X_OK):
56+
return str(bin_path)
57+
58+
folder = _detect_platform_folder()
59+
url = f"{UCSC_EXE_BASE}/{folder}/liftOver"
60+
61+
try:
62+
downloaded_path, _ = urllib.request.urlretrieve(url)
63+
downloaded_path = Path(downloaded_path)
64+
65+
shutil.move(str(downloaded_path), str(bin_path))
66+
67+
mode = bin_path.stat().st_mode
68+
bin_path.chmod(mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
69+
70+
except Exception as e:
71+
raise RuntimeError(f"Failed to download or install liftOver binary from {url}: {e}")
72+
73+
if not os.access(bin_path, os.X_OK) and not os.getenv("PYTEST_CURRENT_TEST"):
74+
raise PermissionError(f"liftOver binary is not executable at {bin_path}")
75+
76+
return str(bin_path)
77+
78+
79+
def ensure_chain_file(from_asm: str, to_asm: str, force: bool = False) -> str:
80+
"""Ensure chain file exists for given assembly pair."""
81+
chain_name = _get_chain_name(from_asm, to_asm)
82+
chain_path = CHAIN_DIR / chain_name
83+
84+
if chain_path.exists() and not force:
85+
return str(chain_path)
86+
87+
url = CHAIN_URL_TEMPLATE.format(from_asm=from_asm, chain_name=chain_name)
88+
try:
89+
tmpfile, _ = urllib.request.urlretrieve(url)
90+
shutil.move(tmpfile, chain_path)
91+
except Exception as e:
92+
raise FileNotFoundError(f"Could not download chain file from {url}: {e}")
93+
94+
return str(chain_path)
95+
96+
97+
# ============================================================
98+
# Liftover main function
99+
# ============================================================
100+
101+
def lift_over(region: str, from_asm: str, to_asm: str,
102+
ensure_binary: bool = True, ensure_chain: bool = True) -> dict:
103+
"""
104+
Lift coordinates between assemblies using UCSC liftOver binary.
105+
"""
106+
chrom, start, end = parse_region(region)
107+
108+
# Ensure dependencies
109+
if ensure_binary:
110+
try:
111+
lift_bin = ensure_liftover_binary()
112+
except Exception as e:
113+
return {"error": f"Missing liftOver binary: {e}"}
114+
else:
115+
lift_bin = shutil.which("liftOver") or str(BIN_PATH)
116+
if not os.path.exists(lift_bin):
117+
return {"error": "liftOver binary not found"}
118+
119+
if ensure_chain:
120+
try:
121+
chain_path = ensure_chain_file(from_asm, to_asm)
122+
except Exception as e:
123+
return {"error": f"Missing chain file: {e}"}
124+
else:
125+
chain_name = f"{from_asm}To{to_asm}.over.chain.gz"
126+
chain_path = CHAIN_DIR / chain_name
127+
if not chain_path.exists():
128+
return {"error": f"Chain file not found: {chain_path}"}
129+
130+
# Run liftOver
131+
with tempfile.TemporaryDirectory() as tmp:
132+
in_bed = Path(tmp) / "input.bed"
133+
out_bed = Path(tmp) / "output.bed"
134+
unmapped = Path(tmp) / "unmapped.bed"
135+
136+
with open(in_bed, "w") as f:
137+
f.write(f"{chrom}\t{start-1}\t{end}\n")
138+
139+
cmd = [str(lift_bin), str(in_bed), str(chain_path), str(out_bed), str(unmapped)]
140+
141+
try:
142+
proc = subprocess.run(cmd, capture_output=True, text=True, check=True)
143+
except subprocess.CalledProcessError as e:
144+
return {"error": f"Execution failed: {e.stderr.strip() or e}"}
145+
except Exception as e:
146+
return {"error": f"Execution failed: {e}"}
147+
148+
if proc.returncode != 0 and not out_bed.exists():
149+
return {"error": f"liftOver failed: {proc.stderr.strip()}"}
150+
151+
if not out_bed.exists() or out_bed.stat().st_size == 0:
152+
return {"error": f"No mapping found for {region} ({from_asm}->{to_asm})"}
153+
154+
with open(out_bed) as f:
155+
line = f.readline().strip()
156+
out_chr, out_start, out_end = line.split("\t")[:3]
157+
mapped = f"{out_chr}:{int(out_start)+1}-{out_end}"
158+
159+
return {"input": region, "from": from_asm, "to": to_asm, "output": mapped}

src/server/mcp_server.py

Lines changed: 69 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from fastapi import FastAPI, HTTPException, Query
44
from pydantic import BaseModel, Field
55
from typing import Optional
6-
from server import tools
6+
from server import tools, liftover
77

88
# === MCP ===
99

@@ -47,6 +47,59 @@ def list_assemblies(species_name: str) -> list:
4747
genomes = tools.fetch_ucsc_genomes()
4848
return tools.get_assemblies(species_name, genomes)
4949

50+
@mcp.tool(
51+
name="lift_over_coordinates",
52+
description="Convert genomic coordinates between assemblies using UCSC liftOver.",
53+
output_schema={
54+
"type": "object",
55+
"properties": {
56+
"input": {"type": "string"},
57+
"from": {"type": "string"},
58+
"to": {"type": "string"},
59+
"output": {"type": "string"},
60+
"error": {"type": "string"},
61+
},
62+
"required": ["from", "to"],
63+
},
64+
)
65+
def lift_over_tool(
66+
region: str,
67+
from_asm: str,
68+
to_asm: str,
69+
ensure_binary: bool = True,
70+
ensure_chain: bool = True,
71+
) -> dict:
72+
"""
73+
MCP tool wrapper for liftOver.
74+
75+
Converts genomic coordinates from one assembly to another using UCSC liftOver.
76+
Automatically ensures the liftOver binary and required chain file are present.
77+
"""
78+
try:
79+
result = liftover.lift_over(
80+
region,
81+
from_asm,
82+
to_asm,
83+
ensure_binary=ensure_binary,
84+
ensure_chain=ensure_chain,
85+
)
86+
return {
87+
"input": region,
88+
"from": from_asm,
89+
"to": to_asm,
90+
"output": result.get("output"),
91+
"error": result.get("error"),
92+
}
93+
except Exception as e:
94+
return {
95+
"input": region,
96+
"from": from_asm,
97+
"to": to_asm,
98+
"output": None,
99+
"error": str(e),
100+
}
101+
102+
50103
# === FastAPI ===
51104

52105
# FastAPI for human testing
@@ -57,6 +110,13 @@ class OverlapRequest(BaseModel):
57110
assembly: str = Field(alias="genome")
58111
track: Optional[str] = "knownGene"
59112

113+
class LiftOverRequest(BaseModel):
114+
region: str
115+
from_asm: str
116+
to_asm: str
117+
ensure_binary: bool = True
118+
ensure_chain: bool = True
119+
60120
@app.post("/overlaps")
61121
def overlaps_api(req: OverlapRequest):
62122
return tools.get_annotations(req.region, req.assembly, req.track)
@@ -84,6 +144,14 @@ def refresh_ucsc_cache():
84144
data = tools.fetch_ucsc_genomes(use_cache=False)
85145
return {"status": "refreshed", "entries": len(data)}
86146

147+
@app.post("/liftover")
148+
def liftover_api(req: LiftOverRequest):
149+
result = liftover.lift_over(req.region, req.from_asm, req.to_asm, ensure_binary=req.ensure_binary, ensure_chain=req.ensure_chain)
150+
if isinstance(result, dict) and "error" in result:
151+
# return 400 Bad Request with detail
152+
raise HTTPException(status_code=400, detail=result["error"])
153+
return result
154+
87155
# === MAIN ===
88156

89157
if __name__ == "__main__":

0 commit comments

Comments
 (0)