|
14 | 14 | import sys |
15 | 15 | import time |
16 | 16 | import traceback |
| 17 | +from dataclasses import ( |
| 18 | + dataclass, |
| 19 | + field, |
| 20 | +) |
17 | 21 | from json import loads |
18 | 22 | from typing import ( |
19 | 23 | Any, |
|
27 | 31 | import yaml |
28 | 32 | from packaging.version import Version |
29 | 33 | from pulsar.client.staging import COMMAND_VERSION_FILENAME |
30 | | -from sqlalchemy import select |
| 34 | +from sqlalchemy import ( |
| 35 | + and_, |
| 36 | + func, |
| 37 | + select, |
| 38 | + update, |
| 39 | +) |
31 | 40 |
|
32 | 41 | from galaxy import ( |
33 | 42 | model, |
@@ -296,6 +305,18 @@ def job_config_xml_to_dict(config, root): |
296 | 305 | return config_dict |
297 | 306 |
|
298 | 307 |
|
| 308 | +@dataclass |
| 309 | +class JobConfigurationLimits: |
| 310 | + registered_user_concurrent_jobs: Optional[int] = None |
| 311 | + anonymous_user_concurrent_jobs: Optional[int] = None |
| 312 | + walltime: Optional[str] = None |
| 313 | + walltime_delta: Optional[datetime.timedelta] = None |
| 314 | + total_walltime: Dict[str, Any] = field(default_factory=dict) |
| 315 | + output_size: Optional[int] = None |
| 316 | + destination_user_concurrent_jobs: Dict[str, int] = field(default_factory=dict) |
| 317 | + destination_total_concurrent_jobs: Dict[str, int] = field(default_factory=dict) |
| 318 | + |
| 319 | + |
299 | 320 | class JobConfiguration(ConfiguresHandlers): |
300 | 321 | """A parser and interface to advanced job management features. |
301 | 322 |
|
@@ -344,16 +365,7 @@ def __init__(self, app: MinimalManagerApp): |
344 | 365 | self.resource_groups = {} |
345 | 366 | self.default_resource_group = None |
346 | 367 | self.resource_parameters = {} |
347 | | - self.limits = Bunch( |
348 | | - registered_user_concurrent_jobs=None, |
349 | | - anonymous_user_concurrent_jobs=None, |
350 | | - walltime=None, |
351 | | - walltime_delta=None, |
352 | | - total_walltime={}, |
353 | | - output_size=None, |
354 | | - destination_user_concurrent_jobs={}, |
355 | | - destination_total_concurrent_jobs={}, |
356 | | - ) |
| 368 | + self.limits = JobConfigurationLimits() |
357 | 369 |
|
358 | 370 | default_resubmits = [] |
359 | 371 | default_resubmit_condition = self.app.config.default_job_resubmission_condition |
@@ -1610,12 +1622,142 @@ def get_destination_configuration(self, key, default=None): |
1610 | 1622 | dest_params = self.job_destination.params |
1611 | 1623 | return self.get_job().get_destination_configuration(dest_params, self.app.config, key, default) |
1612 | 1624 |
|
| 1625 | + def queue_with_limit(self, job: Job, job_destination: JobDestination): |
| 1626 | + anonymous_user_concurrent_jobs = self.app.job_config.limits.anonymous_user_concurrent_jobs |
| 1627 | + registered_user_concurrent_jobs = self.app.job_config.limits.registered_user_concurrent_jobs |
| 1628 | + destination_total_concurrent_jobs = self.app.job_config.limits.destination_total_concurrent_jobs |
| 1629 | + destination_total_limit = self.app.job_config.limits.destination_total_concurrent_jobs.get(job_destination.id) |
| 1630 | + destination_user_limit = self.app.job_config.limits.destination_user_concurrent_jobs.get(job_destination.id) |
| 1631 | + destination_tag_limits = {} |
| 1632 | + if job_destination.tags: |
| 1633 | + for tag in job_destination.tags: |
| 1634 | + if tag_limit := destination_total_concurrent_jobs.get(tag): |
| 1635 | + destination_tag_limits[tag] = tag_limit |
| 1636 | + |
| 1637 | + conditions = [model.Job.table.c.id == job.id] |
| 1638 | + |
| 1639 | + if job.user_id: |
| 1640 | + user_job_count = ( |
| 1641 | + select(func.count(model.Job.table.c.id)) |
| 1642 | + .where( |
| 1643 | + and_( |
| 1644 | + model.Job.table.c.state.in_( |
| 1645 | + [ |
| 1646 | + model.Job.states.QUEUED, |
| 1647 | + model.Job.states.RUNNING, |
| 1648 | + model.Job.states.RESUBMITTED, |
| 1649 | + ] |
| 1650 | + ), |
| 1651 | + model.Job.table.c.user_id == job.user_id, |
| 1652 | + ) |
| 1653 | + ) |
| 1654 | + .scalar_subquery() |
| 1655 | + ) |
| 1656 | + |
| 1657 | + if registered_user_concurrent_jobs is not None: |
| 1658 | + conditions.append(user_job_count < registered_user_concurrent_jobs) |
| 1659 | + if destination_user_limit is not None: |
| 1660 | + destination_job_count = ( |
| 1661 | + select(func.count(model.Job.table.c.id)) |
| 1662 | + .where( |
| 1663 | + and_( |
| 1664 | + model.Job.table.c.state.in_( |
| 1665 | + [ |
| 1666 | + model.Job.states.QUEUED, |
| 1667 | + model.Job.states.RUNNING, |
| 1668 | + model.Job.states.RESUBMITTED, |
| 1669 | + ] |
| 1670 | + ), |
| 1671 | + model.Job.table.c.destination_id == job_destination.id, |
| 1672 | + model.Job.table.c.user_id == job.user_id, |
| 1673 | + ) |
| 1674 | + ) |
| 1675 | + .scalar_subquery() |
| 1676 | + ) |
| 1677 | + conditions.append(destination_job_count < destination_user_limit) |
| 1678 | + |
| 1679 | + elif anonymous_user_concurrent_jobs and job.galaxy_session and job.galaxy_session.id: |
| 1680 | + anon_job_count = ( |
| 1681 | + select(func.count(model.Job.table.c.id)) |
| 1682 | + .where( |
| 1683 | + and_( |
| 1684 | + model.Job.table.c.state.in_( |
| 1685 | + [ |
| 1686 | + model.Job.states.QUEUED, |
| 1687 | + model.Job.states.RUNNING, |
| 1688 | + model.Job.states.RESUBMITTED, |
| 1689 | + ] |
| 1690 | + ), |
| 1691 | + model.Job.table.c.session_id == job.galaxy_session.id, |
| 1692 | + ) |
| 1693 | + ) |
| 1694 | + .scalar_subquery() |
| 1695 | + ) |
| 1696 | + conditions.append(anon_job_count < anonymous_user_concurrent_jobs) |
| 1697 | + |
| 1698 | + if destination_total_limit is not None: |
| 1699 | + destination_total_count = ( |
| 1700 | + select(func.count(model.Job.table.c.id)) |
| 1701 | + .where( |
| 1702 | + and_( |
| 1703 | + model.Job.table.c.state.in_( |
| 1704 | + [ |
| 1705 | + model.Job.states.QUEUED, |
| 1706 | + model.Job.states.RUNNING, |
| 1707 | + model.Job.states.RESUBMITTED, |
| 1708 | + ] |
| 1709 | + ), |
| 1710 | + model.Job.table.c.destination_id == job_destination.id, |
| 1711 | + ) |
| 1712 | + ) |
| 1713 | + .scalar_subquery() |
| 1714 | + ) |
| 1715 | + conditions.append(destination_total_count < destination_total_limit) |
| 1716 | + |
| 1717 | + if destination_tag_limits: |
| 1718 | + for tag, limit in destination_tag_limits.items(): |
| 1719 | + destination_ids = {destination.id for destination in self.app.job_config.get_destinations(tag)} |
| 1720 | + tag_count = ( |
| 1721 | + select(func.count(model.Job.table.c.id)) |
| 1722 | + .where( |
| 1723 | + and_( |
| 1724 | + model.Job.table.c.state.in_( |
| 1725 | + [ |
| 1726 | + model.Job.states.QUEUED, |
| 1727 | + model.Job.states.RUNNING, |
| 1728 | + model.Job.states.RESUBMITTED, |
| 1729 | + ] |
| 1730 | + ), |
| 1731 | + model.Job.table.c.destination_id.in_(destination_ids), |
| 1732 | + ) |
| 1733 | + ) |
| 1734 | + .scalar_subquery() |
| 1735 | + ) |
| 1736 | + conditions.append(tag_count < limit) |
| 1737 | + |
| 1738 | + update_stmt = ( |
| 1739 | + update(model.Job) |
| 1740 | + .where(*conditions) |
| 1741 | + .values( |
| 1742 | + state=model.Job.states.QUEUED, |
| 1743 | + destination_id=job_destination.id, |
| 1744 | + destination_params=job_destination.params, |
| 1745 | + job_runner_name=job_destination.runner, |
| 1746 | + ) |
| 1747 | + ) |
| 1748 | + |
| 1749 | + result = self.sa_session.execute(update_stmt) |
| 1750 | + self.sa_session.commit() |
| 1751 | + |
| 1752 | + return result.rowcount > 0 |
| 1753 | + |
1613 | 1754 | def enqueue(self): |
1614 | 1755 | job = self.get_job() |
1615 | 1756 | # Change to queued state before handing to worker thread so the runner won't pick it up again |
1616 | | - self.change_state(model.Job.states.QUEUED, flush=False, job=job) |
1617 | | - # Persist the destination so that the job will be included in counts if using concurrency limits |
1618 | | - self.set_job_destination(self.job_destination, None, flush=False, job=job) |
| 1757 | + if self.is_task: |
| 1758 | + self.change_state(model.Job.states.QUEUED, flush=False, job=job) |
| 1759 | + elif not self.queue_with_limit(job, self.job_destination): |
| 1760 | + return False |
1619 | 1761 | # Set object store after job destination so can leverage parameters... |
1620 | 1762 | self._set_object_store_ids(job) |
1621 | 1763 | # Now that we have the object store id, check if we are over the limit |
|
0 commit comments