Skip to content

Commit c038b8e

Browse files
authored
Merge pull request #828 from MemPalace/performance-optimization-regex-caching-3001788084232137906
⚡ Optimize entity detection with regex caching and pre-compilation
2 parents f7fcd51 + d886a62 commit c038b8e

1 file changed

Lines changed: 7 additions & 5 deletions

File tree

mempalace/entity_detector.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
import re
1919
import os
20+
import functools
2021
from pathlib import Path
2122
from collections import defaultdict
2223

@@ -60,6 +61,8 @@
6061
r"\btheir\b",
6162
]
6263

64+
PRONOUN_RE = re.compile("|".join(PRONOUN_PATTERNS), re.IGNORECASE)
65+
6366
# Person signals — dialogue markers
6467
DIALOGUE_PATTERNS = [
6568
r"^>\s*{name}[:\s]", # > Speaker: ...
@@ -466,6 +469,7 @@ def extract_candidates(text: str) -> dict:
466469
# ==================== SIGNAL SCORING ====================
467470

468471

472+
@functools.lru_cache(maxsize=128)
469473
def _build_patterns(name: str) -> dict:
470474
"""Pre-compile all regex patterns for a single entity name."""
471475
n = re.escape(name)
@@ -515,11 +519,9 @@ def score_entity(name: str, text: str, lines: list) -> dict:
515519
name_line_indices = [i for i, line in enumerate(lines) if name_lower in line.lower()]
516520
pronoun_hits = 0
517521
for idx in name_line_indices:
518-
window_text = " ".join(lines[max(0, idx - 2) : idx + 3]).lower()
519-
for pronoun_pattern in PRONOUN_PATTERNS:
520-
if re.search(pronoun_pattern, window_text):
521-
pronoun_hits += 1
522-
break
522+
window_text = " ".join(lines[max(0, idx - 2) : idx + 3])
523+
if PRONOUN_RE.search(window_text):
524+
pronoun_hits += 1
523525
if pronoun_hits > 0:
524526
person_score += pronoun_hits * 2
525527
person_signals.append(f"pronoun nearby ({pronoun_hits}x)")

0 commit comments

Comments
 (0)