Skip to content

Commit 9e76d34

Browse files
authored
Straighten interface/implementation boundary (python#21270)
This is a follow-up to python#21119 This avoids type-checking function signatures twice in two-phase checking. Luckily, things where already quite clean, only overloads needed some non-trivial logic (I guess this is why the daemon checks overloads as a whole). Some comments: * I didn't touch the daemon after all. When we will be more confident that this works well, we can simply set `impl_only=True` in the daemon (or actually just remove this flag altogether). * I found a bug while reading the code that we use bare (un-decorated) signatures for consistency checks for in-place/reverse special methods like `__iadd__()` and `__radd__()`, I just added TODOs for now, as these looks quite niche use cases. * Most of the PR is adding the missing docstring for `checker.py`.
1 parent dc6f6ee commit 9e76d34

2 files changed

Lines changed: 120 additions & 13 deletions

File tree

mypy/build.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3306,12 +3306,19 @@ def type_map(self) -> dict[Expression, Type]:
33063306
assert len(self.type_checker()._type_maps) == 1
33073307
return self.type_checker()._type_maps[0]
33083308

3309-
def type_check_second_pass(self, todo: Sequence[DeferredNode] | None = None) -> bool:
3309+
def type_check_second_pass(
3310+
self,
3311+
todo: Sequence[DeferredNode] | None = None,
3312+
recurse_into_functions: bool = True,
3313+
impl_only: bool = False,
3314+
) -> bool:
33103315
if self.options.semantic_analysis_only:
33113316
return False
33123317
t0 = time_ref()
33133318
with self.wrap_context():
3314-
result = self.type_checker().check_second_pass(todo=todo)
3319+
result = self.type_checker().check_second_pass(
3320+
todo=todo, recurse_into_functions=recurse_into_functions, impl_only=impl_only
3321+
)
33153322
self.time_spent_us += time_spent_us(t0)
33163323
return result
33173324

@@ -4758,7 +4765,7 @@ def process_stale_scc_interface(
47584765
for id in stale:
47594766
if id not in unfinished_modules:
47604767
continue
4761-
if not graph[id].type_check_second_pass():
4768+
if not graph[id].type_check_second_pass(recurse_into_functions=False):
47624769
unfinished_modules.discard(id)
47634770

47644771
t4 = time.time()
@@ -4816,7 +4823,7 @@ def process_stale_scc_implementation(
48164823
for _, node, info in tree.local_definitions(impl_only=True):
48174824
assert isinstance(node.node, (FuncDef, OverloadedFuncDef, Decorator))
48184825
todo.append(DeferredNode(node.node, info))
4819-
graph[id].type_check_second_pass(todo=todo)
4826+
graph[id].type_check_second_pass(todo=todo, impl_only=True)
48204827
if not checker.deferred_nodes:
48214828
unfinished_modules.discard(id)
48224829
graph[id].detect_possibly_undefined_vars()
@@ -4825,7 +4832,7 @@ def process_stale_scc_implementation(
48254832
for id in stale:
48264833
if id not in unfinished_modules:
48274834
continue
4828-
if not graph[id].type_check_second_pass():
4835+
if not graph[id].type_check_second_pass(impl_only=True):
48294836
unfinished_modules.discard(id)
48304837
graph[id].detect_possibly_undefined_vars()
48314838
graph[id].finish_passes()

mypy/checker.py

Lines changed: 108 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,62 @@
1-
"""Mypy type checker."""
1+
"""Mypy type checker.
2+
3+
Infer types of expressions and type-check the AST. We use multi-pass architecture,
4+
this means we re-visit the AST until either all types are inferred, or we hit the
5+
upper limit on number of passes (currently 3). The number of passes is intentionally
6+
low, mostly to keep performance predictable. An example of code where more than one
7+
pass is needed:
8+
9+
def foo() -> None:
10+
C().x += 1 # we don't know the type of x before we visit C
11+
12+
class C:
13+
def __init__(self) -> None:
14+
self.x = <some non-literal expression>
15+
16+
In this example, encountering attribute C.x will trigger handle_cannot_determine_type()
17+
which in turn will schedule foo() to be re-visited in the next pass. This process is
18+
called deferring, some notes on it:
19+
* Only a top-level function or method can be deferred. For historical reasons we don't
20+
defer module top-levels.
21+
* There are two reasons that can trigger deferral: a type of name/attribute used in
22+
a function body is not inferred yet, or type of superclass node is not known when
23+
checking LSP.
24+
* In first case, the actual deferred node will be always FuncDef, even if the actual
25+
top-level function is either a Decorator or an OverloadedFuncDef.
26+
* In the second case, the deferred node will be actual top-level node, unless FuncDef
27+
is sufficient (in case when deferring a method of a class nested in function).
28+
* When a function was deferred we suppress most errors, and infer all expressions as
29+
Any until the end of deferred function. This is done to avoid false positives. We may
30+
change this in future by adding a special kind of Any that signifies not ready type.
31+
32+
Second important aspect of architecture is two-phase checking. Currently, it is only
33+
used in parallel type-checking mode, but at some point it will be the default in
34+
sequential mode as well.
35+
36+
In first phase (called interface phase) we type-check only module top-levels, function
37+
signatures, and bodies of functions/methods that were found during semantic analysis as
38+
(possibly) affecting external module interface (most notably __init__ methods). This is
39+
done by setting recurse_into_functions flag to False. We handle all deferrals triggered
40+
during this phase (still with recurse_into_functions being False) before moving to
41+
the next one.
42+
43+
In second phase (called implementation phase) we type-check only the bodies of functions
44+
and methods not type-checked in interface phase. This is done by setting
45+
recurse_into_functions to True, and gathering all functions/methods with def_or_infer_vars
46+
flag being False. Note that we execute only parts of the relevant visit methods that
47+
were not executed before (i.e. we don't handle function signatures twice), see
48+
check_partial() method for more details.
49+
50+
The boundary between function signature logic and function body logic is somewhat arbitrary,
51+
and currently mostly sits where it was historically. Some rules about this are:
52+
* Any logic that can *change* function signature *must* be executed in phase one.
53+
* In general, we want to put as much of logic into phase two for better performance.
54+
* Try keeping things clear/consistent, right now the boundary always sits at the point when
55+
we push the function on the scope stack and call check_func_item().
56+
57+
Note: some parts of the type-checker live in other `check*.py` files, like `checkexpr.py`,
58+
the orchestration between various type-checking phases and passes is done in `build.py`.
59+
"""
260

361
from __future__ import annotations
462

@@ -559,13 +617,15 @@ def check_second_pass(
559617
todo: Sequence[DeferredNode | FineGrainedDeferredNode] | None = None,
560618
*,
561619
allow_constructor_cache: bool = True,
620+
recurse_into_functions: bool = True,
621+
impl_only: bool = False,
562622
) -> bool:
563623
"""Run second or following pass of type checking.
564624
565625
This goes through deferred nodes, returning True if there were any.
566626
"""
567627
self.allow_constructor_cache = allow_constructor_cache
568-
self.recurse_into_functions = True
628+
self.recurse_into_functions = recurse_into_functions
569629
with state.strict_optional_set(self.options.strict_optional), checker_state.set(self):
570630
if not todo and not self.deferred_nodes:
571631
return False
@@ -591,21 +651,51 @@ def check_second_pass(
591651
if active_typeinfo:
592652
stack.enter_context(self.tscope.class_scope(active_typeinfo))
593653
stack.enter_context(self.scope.push_class(active_typeinfo))
594-
self.check_partial(node)
654+
self.check_partial(node, impl_only=impl_only)
595655
return True
596656

597-
def check_partial(self, node: DeferredNodeType | FineGrainedDeferredNodeType) -> None:
657+
def check_partial(
658+
self, node: DeferredNodeType | FineGrainedDeferredNodeType, impl_only: bool
659+
) -> None:
598660
self.widened_vars = []
599661
if isinstance(node, MypyFile):
600662
self.check_top_level(node)
601663
else:
602-
self.recurse_into_functions = True
603664
with self.binder.top_frame_context():
604-
self.accept(node)
665+
# TODO: use impl_only in the daemon as well.
666+
if not impl_only:
667+
self.accept(node)
668+
return
669+
if isinstance(node, (FuncDef, Decorator)):
670+
self.check_partial_impl(node)
671+
else:
672+
# Overloads need some special logic, since settable
673+
# properties are stored as overloads.
674+
for i, item in enumerate(node.items):
675+
# Setter and/or deleter can technically be empty.
676+
allow_empty = not node.is_property or i > 0
677+
assert isinstance(item, Decorator)
678+
# Although the actual bodies of overload items are empty, we
679+
# still need to execute some logic that doesn't affect signature.
680+
with self.tscope.function_scope(item.func):
681+
self.check_func_item(
682+
item.func, name=node.name, allow_empty=allow_empty
683+
)
684+
if node.impl is not None:
685+
# Handle the implementation as a regular function.
686+
with self.enter_overload_impl(node.impl):
687+
self.check_partial_impl(node.impl)
688+
689+
def check_partial_impl(self, impl: FuncDef | Decorator) -> None:
690+
"""Check only the body (not the signature) of a non-overloaded function."""
691+
if isinstance(impl, FuncDef):
692+
self.visit_func_def_impl(impl)
693+
else:
694+
with self.tscope.function_scope(impl.func):
695+
self.check_func_item(impl.func, name=impl.func.name)
605696

606697
def check_top_level(self, node: MypyFile) -> None:
607698
"""Check only the top-level of a module, skipping function definitions."""
608-
self.recurse_into_functions = False
609699
with self.enter_partial_types():
610700
with self.binder.top_frame_context():
611701
for d in node.defs:
@@ -774,7 +864,12 @@ def _visit_overloaded_func_def(self, defn: OverloadedFuncDef) -> None:
774864
# it may be not safe to optimize them away completely.
775865
if not self.can_skip_diagnostics:
776866
self.check_default_params(fdef.func)
777-
self.check_func_item(fdef.func, name=fdef.func.name, allow_empty=True)
867+
if self.recurse_into_functions or fdef.func.def_or_infer_vars:
868+
with (
869+
self.tscope.function_scope(fdef.func),
870+
self.set_recurse_into_functions(),
871+
):
872+
self.check_func_item(fdef.func, name=fdef.func.name, allow_empty=True)
778873
else:
779874
# Perform full check for real overloads to infer type of all decorated
780875
# overload variants.
@@ -1208,6 +1303,9 @@ def visit_func_def(self, defn: FuncDef) -> None:
12081303
self.check_func_def_override(defn, new_type)
12091304
if not self.recurse_into_functions and not defn.def_or_infer_vars:
12101305
return
1306+
self.visit_func_def_impl(defn)
1307+
1308+
def visit_func_def_impl(self, defn: FuncDef) -> None:
12111309
with self.tscope.function_scope(defn), self.set_recurse_into_functions():
12121310
self.check_func_item(defn, name=defn.name)
12131311
if not self.can_skip_diagnostics:
@@ -1219,6 +1317,7 @@ def visit_func_def(self, defn: FuncDef) -> None:
12191317
# checked when the decorator is.
12201318
found_method_base_classes = self.check_method_override(defn)
12211319
self.check_explicit_override_decorator(defn, found_method_base_classes)
1320+
# TODO: we should use decorated signature for this check.
12221321
self.check_inplace_operator_method(defn)
12231322

12241323
def check_func_item(
@@ -1563,6 +1662,7 @@ def check_funcdef_item(
15631662
and self.is_reverse_op_method(name)
15641663
and defn not in self.overload_impl_stack
15651664
):
1665+
# TODO: we should use decorated signature for this check.
15661666
self.check_reverse_op_method(item, typ, name, defn)
15671667
elif name in ("__getattr__", "__getattribute__"):
15681668
self.check_getattr_method(typ, defn, name)

0 commit comments

Comments
 (0)