1515
1616sys .path .insert (0 , os .path .dirname (os .path .dirname (os .path .abspath (__file__ ))))
1717
18+ from librt import base64
19+ from librt .internal import ReadBuffer , WriteBuffer
20+
21+ from mypy .cache import CacheMeta
1822from mypy .metastore import FilesystemMetadataStore , MetadataStore , SqliteMetadataStore
1923from mypy .util import json_dumps , json_loads
2024
@@ -31,40 +35,116 @@ def merge_deps(all: dict[str, set[str]], new: dict[str, set[str]]) -> None:
3135 all .setdefault (k , set ()).update (v )
3236
3337
38+ def sort_deps (
39+ dependencies : list [str ], suppressed : list [str ], dep_prios : list [int ], dep_lines : list [int ]
40+ ) -> tuple [list [str ], list [str ], list [int ], list [int ]]:
41+ """Sort dependencies and suppressed independently, keeping prios/lines aligned."""
42+ all_deps = list (zip (dependencies + suppressed , dep_prios , dep_lines ))
43+ num_deps = len (dependencies )
44+ sorted_deps = sorted (all_deps [:num_deps ])
45+ sorted_supp = sorted (all_deps [num_deps :])
46+ if sorted_deps :
47+ deps_t , prios1_t , lines1_t = zip (* sorted_deps )
48+ deps_out = list (deps_t )
49+ prios1 = list (prios1_t )
50+ lines1 = list (lines1_t )
51+ else :
52+ deps_out = []
53+ prios1 = []
54+ lines1 = []
55+ if sorted_supp :
56+ supp_t , prios2_t , lines2_t = zip (* sorted_supp )
57+ supp_out = list (supp_t )
58+ prios2 = list (prios2_t )
59+ lines2 = list (lines2_t )
60+ else :
61+ supp_out = []
62+ prios2 = []
63+ lines2 = []
64+ return deps_out , supp_out , prios1 + prios2 , lines1 + lines2
65+
66+
67+ def normalize_meta (meta : CacheMeta ) -> None :
68+ """Normalize a CacheMeta instance to avoid spurious diffs.
69+
70+ Zero out mtimes and sort dependencies deterministically.
71+ """
72+ meta .mtime = 0
73+ meta .data_mtime = 0
74+ meta .dependencies , meta .suppressed , meta .dep_prios , meta .dep_lines = sort_deps (
75+ meta .dependencies , meta .suppressed , meta .dep_prios , meta .dep_lines
76+ )
77+
78+
79+ def serialize_meta_ff (meta : CacheMeta , version_prefix : bytes ) -> bytes :
80+ """Serialize a CacheMeta instance back to fixed format binary."""
81+ buf = WriteBuffer ()
82+ meta .write (buf )
83+ return version_prefix + buf .getvalue ()
84+
85+
86+ def normalize_json_meta (obj : dict [str , Any ]) -> None :
87+ """Normalize a JSON meta dict to avoid spurious diffs.
88+
89+ Zero out mtimes and sort dependencies deterministically.
90+ """
91+ obj ["mtime" ] = 0
92+ obj ["data_mtime" ] = 0
93+ if "dependencies" in obj :
94+ obj ["dependencies" ], obj ["suppressed" ], obj ["dep_prios" ], obj ["dep_lines" ] = sort_deps (
95+ obj ["dependencies" ], obj ["suppressed" ], obj ["dep_prios" ], obj ["dep_lines" ]
96+ )
97+
98+
3499def load (cache : MetadataStore , s : str ) -> Any :
100+ """Load and normalize a cache entry.
101+
102+ Returns:
103+ - For .meta.ff: normalized binary bytes (with version prefix)
104+ - For .data.ff: raw binary bytes
105+ - For .meta.json/.data.json/.deps.json: parsed and normalized dict/list
106+ """
35107 data = cache .read (s )
108+ if s .endswith (".meta.ff" ):
109+ version_prefix = data [:2 ]
110+ buf = ReadBuffer (data [2 :])
111+ meta = CacheMeta .read (buf , data_file = "" )
112+ if meta is None :
113+ # Can't deserialize (e.g. different mypy version). Fall back to
114+ # raw bytes -- we lose mtime normalization but the diff stays correct.
115+ return data
116+ normalize_meta (meta )
117+ return serialize_meta_ff (meta , version_prefix )
118+ if s .endswith (".data.ff" ):
119+ return data
36120 obj = json_loads (data )
37121 if s .endswith (".meta.json" ):
38- # For meta files, zero out the mtimes and sort the
39- # dependencies to avoid spurious conflicts
40- obj ["mtime" ] = 0
41- obj ["data_mtime" ] = 0
42- if "dependencies" in obj :
43- all_deps = obj ["dependencies" ] + obj ["suppressed" ]
44- num_deps = len (obj ["dependencies" ])
45- thing = list (zip (all_deps , obj ["dep_prios" ], obj ["dep_lines" ]))
46-
47- def unzip (x : Any ) -> Any :
48- return zip (* x ) if x else ((), (), ())
49-
50- obj ["dependencies" ], prios1 , lines1 = unzip (sorted (thing [:num_deps ]))
51- obj ["suppressed" ], prios2 , lines2 = unzip (sorted (thing [num_deps :]))
52- obj ["dep_prios" ] = prios1 + prios2
53- obj ["dep_lines" ] = lines1 + lines2
122+ normalize_json_meta (obj )
54123 if s .endswith (".deps.json" ):
55124 # For deps files, sort the deps to avoid spurious mismatches
56125 for v in obj .values ():
57126 v .sort ()
58127 return obj
59128
60129
130+ def encode_for_diff (s : str , obj : object ) -> str :
131+ """Encode a cache entry value for inclusion in the JSON diff.
132+
133+ Fixed format binary entries are base64-encoded, JSON entries are
134+ re-serialized as JSON strings.
135+ """
136+ if isinstance (obj , bytes ):
137+ return base64 .b64encode (obj ).decode ()
138+ return json_dumps (obj ).decode ()
139+
140+
61141def main () -> None :
62142 parser = argparse .ArgumentParser ()
63143 parser .add_argument ("--verbose" , action = "store_true" , default = False , help = "Increase verbosity" )
64144 parser .add_argument ("--sqlite" , action = "store_true" , default = False , help = "Use a sqlite cache" )
65- parser .add_argument ("input_dir1" , help = "Input directory for the cache" )
66- parser .add_argument ("input_dir2" , help = "Input directory for the cache" )
67- parser .add_argument ("output" , help = "Output file" )
145+ parser .add_argument ("input_dir1" , help = "Input directory for the original cache" )
146+ parser .add_argument ("input_dir2" , help = "Input directory for the target cache" )
147+ parser .add_argument ("output" , help = "Output file with the diff from original cache " )
68148 args = parser .parse_args ()
69149
70150 cache1 = make_cache (args .input_dir1 , args .sqlite )
@@ -73,7 +153,7 @@ def main() -> None:
73153 type_misses : dict [str , int ] = defaultdict (int )
74154 type_hits : dict [str , int ] = defaultdict (int )
75155
76- updates : dict [str , bytes | None ] = {}
156+ updates : dict [str , str | None ] = {}
77157
78158 deps1 : dict [str , set [str ]] = {}
79159 deps2 : dict [str , set [str ]] = {}
@@ -96,10 +176,12 @@ def main() -> None:
96176 # so we can produce a much smaller direct diff of them.
97177 if ".deps." not in s :
98178 if obj2 is not None :
99- updates [s ] = json_dumps ( obj2 )
179+ updates [s ] = encode_for_diff ( s , obj2 )
100180 else :
101181 updates [s ] = None
102182 elif obj2 :
183+ # This is a deps file, with json data
184+ assert ".deps." in s
103185 merge_deps (deps1 , obj1 )
104186 merge_deps (deps2 , obj2 )
105187 else :
@@ -109,11 +191,15 @@ def main() -> None:
109191 cache1_all_set = set (cache1_all )
110192 for s in cache2 .list_all ():
111193 if s not in cache1_all_set :
112- updates [s ] = cache2 .read (s )
194+ raw = cache2 .read (s )
195+ if s .endswith (".ff" ):
196+ updates [s ] = base64 .b64encode (raw ).decode ()
197+ else :
198+ updates [s ] = raw .decode ()
113199
114200 # Compute what deps have been added and merge them all into the
115201 # @root deps file.
116- new_deps = {k : deps1 .get (k , set ()) - deps2 .get (k , set ()) for k in deps2 }
202+ new_deps = {k : deps2 .get (k , set ()) - deps1 .get (k , set ()) for k in deps2 }
117203 new_deps = {k : v for k , v in new_deps .items () if v }
118204 try :
119205 root_deps = load (cache1 , "@root.deps.json" )
@@ -122,7 +208,7 @@ def main() -> None:
122208 merge_deps (new_deps , root_deps )
123209
124210 new_deps_json = {k : list (v ) for k , v in new_deps .items () if v }
125- updates ["@root.deps.json" ] = json_dumps (new_deps_json )
211+ updates ["@root.deps.json" ] = json_dumps (new_deps_json ). decode ()
126212
127213 # Drop updates to deps.meta.json for size reasons. The diff
128214 # applier will manually fix it up.
0 commit comments