1212from typing import Iterator , Tuple
1313from utils import (
1414 create_graph_from_include_analysis ,
15+ get_include_analysis_edge_prevalence ,
1516 get_latest_include_analysis ,
1617)
1718
@@ -20,17 +21,43 @@ def minimum_edge_cut(
2021 include_analysis : IncludeAnalysisOutput ,
2122 source : str ,
2223 target : str ,
24+ start_from_source_includes = False ,
25+ prevalence_threshold : float = None ,
2326) -> Iterator [Tuple [str , str ]]:
2427 files = include_analysis ["files" ]
2528 DG : nx .DiGraph = create_graph_from_include_analysis (include_analysis )
2629
27- edge_cut = nx . minimum_edge_cut ( DG , files . index ( source ), files . index ( target ) )
30+ edge_prevalence = get_include_analysis_edge_prevalence ( include_analysis )
2831
29- for includer_idx , include_idx in edge_cut :
30- includer = files [includer_idx ]
31- included = files [include_idx ]
32+ sources = include_analysis ["includes" ][source ] if start_from_source_includes else [source ]
33+ cuts = []
3234
33- yield includer , included
35+ for source in sources :
36+ edge_cut = nx .minimum_edge_cut (DG , files .index (source ), files .index (target ))
37+
38+ for includer_idx , include_idx in edge_cut :
39+ includer = files [includer_idx ]
40+ included = files [include_idx ]
41+
42+ if start_from_source_includes :
43+ cuts .append ((includer , included ))
44+ else :
45+ prevalence = edge_prevalence [includer ][included ]
46+
47+ if prevalence_threshold and prevalence < prevalence_threshold :
48+ continue
49+
50+ yield includer , included , prevalence
51+
52+ if start_from_source_includes :
53+ # Deduplicate cuts
54+ for includer , included in set (cuts ):
55+ prevalence = edge_prevalence [includer ][included ]
56+
57+ if prevalence_threshold and prevalence < prevalence_threshold :
58+ continue
59+
60+ yield includer , included , prevalence
3461
3562
3663def main ():
@@ -43,6 +70,14 @@ def main():
4370 )
4471 parser .add_argument ("source" , help = "Source file." )
4572 parser .add_argument ("target" , help = "Target file." )
73+ parser .add_argument (
74+ "--start-from-source-includes" ,
75+ action = "store_true" ,
76+ help = "Start from includes of the source file, rather than the source file itself." ,
77+ )
78+ parser .add_argument (
79+ "--prevalence-threshold" , type = float , help = "Filter out edges with a prevalence percentage below the threshold."
80+ )
4681 parser .add_argument ("--verbose" , action = "store_true" , default = False , help = "Enable verbose logging." )
4782 args = parser .parse_args ()
4883
@@ -79,6 +114,8 @@ def main():
79114 include_analysis ,
80115 args .source ,
81116 args .target ,
117+ start_from_source_includes = args .start_from_source_includes ,
118+ prevalence_threshold = args .prevalence_threshold ,
82119 ):
83120 csv_writer .writerow (row )
84121
0 commit comments