5555from ontogpt .io .owl_exporter import OWLExporter
5656from ontogpt .io .rdf_exporter import RDFExporter
5757from ontogpt .io .template_loader import get_template_details , get_template_path
58+ from ontogpt .io .utils import read_text_with_fallbacks
5859from ontogpt .io .yaml_wrapper import dump_minimal_yaml
5960from ontogpt .templates .core import ExtractionResult
6061from ontogpt .utils .multilingual import multilingual_analysis
@@ -196,7 +197,7 @@ def parse_input(
196197 parse_tabular_input (str (f ), selectcols )
197198 if Path (f ).suffix in VALID_TABULAR_FORMATS
198199 or Path (f ).suffix in VALID_SPREADSHEET_FORMATS
199- else open ( f , "r" ). read ( )
200+ else read_text_with_fallbacks ( f )
200201 )
201202 for f in inputfiles
202203 if f .is_file ()
@@ -207,7 +208,7 @@ def parse_input(
207208 parse_tabular_input (str (f ), selectcols )
208209 if Path (f ).suffix in VALID_TABULAR_FORMATS
209210 or Path (f ).suffix in VALID_SPREADSHEET_FORMATS
210- else open ( f , "r" ). read ( )
211+ else read_text_with_fallbacks ( f )
211212 )
212213 for f in inputfiles
213214 if f .is_file ()
@@ -225,7 +226,7 @@ def parse_input(
225226 text = parse_tabular_input (input , selectcols )
226227 logging .info (f"Input text: { text } " )
227228 else :
228- text = open ( input , "rb" ). read (). decode ( encoding = "utf-8" , errors = "ignore" )
229+ text = read_text_with_fallbacks ( Path ( input ) )
229230 logging .info (f"Input text: { text } " )
230231 parsedlist = [text ]
231232 else :
@@ -1102,11 +1103,11 @@ def recipe_extract(
11021103 ke .client .cache_db_path = settings .cache_db
11031104
11041105 if recipes_urls_file :
1105- with open ( recipes_urls_file , "r" ) as f :
1106- urls = [line .strip () for line in f . readlines () if url in line ]
1107- if len (urls ) != 1 :
1108- raise ValueError (f"Found { len (urls )} URLs in { recipes_urls_file } " )
1109- url = urls [0 ]
1106+ content = read_text_with_fallbacks ( Path ( recipes_urls_file ))
1107+ urls = [line .strip () for line in content . splitlines () if url in line ]
1108+ if len (urls ) != 1 :
1109+ raise ValueError (f"Found { len (urls )} URLs in { recipes_urls_file } " )
1110+ url = urls [0 ]
11101111 scraper = scrape_me (url )
11111112
11121113 if dictionary :
@@ -1172,8 +1173,7 @@ def convert(
11721173 )
11731174
11741175 cls = ke .template_pyclass
1175- with open (input , "r" ) as f :
1176- data = yaml .safe_load (f )
1176+ data = yaml .safe_load (read_text_with_fallbacks (Path (input )))
11771177 obj = cls (** data ["extracted_object" ])
11781178 results = ExtractionResult (extracted_object = obj )
11791179 write_extraction (results , output , output_format , ke , template , cut_input_text )
@@ -1580,7 +1580,7 @@ def diagnose(
15801580 if not model :
15811581 model = DEFAULT_MODEL
15821582
1583- phenopackets = [json .load ( open ( f )) for f in phenopacket_files ]
1583+ phenopackets = [json .loads ( read_text_with_fallbacks ( Path ( f ) )) for f in phenopacket_files ]
15841584 engine = PhenoEngine (
15851585 model = model ,
15861586 temperature = temperature ,
@@ -1633,7 +1633,9 @@ def run_multilingual_analysis(
16331633 elif input_data_dir and Path (input_data_dir ).is_dir ():
16341634 logging .info (f"Input file directory: { input_data_dir } " )
16351635 inputfiles = Path (input_data_dir ).glob ("*.txt" )
1636- inputdict = {str (f .name ): open (f , "r" ).read () for f in inputfiles if f .is_file ()}
1636+ inputdict = {
1637+ str (f .name ): read_text_with_fallbacks (f ) for f in inputfiles if f .is_file ()
1638+ }
16371639 logging .info (f"Found { len (inputdict )} input files here." )
16381640
16391641 i = 0
@@ -1698,7 +1700,7 @@ def answer(
16981700 ** kwargs ,
16991701):
17001702 """Answer a set of questions defined in YAML."""
1701- qc = QuestionCollection (** yaml .safe_load (open ( inputfile )))
1703+ qc = QuestionCollection (** yaml .safe_load (read_text_with_fallbacks ( Path ( inputfile ) )))
17021704 engine = GenericEngine (
17031705 model = model ,
17041706 temperature = temperature ,
@@ -1947,7 +1949,7 @@ def complete(
19471949 """
19481950
19491951 if inputfile :
1950- text = open ( inputfile ). read ( )
1952+ text = read_text_with_fallbacks ( Path ( inputfile ))
19511953 else :
19521954 text = input .strip ()
19531955
@@ -2174,7 +2176,7 @@ def _get_templates() -> Dict[str, Tuple[str, str]]:
21742176 template_dir = Path (__file__ ).parent / "templates"
21752177 template_paths = [f for f in template_dir .glob ("*.yaml" )]
21762178 for template_path in template_paths :
2177- with open (template_path , "r" ) as template_file :
2179+ with open (template_path , "r" , encoding = "utf-8" ) as template_file :
21782180 data = yaml .safe_load (template_file )
21792181 if data ["id" ].startswith (http_prefixes ):
21802182 identifier = data ["id" ].split ("/" )[- 1 ]
0 commit comments