1919import logging
2020import posixpath
2121import threading
22+ from collections import OrderedDict
2223from concurrent .futures import Future , ThreadPoolExecutor
2324from contextlib import ExitStack
2425from dataclasses import asdict , dataclass
25- from functools import lru_cache , partial
26+ from functools import partial
2627from os import environ
2728from time import time
2829from typing import Any , Callable , Final , Literal
@@ -78,6 +79,12 @@ class CompletionRefs:
7879UploadData = dict [str , Callable [[], JsonEncodeable ]]
7980
8081
82+ def is_system_instructions_hashable (
83+ system_instruction : list [types .MessagePart ],
84+ ) -> bool :
85+ return all (isinstance (x , types .Text ) for x in system_instruction )
86+
87+
8188class UploadCompletionHook (CompletionHook ):
8289 """An completion hook using ``fsspec`` to upload to external storage
8390
@@ -98,10 +105,13 @@ def __init__(
98105 base_path : str ,
99106 max_size : int = 20 ,
100107 upload_format : Format | None = None ,
108+ lru_cache_max_size : int = 1024 ,
101109 ) -> None :
102110 self ._max_size = max_size
103111 self ._fs , base_path = fsspec .url_to_fs (base_path )
104112 self ._base_path = self ._fs .unstrip_protocol (base_path )
113+ self .lru_dict = OrderedDict ()
114+ self .lru_cache_max_size = lru_cache_max_size
105115
106116 if upload_format not in _FORMATS + (None ,):
107117 raise ValueError (
@@ -159,7 +169,7 @@ def _calculate_ref_path(
159169 # TODO: experimental with using the trace_id and span_id, or fetching
160170 # gen_ai.response.id from the active span.
161171 system_instruction_hash = None
162- if all ( isinstance ( x , types . Text ) for x in system_instruction ):
172+ if is_system_instructions_hashable ( system_instruction ):
163173 # Get a hash of the text.
164174 system_instruction_hash = hashlib .sha256 (
165175 "\n " .join (x .content for x in system_instruction ).encode ( # pyright: ignore[reportUnknownMemberType, reportAttributeAccessIssue, reportUnknownArgumentType]
@@ -181,10 +191,18 @@ def _calculate_ref_path(
181191 ),
182192 )
183193
184- @lru_cache (maxsize = 512 )
185194 def _file_exists (self , path : str ) -> bool :
195+ if path in self .lru_dict :
196+ self .lru_dict .move_to_end (path )
197+ return True
186198 # https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.exists
187- return self ._fs .exists (path )
199+ file_exists = self ._fs .exists (path )
200+ if not file_exists :
201+ return False
202+ self .lru_dict [path ] = True
203+ if len (self .lru_dict ) > self .lru_cache_max_size :
204+ self .lru_dict .popitem (last = False )
205+ return True
188206
189207 def _do_upload (
190208 self , path : str , json_encodeable : Callable [[], JsonEncodeable ]
@@ -214,6 +232,11 @@ def _do_upload(
214232 gen_ai_json_dump (message , file )
215233 file .write ("\n " )
216234
235+ if "_system_instruction" in path :
236+ self .lru_dict [path ] = True
237+ if len (self .lru_dict ) > self .lru_cache_max_size :
238+ self .lru_dict .popitem (last = False )
239+
217240 def on_completion (
218241 self ,
219242 * ,
0 commit comments