@@ -121,9 +121,9 @@ def error_check_if_exist(url: str)->bool:
121121 # add the headers parameter to make the request appears like coming
122122 # from browser, otherwise some websites will return 403
123123 headers = {
124- "User-Agent" : "Mozilla/5.0 (Windows NT 6.1; WOW64 ) "
124+ "User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64 ) "
125125 "AppleWebKit/537.36 (KHTML, like Gecko) "
126- "Chrome/56 .0.2924.76 Safari/537.36"
126+ "Chrome/117 .0.0.0 Safari/537.36"
127127 }
128128 req = requests .head (url , headers = headers )
129129 except requests .exceptions .SSLError :
@@ -132,6 +132,31 @@ def error_check_if_exist(url: str)->bool:
132132 return True
133133 return req .status_code >= 400
134134
135+ def error_check_if_timeout (url : str ) -> bool :
136+ # Check if url exists with a timeout
137+ try :
138+ # Add headers to make the request appear like it's coming from a browser
139+ headers = {
140+ "User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
141+ "AppleWebKit/537.36 (KHTML, like Gecko) "
142+ "Chrome/117.0.0.0 Safari/537.36"
143+ }
144+ req = requests .head (url , headers = headers , timeout = 10 ) # Set timeout to 10 seconds
145+ except requests .exceptions .Timeout :
146+ return True
147+ except Exception :
148+ return True
149+ return req .status_code >= 400
150+
151+ timeout_url_error = [item for item in [url_item ['metadata_attr' ] for url_item in urls if error_check_if_timeout (url_item ['url' ])]]
152+ if len (timeout_url_error ) > 0 :
153+ timeout_url_error_str = ", " .join (timeout_url_error )
154+ raise ValidationError (
155+ _ (
156+ f"Please provide valid url link for the following key(s) in the metadata source: <strong>{ timeout_url_error_str } </strong>. "
157+ "The website(s) cannot be reached within 10 seconds."
158+ )
159+ )
135160 url_error = [item for item in [url_item ['metadata_attr' ] for url_item in urls if error_check (url_item ['url' ], url_item ['forbidden_url' ])]]
136161 if len (url_error ) > 0 :
137162 url_error_str = ", " .join (url_error )
0 commit comments