-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
40 lines (29 loc) · 1.03 KB
/
main.py
File metadata and controls
40 lines (29 loc) · 1.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import os
from settings.development import settings
from utils.downloader import download
from utils.tokenizer import tokenize
def main():
"""Main entry point for the application."""
file_url = settings["file_url"]
local_folder = settings["local_folder"]
try:
if download(file_url, local_folder):
print("✓ File downloaded successfully.")
else:
print("✓ File already exists.")
except ValueError as ve:
print(f"✗ Download error: {ve}")
return
file_path = os.path.join(local_folder, file_url.split("/")[-1])
try:
with open(file_path, "r", encoding="utf-8") as f:
raw_text = f.read()
tokens = tokenize(raw_text)
print(f"✓ Tokenization complete. Total tokens: {len(tokens)}")
print(f"First 30 tokens: {tokens[:30]}")
except FileNotFoundError:
print(f"✗ File not found: {file_path}")
except Exception as e:
print(f"✗ Error during tokenization: {e}")
if __name__ == "__main__":
main()