Skip to content

Commit 01f9f0a

Browse files
committed
Address copilot comments
1 parent 79d0fd1 commit 01f9f0a

File tree

4 files changed

+7
-15
lines changed

4 files changed

+7
-15
lines changed

main.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,16 +27,15 @@ def main():
2727
print(f"✗ Download error: {ve}")
2828
return
2929

30-
file_name = file_url.split("/")[-1]
31-
file_path = os.path.join(local_folder, file_name)
30+
file_path = os.path.join(local_folder, file_url.split("/")[-1])
3231

3332
try:
3433
with open(file_path, "r", encoding="utf-8") as f:
3534
raw_text = f.read()
3635

3736
tokens = tokenize(raw_text)
3837
print(f"✓ Tokenization complete. Total tokens: {len(tokens)}")
39-
print(f"First 10 tokens: {tokens[:30]}")
38+
print(f"First 30 tokens: {tokens[:30]}")
4039

4140
except FileNotFoundError:
4241
print(f"✗ File not found: {file_path}")

settings/development.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@
22

33
settings = {
44
"file_url": "https://raw.githubusercontent.com/rasbt/LLMs-from-scratch/main/ch02/01_main-chapter-code/the-verdict.txt",
5-
"local_folder": f"{os.path.join(os.path.expanduser("~"), "data")}",
5+
"local_folder": os.path.join(os.path.expanduser("~"), "data"),
66
}

utils/downloader.py

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,10 @@
44
from settings.development import settings
55

66

7-
def assert_local_folder_exists(local_folder: str):
8-
"""Ensure the download folder exists."""
9-
10-
if not os.path.exists(local_folder):
11-
os.makedirs(local_folder, exist_ok=True)
12-
13-
14-
def download(file_url, local_folder) -> bool:
7+
def download(file_url: str, local_folder: str) -> bool:
158
"""Download a file from a predefined URL if it does not exist locally."""
169

17-
assert_local_folder_exists(local_folder)
10+
os.makedirs(local_folder, exist_ok=True)
1811

1912
file_name = file_url.split("/")[-1]
2013
file_path = os.path.join(local_folder, file_name)

utils/tokenizer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import re
22

33

4-
def tokenize(text):
4+
def tokenize(text: str) -> list[str]:
55
"""Tokenizes the input text into words.
66
77
Args:
@@ -11,7 +11,7 @@ def tokenize(text):
1111
list: A list of tokens (words).
1212
"""
1313

14-
result = re.split(r'([,.:;?_!"()\']|--|\s)', text)
14+
result = re.split(r"([,.:;?_!\"()']|--|\s)", text)
1515
result = [token.strip() for token in result if token.strip()]
1616
return result
1717

0 commit comments

Comments
 (0)