Skip to content

Commit 5a26fba

Browse files
authored
Merge pull request #8 from AnswerDotAI/local-first
look at local files first
2 parents 4aef130 + b8bc8bd commit 5a26fba

3 files changed

Lines changed: 77 additions & 6 deletions

File tree

llms_txt/_modidx.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
'llms_txt.core._parse_llms': ('core.html#_parse_llms', 'llms_txt/core.py'),
1111
'llms_txt.core._section': ('core.html#_section', 'llms_txt/core.py'),
1212
'llms_txt.core.create_ctx': ('core.html#create_ctx', 'llms_txt/core.py'),
13+
'llms_txt.core.find_root_dir': ('core.html#find_root_dir', 'llms_txt/core.py'),
14+
'llms_txt.core.get_doc_content': ('core.html#get_doc_content', 'llms_txt/core.py'),
1315
'llms_txt.core.get_sizes': ('core.html#get_sizes', 'llms_txt/core.py'),
1416
'llms_txt.core.llms_txt2ctx': ('core.html#llms_txt2ctx', 'llms_txt/core.py'),
1517
'llms_txt.core.mk_ctx': ('core.html#mk_ctx', 'llms_txt/core.py'),

llms_txt/core.py

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/01_core.ipynb.
44

55
# %% auto 0
6-
__all__ = ['opt_re', 'named_re', 'search', 'parse_link', 'parse_llms_file', 'mk_ctx', 'get_sizes', 'create_ctx', 'llms_txt2ctx']
6+
__all__ = ['opt_re', 'named_re', 'search', 'parse_link', 'parse_llms_file', 'find_root_dir', 'get_doc_content', 'mk_ctx',
7+
'get_sizes', 'create_ctx', 'llms_txt2ctx']
78

89
# %% ../nbs/01_core.ipynb
910
import re
@@ -13,6 +14,7 @@
1314
from fastcore.xml import *
1415
from fastcore.script import *
1516
import httpx
17+
from urllib.parse import urlparse
1618

1719
# %% ../nbs/01_core.ipynb
1820
def opt_re(s):
@@ -65,14 +67,40 @@ def parse_llms_file(txt):
6567
# %% ../nbs/01_core.ipynb
6668
from fastcore.xml import Sections,Project,Doc
6769

70+
# %% ../nbs/01_core.ipynb
71+
def find_root_dir():
72+
"Find the root directory of the nbdev project by looking for settings.ini"
73+
path = Path.cwd()
74+
while path != path.parent:
75+
if (path / 'settings.ini').exists(): return path
76+
path = path.parent
77+
return None
78+
79+
# %% ../nbs/01_core.ipynb
80+
def get_doc_content(url):
81+
"Fetch content from local file if in nbdev repo."
82+
root_dir = find_root_dir()
83+
if root_dir:
84+
config = Config(root_dir, 'settings.ini')
85+
doc_host = config.get('doc_host')
86+
if doc_host and url.startswith(doc_host):
87+
parsed_url = urlparse(url)
88+
relative_path = parsed_url.path.lstrip('/')
89+
local_path = root_dir / '_docs' / relative_path
90+
if local_path.exists():
91+
with open(local_path, 'r') as f: return f.read()
92+
# If not a local file or file doesn't exist, fetch from URL
93+
return httpx.get(url).text
94+
6895
# %% ../nbs/01_core.ipynb
6996
def _doc(kw):
7097
"Create a `Doc` FT object with the text retrieved from `url` as the child, and `kw` as attrs."
7198
url = kw.pop('url')
99+
txt = get_doc_content(url)
72100
re_comment = re.compile('^<!--.*-->$', flags=re.MULTILINE)
73101
re_base64_img = re.compile(r'<img[^>]*src="data:image/[^"]*"[^>]*>')
74-
txt = [o for o in httpx.get(url).text.splitlines() if not re_comment.search(o) and not re_base64_img.search(o)]
75-
return Doc('\n'.join(txt), **kw)
102+
txt = '\n'.join([o for o in txt.splitlines() if not re_comment.search(o) and not re_base64_img.search(o)])
103+
return Doc(txt, **kw)
76104

77105
# %% ../nbs/01_core.ipynb
78106
def _section(nm, items, n_workers=None):

nbs/01_core.ipynb

Lines changed: 44 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@
4949
"from fastcore.utils import *\n",
5050
"from fastcore.xml import *\n",
5151
"from fastcore.script import *\n",
52-
"import httpx"
52+
"import httpx\n",
53+
"from urllib.parse import urlparse"
5354
]
5455
},
5556
{
@@ -673,6 +674,45 @@
673674
"from fastcore.xml import Sections,Project,Doc"
674675
]
675676
},
677+
{
678+
"cell_type": "code",
679+
"execution_count": null,
680+
"metadata": {},
681+
"outputs": [],
682+
"source": [
683+
"#| export\n",
684+
"def find_root_dir():\n",
685+
" \"Find the root directory of the nbdev project by looking for settings.ini\"\n",
686+
" path = Path.cwd()\n",
687+
" while path != path.parent:\n",
688+
" if (path / 'settings.ini').exists(): return path\n",
689+
" path = path.parent\n",
690+
" return None"
691+
]
692+
},
693+
{
694+
"cell_type": "code",
695+
"execution_count": null,
696+
"metadata": {},
697+
"outputs": [],
698+
"source": [
699+
"#|export\n",
700+
"def get_doc_content(url):\n",
701+
" \"Fetch content from local file if in nbdev repo.\"\n",
702+
" root_dir = find_root_dir()\n",
703+
" if root_dir:\n",
704+
" config = Config(root_dir, 'settings.ini')\n",
705+
" doc_host = config.get('doc_host')\n",
706+
" if doc_host and url.startswith(doc_host):\n",
707+
" parsed_url = urlparse(url)\n",
708+
" relative_path = parsed_url.path.lstrip('/')\n",
709+
" local_path = root_dir / '_docs' / relative_path\n",
710+
" if local_path.exists():\n",
711+
" with open(local_path, 'r') as f: return f.read()\n",
712+
" # If not a local file or file doesn't exist, fetch from URL\n",
713+
" return httpx.get(url).text"
714+
]
715+
},
676716
{
677717
"cell_type": "code",
678718
"execution_count": null,
@@ -683,10 +723,11 @@
683723
"def _doc(kw):\n",
684724
" \"Create a `Doc` FT object with the text retrieved from `url` as the child, and `kw` as attrs.\"\n",
685725
" url = kw.pop('url')\n",
726+
" txt = get_doc_content(url)\n",
686727
" re_comment = re.compile('^<!--.*-->$', flags=re.MULTILINE)\n",
687728
" re_base64_img = re.compile(r'<img[^>]*src=\"data:image/[^\"]*\"[^>]*>')\n",
688-
" txt = [o for o in httpx.get(url).text.splitlines() if not re_comment.search(o) and not re_base64_img.search(o)]\n",
689-
" return Doc('\\n'.join(txt), **kw)"
729+
" txt = '\\n'.join([o for o in txt.splitlines() if not re_comment.search(o) and not re_base64_img.search(o)])\n",
730+
" return Doc(txt, **kw)"
690731
]
691732
},
692733
{

0 commit comments

Comments
 (0)