|
3 | 3 | # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/01_core.ipynb. |
4 | 4 |
|
5 | 5 | # %% auto 0 |
6 | | -__all__ = ['opt_re', 'named_re', 'search', 'parse_link', 'parse_llms_file', 'mk_ctx', 'get_sizes', 'create_ctx', 'llms_txt2ctx'] |
| 6 | +__all__ = ['opt_re', 'named_re', 'search', 'parse_link', 'parse_llms_file', 'find_root_dir', 'get_doc_content', 'mk_ctx', |
| 7 | + 'get_sizes', 'create_ctx', 'llms_txt2ctx'] |
7 | 8 |
|
8 | 9 | # %% ../nbs/01_core.ipynb |
9 | 10 | import re |
|
13 | 14 | from fastcore.xml import * |
14 | 15 | from fastcore.script import * |
15 | 16 | import httpx |
| 17 | +from urllib.parse import urlparse |
16 | 18 |
|
17 | 19 | # %% ../nbs/01_core.ipynb |
18 | 20 | def opt_re(s): |
@@ -65,14 +67,40 @@ def parse_llms_file(txt): |
65 | 67 | # %% ../nbs/01_core.ipynb |
66 | 68 | from fastcore.xml import Sections,Project,Doc |
67 | 69 |
|
| 70 | +# %% ../nbs/01_core.ipynb |
| 71 | +def find_root_dir(): |
| 72 | + "Find the root directory of the nbdev project by looking for settings.ini" |
| 73 | + path = Path.cwd() |
| 74 | + while path != path.parent: |
| 75 | + if (path / 'settings.ini').exists(): return path |
| 76 | + path = path.parent |
| 77 | + return None |
| 78 | + |
| 79 | +# %% ../nbs/01_core.ipynb |
| 80 | +def get_doc_content(url): |
| 81 | + "Fetch content from local file if in nbdev repo." |
| 82 | + root_dir = find_root_dir() |
| 83 | + if root_dir: |
| 84 | + config = Config(root_dir, 'settings.ini') |
| 85 | + doc_host = config.get('doc_host') |
| 86 | + if doc_host and url.startswith(doc_host): |
| 87 | + parsed_url = urlparse(url) |
| 88 | + relative_path = parsed_url.path.lstrip('/') |
| 89 | + local_path = root_dir / '_docs' / relative_path |
| 90 | + if local_path.exists(): |
| 91 | + with open(local_path, 'r') as f: return f.read() |
| 92 | + # If not a local file or file doesn't exist, fetch from URL |
| 93 | + return httpx.get(url).text |
| 94 | + |
68 | 95 | # %% ../nbs/01_core.ipynb |
69 | 96 | def _doc(kw): |
70 | 97 | "Create a `Doc` FT object with the text retrieved from `url` as the child, and `kw` as attrs." |
71 | 98 | url = kw.pop('url') |
| 99 | + txt = get_doc_content(url) |
72 | 100 | re_comment = re.compile('^<!--.*-->$', flags=re.MULTILINE) |
73 | 101 | re_base64_img = re.compile(r'<img[^>]*src="data:image/[^"]*"[^>]*>') |
74 | | - txt = [o for o in httpx.get(url).text.splitlines() if not re_comment.search(o) and not re_base64_img.search(o)] |
75 | | - return Doc('\n'.join(txt), **kw) |
| 102 | + txt = '\n'.join([o for o in txt.splitlines() if not re_comment.search(o) and not re_base64_img.search(o)]) |
| 103 | + return Doc(txt, **kw) |
76 | 104 |
|
77 | 105 | # %% ../nbs/01_core.ipynb |
78 | 106 | def _section(nm, items, n_workers=None): |
|
0 commit comments