Skip to content

Commit ba5e5cd

Browse files
g0blinResearchg0blin
andauthored
Merge commit from fork
Co-authored-by: g0blin <g0blin@hackthebox.com>
1 parent 1db0c88 commit ba5e5cd

2 files changed

Lines changed: 114 additions & 2 deletions

File tree

nbconvert/preprocessors/extractattachments.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,22 @@ def preprocess_cell(self, cell, resources, index):
8282
for fname in cell.attachments:
8383
self.log.debug("Encountered attachment %s", fname)
8484

85+
# Sanitize: use only the basename to prevent path traversal
86+
safe_fname = os.path.basename(fname)
87+
if not safe_fname:
88+
self.log.warning(
89+
"Attachment filename '%s' is invalid (empty basename), skipping",
90+
fname,
91+
)
92+
continue
93+
if safe_fname != fname:
94+
self.log.warning(
95+
"Attachment filename '%s' contained path components, "
96+
"using basename '%s'",
97+
fname,
98+
safe_fname,
99+
)
100+
85101
# Add file for writer
86102

87103
# Right now I don't know of a situation where there would be multiple
@@ -94,7 +110,14 @@ def preprocess_cell(self, cell, resources, index):
94110
break
95111

96112
# FilesWriter wants path to be in attachment filename here
97-
new_filename = os.path.join(self.path_name, fname)
113+
new_filename = os.path.join(self.path_name, safe_fname)
114+
if new_filename in resources[self.resources_item_key]:
115+
self.log.warning(
116+
"Attachment filename '%s' (from '%s') overwrites a previous "
117+
"attachment with the same name",
118+
safe_fname,
119+
fname,
120+
)
98121
resources[self.resources_item_key][new_filename] = decoded
99122

100123
# Edit the reference to the attachment

tests/preprocessors/test_extractattachments.py

Lines changed: 90 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@
44
# Distributed under the terms of the Modified BSD License.
55

66
import os
7-
from base64 import b64decode
7+
from base64 import b64decode, b64encode
8+
9+
from nbformat import v4 as nbformat
810

911
from nbconvert.preprocessors.extractattachments import ExtractAttachmentsPreprocessor
1012

@@ -86,3 +88,90 @@ def test_use_separate_dir_config(self):
8688
src = nb.cells[-1].source
8789
# This shouldn't change on Windows
8890
self.assertEqual(src, "![image.png](notebook1_custom/image.png)")
91+
92+
def test_attachment_path_traversal_sanitised(self):
93+
"""Test that path traversal in attachment filenames is sanitised.
94+
95+
Crafted attachment filenames containing '../' sequences must not escape
96+
the output directory. The preprocessor should strip path components,
97+
store the file under its basename only, and update the cell source
98+
reference accordingly.
99+
"""
100+
malicious_fname = "../../../../../../../tmp/nbconvert_traversal/evil.php"
101+
malicious_content = b"<?php\n// I should not be here"
102+
b64_content = b64encode(malicious_content).decode("utf-8")
103+
attachments = {malicious_fname: {"text/plain": b64_content}}
104+
cell = nbformat.new_markdown_cell(
105+
source=f"![exploit](attachment:{malicious_fname})",
106+
attachments=attachments,
107+
)
108+
nb = nbformat.new_notebook(cells=[cell])
109+
res = self.build_resources()
110+
preprocessor = self.build_preprocessor()
111+
nb, res = preprocessor(nb, res)
112+
113+
# The output key must be the basename only - no traversal components
114+
self.assertIn("evil.php", res["outputs"])
115+
self.assertEqual(res["outputs"]["evil.php"], malicious_content)
116+
for key in res["outputs"]:
117+
self.assertNotIn("..", key)
118+
self.assertFalse(os.path.isabs(key))
119+
120+
# Cell source must reference the safe, flattened filename
121+
self.assertEqual(nb.cells[0].source, "![exploit](evil.php)")
122+
123+
def test_attachment_absolute_path_sanitised(self):
124+
"""Test that absolute paths in attachment filenames are sanitised.
125+
126+
An absolute path like '/tmp/absolute/test1' would cause os.path.join
127+
to discard the output directory prefix entirely, writing to the
128+
absolute location. The preprocessor must strip the path and use
129+
only the basename.
130+
"""
131+
abs_fname = "/tmp/absolute/test1"
132+
content = b"absolute path write test"
133+
b64_content = b64encode(content).decode("utf-8")
134+
attachments = {abs_fname: {"text/plain": b64_content}}
135+
cell = nbformat.new_markdown_cell(
136+
source=f"![file](attachment:{abs_fname})",
137+
attachments=attachments,
138+
)
139+
nb = nbformat.new_notebook(cells=[cell])
140+
res = self.build_resources()
141+
preprocessor = self.build_preprocessor()
142+
nb, res = preprocessor(nb, res)
143+
144+
# The output key must be the basename only - not the absolute path
145+
self.assertIn("test1", res["outputs"])
146+
self.assertNotIn("/tmp/absolute/test1", res["outputs"])
147+
self.assertEqual(res["outputs"]["test1"], content)
148+
for key in res["outputs"]:
149+
self.assertFalse(os.path.isabs(key))
150+
151+
# Cell source must reference the safe, flattened filename
152+
self.assertEqual(nb.cells[0].source, "![file](test1)")
153+
154+
def test_attachment_empty_basename_skipped(self):
155+
"""Test that filenames resolving to an empty basename are skipped.
156+
157+
A filename like '../../../tmp/' has an empty os.path.basename(),
158+
which would cause downstream errors if not caught. The preprocessor
159+
should skip these attachments entirely and log a warning.
160+
"""
161+
bad_fname = "../../../tmp/"
162+
b64_content = b64encode(b"should be skipped").decode("utf-8")
163+
attachments = {bad_fname: {"text/plain": b64_content}}
164+
cell = nbformat.new_markdown_cell(
165+
source=f"![x](attachment:{bad_fname})",
166+
attachments=attachments,
167+
)
168+
nb = nbformat.new_notebook(cells=[cell])
169+
res = self.build_resources()
170+
preprocessor = self.build_preprocessor()
171+
nb, res = preprocessor(nb, res)
172+
173+
# No outputs should be created for empty basename
174+
self.assertEqual(len(res["outputs"]), 0)
175+
176+
# Cell source should remain unchanged (attachment ref not rewritten)
177+
self.assertEqual(nb.cells[0].source, f"![x](attachment:{bad_fname})")

0 commit comments

Comments
 (0)