Skip to content

Commit 76e910e

Browse files
fix: sanitize genericode import inputs and secure XML parser (backport #53302) (#54174)
Co-authored-by: Shllokkk <shllokosan23@gmail.com>
1 parent c44ec7e commit 76e910e

2 files changed

Lines changed: 18 additions & 9 deletions

File tree

erpnext/edi/doctype/code_list/code_list.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
import frappe
77
from frappe.model.document import Document
8+
from frappe.utils import escape_html
89

910
if TYPE_CHECKING:
1011
from lxml.etree import Element
@@ -63,14 +64,16 @@ def get_default_code(self) -> str | None:
6364

6465
def from_genericode(self, root: "Element"):
6566
"""Extract Code List details from genericode XML"""
66-
self.title = root.find(".//Identification/ShortName").text
67+
self.title = escape_html(root.find(".//Identification/ShortName").text)
6768
self.version = root.find(".//Identification/Version").text
6869
self.canonical_uri = root.find(".//CanonicalUri").text
6970
# optionals
70-
self.description = getattr(root.find(".//Identification/LongName"), "text", None)
71-
self.publisher = getattr(root.find(".//Identification/Agency/ShortName"), "text", None)
71+
self.description = escape_html(getattr(root.find(".//Identification/LongName"), "text", None))
72+
self.publisher = escape_html(getattr(root.find(".//Identification/Agency/ShortName"), "text", None))
7273
if not self.publisher:
73-
self.publisher = getattr(root.find(".//Identification/Agency/LongName"), "text", None)
74+
self.publisher = escape_html(
75+
getattr(root.find(".//Identification/Agency/LongName"), "text", None)
76+
)
7477
self.publisher_id = getattr(root.find(".//Identification/Agency/Identifier"), "text", None)
7578
self.url = getattr(root.find(".//Identification/LocationUri"), "text", None)
7679

erpnext/edi/doctype/code_list/code_list_import.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import frappe
44
import requests
55
from frappe import _
6+
from frappe.utils import escape_html
67
from lxml import etree
78

89
URL_PREFIXES = ("http://", "https://")
@@ -32,7 +33,12 @@ def import_genericode():
3233
content = f.read()
3334

3435
# Parse the xml content
35-
parser = etree.XMLParser(remove_blank_text=True)
36+
parser = etree.XMLParser(
37+
remove_blank_text=True,
38+
resolve_entities=False,
39+
load_dtd=False,
40+
no_network=True,
41+
)
3642
try:
3743
root = etree.fromstring(content, parser=parser)
3844
except Exception as e:
@@ -104,15 +110,15 @@ def get_genericode_columns_and_examples(root):
104110

105111
# Get column names
106112
for column in root.findall(".//Column"):
107-
column_id = column.get("Id")
113+
column_id = escape_html(column.get("Id"))
108114
columns.append(column_id)
109115
example_values[column_id] = []
110116
filterable_columns[column_id] = set()
111117

112118
# Get all values and count unique occurrences
113119
for row in root.findall(".//SimpleCodeList/Row"):
114120
for value in row.findall("Value"):
115-
column_id = value.get("ColumnRef")
121+
column_id = escape_html(value.get("ColumnRef"))
116122
if column_id not in columns:
117123
# Handle undeclared column
118124
columns.append(column_id)
@@ -123,7 +129,7 @@ def get_genericode_columns_and_examples(root):
123129
if simple_value is None:
124130
continue
125131

126-
filterable_columns[column_id].add(simple_value.text)
132+
filterable_columns[column_id].add(escape_html(simple_value.text))
127133

128134
# Get example values (up to 3) and filter columns with cardinality <= 5
129135
for row in root.findall(".//SimpleCodeList/Row")[:3]:
@@ -133,7 +139,7 @@ def get_genericode_columns_and_examples(root):
133139
if simple_value is None:
134140
continue
135141

136-
example_values[column_id].append(simple_value.text)
142+
example_values[column_id].append(escape_html(simple_value.text))
137143

138144
filterable_columns = {k: list(v) for k, v in filterable_columns.items() if len(v) <= 5}
139145

0 commit comments

Comments
 (0)