Skip to content

Commit d1644d1

Browse files
refactor: unify duckdb local connection flow for uploads and warehouse
1 parent 22bc18f commit d1644d1

3 files changed

Lines changed: 64 additions & 68 deletions

File tree

insights/api/__init__.py

Lines changed: 39 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,14 @@
11
# Copyright (c) 2022, Frappe Technologies Pvt. Ltd. and contributors
22
# For license information, please see license.txt
33

4-
import os
5-
64
import frappe
75
from frappe.defaults import get_user_default, set_user_default
86
from frappe.handler import is_valid_http_method, is_whitelisted
97
from frappe.monitor import add_data_to_monitor
108

119
from insights.api.shared import is_public
1210
from insights.decorators import insights_whitelist, validate_type
13-
from insights.insights.doctype.insights_data_source_v3.connectors.duckdb import (
14-
get_duckdb_connection,
15-
)
11+
from insights.insights.doctype.insights_data_source_v3.connectors.duckdb import get_duckdb_connection
1612
from insights.insights.doctype.insights_data_source_v3.ibis_utils import (
1713
get_columns_from_schema,
1814
)
@@ -126,30 +122,23 @@ def get_file_data(filename: str):
126122

127123
create_uploads_if_not_exists()
128124
ds = frappe.get_doc("Insights Data Source v3", "uploads")
129-
private_folder = frappe.utils.get_files_path(is_private=1)
130-
private_folder = os.path.realpath(private_folder)
131-
db = get_duckdb_connection(ds, read_only=True, allowed_dir=private_folder)
132-
try:
133-
if ext in ["xlsx"]:
134-
table = db.read_xlsx(file_path)
135-
elif ext in ["json", "jsonl"]:
136-
table = db.read_json(file_path)
137-
else:
138-
table = db.read_csv(file_path, table_name=file_name)
125+
db = get_duckdb_connection(ds, read_only=False, allow_private_files=True)
139126

127+
try:
128+
table = _read_uploaded_table(db, file_path, ext)
140129
columns = get_columns_from_schema(table.schema())
141130
rows = table.head(50).execute().fillna("").to_dict(orient="records")
142131
row_count = table.count().execute()
132+
133+
return {
134+
"tablename": file_name,
135+
"rows": rows,
136+
"columns": columns,
137+
"total_rows": int(row_count),
138+
}
143139
finally:
144140
db.disconnect()
145141

146-
return {
147-
"tablename": file_name,
148-
"rows": rows,
149-
"columns": columns,
150-
"total_rows": int(row_count),
151-
}
152-
153142

154143
@insights_whitelist()
155144
@validate_type
@@ -162,33 +151,46 @@ def import_csv_data(filename: str, tablename: str = ""):
162151

163152
create_uploads_if_not_exists()
164153
ds = frappe.get_doc("Insights Data Source v3", "uploads")
165-
private_folder = os.path.realpath(frappe.utils.get_files_path(is_private=1))
154+
db = get_duckdb_connection(ds, read_only=False, allow_private_files=True)
166155

167-
db = get_duckdb_connection(ds, read_only=False, allowed_dir=private_folder)
168156
try:
169-
if ext in ["xlsx"]:
170-
table = db.read_xlsx(file_path)
171-
elif ext in ["json", "jsonl"]:
172-
table = db.read_json(file_path)
173-
else:
174-
table = db.read_csv(file_path, table_name=table_name)
157+
table = _read_uploaded_table(db, file_path, ext)
175158
db.create_table(table_name, table, overwrite=True)
159+
except frappe.ValidationError:
160+
raise
176161
except Exception as e:
177162
frappe.log_error(e)
178-
if ext in ["xlsx"]:
163+
frappe.throw("Failed to import uploaded file data into Insights uploads table. Please try again.")
164+
finally:
165+
db.disconnect()
166+
167+
InsightsTablev3.bulk_create(ds.name, [table_name])
168+
169+
170+
def _read_uploaded_table(db, file_path: str, ext: str):
171+
try:
172+
if ext == "xlsx":
173+
return db.read_xlsx(file_path)
174+
175+
if ext in ["json", "jsonl"]:
176+
return db.read_json(file_path)
177+
178+
return db.read_csv(file_path)
179+
180+
except Exception as e:
181+
frappe.log_error(e)
182+
183+
if ext == "xlsx":
179184
frappe.throw(
180185
"Failed to read Excel data from uploaded file. Please ensure the file is a valid Excel format and try again."
181186
)
182-
elif ext in ["json", "jsonl"]:
187+
188+
if ext in ["json", "jsonl"]:
183189
frappe.throw(
184190
"Failed to read JSON data from uploaded file. Please ensure the file is a valid JSON or JSONL format and try again."
185191
)
186-
else:
187-
frappe.throw("Failed to read CSV data from uploaded file. Please try again.")
188-
finally:
189-
db.disconnect()
190192

191-
InsightsTablev3.bulk_create(ds.name, [table_name])
193+
frappe.throw("Failed to read CSV data from uploaded file. Please try again.")
192194

193195

194196
@frappe.whitelist(allow_guest=True)

insights/insights/doctype/insights_data_source_v3/connectors/duckdb.py

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,36 +10,39 @@
1010
from frappe.utils import get_files_path
1111

1212

13-
def get_duckdb_connection(data_source, read_only=True, allowed_dir=None):
13+
def get_duckdb_connection(data_source, read_only=True, allowed_dir=None, allow_private_files=False):
1414
name = data_source.name or frappe.scrub(data_source.title)
1515
db_name = data_source.database_name
1616

1717
if db_name.startswith("http"):
1818
return get_http_duckdb_connection(data_source, name, db_name)
1919

20-
return get_local_duckdb_connection(db_name, read_only=read_only, allowed_dir=allowed_dir)
20+
path = os.path.join(get_files_path(is_private=1), f"{db_name}.duckdb")
21+
return get_local_duckdb_connection(
22+
path, read_only=read_only, allowed_dir=allowed_dir, allow_private_files=allow_private_files
23+
)
2124

2225

23-
def get_local_duckdb_connection(db_name, read_only=True, allowed_dir=None):
24-
private_folder = os.path.realpath(get_files_path(is_private=1))
25-
path = os.path.join(private_folder, f"{db_name}.duckdb")
26-
26+
def get_local_duckdb_connection(path, read_only=True, allowed_dir=None, allow_private_files=False):
2727
if not os.path.exists(path):
2828
db = ibis.duckdb.connect(path)
2929
db.disconnect()
3030

3131
db = ibis.duckdb.connect(path, read_only=read_only)
32+
33+
private_folder = os.path.realpath(get_files_path(is_private=1))
34+
private_folder = _escape_sql_path(private_folder)
3235
db.raw_sql(f"SET home_directory='{private_folder}'")
3336

34-
if allowed_dir:
35-
escaped_dir = allowed_dir.replace("'", "''")
36-
# Some environments start with external access already disabled.
37-
# Best effort: try enabling it first, then configure directory allowlist.
37+
if not read_only and (allowed_dir or allow_private_files):
38+
allowed_dir = _escape_sql_path(allowed_dir) if allowed_dir else private_folder
39+
3840
with suppress(Exception):
3941
db.raw_sql("SET enable_external_access = true")
40-
db.raw_sql(f"SET allowed_directories = ['{escaped_dir}']")
4142

42-
db.raw_sql("SET enable_external_access = false")
43+
db.raw_sql(f"SET allowed_directories = ['{allowed_dir}']")
44+
else:
45+
db.raw_sql("SET enable_external_access = false")
4346

4447
return db
4548

@@ -80,3 +83,7 @@ def get_http_secret(data_source, name, db_name):
8083
except Exception as e:
8184
frappe.log_error(title="Error creating HTTP Secret for DuckDB", message=str(e))
8285
return
86+
87+
88+
def _escape_sql_path(path: str) -> str:
89+
return path.replace("'", "''")

insights/insights/doctype/insights_data_source_v3/data_warehouse.py

Lines changed: 6 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from ibis.expr.types import Expr
2121

2222
import insights
23+
from insights.insights.doctype.insights_data_source_v3.connectors.duckdb import get_local_duckdb_connection
2324
from insights.utils import InsightsDataSourcev3, InsightsTablev3
2425

2526
WAREHOUSE_DB_NAME = "insights"
@@ -39,31 +40,17 @@ def get_db_path(self) -> str:
3940
def get_connection(self, database: str | None = None, read_only: bool = True) -> DuckDBBackend:
4041
path = self.get_db_path()
4142

42-
if not os.path.exists(path):
43-
db = ibis.duckdb.connect(path)
44-
db.disconnect()
45-
46-
db = ibis.duckdb.connect(path, read_only=read_only)
47-
48-
if not read_only:
49-
self._configure_temp_directory_access(db)
43+
db = get_local_duckdb_connection(
44+
path,
45+
read_only=read_only,
46+
allowed_dir=str(Path(tempfile.gettempdir())) if not read_only else None,
47+
)
5048

5149
if database:
5250
db.raw_sql(f"USE '{database}'")
5351

5452
return db
5553

56-
def _configure_temp_directory_access(self, db: DuckDBBackend) -> None:
57-
tmp_dir = str(Path(tempfile.gettempdir())).replace("'", "''")
58-
59-
# Some environments start with external access already disabled.
60-
# Best effort: try enabling it first, then configure directory allowlist.
61-
with suppress(Exception):
62-
db.raw_sql("SET enable_external_access = true")
63-
64-
db.raw_sql(f"SET allowed_directories = ['{tmp_dir}']")
65-
db.raw_sql("SET enable_external_access = false")
66-
6754
def create_database(self, database: str):
6855
with self.get_write_connection() as db:
6956
with suppress(CatalogException):

0 commit comments

Comments
 (0)