Skip to content

Commit d96ba2e

Browse files
committed
Add xAPI analytics dashboard and remove unrelated changes
- Add ORM-based dashboard creation script with 7 charts including client app breakdown (Blueprint, CITE, Gallery, Steamfitter) - Auto-create dashboard on Superset startup via init-superset.sh - Add README documenting Superset integration and xAPI data model - Remove unrelated KC_SPI and CSP changes (belong in separate branch)
1 parent 26bed2b commit d96ba2e

File tree

8 files changed

+1075
-5
lines changed

8 files changed

+1075
-5
lines changed

Crucible.AppHost/AppHost.cs

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -193,10 +193,6 @@ public static IResourceBuilder<KeycloakResource> AddKeycloak(this IDistributedAp
193193
.WithEnvironment("KC_HTTPS_PORT", "8443")
194194
.WithEnvironment("KC_HOSTNAME_STRICT", "false")
195195
.WithEnvironment("KC_BOOTSTRAP_ADMIN_PASSWORD", "admin")
196-
// Configure SameSite cookie settings to help with iframe authentication
197-
.WithEnvironment("KC_SPI_STICKY_SESSION_ENCODER_INFINISPAN_SHOULD_ATTACH_ROUTE", "false")
198-
.WithEnvironment("KC_SPI_LOGIN_PROTOCOL_OPENID_CONNECT_LEGACY_LOGOUT_REDIRECT_URI", "true")
199-
.WithEnvironment("KC_SPI_COOKIE_DEFAULT_SAME_SITE", "None")
200196
// Limit Java heap to reduce memory usage (from ~636MB to ~400MB)
201197
.WithEnvironment("JAVA_OPTS", "-Xms256m -Xmx384m")
202198
.WithRealmImport($"{builder.AppHostDirectory}/resources/crucible-realm.json");
@@ -1004,6 +1000,7 @@ public static void AddSuperset(this IDistributedApplicationBuilder builder, IRes
10041000
.WithHttpHealthCheck(path: "/health", endpointName: "http")
10051001
.WithBindMount("./resources/superset/superset_config.py", "/app/superset_config.py", isReadOnly: true)
10061002
.WithBindMount("./resources/superset/init-superset.sh", "/app/init-superset.sh", isReadOnly: true)
1003+
.WithBindMount("./resources/superset/create-dashboard-orm.py", "/app/create-dashboard-orm.py", isReadOnly: true)
10071004
.WithEnvironment("SUPERSET_CONFIG_PATH", "/app/superset_config.py")
10081005
.WithEnvironment("SUPERSET_SECRET_KEY", "crucible-dev-superset-secret-key")
10091006
.WithEnvironment("KEYCLOAK_EXTERNAL_URL", "http://localhost:8080/realms/crucible")

Crucible.AppHost/resources/crucible-realm.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3575,7 +3575,7 @@
35753575
"referrerPolicy": "no-referrer",
35763576
"xRobotsTag": "none",
35773577
"xFrameOptions": "SAMEORIGIN",
3578-
"contentSecurityPolicy": "frame-src 'self'; frame-ancestors 'self' http://localhost:4301 http://localhost:4721 http://localhost:4723 http://localhost:4401 http://localhost:4725; object-src 'none';",
3578+
"contentSecurityPolicy": "frame-src 'self'; frame-ancestors 'self'; object-src 'none';",
35793579
"xXSSProtection": "1; mode=block",
35803580
"strictTransportSecurity": "max-age=31536000; includeSubDomains"
35813581
},
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
# Apache Superset Integration
2+
3+
Apache Superset provides business intelligence and data visualization for xAPI learning analytics data stored in LRsql.
4+
5+
## Architecture
6+
7+
- **Superset container** runs on port 8088 with a custom Dockerfile (`Dockerfile.SupersetCustom`) that adds PostgreSQL and OAuth dependencies
8+
- **PostgreSQL** stores Superset's own metadata in a `superset` database
9+
- **LRsql database** is auto-registered as a data source on startup
10+
- **Keycloak** provides OAuth SSO authentication
11+
12+
## Configuration Files
13+
14+
| File | Purpose |
15+
|------|---------|
16+
| `Dockerfile.SupersetCustom` | Custom image with psycopg2-binary and authlib |
17+
| `superset_config.py` | Superset configuration (OAuth, cache, security) |
18+
| `init-superset.sh` | Startup script: migrations, admin user, LRsql registration, dashboard creation |
19+
| `create-dashboard-orm.py` | Creates the starter xAPI analytics dashboard using Superset's internal ORM |
20+
| `create-dashboard.py` | Alternative: REST API-based dashboard creation (Python) |
21+
| `create-dashboard.js` | Alternative: REST API-based dashboard creation (Node.js) |
22+
| `create-dashboard.sh` | Alternative: REST API-based dashboard creation (bash) |
23+
24+
## Starter Dashboard
25+
26+
The `xAPI Learning Analytics` dashboard is automatically created on first startup with 7 charts:
27+
28+
1. **Activity by Client App** (pie) - Statement distribution across Crucible apps (Blueprint, CITE, Gallery, Steamfitter, etc.)
29+
2. **Client App Verb Breakdown** (stacked bar) - Which verbs each app generates
30+
3. **xAPI Verb Distribution** (pie) - Overall verb frequency
31+
4. **xAPI Verb Counts** (bar) - Verb counts ranked
32+
5. **xAPI Activity Over Time** (timeline) - Statement volume over time by verb
33+
6. **Top Learners by Activity** (table) - Most active learners
34+
7. **Most Active Learning Objects** (table) - Most referenced activity IRIs
35+
36+
## Authentication
37+
38+
- **Local admin**: `admin` / `admin` (created on startup)
39+
- **Keycloak SSO**: Click the Keycloak login option on the login page
40+
41+
## Accessing Superset
42+
43+
- Dashboard URL: http://localhost:8088/superset/dashboard/xapi-analytics/
44+
- SQL Lab: http://localhost:8088/sqllab/ (query LRsql data directly)
45+
46+
## xAPI Data Model
47+
48+
The LRsql database uses these key tables for analytics:
49+
50+
| Table | Purpose |
51+
|-------|---------|
52+
| `xapi_statement` | Core xAPI statements with verb_iri, timestamp, JSON payload |
53+
| `statement_to_actor` | Links statements to actors (Actor, Team, Authority) |
54+
| `statement_to_activity` | Links statements to activities (Object, context) |
55+
| `activity` | Activity definitions with IRI and JSON payload |
56+
| `actor` | Actor definitions |
57+
58+
### Client App Detection
59+
60+
Client applications are identified by port number in activity IRIs:
61+
62+
| Port | Application |
63+
|------|-------------|
64+
| 4724 | Blueprint |
65+
| 4720, 4721 | CITE |
66+
| 4722, 4723 | Gallery |
67+
| 4300, 4301 | Player |
68+
| 4400, 4401 | Steamfitter |
69+
| 4403 | Alloy |
70+
| 4310 | Caster |
71+
| 5000 | TopoMojo |
72+
| 8081 | Moodle |
73+
74+
### Cross-App Correlation
75+
76+
Activities across multiple Crucible apps can be correlated using:
77+
78+
- **Registration ID** (`registration` column) - UUID linking statements from a single exercise execution
79+
- **Context Activities** (`context.contextActivities.grouping` in payload) - Shared MSEL/exercise activity IRI
80+
81+
For full cross-app correlation, orchestrators (Alloy, Blueprint) should pass a shared registration UUID to all downstream apps when launching exercises.
Lines changed: 280 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,280 @@
1+
"""Create starter xAPI analytics dashboard using Superset's internal ORM.
2+
3+
This script runs inside the Superset container after init, using direct
4+
database access to properly populate the dashboard_slices relationship
5+
that the REST API doesn't handle.
6+
"""
7+
import json
8+
import sys
9+
10+
from superset.app import create_app
11+
12+
app = create_app()
13+
14+
with app.app_context():
15+
from superset import db
16+
from superset.models.core import Database
17+
from superset.connectors.sqla.models import SqlaTable
18+
from superset.models.slice import Slice
19+
from superset.models.dashboard import Dashboard
20+
21+
# Check if dashboard already exists
22+
existing = db.session.query(Dashboard).filter_by(slug="xapi-analytics").first()
23+
if existing:
24+
print("xAPI Analytics dashboard already exists")
25+
sys.exit(0)
26+
27+
# Get LRsql database
28+
lrsql_db = db.session.query(Database).filter(
29+
Database.database_name.contains("LRsql")
30+
).first()
31+
if not lrsql_db:
32+
print("LRsql database not found, skipping dashboard creation")
33+
sys.exit(0)
34+
35+
print(f"LRsql database ID: {lrsql_db.id}")
36+
37+
# Create virtual datasets
38+
datasets_config = [
39+
("xapi_client_activity", """
40+
SELECT
41+
CASE
42+
WHEN a.activity_iri LIKE '%:4724/%' OR a.activity_iri LIKE '%blueprint%' THEN 'Blueprint'
43+
WHEN a.activity_iri LIKE '%:4720/%' OR a.activity_iri LIKE '%cite%' THEN 'CITE'
44+
WHEN a.activity_iri LIKE '%:4722/%' OR a.activity_iri LIKE '%gallery%' THEN 'Gallery'
45+
WHEN a.activity_iri LIKE '%:4300/%' OR a.activity_iri LIKE '%:4301/%' OR a.activity_iri LIKE '%player%' THEN 'Player'
46+
WHEN a.activity_iri LIKE '%:4400/%' OR a.activity_iri LIKE '%:4401/%' OR a.activity_iri LIKE '%steamfitter%' THEN 'Steamfitter'
47+
WHEN a.activity_iri LIKE '%:4403/%' OR a.activity_iri LIKE '%alloy%' THEN 'Alloy'
48+
WHEN a.activity_iri LIKE '%:4310/%' OR a.activity_iri LIKE '%caster%' THEN 'Caster'
49+
WHEN a.activity_iri LIKE '%:5000/%' OR a.activity_iri LIKE '%topomojo%' THEN 'TopoMojo'
50+
WHEN a.activity_iri LIKE '%:4303/%' THEN 'Player VM'
51+
WHEN a.activity_iri LIKE '%:8081/%' OR a.activity_iri LIKE '%moodle%' THEN 'Moodle'
52+
ELSE 'Other'
53+
END AS client_app,
54+
REPLACE(REPLACE(s.verb_iri, 'http://adlnet.gov/expapi/verbs/', ''), 'https://w3id.org/xapi/dod-isd/verbs/', '') AS verb,
55+
COUNT(DISTINCT s.statement_id) AS statement_count
56+
FROM xapi_statement s
57+
JOIN statement_to_activity sta ON sta.statement_id = s.statement_id AND sta.usage = 'Object'
58+
JOIN activity a ON a.activity_iri = sta.activity_iri
59+
WHERE NOT s.is_voided
60+
GROUP BY client_app, s.verb_iri
61+
ORDER BY statement_count DESC"""),
62+
("xapi_verb_frequency", """
63+
SELECT
64+
REPLACE(REPLACE(verb_iri, 'http://adlnet.gov/expapi/verbs/', ''), 'https://w3id.org/xapi/dod-isd/verbs/', '') AS verb,
65+
verb_iri,
66+
COUNT(*) AS statement_count
67+
FROM xapi_statement
68+
WHERE NOT is_voided
69+
GROUP BY verb_iri
70+
ORDER BY statement_count DESC"""),
71+
("xapi_activity_timeline", """
72+
SELECT
73+
DATE_TRUNC('hour', timestamp) AS time_bucket,
74+
REPLACE(REPLACE(verb_iri, 'http://adlnet.gov/expapi/verbs/', ''), 'https://w3id.org/xapi/dod-isd/verbs/', '') AS verb,
75+
COUNT(*) AS statement_count
76+
FROM xapi_statement
77+
WHERE NOT is_voided AND timestamp IS NOT NULL
78+
GROUP BY time_bucket, verb_iri
79+
ORDER BY time_bucket"""),
80+
("xapi_learner_activity", """
81+
SELECT
82+
s.payload->'actor'->>'name' AS learner_name,
83+
sta.actor_ifi AS learner_id,
84+
REPLACE(REPLACE(s.verb_iri, 'http://adlnet.gov/expapi/verbs/', ''), 'https://w3id.org/xapi/dod-isd/verbs/', '') AS verb,
85+
COUNT(*) AS statement_count,
86+
MIN(s.timestamp) AS first_activity,
87+
MAX(s.timestamp) AS last_activity
88+
FROM xapi_statement s
89+
JOIN statement_to_actor sta ON sta.statement_id = s.statement_id AND sta.usage = 'Actor'
90+
WHERE NOT s.is_voided
91+
GROUP BY learner_name, learner_id, s.verb_iri
92+
ORDER BY statement_count DESC"""),
93+
("xapi_activity_objects", """
94+
SELECT
95+
a.activity_iri,
96+
a.payload->>'name' AS activity_name,
97+
sta.usage AS context_type,
98+
COUNT(DISTINCT sta.statement_id) AS statement_count
99+
FROM statement_to_activity sta
100+
JOIN activity a ON a.activity_iri = sta.activity_iri
101+
GROUP BY a.activity_iri, a.payload->>'name', sta.usage
102+
ORDER BY statement_count DESC"""),
103+
]
104+
105+
datasets = {}
106+
print("Creating datasets...")
107+
for name, sql in datasets_config:
108+
ds = db.session.query(SqlaTable).filter_by(
109+
table_name=name, database_id=lrsql_db.id
110+
).first()
111+
if not ds:
112+
ds = SqlaTable(
113+
table_name=name,
114+
database_id=lrsql_db.id,
115+
schema="public",
116+
sql=sql,
117+
is_managed_externally=False,
118+
)
119+
db.session.add(ds)
120+
db.session.flush()
121+
print(f" Dataset '{name}': {ds.id}")
122+
else:
123+
print(f" Dataset '{name}' already exists: {ds.id}")
124+
# Sync column metadata from the SQL query
125+
try:
126+
ds.fetch_metadata()
127+
print(f" Synced {len(ds.columns)} columns")
128+
except Exception as e:
129+
print(f" Warning: could not sync columns: {e}")
130+
datasets[name] = ds
131+
132+
# Create charts
133+
charts_config = [
134+
("Activity by Client App", "xapi_client_activity", "pie", {
135+
"viz_type": "pie",
136+
"groupby": ["client_app"],
137+
"metric": {"label": "statement_count", "expressionType": "SQL", "sqlExpression": "SUM(statement_count)"},
138+
"row_limit": 20,
139+
"sort_by_metric": True,
140+
"color_scheme": "supersetColors",
141+
"show_labels": True,
142+
"label_type": "key_percent",
143+
}),
144+
("Client App Verb Breakdown", "xapi_client_activity", "dist_bar", {
145+
"viz_type": "dist_bar",
146+
"groupby": ["client_app"],
147+
"metrics": [{"label": "statement_count", "expressionType": "SQL", "sqlExpression": "SUM(statement_count)"}],
148+
"columns": ["verb"],
149+
"row_limit": 50,
150+
"order_desc": True,
151+
"color_scheme": "supersetColors",
152+
"show_legend": True,
153+
"x_axis_label": "Client Application",
154+
"y_axis_label": "Statement Count",
155+
}),
156+
("xAPI Verb Distribution", "xapi_verb_frequency", "pie", {
157+
"viz_type": "pie",
158+
"groupby": ["verb"],
159+
"metric": {"label": "statement_count", "expressionType": "SQL", "sqlExpression": "SUM(statement_count)"},
160+
"row_limit": 20,
161+
"sort_by_metric": True,
162+
"color_scheme": "supersetColors",
163+
"show_labels": True,
164+
"label_type": "key_percent",
165+
}),
166+
("xAPI Verb Counts", "xapi_verb_frequency", "dist_bar", {
167+
"viz_type": "dist_bar",
168+
"groupby": ["verb"],
169+
"metrics": [{"label": "statement_count", "expressionType": "SQL", "sqlExpression": "SUM(statement_count)"}],
170+
"row_limit": 20,
171+
"order_desc": True,
172+
"color_scheme": "supersetColors",
173+
"show_legend": False,
174+
"x_axis_label": "Verb",
175+
"y_axis_label": "Count",
176+
}),
177+
("xAPI Activity Over Time", "xapi_activity_timeline", "echarts_timeseries_line", {
178+
"viz_type": "echarts_timeseries_line",
179+
"x_axis": "time_bucket",
180+
"metrics": [{"label": "statement_count", "expressionType": "SQL", "sqlExpression": "SUM(statement_count)"}],
181+
"groupby": ["verb"],
182+
"row_limit": 10000,
183+
"color_scheme": "supersetColors",
184+
"show_legend": True,
185+
"rich_tooltip": True,
186+
}),
187+
("Top Learners by Activity", "xapi_learner_activity", "table", {
188+
"viz_type": "table",
189+
"query_mode": "aggregate",
190+
"groupby": ["learner_name"],
191+
"metrics": [{"label": "total_statements", "expressionType": "SQL", "sqlExpression": "SUM(statement_count)"}],
192+
"order_desc": True,
193+
"row_limit": 50,
194+
}),
195+
("Most Active Learning Objects", "xapi_activity_objects", "table", {
196+
"viz_type": "table",
197+
"query_mode": "aggregate",
198+
"groupby": ["activity_iri", "activity_name"],
199+
"metrics": [{"label": "total_statements", "expressionType": "SQL", "sqlExpression": "SUM(statement_count)"}],
200+
"order_desc": True,
201+
"row_limit": 50,
202+
}),
203+
]
204+
205+
slices = []
206+
print("Creating charts...")
207+
for name, ds_name, viz_type, params in charts_config:
208+
ds = datasets[ds_name]
209+
chart = db.session.query(Slice).filter_by(slice_name=name).first()
210+
if not chart:
211+
chart = Slice(
212+
slice_name=name,
213+
datasource_id=ds.id,
214+
datasource_type="table",
215+
viz_type=viz_type,
216+
params=json.dumps(params),
217+
)
218+
db.session.add(chart)
219+
db.session.flush()
220+
print(f" Chart '{name}': {chart.id}")
221+
else:
222+
print(f" Chart '{name}' already exists: {chart.id}")
223+
slices.append(chart)
224+
225+
# Build dashboard layout
226+
chart_layout = [
227+
(0, "Activity by Client App", 6, "ROW-1"),
228+
(1, "Client App Verb Breakdown", 6, "ROW-1"),
229+
(2, "xAPI Verb Distribution", 6, "ROW-2"),
230+
(3, "xAPI Verb Counts", 6, "ROW-2"),
231+
(4, "xAPI Activity Over Time", 12, "ROW-3"),
232+
(5, "Top Learners by Activity", 6, "ROW-4"),
233+
(6, "Most Active Learning Objects", 6, "ROW-4"),
234+
]
235+
236+
row_ids = ["ROW-1", "ROW-2", "ROW-3", "ROW-4"]
237+
positions = {
238+
"DASHBOARD_VERSION_KEY": "v2",
239+
"ROOT_ID": {"type": "ROOT", "id": "ROOT_ID", "children": ["GRID_ID"]},
240+
"GRID_ID": {"type": "GRID", "id": "GRID_ID", "children": row_ids, "parents": ["ROOT_ID"]},
241+
"HEADER_ID": {"type": "HEADER", "id": "HEADER_ID", "meta": {"text": "xAPI Learning Analytics"}},
242+
}
243+
244+
for row_id in row_ids:
245+
children = [f"CHART-{i+1}" for i, _, _, r in chart_layout if r == row_id]
246+
positions[row_id] = {
247+
"type": "ROW", "id": row_id, "children": children,
248+
"parents": ["ROOT_ID", "GRID_ID"],
249+
"meta": {"background": "BACKGROUND_TRANSPARENT"},
250+
}
251+
252+
for i, (idx, name, width, row) in enumerate(chart_layout):
253+
key = f"CHART-{i+1}"
254+
positions[key] = {
255+
"type": "CHART", "id": key, "children": [],
256+
"parents": ["ROOT_ID", "GRID_ID", row],
257+
"meta": {"chartId": slices[idx].id, "width": width, "height": 50, "sliceName": name},
258+
}
259+
260+
# Create dashboard with slices properly associated
261+
print("Creating dashboard...")
262+
dashboard = Dashboard(
263+
dashboard_title="xAPI Learning Analytics",
264+
slug="xapi-analytics",
265+
published=True,
266+
position_json=json.dumps(positions),
267+
json_metadata=json.dumps({
268+
"default_filters": "{}",
269+
"expanded_slices": {},
270+
"refresh_frequency": 0,
271+
"timed_refresh_immune_slices": [],
272+
"color_scheme": "supersetColors",
273+
}),
274+
)
275+
dashboard.slices = slices # This properly populates dashboard_slices!
276+
db.session.add(dashboard)
277+
db.session.commit()
278+
279+
print(f"Dashboard created with {len(slices)} charts")
280+
print("Dashboard URL: /superset/dashboard/xapi-analytics/")

0 commit comments

Comments
 (0)