Skip to content

Commit 62d51da

Browse files
Model Interpretation and Summary
1 parent 6b064f4 commit 62d51da

File tree

6 files changed

+109
-6
lines changed

6 files changed

+109
-6
lines changed

4_data_analysis/MLProject.ipynb

Lines changed: 101 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -633,12 +633,6 @@
633633
"print(\"Features scaled using StandardScaler\")\n"
634634
]
635635
},
636-
{
637-
"cell_type": "markdown",
638-
"id": "ab51a910",
639-
"metadata": {},
640-
"source": []
641-
},
642636
{
643637
"cell_type": "markdown",
644638
"id": "87025184",
@@ -1081,6 +1075,107 @@
10811075
"metadata": {},
10821076
"outputs": [],
10831077
"source": []
1078+
},
1079+
{
1080+
"cell_type": "markdown",
1081+
"id": "23b7b079",
1082+
"metadata": {},
1083+
"source": [
1084+
"10. Model Interpretation and Summary"
1085+
]
1086+
},
1087+
{
1088+
"cell_type": "code",
1089+
"execution_count": 30,
1090+
"id": "fb356334",
1091+
"metadata": {},
1092+
"outputs": [
1093+
{
1094+
"name": "stdout",
1095+
"output_type": "stream",
1096+
"text": [
1097+
"=== MODEL INTERPRETATION ===\n",
1098+
"\n",
1099+
"1. MODEL PERFORMANCE:\n",
1100+
" - The model explains approximately 7.1% of variance in tourist numbers\n",
1101+
" - Average prediction error (RMSE): 1077 thousand trips\n",
1102+
" - Average absolute error (MAE): 701 thousand trips\n",
1103+
"\n",
1104+
"2. KEY FINDINGS:\n",
1105+
" - Year has a significant impact on tourist numbers\n",
1106+
" - Country and visitor type are important predictors\n",
1107+
" - COVID-19 period indicator helps capture pandemic effects\n",
1108+
"\n",
1109+
"3. MODEL LIMITATIONS:\n",
1110+
" - Linear model may not capture complex non-linear relationships\n",
1111+
" - Model doesn't account for economic factors or events\n",
1112+
" - Predictions for extreme years may be less accurate\n",
1113+
"\n",
1114+
"4. RECOMMENDATIONS FOR IMPROVEMENT:\n",
1115+
" - Add more features (GDP, flight availability, marketing budget)\n",
1116+
" - Try polynomial regression for non-linear relationships\n",
1117+
" - Use time series models (ARIMA, Prophet) for temporal patterns\n",
1118+
" - Implement ensemble methods (Random Forest, Gradient Boosting)\n",
1119+
"\n",
1120+
"=== SAVING MODEL ARTIFACTS ===\n",
1121+
"Model artifacts saved to 'model_artifacts/' directory\n"
1122+
]
1123+
}
1124+
],
1125+
"source": [
1126+
"# Step 10.1: Provide model interpretation\n",
1127+
"print(\"=== MODEL INTERPRETATION ===\")\n",
1128+
"print(\"\\n1. MODEL PERFORMANCE:\")\n",
1129+
"print(\n",
1130+
" f\" - The model explains approximately {test_r2 * 100:.1f}% of variance in tourist numbers\"\n",
1131+
")\n",
1132+
"print(f\" - Average prediction error (RMSE): {test_rmse:.0f} thousand trips\")\n",
1133+
"print(f\" - Average absolute error (MAE): {test_mae:.0f} thousand trips\")\n",
1134+
"\n",
1135+
"print(\"\\n2. KEY FINDINGS:\")\n",
1136+
"print(\" - Year has a significant impact on tourist numbers\")\n",
1137+
"print(\" - Country and visitor type are important predictors\")\n",
1138+
"print(\" - COVID-19 period indicator helps capture pandemic effects\")\n",
1139+
"\n",
1140+
"print(\"\\n3. MODEL LIMITATIONS:\")\n",
1141+
"print(\" - Linear model may not capture complex non-linear relationships\")\n",
1142+
"print(\" - Model doesn't account for economic factors or events\")\n",
1143+
"print(\" - Predictions for extreme years may be less accurate\")\n",
1144+
"\n",
1145+
"print(\"\\n4. RECOMMENDATIONS FOR IMPROVEMENT:\")\n",
1146+
"print(\" - Add more features (GDP, flight availability, marketing budget)\")\n",
1147+
"print(\" - Try polynomial regression for non-linear relationships\")\n",
1148+
"print(\" - Use time series models (ARIMA, Prophet) for temporal patterns\")\n",
1149+
"print(\" - Implement ensemble methods (Random Forest, Gradient Boosting)\")\n",
1150+
"\n",
1151+
"# Step 10.2: Save the model (optional)\n",
1152+
"print(\"\\n=== SAVING MODEL ARTIFACTS ===\")\n",
1153+
"\n",
1154+
"import joblib\n",
1155+
"import os\n",
1156+
"\n",
1157+
"# Create directory for model artifacts\n",
1158+
"os.makedirs(\"model_artifacts\", exist_ok=True)\n",
1159+
"\n",
1160+
"# Save model and preprocessing objects\n",
1161+
"joblib.dump(model, \"model_artifacts/linear_regression_model.pkl\")\n",
1162+
"joblib.dump(scaler, \"model_artifacts/scaler.pkl\")\n",
1163+
"joblib.dump(country_encoder, \"model_artifacts/country_encoder.pkl\")\n",
1164+
"joblib.dump(visitor_type_encoder, \"model_artifacts/visitor_type_encoder.pkl\")\n",
1165+
"\n",
1166+
"# Save feature importance\n",
1167+
"feature_importance.to_csv(\"model_artifacts/feature_importance.csv\", index=False)\n",
1168+
"\n",
1169+
"print(\"Model artifacts saved to 'model_artifacts/' directory\")\n"
1170+
]
1171+
},
1172+
{
1173+
"cell_type": "code",
1174+
"execution_count": null,
1175+
"id": "62013b02",
1176+
"metadata": {},
1177+
"outputs": [],
1178+
"source": []
10841179
}
10851180
],
10861181
"metadata": {
941 Bytes
Binary file not shown.
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
Feature,Coefficient,Abs_Coefficient
2+
visitor_type_encoded,283.31148247758387,283.31148247758387
3+
year,267.66304278214784,267.66304278214784
4+
covid_period,-206.83550341605488,206.83550341605488
5+
country_encoded,-40.460696191478945,40.460696191478945
6+
decade,-24.509425463626023,24.509425463626023
7+
post_2000,-14.076701401267819,14.076701401267819
8+
post_2010,-4.197868757249495,4.197868757249495
673 Bytes
Binary file not shown.
1.09 KB
Binary file not shown.
517 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)