|
633 | 633 | "print(\"Features scaled using StandardScaler\")\n" |
634 | 634 | ] |
635 | 635 | }, |
636 | | - { |
637 | | - "cell_type": "markdown", |
638 | | - "id": "ab51a910", |
639 | | - "metadata": {}, |
640 | | - "source": [] |
641 | | - }, |
642 | 636 | { |
643 | 637 | "cell_type": "markdown", |
644 | 638 | "id": "87025184", |
|
1081 | 1075 | "metadata": {}, |
1082 | 1076 | "outputs": [], |
1083 | 1077 | "source": [] |
| 1078 | + }, |
| 1079 | + { |
| 1080 | + "cell_type": "markdown", |
| 1081 | + "id": "23b7b079", |
| 1082 | + "metadata": {}, |
| 1083 | + "source": [ |
| 1084 | + "10. Model Interpretation and Summary" |
| 1085 | + ] |
| 1086 | + }, |
| 1087 | + { |
| 1088 | + "cell_type": "code", |
| 1089 | + "execution_count": 30, |
| 1090 | + "id": "fb356334", |
| 1091 | + "metadata": {}, |
| 1092 | + "outputs": [ |
| 1093 | + { |
| 1094 | + "name": "stdout", |
| 1095 | + "output_type": "stream", |
| 1096 | + "text": [ |
| 1097 | + "=== MODEL INTERPRETATION ===\n", |
| 1098 | + "\n", |
| 1099 | + "1. MODEL PERFORMANCE:\n", |
| 1100 | + " - The model explains approximately 7.1% of variance in tourist numbers\n", |
| 1101 | + " - Average prediction error (RMSE): 1077 thousand trips\n", |
| 1102 | + " - Average absolute error (MAE): 701 thousand trips\n", |
| 1103 | + "\n", |
| 1104 | + "2. KEY FINDINGS:\n", |
| 1105 | + " - Year has a significant impact on tourist numbers\n", |
| 1106 | + " - Country and visitor type are important predictors\n", |
| 1107 | + " - COVID-19 period indicator helps capture pandemic effects\n", |
| 1108 | + "\n", |
| 1109 | + "3. MODEL LIMITATIONS:\n", |
| 1110 | + " - Linear model may not capture complex non-linear relationships\n", |
| 1111 | + " - Model doesn't account for economic factors or events\n", |
| 1112 | + " - Predictions for extreme years may be less accurate\n", |
| 1113 | + "\n", |
| 1114 | + "4. RECOMMENDATIONS FOR IMPROVEMENT:\n", |
| 1115 | + " - Add more features (GDP, flight availability, marketing budget)\n", |
| 1116 | + " - Try polynomial regression for non-linear relationships\n", |
| 1117 | + " - Use time series models (ARIMA, Prophet) for temporal patterns\n", |
| 1118 | + " - Implement ensemble methods (Random Forest, Gradient Boosting)\n", |
| 1119 | + "\n", |
| 1120 | + "=== SAVING MODEL ARTIFACTS ===\n", |
| 1121 | + "Model artifacts saved to 'model_artifacts/' directory\n" |
| 1122 | + ] |
| 1123 | + } |
| 1124 | + ], |
| 1125 | + "source": [ |
| 1126 | + "# Step 10.1: Provide model interpretation\n", |
| 1127 | + "print(\"=== MODEL INTERPRETATION ===\")\n", |
| 1128 | + "print(\"\\n1. MODEL PERFORMANCE:\")\n", |
| 1129 | + "print(\n", |
| 1130 | + " f\" - The model explains approximately {test_r2 * 100:.1f}% of variance in tourist numbers\"\n", |
| 1131 | + ")\n", |
| 1132 | + "print(f\" - Average prediction error (RMSE): {test_rmse:.0f} thousand trips\")\n", |
| 1133 | + "print(f\" - Average absolute error (MAE): {test_mae:.0f} thousand trips\")\n", |
| 1134 | + "\n", |
| 1135 | + "print(\"\\n2. KEY FINDINGS:\")\n", |
| 1136 | + "print(\" - Year has a significant impact on tourist numbers\")\n", |
| 1137 | + "print(\" - Country and visitor type are important predictors\")\n", |
| 1138 | + "print(\" - COVID-19 period indicator helps capture pandemic effects\")\n", |
| 1139 | + "\n", |
| 1140 | + "print(\"\\n3. MODEL LIMITATIONS:\")\n", |
| 1141 | + "print(\" - Linear model may not capture complex non-linear relationships\")\n", |
| 1142 | + "print(\" - Model doesn't account for economic factors or events\")\n", |
| 1143 | + "print(\" - Predictions for extreme years may be less accurate\")\n", |
| 1144 | + "\n", |
| 1145 | + "print(\"\\n4. RECOMMENDATIONS FOR IMPROVEMENT:\")\n", |
| 1146 | + "print(\" - Add more features (GDP, flight availability, marketing budget)\")\n", |
| 1147 | + "print(\" - Try polynomial regression for non-linear relationships\")\n", |
| 1148 | + "print(\" - Use time series models (ARIMA, Prophet) for temporal patterns\")\n", |
| 1149 | + "print(\" - Implement ensemble methods (Random Forest, Gradient Boosting)\")\n", |
| 1150 | + "\n", |
| 1151 | + "# Step 10.2: Save the model (optional)\n", |
| 1152 | + "print(\"\\n=== SAVING MODEL ARTIFACTS ===\")\n", |
| 1153 | + "\n", |
| 1154 | + "import joblib\n", |
| 1155 | + "import os\n", |
| 1156 | + "\n", |
| 1157 | + "# Create directory for model artifacts\n", |
| 1158 | + "os.makedirs(\"model_artifacts\", exist_ok=True)\n", |
| 1159 | + "\n", |
| 1160 | + "# Save model and preprocessing objects\n", |
| 1161 | + "joblib.dump(model, \"model_artifacts/linear_regression_model.pkl\")\n", |
| 1162 | + "joblib.dump(scaler, \"model_artifacts/scaler.pkl\")\n", |
| 1163 | + "joblib.dump(country_encoder, \"model_artifacts/country_encoder.pkl\")\n", |
| 1164 | + "joblib.dump(visitor_type_encoder, \"model_artifacts/visitor_type_encoder.pkl\")\n", |
| 1165 | + "\n", |
| 1166 | + "# Save feature importance\n", |
| 1167 | + "feature_importance.to_csv(\"model_artifacts/feature_importance.csv\", index=False)\n", |
| 1168 | + "\n", |
| 1169 | + "print(\"Model artifacts saved to 'model_artifacts/' directory\")\n" |
| 1170 | + ] |
| 1171 | + }, |
| 1172 | + { |
| 1173 | + "cell_type": "code", |
| 1174 | + "execution_count": null, |
| 1175 | + "id": "62013b02", |
| 1176 | + "metadata": {}, |
| 1177 | + "outputs": [], |
| 1178 | + "source": [] |
1084 | 1179 | } |
1085 | 1180 | ], |
1086 | 1181 | "metadata": { |
|
0 commit comments