-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpredict.py
More file actions
142 lines (117 loc) · 5.78 KB
/
Copy pathpredict.py
File metadata and controls
142 lines (117 loc) · 5.78 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
"""
PREDICT.PY
Author: Priyanshu Vijay
Description: Inference interface for the book-length predictive pipeline.
Loads serialized model components, dynamically matches categorical
features with the training baseline log, and runs an interactive
user prediction loop via the terminal console.
"""
import os
import joblib
import pandas as pd
def get_live_prediction():
print("🔮 Initializing Production Inference Engine...")
# 1. Path Verification for Serialized Artifacts
model_path = 'models/book_length_regressor.pkl'
features_path = 'models/model_features.pkl'
if not os.path.exists(model_path) or not os.path.exists(features_path):
raise FileNotFoundError("❌ Serialized model artifacts missing! Please run 'python train_model.py' first.")
# 2. Rehydrate the Saved Model Brain and Structural Matrix Features
print("💾 Rehydrating serialized artifacts from disk...")
model = joblib.load(model_path)
trained_features = joblib.load(features_path)
# Extract valid publishers and book types from one-hot encoded columns
valid_publishers = sorted([
col.replace("Publisher_", "") for col in trained_features if col.startswith("Publisher_")
])
valid_types = sorted([
col.replace("Book_Type_", "") for col in trained_features if col.startswith("Book_Type_")
])
print("\n💡 Model layers rehydrated successfully. Ready for real-time inference.")
# 3. Enhanced Options Loop
while True:
print("\n" + "="*55)
print("🤖 STEPHEN KING BOOK-LENGTH PREDICTION DASHBOARD")
print("="*55)
print("Options: Type your parameters below, or type 'exit' to quit.")
print("-"*55)
# Option A: Interactive Year Input
user_input_year = input("📅 Enter a custom Publication Year (e.g., 2027): ").strip()
if user_input_year.lower() == 'exit':
break
try:
user_year = int(user_input_year)
except ValueError:
print("❌ Error: Publication year must be a valid integer.")
continue
# Option B: Dynamic Book Type Selection Menu
print("\n📚 Available Book Types:")
# Display explicit choices including the baseline reference column dropped during get_dummies
display_types = ["Novel"] + valid_types if "Novel" not in valid_types else valid_types
for idx, b_type in enumerate(display_types, 1):
print(f" [{idx}] {b_type}")
print("-"*55)
user_input_type = input("📝 Select a Book Type (Enter number or text): ").strip()
if user_input_type.lower() == 'exit':
break
if user_input_type.isdigit():
t_idx = int(user_input_type) - 1
user_type = display_types[t_idx] if 0 <= t_idx < len(display_types) else "Novel"
else:
user_type = user_input_type
# Option C: Dynamic Publisher Selection Menu
print("\n🏢 Known Production Category Publishers:")
for idx, pub in enumerate(valid_publishers, 1):
print(f" [{idx}] {pub}")
print(f" [{len(valid_publishers)+1}] Other / New Publisher")
print("-"*55)
user_input_pub = input("📝 Select a Publisher (Enter number or text): ").strip()
if user_input_pub.lower() == 'exit':
break
if user_input_pub.isdigit():
p_idx = int(user_input_pub) - 1
if 0 <= p_idx < len(valid_publishers):
user_publisher = valid_publishers[p_idx]
else:
user_publisher = "Other"
else:
user_publisher = user_input_pub
# 4. Reconstruct the Matrix Row to Match Training Alignment
input_data = {col: 0 for col in trained_features}
# Assign continuous time attribute
if 'Year' in input_data:
input_data['Year'] = user_year
# 5. Apply One-Hot Encoding Logic dynamically for Publisher
encoded_publisher_col = f"Publisher_{user_publisher}"
if encoded_publisher_col in input_data:
input_data[encoded_publisher_col] = 1
print(f"\n🎯 Target constraint matched for category: '{user_publisher}'")
else:
if "Publisher_Other" in input_data:
input_data["Publisher_Other"] = 1
print(f"\n⚠️ Categorized under rare 'Other' group.")
else:
print(f"\n⚠️ Defaulting to baseline publisher variance.")
# 6. Apply One-Hot Encoding Logic dynamically for Book Type
encoded_type_col = f"Book_Type_{user_type}"
if encoded_type_col in input_data:
input_data[encoded_type_col] = 1
print(f"🎯 Target constraint matched for book type: '{user_type}'")
else:
print(f"⚠️ Defaulting to baseline type context ('Novel').")
# 7. Build DataFrame with exact matching matrix column ordering sequence
input_df = pd.DataFrame([input_data])[trained_features]
# 8. Execute Model Inference
predicted_pages = model.predict(input_df)[0]
if predicted_pages < 1:
predicted_pages = 1.0
print("\n==================================================")
print(f"📚 INFERENCE ML RESULT:")
print(f"👉 Predicted Expected Page Count: {predicted_pages:.1f} pages")
print("==================================================\n")
continue_query = input("🔄 Run another prediction? (y/n): ").strip().lower()
if continue_query not in ['y', 'yes']:
print("👋 Exiting inference dashboard. Deployment sequence complete!")
break
if __name__ == "__main__":
get_live_prediction()