-
-
Notifications
You must be signed in to change notification settings - Fork 186
Expand file tree
/
Copy pathcsdi_imputation_example.py
More file actions
63 lines (51 loc) · 1.87 KB
/
Copy pathcsdi_imputation_example.py
File metadata and controls
63 lines (51 loc) · 1.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
"""
A minimalist, standalone example of the PyPOTS CSDI model for time-series imputation.
This script is auto-generated by extracting hyperparameters from the test code.
"""
import numpy as np
from benchpots.datasets import preprocess_random_walk
from pypots.imputation import CSDI
from pypots.utils.metrics import calc_mse
def main():
n_steps = 48
n_features = 35
# 1. Generate a random walk time-series dataset
dataset = preprocess_random_walk(
n_steps=n_steps, n_features=n_features, n_classes=5, n_samples_each_class=40, missing_rate=0.1
)
# 2. Extract training and test sets
train_set = {"X": dataset["train_X"], "X_ori": dataset["train_X_ori"]}
val_set = {"X": dataset["val_X"], "X_ori": dataset["val_X_ori"]}
test_set = {"X": dataset["test_X"], "X_ori": dataset["test_X_ori"]}
test_X_intact = dataset["test_X_ori"]
# 3. Initialize the model
model = CSDI(
n_steps=n_steps,
n_features=n_features,
n_layers=1,
n_channels=8,
d_time_embedding=32,
d_feature_embedding=3,
d_diffusion_embedding=32,
n_diffusion_steps=5,
n_heads=1,
epochs=2,
device="cpu",
)
# 4. Train the model
print("🚀 Training the CSDI model...")
model.fit(train_set, val_set)
# 5. Impute missing values
print("🔮 Imputing missing values...")
results = model.predict(
test_set,
n_sampling_times=2, # for generation models like CSDI, we can sample multiple times to get multiple predications per data instance
)
imputed_X = results["imputation"]
imputed_X = imputed_X.mean(axis=1) # mean over sampling times
# 6. Evaluate
indicating_mask = np.isnan(test_set["X"])
mse = calc_mse(imputed_X, test_X_intact, indicating_mask)
print(f"✅ The MSE of CSDI imputation is: {mse:.4f}")
if __name__ == "__main__":
main()