-
Notifications
You must be signed in to change notification settings - Fork 40
Expand file tree
/
Copy pathdebug_identity_extraction.py
More file actions
243 lines (191 loc) · 8.78 KB
/
debug_identity_extraction.py
File metadata and controls
243 lines (191 loc) · 8.78 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
#!/usr/bin/env python3
"""
Debug identity extraction to see what's going wrong.
"""
import torch
import numpy as np
import sys
from pathlib import Path
from PIL import Image
import matplotlib.pyplot as plt
import logging
# Add paths
sys.path.insert(0, 'nemo')
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def debug_identity():
"""Debug what identity is being extracted."""
# First, let's verify what IMG_1.png actually looks like
img1_path = Path("nemo/data/IMG_1.png")
if not img1_path.exists():
logger.error(f"IMG_1.png not found at {img1_path}")
return
# Load and display IMG_1.png
img1 = Image.open(img1_path)
img1_np = np.array(img1)
logger.info(f"IMG_1.png shape: {img1_np.shape}")
logger.info(f"IMG_1.png dtype: {img1_np.dtype}")
logger.info(f"IMG_1.png range: [{img1_np.min()}, {img1_np.max()}]")
# Also load other images to compare
fig, axes = plt.subplots(2, 4, figsize=(16, 8))
# Row 1: Original images
axes[0, 0].imshow(img1)
axes[0, 0].set_title("IMG_1.png (Source Identity)", fontsize=11, weight='bold', color='red')
axes[0, 0].axis('off')
# Check other images in the directory
img_files = sorted(Path("nemo/data").glob("IMG_*.png"))[:4]
for idx, img_path in enumerate(img_files):
if idx < 4:
img = Image.open(img_path)
axes[0, idx].imshow(img)
axes[0, idx].set_title(f"{img_path.name}", fontsize=10)
axes[0, idx].axis('off')
# Row 2: Let's also check what's in the cache
import h5py
cache_path = Path("proper_face_attributes.h5")
if cache_path.exists():
with h5py.File(cache_path, 'r') as f:
# Check identity frame in cache
if 'identity_frame' in f:
cached_identity = f['identity_frame'][:]
logger.info(f"Cached identity shape: {cached_identity.shape}")
logger.info(f"Cached identity range: [{cached_identity.min()}, {cached_identity.max()}]")
# Convert to displayable format
if len(cached_identity.shape) == 4: # NCHW format
cached_identity = cached_identity[0] # Remove batch dim
if cached_identity.shape[0] == 3: # CHW format
cached_identity = np.transpose(cached_identity, (1, 2, 0))
# Normalize to [0, 1] for display
if cached_identity.min() < 0:
cached_identity = (cached_identity + 1) / 2
else:
cached_identity = cached_identity / 255.0 if cached_identity.max() > 1 else cached_identity
axes[1, 0].imshow(cached_identity)
axes[1, 0].set_title("Cached Identity Frame", fontsize=10, color='blue')
axes[1, 0].axis('off')
# Check some cached frames
for i in range(3):
if f'frame_{i:04d}' in f:
frame = f[f'frame_{i:04d}/frame'][:]
# Remove batch dimension if present
if len(frame.shape) == 4:
frame = frame[0]
if frame.shape[0] == 3:
frame = np.transpose(frame, (1, 2, 0))
if frame.min() < 0:
frame = (frame + 1) / 2
else:
frame = frame / 255.0 if frame.max() > 1 else frame
axes[1, i+1].imshow(frame)
axes[1, i+1].set_title(f"Cached Frame {i}", fontsize=10)
axes[1, i+1].axis('off')
plt.suptitle("Identity Debug: What are we actually using?", fontsize=14, weight='bold')
plt.tight_layout()
plt.savefig("debug_identity.png", dpi=150, bbox_inches='tight')
plt.close()
logger.info("Saved debug_identity.png")
# Now let's trace through the model to see what happens
logger.info("\n" + "="*60)
logger.info("Testing identity extraction from IMG_1.png")
logger.info("="*60)
# Load model
import importlib
from omegaconf import OmegaConf
import cv2
logger.info("Loading model...")
emo_config = OmegaConf.load('./nemo/models/stage_1/volumetric_avatar/va.yaml')
model = importlib.import_module('models.stage_1.volumetric_avatar.va').Model(
emo_config, training=False
)
model_path = './nemo/logs/Retrain_with_17_V1_New_rand_MM_SEC_4_drop_02_stm_10_CV_05_1_1/checkpoints/328_model.pth'
if Path(model_path).exists():
model_dict = torch.load(model_path, map_location='cuda')
model.load_state_dict(model_dict, strict=False)
model = model.cuda()
model.eval()
# Load IMG_1 as tensor - handle RGBA by converting to RGB
img1_rgb = img1_np[:, :, :3] if img1_np.shape[2] == 4 else img1_np
img1_resized = cv2.resize(img1_rgb, (512, 512))
img1_tensor = torch.from_numpy(img1_resized).float() / 127.5 - 1.0
img1_tensor = img1_tensor.permute(2, 0, 1).unsqueeze(0).cuda()
logger.info(f"IMG_1 tensor shape: {img1_tensor.shape}")
logger.info(f"IMG_1 tensor range: [{img1_tensor.min().item():.3f}, {img1_tensor.max().item():.3f}]")
with torch.no_grad():
# Extract face mask
face_mask, _, _, _ = model.face_idt.forward(img1_tensor)
face_mask = (face_mask > 0.6).float()
logger.info(f"Face mask shape: {face_mask.shape}")
logger.info(f"Face mask coverage: {face_mask.mean().item():.3f}")
# Masked image
masked = img1_tensor * face_mask
# Identity embedding
idt_embed = model.idt_embedder_nw(masked)
logger.info(f"Identity embedding shape: {idt_embed.shape}")
logger.info(f"Identity embedding mean: {idt_embed.mean().item():.3f}")
logger.info(f"Identity embedding std: {idt_embed.std().item():.3f}")
# Let's also check what the decoder produces with just this identity
# Create dummy inputs
c = model.args.latent_volume_channels
d = model.args.latent_volume_depth
s = model.args.latent_volume_size
# Get source latents
source_latents = model.local_encoder_nw(masked)
source_volume = source_latents.view(1, c, d, s, s)
if model.args.source_volume_num_blocks > 0:
source_volume = model.volume_source_nw(source_volume)
# Simple decode test
dummy_theta, _, _, _ = model.head_pose_regressor.forward(img1_tensor, True)
dummy_dict = {
'target_theta': dummy_theta,
'target_pose_embed': None
}
embed_dict = {'idt': idt_embed}
# Process volume
processed = model.volume_process_nw(source_volume, embed_dict)
# Reshape for decoder
latent_feats = processed.view(1, c * d, s, s)
# Decode with identity only
reconstructed, _, _, _ = model.decoder_nw(
dummy_dict,
embed_dict,
latent_feats,
False,
stage_two=True
)
logger.info(f"Reconstructed shape: {reconstructed.shape}")
logger.info(f"Reconstructed range: [{reconstructed.min().item():.3f}, {reconstructed.max().item():.3f}]")
# Convert and save reconstruction
if reconstructed.min() >= 0:
recon_np = reconstructed[0].cpu().permute(1, 2, 0).numpy()
else:
recon_np = (reconstructed[0].cpu().permute(1, 2, 0).numpy() + 1) / 2
recon_np = np.clip(recon_np, 0, 1)
# Create comparison figure
fig, axes = plt.subplots(1, 3, figsize=(12, 4))
axes[0].imshow(img1)
axes[0].set_title("Original IMG_1.png", fontsize=11, weight='bold')
axes[0].axis('off')
mask_display = face_mask[0, 0].cpu().numpy()
axes[1].imshow(mask_display, cmap='gray')
axes[1].set_title("Extracted Face Mask", fontsize=11)
axes[1].axis('off')
axes[2].imshow(recon_np)
axes[2].set_title("Reconstructed from Identity", fontsize=11, color='red')
axes[2].axis('off')
plt.suptitle("Identity Extraction Test", fontsize=14, weight='bold')
plt.tight_layout()
plt.savefig("identity_reconstruction_test.png", dpi=150, bbox_inches='tight')
plt.close()
logger.info("Saved identity_reconstruction_test.png")
# Save the reconstruction separately
recon_img = (recon_np * 255).astype(np.uint8)
Image.fromarray(recon_img).save("identity_only_decode.png")
logger.info("Saved identity_only_decode.png")
logger.info("\n" + "="*60)
logger.info("Debug complete! Check the generated images:")
logger.info(" - debug_identity.png: Shows source images and cached data")
logger.info(" - identity_reconstruction_test.png: Shows identity extraction")
logger.info(" - identity_only_decode.png: Decoded using only IMG_1 identity")
logger.info("="*60)
if __name__ == "__main__":
debug_identity()