-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Expand file tree
/
Copy pathconvert_img_to_video.py
More file actions
152 lines (115 loc) · 4.9 KB
/
convert_img_to_video.py
File metadata and controls
152 lines (115 loc) · 4.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import yaml
import cv2
import numpy as np
from pathlib import Path
class ImageProcessor:
def __init__(self, yaml_path):
with open(yaml_path, 'r') as f:
self.config = yaml.safe_load(f)
self.images_info = []
self.reference_size = None
self._load_images()
def _load_images(self):
for img_config in self.config['images']:
img = cv2.imread(img_config['path'])
if img is None:
raise ValueError(f"Cannot load image: {img_config['path']}")
info = {
'image': img,
'duration': float(img_config.get('duration', 1.0)),
'translation': img_config.get('translation', [0, 0]),
'scale': float(img_config.get('scale', 1.0))
}
self.images_info.append(info)
if self.reference_size is None:
self.reference_size = (img.shape[1], img.shape[0])
def _translate_image(self, img, translation):
"""Perform only translation"""
height, width = img.shape[:2]
# Calculate translation amount (pixels)
tx = int(width * translation[0] / 100)
ty = int(height * translation[1] / 100)
# Create translation matrix
M = np.float32([[1, 0, tx], [0, 1, ty]])
# Apply translation while maintaining original dimensions
translated = cv2.warpAffine(img, M, (width, height))
return translated
def _crop_black_borders(self, img):
"""Crop out black borders from the image"""
# Convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Threshold to identify non-black areas
_, thresh = cv2.threshold(gray, 1, 255, cv2.THRESH_BINARY)
# Find bounding box of non-black pixels
coords = cv2.findNonZero(thresh)
if coords is None:
return img
x, y, w, h = cv2.boundingRect(coords)
# Crop the image to the bounding box
return img[y:y+h, x:x+w]
def _scale_image(self, img, scale, target_size):
"""Scale the image"""
if scale <= 1:
return cv2.resize(img, target_size)
# First scale up
height, width = img.shape[:2]
scaled_width = int(width * scale)
scaled_height = int(height * scale)
scaled = cv2.resize(img, (scaled_width, scaled_height))
# Center-crop to target dimensions
start_x = (scaled_width - target_size[0]) // 2
start_y = (scaled_height - target_size[1]) // 2
cropped = scaled[start_y:start_y+target_size[1],
start_x:start_x+target_size[0]]
return cropped
def _transform_image(self, img, translation, scale):
"""Apply transformations in sequence: translation → cropping → scaling"""
original_size = (img.shape[1], img.shape[0])
# 1. Translation
translated = self._translate_image(img, translation)
# 2. Black border cropping
cropped = self._crop_black_borders(translated)
# 3. Scale back to original dimensions
transformed = self._scale_image(cropped, scale, original_size)
return transformed
def create_video(self, output_path, fps=25):
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, self.reference_size)
try:
for info in self.images_info:
# Transform image
transformed = self._transform_image(
info['image'],
info['translation'],
info['scale']
)
# Resize to reference dimensions if needed
if transformed.shape[:2] != (self.reference_size[1], self.reference_size[0]):
transformed = cv2.resize(transformed, self.reference_size)
# Write video frames
n_frames = int(info['duration'] * fps)
for _ in range(n_frames):
out.write(transformed)
finally:
out.release()
# Enhance video quality
self._improve_video_quality(output_path)
def _improve_video_quality(self, video_path):
import subprocess
temp_path = video_path + '.temp.mp4'
cmd = [
'ffmpeg', '-i', video_path,
'-c:v', 'libx264',
'-preset', 'slow',
'-crf', '18',
'-y',
temp_path
]
subprocess.run(cmd)
import os
os.replace(temp_path, video_path)
def main():
processor = ImageProcessor('tools/i2v_config.yaml')
processor.create_video('convertd_video.mp4', fps=25)
if __name__ == '__main__':
main()