WebAPI/PythonRpcServer/scenedetector.py at 1274d4ee7599ba5943d95929eb6a97f5f9a23454 · classtranscribe/WebAPI · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import os
from cv2 import cv2
import json
import numpy as np
from skimage.metrics import structural_similarity as ssim
from datetime import datetime

DATA_DIR = os.getenv('DATA_DIRECTORY')


def find_scenes(video_path, min_scene_length=1, abs_min=0.75, abs_max=0.98, find_subscenes=True, max_subscenes_per_minute=12):
    """
    Detects scenes within a video.

    Calculates the structual similarity index measure (SSIM) between each subsequent frame then uses
    the list of SSIMs to identify where scene changes are.

    Parameters:
    video_path (string): Path of the video to be used.
    min_scene_length (int): Minimum scene length in seconds. Default 1s
    abs_min (float): Minimum SSIM value for non-scene changes, i.e. any frame with SSIM < abs_min
        is defined as a scene change. Default 0.7
    abs_max (float): Maximum SSIM value for scene_changes, i.e. any frame with SSIM > abs_max
        is defined as NOT a scene change. Default 0.98
    find_subscenes (boolean): Find subscenes or not. Default True
    max_subscenes_per_minute (int): Maximum number of subscenes per minute within a scene. If number
        of subscenes found exceeds max_subscenes_per_minute, then none of those subscenes are returned.
        Rational is that too many detected subscenes is more likely a result of a video clip or other
        noisy media and not actual scene changes.

    Returns:
    string: List of dictionaries dumped to a JSON string. Each dict corresponds to a scene/subscene,
        with the key/item pairs being starting timestamp (start), image file name (img_file), ending
        timestamp (end), and boolean indicating if it's a scene or subscene (is_subscene).
    """

    try:
        file_name = video_path[video_path.rfind('/')+1 : video_path.find('.')]
        directory = os.path.join(DATA_DIR, file_name)
        if not os.path.exists(directory):
            os.mkdir(directory)

        # Get the video capture and number of frames and fps
        cap = cv2.VideoCapture(video_path)
        num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        fps = int(cap.get(cv2.CAP_PROP_FPS))

        # Mininum number of frames per scene
        min_frames = min_scene_length*fps
        # Stores the last frame read
        last_frame = 0
        # List of similarities (SSIMs) between frames
        similarities = np.zeros(num_frames)
        timestamps = np.zeros(num_frames)

        for i in range(num_frames):
            # Read the next frame, resizing and converting to grayscale

            ret, frame = cap.read()

            # Save the time stamp of each frame
            timestamps[i] = cap.get(cv2.CAP_PROP_POS_MSEC)/1000

            curr_frame = cv2.cvtColor(cv2.resize(frame, (320,240)), cv2.COLOR_BGR2GRAY)

            # Calculate the SSIM between the current frame and last frame

            if i >= 1:
                similarities[i] = ssim(last_frame, curr_frame)

            # Save the current frame for the next iteration
            last_frame = curr_frame


        # Find cuts by finding where SSIM < abs_min
        cuts = np.argwhere(similarities < abs_min).flatten()

        # Get real scene cuts by filtering out those that happen within min_frames of the last cut
        scene_cuts = [cuts[0]]
        for i in range(1, len(cuts)):
            if cuts[i] >= cuts[i-1] + min_frames:
                scene_cuts += [cuts[i]]
        scene_cuts += [num_frames-1]

        img_file = 'temp'

        # Initialize list of scenes
        scenes = []

        # Iterate through the scene cuts
        for i in range(1, len(scene_cuts)):
            if not find_subscenes:
                continue

            scenes += [{'start': scene_cuts[i-1],
                'img_file': img_file,
                'end': scene_cuts[i],
                'is_subscene': False,
                }]


        # Write the image file for each scene and convert start/end to timestamp
        for i, scene in enumerate(scenes):
            cap.set(cv2.CAP_PROP_POS_FRAMES, (scene['start'] + scene['end']) // 2)
            res, frame = cap.read()
            img_file = os.path.join(DATA_DIR, file_name, "%d.jpg"%i)
            cv2.imwrite(img_file, frame)
            # we dont want microsecond accuracy; the [:12] cuts off the last 3 unwanted digits
            scene['start'] = datetime.utcfromtimestamp(timestamps[scene['start']]).strftime("%H:%M:%S.%f")[:12]
            scene['end'] = datetime.utcfromtimestamp(timestamps[scene['end'] ]).strftime("%H:%M:%S.%f")[:12]
            scene['img_file'] = img_file


        return json.dumps(scenes)

    except Exception as e:
        print("findScene() throwing Exception:" + str(e))
        raise e