-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathMFCCEngine.py
More file actions
100 lines (79 loc) · 3.85 KB
/
MFCCEngine.py
File metadata and controls
100 lines (79 loc) · 3.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import os
import math
import librosa
from dtw import dtw
from numpy.linalg import norm
from .Engine import Engine
from model.Audiotrack import Audiotrack
from model.Fingerprint import Fingerprint, FingerprintType
from .utils import average_matches
class MFCCEngine(Engine):
def __init__(self, data_path, sample_size = 200, n_mfcc = 6, n_average = 1):
self.data_path = data_path
self.sample_size = sample_size
self.n_mfcc = n_mfcc
self.n_average = n_mfcc
def extract_fingerprints(self, audiotrack, duration = 120.0):
mfcc = self.__extract_mfcc(audiotrack.filename, duration)
samples = self.__split_mfcc(mfcc)
return [Fingerprint.create(FingerprintType.MFCC.value, audiotrack, sample) for sample in samples]
def compare(self, lhs, rhs):
dist, cost, acc_cost, path = dtw(
lhs.T, rhs.T, dist=lambda x, y: norm(x - y, ord=1))
return dist
def find_matches(self, audiotrack, top_k):
fingerprints = Fingerprint.objects(type=FingerprintType.MFCC.value)
ref_mfcc = self.extract_fingerprints(audiotrack, 10.0)
# Calculate top track matches for each reference mfcc and make an average from it
total_matches = {}
for i in range(len(ref_mfcc)):
fragment = ref_mfcc[i]
print("Processing fragment %i of %i" % (i, len(ref_mfcc)))
sample_matches = self.__calc_fingerprints_distance(
fragment.deserialize_data(), fingerprints)
song_matches = average_matches(sample_matches, self.n_average)
if not total_matches:
total_matches = song_matches
else:
# Calcualte average from current and new matches
total_matches = {
key: max(dist, total_matches[key]) for key, dist in song_matches.items()}
# Sort by distance
normalized_matches = self.__normalize_results(total_matches)
sorted_matches = sorted(normalized_matches.items(), key=lambda x: x[1])
return [{'filename': k, 'distance': v} for k, v in sorted_matches[:top_k]]
def __normalize_results(self, results):
# dict filename: distance
sum_distance = sum(results.values())
return { key: (distance / sum_distance) for key, distance in results.items() }
def __calc_fingerprints_distance(self, ref_mfcc, fingerprints):
"Loop over all samples in the database and find the best match"
matches = []
for fingerprint in fingerprints:
dist = None
mfcc = fingerprint.deserialize_data()
# Align both samples to the same size
if ref_mfcc.shape[1] == mfcc.shape[1]:
dist = self.compare(ref_mfcc, mfcc)
elif ref_mfcc.shape[1] < mfcc.shape[1]:
dist = self.compare(ref_mfcc, mfcc[:, ref_mfcc.shape[1]])
else:
dist = self.compare(ref_mfcc[:, mfcc.shape[1]], mfcc)
matches.append(
{'filename': fingerprint.audiotrack.filename, 'distance': dist})
return sorted(matches, key=lambda x: x['distance'])
def __extract_mfcc(self, file_path, duration = 120.0):
"Extracts MFCC descriptors from an audiotrack located in the file_path"
signal, sr = librosa.load(
path=self.data_path + file_path, duration=duration)
mfcc = librosa.feature.mfcc(signal, n_mfcc=self.n_mfcc, sr=sr)
return mfcc
def __split_mfcc(self, mfcc):
"Splits MFCC descriptor from a song into many short ones"
n_samples = math.floor(mfcc.shape[1] / self.sample_size)
last_sample_size = mfcc.shape[1] % self.sample_size
samples = []
for i in range(n_samples):
samples.append(mfcc[:, i*self.sample_size:(i+1)*self.sample_size])
samples.append(mfcc[:, n_samples*self.sample_size:])
return samples