-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpitch.py
83 lines (62 loc) · 2.62 KB
/
pitch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# Aoife McDonagh
# 13411348
#
# Python script for determining the pitch of audio sample
# Function to extract the pitch period from an audio sample
# Parameters:
# sequence: autocorrelation function for the audio frame
# min_lag: minimum number of lag samples rounded up to the nearest integer
from numpy import dot
def extractT0(sequence, minlag):
peak = 0
peak_location = 0
for i in range(0, len(sequence)-1):
if sequence[i] > peak:
peak = sequence[i]
peak_location = i
T0 = peak_location + minlag
return T0
# Function to calculate the autocorrelation function for a given frame of audio
# Parameters:
# filtered_speech: low pass filtered speech frame
# minlag: minimum number of lag samples rounded up to the nearest integer
# maxlag: maximum number of lag samples rounded up to the nearest integer
def xcorr(frame, minlag, maxlag):
frame_size = len(frame)
num_lags = maxlag - minlag
sequence = []
for i in range(0, num_lags):
k = i + minlag - 2 # lag value
#sequence[i] = sum(abs(frame[0:frame_size-k-1].dot(frame[k:frame_size-1])))
sequence_value_i = dot(frame[0:(frame_size-1)-k], frame[k:frame_size-1])
sequence_value_i = (sequence_value_i/(frame_size-k))
sequence.append(sequence_value_i)
return sequence
# Function to return the pitch period value of a 32ms *frame* of speech
def get_pitch_freq(frame, frame_rate):
Ts = 1.0/frame_rate # Sample period of 'speech' (s)
# Assume a pitch range of 80-260Hz
min_f0 = 80
max_f0 = 260
min_lag = int(round((1.0/max_f0)/Ts)); # Min # of lag samples rounded up to nearest integer
max_lag = int(round((1.0/min_f0)/Ts)); # Max # of lag samples rounded up to nearest integer
# Get pitch value
sequence = xcorr(frame, min_lag, max_lag)
T0 = extractT0(sequence, min_lag)
pitch_frequency = 1.0/(T0*Ts)
return pitch_frequency
# Split speech into 32ms frames
# get pitch of each 32ms frame
# returns average pitch value of *entire speech sample*
# best to have speech signal not be too long that pitch changes dramatically
def average_pitch(speech, frame_rate):
frame_size = int(frame_rate*0.032) # 32ms
pitch_values = [] # Create an empty list for storing pitch values for averaging later
for j in range(0, (len(speech) - frame_size) , frame_size):
pitch = get_pitch_freq(speech[j:j+frame_size], frame_rate) # get pitch of 32ms frame
pitch_values.append(pitch)
average_pitch = sum(pitch_values)/len(pitch_values) # sum all values in list and divide by its length to get average pitch value over entire sample
return average_pitch
if __name__ == "__main__" :
print("pitch main: this should not be printed")
average_pitch(sys.argv[1], sys.argv[2])