下面的代码使用的音频文件在tensorflow
创建的特征的矩阵:Python的类型错误:“浮动”对象不能被解释为索引
import tensorflow as tf
directory = "audio_dataset/*.wav"
filenames = tf.train.match_filenames_once(directory)
init = (tf.global_variables_initializer(), tf.local_variables_initializer())
count_num_files = tf.size(filenames)
filename_queue = tf.train.string_input_producer(filenames)
reader = tf.WholeFileReader()
filename, file_contents = reader.read(filename_queue)
with tf.Session() as sess:
sess.run(init)
num_files = sess.run(count_num_files)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
for i in range(num_files):
audio_file = sess.run(filename)
print(audio_file)
这是一种将音频从时域到频域的工具包:
from bregman.suite import *
chromo = tf.placeholder(tf.float32)
max_freqs = tf.argmax(chromo, 0)
def get_next_chromogram(sess):
audio_file = sess.run(filename)
F = Chromagram(audio_file, nfft=16384, wfft=8192, nhop=2205)
return F.X
def extract_feature_vector(sess, chromo_data):
num_features, num_samples = np.shape(chromo_data)
freq_vals = sess.run(max_freqs, feed_dict={chromo: chromo_data})
hist, bins = np.histogram(freq_vals, bins=range(num_features + 1))
return hist.astype(float)/num_samples
def get_dataset(sess):
num_files = sess.run(count_num_files)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
xs = []
for _ in range(num_files):
chromo_data = get_next_chromogram(sess)
x = [extract_feature_vector(sess, chromo_data)]
x = np.matrix(x)
if len(xs) == 0:
xs = x
else:
xs = np.vstack((xs, x))
return xs
这个聚类围绕两个质心数据:
k = 2
max_iterations = 100
def initial_cluster_centroids(X, k):
return X[0:k, :]
def assign_cluster(X, centroids):
expanded_vectors = tf.expand_dims(X, 0)
expanded_centroids = tf.expand_dims(centroids, 1)
distances = tf.reduce_sum(tf.square(tf.subtract(expanded_vectors, expanded_centroids)), 2)
mins = tf.argmin(distances, 0)
return mins
def recompute_centroids(X, Y):
sums = tf.unsorted_segment_sum(X, Y, k)
counts = tf.unsorted_segment_sum(tf.ones_like(X), Y, k)
return sums/counts
with tf.Session() as sess:
sess.run(init)
X = get_dataset(sess)
centroids = initial_cluster_centroids(X, k)
i, converged = 0, False
while not converged and i < max_iterations:
i += 1
Y = assign_cluster(X, centroids)
centroids = sess.run(recompute_centroids(X, Y))
print(centroids)
但是我得到以下回溯:
Traceback (most recent call last):
File "components.py", line 776, in <module>
X = get_dataset(sess)
File "ccomponents.py", line 745, in get_dataset
chromo_data = get_next_chromogram(sess)
File "coffee_components.py", line 728, in get_next_chromogram
F = Chromagram(audio_file, nfft=16384, wfft=8192, nhop=2205)
File "/Volumes/Dados/Documents/Education/Programming/Machine Learning/Manning/book/BregmanToolkit-master/bregman/features.py", line 143, in __init__
Features.__init__(self, arg, feature_params)
File "/Volumes/Dados/Documents/Education/Programming/Machine Learning/Manning/book/BregmanToolkit-master/bregman/features_base.py", line 70, in __init__
self.extract()
File "/Volumes/Dados/Documents/Education/Programming/Machine Learning/Manning/book/BregmanToolkit-master/bregman/features_base.py", line 213, in extract
self.extract_funs.get(f, self._extract_error)()
File "/Volumes/Dados/Documents/Education/Programming/Machine Learning/Manning/book/BregmanToolkit-master/bregman/features_base.py", line 711, in _chroma
if not self._cqft():
File "/Volumes/Dados/Documents/Education/Programming/Machine Learning/Manning/book/BregmanToolkit-master/bregman/features_base.py", line 588, in _cqft
self._make_log_freq_map()
File "/Volumes/Dados/Documents/Education/Programming/Machine Learning/Manning/book/BregmanToolkit-master/bregman/features_base.py", line 353, in _make_log_freq_map
mxnorm = P.empty(self._cqtN) # Normalization coefficients
TypeError: 'float' object cannot be interpreted as an index
就我而言,range
是int
,而不是一个float
。
有人可以请指出我的错误吗?
'range'在哪里?它不在堆栈跟踪中。这似乎是抱怨'X = get_dataset(sess)'行。 – Antimony
是的,'get_dataset(sess)'是一个函数(参见上面),使用('range()')进行迭代。通常这个错误是指你在范围内使用'float'这个事实,但我不确定这里。 – outkast
也许你可以检查'get_next_chromogram()'中'audio_file'的值?这是唯一传递给'Chromagram()'的非整数。 – Antimony