-1
我已经为2维创建了我的k均值算法。我想修改它的8个维度,即数据点可以取8维值,最后返回8维质心值。如何修改我的K-Means聚类算法以将维度增加到8?
的代码如下:
import random
import math
# Input varibles
#k = 3
#Threshold = 1
DATA = [[2, 1, 1, 2, 1, 1, 1, 5], [ 6, 8, 1, 3, 4, 3, 7, 1],[4, 1, 3, 2, 1, 3, 1, 1],[3, 1, 1, 2, 1, 2, 1, 1],[3 ,1 ,1 ,1, 1, 2, 1, 1],[6, 1, 1, 1, 1, 7, 1, 1],[6, 10, 2, 8, 10, 7, 3, 3]]
BIG_NUMBER = math.pow(10, 10)
data = []
centroids = []
class DataPoint:
def __init__(self, x, y):
self.x = x
self.y = y
def set_x(self, x):
self.x = x
def get_x(self):
return self.x
def set_y(self, y):
self.y = y
def get_y(self):
return self.y
def set_cluster(self, clusterNumber):
self.clusterNumber = clusterNumber
def get_cluster(self):
return self.clusterNumber
class Centroid:
def __init__(self, x, y):
self.x = x
self.y = y
def set_x(self, x):
self.x = x
def get_x(self):
return self.x
def set_y(self, y):
self.y = y
def get_y(self):
return self.y
# Initializing The Centroids
def initialize_centroids(k,DATA):
#find data range in x and y
max_x = max(x for x,y in DATA)
max_y = max(y for x,y in DATA)
min_x = min(x for x,y in DATA)
min_y = min(y for x,y in DATA)
#chosse random x and y between this data range
#assign to centroids
for j in range(k):
#x = random.choice(DATA)
random_x = random.uniform(min_x,max_x)
random_y = random.uniform(min_y,max_y)
centroids.append(Centroid(random_x, random_y))
#print("(", centroids[j].get_x(), ",", centroids[j].get_y(), ")")
return centroids
# Assigning Datapoints to nearest Centroids
def initialize_datapoints(k,DATA):
for i in range(len(DATA)):
newpoint = DataPoint(DATA[i][0], DATA[i][1])
bestMinimum = BIG_NUMBER
data.append(newpoint)
for j in range(k):
distance = get_distance(newpoint.get_x(), newpoint.get_y(), centroids[j].get_x(), centroids[j].get_y())
if(distance < bestMinimum):
bestMinimum = distance
newpoint.set_cluster(j)
return
# Calculating Euclidean distance
def get_distance(dataPointX, dataPointY, centroidX, centroidY):
return math.sqrt(math.pow((centroidY - dataPointY), 2) + math.pow((centroidX - dataPointX), 2))
# Updating Centroid and Clusters till the threshold is met
def update_centroids_n_clusters(k,DATA,Threshold):
dist = 0.0
#print ("a")
for j in range(k):
prev_x = centroids[j].get_x()
prev_y = centroids[j].get_y()
totalX = 0
totalY = 0
totalInCluster = 0
for z in range(len(data)):
if (data[z].get_cluster() == j):
totalX += data[z].get_x()
totalY += data[z].get_y()
totalInCluster += 1
if (totalInCluster > 0):
s_x = (totalX/totalInCluster)
s_y = (totalY/totalInCluster)
centroids[j].set_x(s_x)
centroids[j].set_y(s_y)
x1 = centroids[j].get_x()
y1 = centroids[j].get_y()
x2 = prev_x
y2 = prev_y
dist += get_distance(x1,y1,x2,y2)
conv_val = (1/k)*dist
if(conv_val >= Threshold):
for i in range(len(DATA)):
bestMinimum = BIG_NUMBER
currentCluster = 0
for j in range(k):
distance = get_distance(data[i].get_x(), data[i].get_y(), centroids[j].get_x(), centroids[j].get_y())
if (distance < bestMinimum):
bestMinimum = distance
currentCluster = j
data[i].set_cluster(currentCluster)
update_centroids_n_clusters(k, DATA, Threshold)
return
# Performing K_Means
def Kmeans(k, DATA, Threshold):
initialize_centroids(k,DATA)
initialize_datapoints(k, DATA)
update_centroids_n_clusters(k, DATA, Threshold)
for i in range(k):
p = 0
print()
print("Centroid ", i, " is at")
print("(",centroids[i].get_x(), ",", centroids[i].get_y(), ")")
print("Cluster ", i, " includes:")
for j in range(len(DATA)):
if (data[j].get_cluster() == i):
#print("(", data[j].get_x(), ", ", data[j].get_y(), ")")
p += 1
print(p,"points")
return
Kmeans(3,DATA,0.1)
我应该如何修改我的类质心和类数据点在这个代码?谢谢!!
注:该代码是在的Python 3