11 from mpi4py
import MPI
26 ostr =
'#### Configuration ###########################\n' 28 ostr +=
'Grid size = %d\n' % (self.
grid_size)
29 ostr +=
'Dimension = %d\n' % (self.
dim)
30 ostr +=
'Number of clusters = %d\n' % (self.
n_means)
31 ostr +=
'Number of points = %d\n' % (self.
n_points)
32 ostr +=
'##############################################' 39 def init_points(self):
43 def _fill_randoms(self, tlst, count):
44 for _
in range(0, count):
46 for _
in range(0, self.
dim):
47 lst.append(random.randint(0, self.
grid_size - 1))
50 def calc_sq_dist(v1, v2):
52 for (x, y)
in zip(v1, v2):
53 sum_ += (x - y) * (x - y)
57 def load_points(kv, kvi, kvo, i):
59 for (idp, point)
in enumerate(kmeans.points):
63 def calc_cluster(kv, kvi, kvo, i):
67 min_dst = kmeans.grid_size * kmeans.grid_size
68 for (idm, mean)
in enumerate(kmeans.means):
69 dst = calc_sq_dist(v, mean)
76 def copy_center(kv, kvi, kvo, i):
83 def update_cluster(kvvec, kvi, kvo):
86 for d
in range(0, kmeans.dim):
89 for d
in range(0, kmeans.dim):
91 avg = [x / (len(kvvec))
for x
in sum_]
92 kvo.add_kv(kvvec[0][0], avg)
102 print(
'Number of processes = %d' % (comm.size))
105 kmeans.means = comm.bcast(kmeans.means, root=0)
108 for _
in range(0, kmeans.n_iteration):
109 kvs0 = kmr.emptykvs.map_once(
False, load_points, key=
"integer")
110 kvs1 = kvs0.map(calc_cluster, key=
"integer")
111 kvs2 = kvs1.shuffle()
112 kvs3 = kvs2.reduce(update_cluster, key=
"integer")
113 kvs4 = kvs3.replicate()
114 kvs4.map(copy_center)
117 print(
'Cluster coordinates')
118 for m
in kmeans.means:
def _fill_randoms(self, tlst, count)