Module src.app.Clustering.ClusterSizeReduction
Source code
from src.app.Module import Module
import numpy as np
class ClusterSizeReduction(Module):
"""Reduces number of images in a cluster.
This class reduces the number of images in a cluster to the specified amount.
Attributes:
_num_elements_per_cluster: Number of elements per cluster to reduce to (int)
"""
def __init__(self, prev_module, num_elements_per_cluster):
super().__init__('ClusterSizeReduction', prev_module)
self._num_elements_per_cluster = num_elements_per_cluster
def run(self):
super().run()
self._data = self._prev_model.get_module_results()
num_unique_labels = len(np.unique(self._data['labels']))
self._result = {
'images': [],
'features': [],
'labels': [],
'centers': self._data['centers'],
'kmeans': self._data['kmeans'],
}
# Map array indices to corresponding label/euclidian distance
distances = np.empty((len(self._data['features']), 3))
for i, feature in enumerate(self._data['features']):
label = self._data['labels'][i]
center = self._data['centers'][label]
feature = np.array(feature)
distance = np.linalg.norm(center-feature)
distances[i, 0] = label
distances[i, 1] = i
distances[i, 2] = distance
# Convert to np array and sort by euclidian distance to each centroid
distances = distances[distances[:, 2].argsort()]
# Choose num_elements_per_cluster images/labels/features based on min distance
for i in range(num_unique_labels):
nearest = distances[distances[:, 0] == i]
if len(nearest) >= self._num_elements_per_cluster:
nearest = nearest[:self._num_elements_per_cluster, :]
for n in nearest:
index = int(n[1])
self._result['images'].append(self._data['images'][index])
self._result['features'].append(self._data['features'][index])
self._result['labels'].append(self._data['labels'][index])
Classes
class ClusterSizeReduction (prev_module, num_elements_per_cluster)
-
Reduces number of images in a cluster.
This class reduces the number of images in a cluster to the specified amount.
Attributes
_num_elements_per_cluster
- Number of elements per cluster to reduce to (int)
Source code
class ClusterSizeReduction(Module): """Reduces number of images in a cluster. This class reduces the number of images in a cluster to the specified amount. Attributes: _num_elements_per_cluster: Number of elements per cluster to reduce to (int) """ def __init__(self, prev_module, num_elements_per_cluster): super().__init__('ClusterSizeReduction', prev_module) self._num_elements_per_cluster = num_elements_per_cluster def run(self): super().run() self._data = self._prev_model.get_module_results() num_unique_labels = len(np.unique(self._data['labels'])) self._result = { 'images': [], 'features': [], 'labels': [], 'centers': self._data['centers'], 'kmeans': self._data['kmeans'], } # Map array indices to corresponding label/euclidian distance distances = np.empty((len(self._data['features']), 3)) for i, feature in enumerate(self._data['features']): label = self._data['labels'][i] center = self._data['centers'][label] feature = np.array(feature) distance = np.linalg.norm(center-feature) distances[i, 0] = label distances[i, 1] = i distances[i, 2] = distance # Convert to np array and sort by euclidian distance to each centroid distances = distances[distances[:, 2].argsort()] # Choose num_elements_per_cluster images/labels/features based on min distance for i in range(num_unique_labels): nearest = distances[distances[:, 0] == i] if len(nearest) >= self._num_elements_per_cluster: nearest = nearest[:self._num_elements_per_cluster, :] for n in nearest: index = int(n[1]) self._result['images'].append(self._data['images'][index]) self._result['features'].append(self._data['features'][index]) self._result['labels'].append(self._data['labels'][index])
Ancestors
Inherited members