% The farthest point clustering scheme tends to grab points that are evenly % spaced over the support of the dataset. When there are lots of "outliers" % there may be very few points where most of the data are. K = 30; % Grab K points D = 2; % in D dimensions, out of N = 1e3; % a dataset of size N alpha = 0.8; xx = [randn(D, ceil(N*alpha)),... bsxfun(@plus, [4;4], 3*exp(randn(D, N-ceil(N*alpha))))]; idx = ceil(N*rand(1,K)); [idx2, yy] = fpc(xx, K); clf; subplot(1,2,1); hold on; plot(xx(1,:), xx(2,:), '+'); plot(xx(1,idx), xx(2,idx), 'rx', 'MarkerSize', 10, 'LineWidth', 2); axis equal subplot(1,2,2); hold on; for kk = 1:K color = rand(1,3); plot(xx(1,yy==kk), xx(2,yy==kk), '+', 'Color', color); plot(xx(1,idx2(kk)), xx(2,idx2(kk)), 'kx', 'MarkerSize', 12, 'LineWidth', 4); plot(xx(1,idx2(kk)), xx(2,idx2(kk)), 'x', 'MarkerSize', 8, 'LineWidth', 2, 'Color', color); end axis equal