% -*- texinfo -*- % @deftypefn {Function File} {@var{y} =} linkage (@var{d}) % @deftypefnx {Function File} {@var{y} =} linkage (@var{d}, @var{method}) % @deftypefnx {Function File} @ % {@var{y} =} linkage (@var{x}, @var{method}, @var{metric}) % @deftypefnx {Function File} @ % {@var{y} =} linkage (@var{x}, @var{method}, @var{arglist}) % % Produce a hierarchical clustering dendrogram % % @var{d} is the dissimilarity matrix relative to @var{n} observations, % formatted as a @math{(n-1)*n/2}x1 vector as produced by @code{pdist}. % Alternatively, @var{x} contains data formatted for input to % @code{pdist}, @var{metric} is a metric for @code{pdist} and % @var{arglist} is a cell array containing arguments that are passed to % @code{pdist}. % % @code{linkage} starts by putting each observation into a singleton % cluster and numbering those from 1 to @var{n}. Then it merges two % clusters, chosen according to @var{method}, to create a new cluster % numbered @var{n+1}, and so on until all observations are grouped into % a single cluster numbered @var{2*n-1}. Row @var{m} of the % @math{m-1}x3 output matrix relates to cluster @math{n+m}: the first % two columns are the numbers of the two component clusters and column % 3 contains their distance. % % @var{method} defines the way the distance between two clusters is % computed and how they are recomputed when two clusters are merged: % % @table @samp % @item 'single' (default) % Distance between two clusters is the minimum distance between two % elements belonging each to one cluster. Produces a cluster tree % known as minimum spanning tree. % % @item 'complete' % Furthest distance between two elements belonging each to one cluster. % % @item 'average' % Unweighted pair group method with averaging (UPGMA). % The mean distance between all pair of elements each belonging to one % cluster. % % @item 'weighted' % Weighted pair group method with averaging (WPGMA). % When two clusters A and B are joined together, the new distance to a % cluster C is the mean between distances A-C and B-C. % % @item 'centroid' % Unweighted Pair-Group Method using Centroids (UPGMC). % Assumes Euclidean metric. The distance between cluster centroids, % each centroid being the center of mass of a cluster. % % @item 'median' % Weighted pair-group method using centroids (WPGMC). % Assumes Euclidean metric. Distance between cluster centroids. When % two clusters are joined together, the new centroid is the midpoint % between the joined centroids. % % @item 'ward' % Ward's sum of squared deviations about the group mean (ESS). % Also known as minimum variance or inner squared distance. % Assumes Euclidean metric. How much the moment of inertia of the % merged cluster exceeds the sum of those of the individual clusters. % @end table % % @strong{Reference} % Ward, J. H. Hierarchical Grouping to Optimize an Objective Function % J. Am. Statist. Assoc. 1963, 58, 236-244, % @url{http://iv.slis.indiana.edu/sw/data/ward.pdf}. % @end deftypefn % % @seealso{pdist,squareform}