Skip to content

TSNE with correlation metric: ValueError: Distance matrix 'X' must be symmetric #4475

@cel4

Description

@cel4
from sklearn.manifold import TSNE
import numpy as np
np.random.seed(42)

data = np.random.rand(10, 3)
data[-1, :] = 0

model = TSNE(metric="correlation")
model.fit_transform(data)

TSNE raises an obscure error, when the data set contains rows with a standard deviation 0 and therefore undefined correlations:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-20-658142c1e315> in <module>()
      1 model = TSNE(metric="correlation")
----> 2 res = model.fit_transform(data)
      3 ran = model.fit_transform(ran_data)

/Users/ch/miniconda/envs/sci34/lib/python3.4/site-packages/sklearn/manifold/t_sne.py in fit_transform(self, X, y)
    522             Embedding of the training data in low-dimensional space.
    523         """
--> 524         self.fit(X)
    525         return self.embedding_

/Users/ch/miniconda/envs/sci34/lib/python3.4/site-packages/sklearn/manifold/t_sne.py in fit(self, X, y)
    447         self.training_data_ = X
    448 
--> 449         P = _joint_probabilities(distances, self.perplexity, self.verbose)
    450         if self.init == 'pca':
    451             pca = RandomizedPCA(n_components=self.n_components,

/Users/ch/miniconda/envs/sci34/lib/python3.4/site-packages/sklearn/manifold/t_sne.py in _joint_probabilities(distances, desired_perplexity, verbose)
     52     P = conditional_P + conditional_P.T
     53     sum_P = np.maximum(np.sum(P), MACHINE_EPSILON)
---> 54     P = np.maximum(squareform(P) / sum_P, MACHINE_EPSILON)
     55     return P
     56 

/Users/ch/miniconda/envs/sci34/lib/python3.4/site-packages/scipy/spatial/distance.py in squareform(X, force, checks)
   1479             raise ValueError('The matrix argument must be square.')
   1480         if checks:
-> 1481             is_valid_dm(X, throw=True, name='X')
   1482 
   1483         # One-side of the dimensions is set here.

/Users/ch/miniconda/envs/sci34/lib/python3.4/site-packages/scipy/spatial/distance.py in is_valid_dm(D, tol, throw, name, warning)
   1562                 if name:
   1563                     raise ValueError(('Distance matrix \'%s\' must be '
-> 1564                                      'symmetric.') % name)
   1565                 else:
   1566                     raise ValueError('Distance matrix must be symmetric.')

ValueError: Distance matrix 'X' must be symmetric

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions