-
-
Notifications
You must be signed in to change notification settings - Fork 26.9k
birch clustering does not work with version 1.1.1 #23988
Copy link
Copy link
Closed
Labels
Description
Describe the bug
Birch clustering does not work for version 1.1.1 but does work for version 0.24.1. This issue was also documented on StackOverflow here/
When trying to fit a dataframe in version 1.1.1 you get the error ValueError: ndarray is not C-contiguous. When running in version 0.24.1 it runs completely fine.
Steps/Code to Reproduce
from sklearn.datasets import load_iris
from sklearn.cluster import Birch
X, y = load_iris(return_X_y=True, as_frame=True)
birch = Birch(n_clusters=3)
birch.fit_predict(X)Expected Results
No error raised.
Actual Results
from sklearn.datasets import load_iris
from sklearn.cluster import Birch
X, y = load_iris(return_X_y=True, as_frame=True)
birch = Birch(n_clusters=3)
birch.fit_predict(X)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Input In [4], in <cell line: 6>()
4 X, y = load_iris(return_X_y=True, as_frame=True)
5 birch = Birch(n_clusters=3)
----> 6 birch.fit_predict(X)
File ~/Documents/packages/scikit-learn/sklearn/base.py:716, in ClusterMixin.fit_predict(self, X, y)
698 """
699 Perform clustering on `X` and returns cluster labels.
700
(...)
712 Cluster labels.
713 """
714 # non-optimized default implementation; override when a better
715 # method is possible for a given clustering algorithm
--> 716 self.fit(X)
717 return self.labels_
File ~/Documents/packages/scikit-learn/sklearn/cluster/_birch.py:545, in Birch.fit(self, X, y)
543 # TODO: Remove deprecated flags in 1.2
544 self._deprecated_fit, self._deprecated_partial_fit = True, False
--> 545 return self._fit(X, partial=False)
File ~/Documents/packages/scikit-learn/sklearn/cluster/_birch.py:615, in Birch._fit(self, X, partial)
612 self.subcluster_centers_ = centroids
613 self._n_features_out = self.subcluster_centers_.shape[0]
--> 615 self._global_clustering(X)
616 return self
File ~/Documents/packages/scikit-learn/sklearn/cluster/_birch.py:762, in Birch._global_clustering(self, X)
759 self.subcluster_labels_ = clusterer.fit_predict(self.subcluster_centers_)
761 if compute_labels:
--> 762 self.labels_ = self._predict(X)
File ~/Documents/packages/scikit-learn/sklearn/cluster/_birch.py:700, in Birch._predict(self, X)
697 kwargs = {"Y_norm_squared": self._subcluster_norms}
699 with config_context(assume_finite=True):
--> 700 argmin = pairwise_distances_argmin(
701 X, self.subcluster_centers_, metric_kwargs=kwargs
702 )
703 return self.subcluster_labels_[argmin]
File ~/Documents/packages/scikit-learn/sklearn/metrics/pairwise.py:791, in pairwise_distances_argmin(X, Y, axis, metric, metric_kwargs)
788 metric = "sqeuclidean"
789 metric_kwargs = {}
--> 791 indices = PairwiseDistancesArgKmin.compute(
792 X=X,
793 Y=Y,
794 k=1,
795 metric=metric,
796 metric_kwargs=metric_kwargs,
797 strategy="auto",
798 return_distance=False,
799 )
800 indices = indices.flatten()
801 else:
802 # TODO: once PairwiseDistancesArgKmin supports sparse input matrices and 32 bit,
803 # we won't need to fallback to pairwise_distances_chunked anymore.
804
805 # Turn off check for finiteness because this is costly and because arrays
806 # have already been validated.
File ~/Documents/packages/scikit-learn/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py:233, in PairwiseDistancesArgKmin.compute(cls, X, Y, k, metric, chunk_size, metric_kwargs, strategy, return_distance)
227 # Note (jjerphan): Some design thoughts for future extensions.
228 # This factory comes to handle specialisations for the given arguments.
229 # For future work, this might can be an entrypoint to specialise operations
230 # for various backend and/or hardware and/or datatypes, and/or fused
231 # {sparse, dense}-datasetspair etc.
232 if X.dtype == Y.dtype == np.float64:
--> 233 return PairwiseDistancesArgKmin64.compute(
234 X=X,
235 Y=Y,
236 k=k,
237 metric=metric,
238 chunk_size=chunk_size,
239 metric_kwargs=metric_kwargs,
240 strategy=strategy,
241 return_distance=return_distance,
242 )
243 raise ValueError(
244 "Only 64bit float datasets are supported at this time, "
245 f"got: X.dtype={X.dtype} and Y.dtype={Y.dtype}."
246 )
File ~/Documents/packages/scikit-learn/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx:74, in sklearn.metrics._pairwise_distances_reduction._argkmin.PairwiseDistancesArgKmin64.compute()
File ~/Documents/packages/scikit-learn/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx:343, in sklearn.metrics._pairwise_distances_reduction._argkmin.FastEuclideanPairwiseDistancesArgKmin64.__init__()
File ~/Documents/packages/scikit-learn/sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pyx:98, in sklearn.metrics._pairwise_distances_reduction._datasets_pair.DatasetsPair.get_for()
File ~/Documents/packages/scikit-learn/sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pyx:146, in sklearn.metrics._pairwise_distances_reduction._datasets_pair.DenseDenseDatasetsPair.__init__()
File stringsource:658, in View.MemoryView.memoryview_cwrapper()
File stringsource:349, in View.MemoryView.memoryview.__cinit__()
ValueError: ndarray is not C-contiguousVersions
System:
python: 3.10.4 (main, Apr 2 2022, 09:04:19) [GCC 11.2.0]
executable: /home/creeg/projects/capstone/venv/bin/python
machine: Linux-5.10.16.3-microsoft-standard-WSL2-x86_64-with-glibc2.35
Python dependencies:
pip: 22.0.2
setuptools: 59.6.0
sklearn: 0.24.1
numpy: 1.23.1
scipy: 1.8.1
Cython: None
pandas: 1.4.2
matplotlib: 3.5.2
joblib: 1.1.0
threadpoolctl: 3.1.0
Built with OpenMP: TrueReactions are currently unavailable