-
-
Notifications
You must be signed in to change notification settings - Fork 260
type error when fitting X,y #453
Copy link
Copy link
Closed
Description
Hi All,
I have created the training set for machine learning and when trying to fit model it gives a value error.
The code and error is as follows.
Code:
search.fit(train_final[X_cols_train], train_final['target'])
output:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-105-9dea7c488bbf> in <module>
----> 1 search.fit(train_final[X_cols_train], train_final['target'])
~/env/lib/python3.5/site-packages/dask_ml/model_selection/_incremental.py in fit(self, X, y, **fit_params)
572 Additional partial fit keyword arguments for the estimator.
573 """
--> 574 return default_client().sync(self._fit, X, y, **fit_params)
575
576 @if_delegate_has_method(delegate=("best_estimator_", "estimator"))
~/env/lib/python3.5/site-packages/distributed/client.py in sync(self, func, *args, **kwargs)
671 return future
672 else:
--> 673 return sync(self.loop, func, *args, **kwargs)
674
675 def __repr__(self):
~/env/lib/python3.5/site-packages/distributed/utils.py in sync(loop, func, *args, **kwargs)
275 e.wait(10)
276 if error[0]:
--> 277 six.reraise(*error[0])
278 else:
279 return result[0]
~/env/lib/python3.5/site-packages/six.py in reraise(tp, value, tb)
691 if value.__traceback__ is not tb:
692 raise value.with_traceback(tb)
--> 693 raise value
694 finally:
695 value = None
~/env/lib/python3.5/site-packages/distributed/utils.py in f()
260 if timeout is not None:
261 future = gen.with_timeout(timedelta(seconds=timeout), future)
--> 262 result[0] = yield future
263 except Exception as exc:
264 error[0] = sys.exc_info()
~/env/lib/python3.5/site-packages/tornado/gen.py in run(self)
1131
1132 try:
-> 1133 value = future.result()
1134 except Exception:
1135 self.had_exception = True
/usr/lib/python3.5/asyncio/futures.py in result(self)
291 self._tb_logger = None
292 if self._exception is not None:
--> 293 raise self._exception
294 return self._result
295
~/env/lib/python3.5/site-packages/tornado/gen.py in wrapper(*args, **kwargs)
324 try:
325 orig_stack_contexts = stack_context._state.contexts
--> 326 yielded = next(result)
327 if stack_context._state.contexts is not orig_stack_contexts:
328 yielded = _create_future()
~/env/lib/python3.5/site-packages/dask_ml/model_selection/_incremental.py in _fit(self, X, y, **fit_params)
522 @gen.coroutine
523 def _fit(self, X, y, **fit_params):
--> 524 X, y = self._check_array(X, y)
525
526 X_train, X_test, y_train, y_test = self._get_train_test_split(X, y)
~/env/lib/python3.5/site-packages/dask_ml/model_selection/_incremental.py in _check_array(self, X, y, **kwargs)
437 if isinstance(y, np.ndarray):
438 y = da.from_array(y, y.shape)
--> 439 X = check_array(X, **kwargs)
440 kwargs["ensure_2d"] = False
441 y = check_array(y, **kwargs)
~/env/lib/python3.5/site-packages/dask_ml/utils.py in check_array(array, *args, **kwargs)
149 elif isinstance(array, dd.DataFrame):
150 if not accept_dask_dataframe:
--> 151 raise TypeError("This estimator does not support dask dataframes.")
152 # TODO: sample?
153 return array
TypeError: This estimator does not support dask dataframes.
Also I would like to know when fitting data do we need to re-code string values in columns (in categorical data) to numerical data. For example if there is a column with categories a,b,c do we have to re-code it as for example as 1,2,3. Furthermore if we need to do so then how do we make sure that the test set also is re-coded in the same pattern such as 1 for a and 2 for b.
Thank you
Michael
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels