-
Notifications
You must be signed in to change notification settings - Fork 674
Closed
Labels
CIDask ⚡Issues related to the Dask engineIssues related to the Dask engineP0Highest priority tasks requiring immediate fixHighest priority tasks requiring immediate fixTesting 📈Issues related to testingIssues related to testing
Description
example failure: https://github.com/modin-project/modin/actions/runs/3322991945/jobs/5551188570
This workflow run passed before the commit was merged, but failed when I ran it just now. I think bokeh is a dependency of dask.
stack trace
RuntimeError Traceback (most recent call last)
Cell In [2], line 13
10 url_path = "https://modin-test.s3.us-west-1.amazonaws.com/yellow_tripdata_2015-01.csv"
11 urllib.request.urlretrieve(url_path, "taxi.csv")
---> 13 modin_df = pd.read_csv(s3_path,parse_dates=["tpep_pickup_datetime","tpep_dropoff_datetime"],quoting=3,nrows=1000)
File ~/work/modin/modin/modin/logging/logger_decorator.py:128, in enable_logging.<locals>.decorator.<locals>.run_and_log(*args, **kwargs)
113 """
114 Compute function with logging if Modin logging is enabled.
115
(...)
125 Any
126 """
127 if LogMode.get() == "disable":
--> 128 return obj(*args,**kwargs)
130 logger = get_logger()
131 logger_level = getattr(logger, log_level)
File ~/work/modin/modin/modin/_compat/pandas_api/latest/io.py:156, in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, encoding_errors, dialect, error_bad_lines, warn_bad_lines, on_bad_lines, skipfooter, doublequote, delim_whitespace, low_memory, memory_map, float_precision, storage_options)
154 f_locals.pop("mangle_dupe_cols", None)
155 kwargs = {k: v for k, v in f_locals.items() if k in _pd_read_csv_signature}
--> 156 return _read(**kwargs)
File ~/work/modin/modin/modin/_compat/pandas_api/common/io.py:35, in _read(**kwargs)
22 def _read(**kwargs):
23 """
24 Read csv file from local disk.
25
(...)
33 modin.pandas.DataFrame
34 """
---> 35 Engine.subscribe(_update_engine)
36 from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher
38 squeeze = kwargs.pop("squeeze", False)
File ~/work/modin/modin/modin/config/pubsub.py:217, in Parameter.subscribe(cls, callback)
208 """
209 Add `callback` to the `_subs` list and then execute it.
210
(...)
214 Callable to execute.
215 """
216 cls._subs.append(callback)
--> 217 callback(cls)
File ~/work/modin/modin/modin/pandas/__init__.py:161, in _update_engine(publisher)
158 if _is_first_update.get("Dask", True):
159 from modin.core.execution.dask.common import initialize_dask
--> 161 initialize_dask()
162 elif publisher.get() == "Cloudray":
163 from modin.experimental.cloud import get_connection
File ~/work/modin/modin/modin/core/execution/dask/common/utils.py:47, in initialize_dask()
45 memory_limit = Memory.get()
46 worker_memory_limit = memory_limit // num_cpus if memory_limit else "auto"
---> 47 client = Client(n_workers=num_cpus,memory_limit=worker_memory_limit)
49 num_cpus = len(client.ncores())
50 NPartitions._put(num_cpus)
File /opt/hostedtoolcache/Python/3.8.14/x64/lib/python3.8/site-packages/distributed/client.py:982, in Client.__init__(self, address, loop, timeout, set_as_default, scheduler_file, security, asynchronous, name, heartbeat_interval, serializers, deserializers, extensions, direct_to_workers, connection_limit, **kwargs)
979 preload_argv = dask.config.get("distributed.client.preload-argv")
980 self.preloads = preloading.process_preloads(self, preload, preload_argv)
--> 982 self.start(timeout=timeout)
983 Client._instances.add(self)
985 from distributed.recreate_tasks import ReplayTaskClient
File /opt/hostedtoolcache/Python/3.8.14/x64/lib/python3.8/site-packages/distributed/client.py:1172, in Client.start(self, **kwargs)
1170 self._started = asyncio.ensure_future(self._start(**kwargs))
1171 else:
-> 1172 sync(self.loop,self._start,**kwargs)
File /opt/hostedtoolcache/Python/3.8.14/x64/lib/python3.8/site-packages/distributed/utils.py:406, in sync(loop, func, callback_timeout, *args, **kwargs)
404 if error:
405 typ, exc, tb = error
--> 406 raise exc.with_traceback(tb)
407 else:
408 return result
File /opt/hostedtoolcache/Python/3.8.14/x64/lib/python3.8/site-packages/distributed/utils.py:379, in sync.<locals>.f()
377 future = asyncio.wait_for(future, callback_timeout)
378 future = asyncio.ensure_future(future)
--> 379 result = yield future
380 except Exception:
381 error = sys.exc_info()
File /opt/hostedtoolcache/Python/3.8.14/x64/lib/python3.8/site-packages/tornado/gen.py:762, in Runner.run(self)
759 exc_info = None
761 try:
--> 762 value = future.result()
763 except Exception:
764 exc_info = sys.exc_info()
File /opt/hostedtoolcache/Python/3.8.14/x64/lib/python3.8/site-packages/distributed/client.py:1238, in Client._start(self, timeout, **kwargs)
1235 elif self._start_arg is None:
1236 from distributed.deploy import LocalCluster
-> 1238 self.cluster = await LocalCluster(
1239 loop=self.loop,
1240 asynchronous=self._asynchronous,
1241 **self._startup_kwargs,
1242 )
1243 address = self.cluster.scheduler_address
1245 self._gather_semaphore = asyncio.Semaphore(5)
File /opt/hostedtoolcache/Python/3.8.14/x64/lib/python3.8/site-packages/distributed/deploy/spec.py:398, in SpecCluster.__await__.<locals>._()
396 async def _():
397 if self.status == Status.created:
--> 398 await self._start()
399 await self.scheduler
400 await self._correct_state()
File /opt/hostedtoolcache/Python/3.8.14/x64/lib/python3.8/site-packages/distributed/deploy/spec.py:319, in SpecCluster._start(self)
317 self.status = Status.failed
318 await self._close()
--> 319 raise RuntimeError(f"Cluster failed to start: {e}") from e
RuntimeError: Cluster failed to start: No module named 'bokeh'
RuntimeError: Cluster failed to start: No module named 'bokeh'
============================== 4 failed in 58.87s ==============================Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
CIDask ⚡Issues related to the Dask engineIssues related to the Dask engineP0Highest priority tasks requiring immediate fixHighest priority tasks requiring immediate fixTesting 📈Issues related to testingIssues related to testing