-
Notifications
You must be signed in to change notification settings - Fork 243
Closed
Labels
bugSomething isn't workingSomething isn't working
Milestone
Description
Description of the problem
Running pygmt.info on a pandas.Series object with a pd.NA value (that uses pandas' nullable integer datatype) raises an error like ValueError: Converting an integer to a NumPy datetime requires a specified unit. This is caused by the _check_dtype_and_dim function here
Lines 789 to 844 in 7aac7fd
| def _check_dtype_and_dim(self, array, ndim): | |
| """ | |
| Check that a numpy array has the given number of dimensions and is a | |
| valid data type. | |
| Parameters | |
| ---------- | |
| array : numpy.ndarray | |
| The array to be tested. | |
| ndim : int | |
| The desired number of array dimensions. | |
| Returns | |
| ------- | |
| gmt_type : int | |
| The GMT constant value representing this data type. | |
| Raises | |
| ------ | |
| GMTInvalidInput | |
| If the array has the wrong number of dimensions or | |
| is an unsupported data type. | |
| Examples | |
| -------- | |
| >>> import numpy as np | |
| >>> data = np.array([1, 2, 3], dtype="float64") | |
| >>> with Session() as ses: | |
| ... gmttype = ses._check_dtype_and_dim(data, ndim=1) | |
| ... gmttype == ses["GMT_DOUBLE"] | |
| ... | |
| True | |
| >>> data = np.ones((5, 2), dtype="float32") | |
| >>> with Session() as ses: | |
| ... gmttype = ses._check_dtype_and_dim(data, ndim=2) | |
| ... gmttype == ses["GMT_FLOAT"] | |
| ... | |
| True | |
| """ | |
| # Check that the array has the given number of dimensions | |
| if array.ndim != ndim: | |
| raise GMTInvalidInput( | |
| f"Expected a numpy {ndim}-D array, got {array.ndim}-D." | |
| ) | |
| # Check that the array has a valid/known data type | |
| if array.dtype.type not in DTYPES: | |
| try: | |
| # Try to convert any unknown numpy data types to np.datetime64 | |
| array = array_to_datetime(array) | |
| except ValueError as e: | |
| raise GMTInvalidInput( | |
| f"Unsupported numpy data type '{array.dtype.type}'." | |
| ) from e | |
| return self[DTYPES[array.dtype.type]] |
Maybe we need to update the dictionary here
Lines 75 to 88 in 7aac7fd
| DTYPES = { | |
| np.int8: "GMT_CHAR", | |
| np.int16: "GMT_SHORT", | |
| np.int32: "GMT_INT", | |
| np.int64: "GMT_LONG", | |
| np.uint8: "GMT_UCHAR", | |
| np.uint16: "GMT_USHORT", | |
| np.uint32: "GMT_UINT", | |
| np.uint64: "GMT_ULONG", | |
| np.float32: "GMT_FLOAT", | |
| np.float64: "GMT_DOUBLE", | |
| np.str_: "GMT_TEXT", | |
| np.datetime64: "GMT_DATETIME", | |
| } |
to include pd.Int32Dtype() and pd.Int64Dtype()? But what would we map these nullable dtypes to in GMT?
Minimal Complete Verifiable Example
import pandas as pd
import pygmt
series = pd.Series(data=[0, 4, pd.NA, 8, 6], dtype=pd.Int32Dtype())
output = pygmt.info(data=series)Full error message
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
File ~/Documents/pygmt/pygmt/clib/session.py:839, in Session._check_dtype_and_dim(self, array, ndim)
837 try:
838 # Try to convert any unknown numpy data types to np.datetime64
--> 839 array = array_to_datetime(array)
840 except ValueError as e:
File ~/Documents/pygmt/pygmt/clib/conversion.py:327, in array_to_datetime(array)
253 """
254 Convert a 1-D datetime array from various types into numpy.datetime64.
255
(...)
325 '2018-01-01T00:00:00.000000'], dtype='datetime64[us]')
326 """
--> 327 return np.asarray(array, dtype=np.datetime64)
ValueError: Converting an integer to a NumPy datetime requires a specified unit
The above exception was the direct cause of the following exception:
GMTInvalidInput Traceback (most recent call last)
Cell In[4], line 1
----> 1 output = pygmt.info(data=series)
File ~/Documents/pygmt/pygmt/helpers/decorators.py:600, in use_alias.<locals>.alias_decorator.<locals>.new_module(*args, **kwargs)
593 msg = (
594 "Parameters 'Y' and 'yshift' are deprecated since v0.8.0. "
595 "and will be removed in v0.12.0. "
596 "Use Figure.shift_origin(yshift=...) instead."
597 )
598 warnings.warn(msg, category=SyntaxWarning, stacklevel=2)
--> 600 return module_func(*args, **kwargs)
File ~/Documents/pygmt/pygmt/helpers/decorators.py:740, in kwargs_to_strings.<locals>.converter.<locals>.new_module(*args, **kwargs)
738 kwargs[arg] = separators[fmt].join(f"{item}" for item in value)
739 # Execute the original function and return its output
--> 740 return module_func(*args, **kwargs)
File ~/Documents/pygmt/pygmt/src/info.py:85, in info(data, **kwargs)
83 file_context = lib.virtualfile_from_data(check_kind="vector", data=data)
84 with GMTTempFile() as tmpfile:
---> 85 with file_context as fname:
86 lib.call_module(
87 module="info",
88 args=build_arg_string(kwargs, infile=fname, outfile=tmpfile.name),
89 )
90 result = tmpfile.read()
File ~/mambaforge/envs/pygmt/lib/python3.12/contextlib.py:137, in _GeneratorContextManager.__enter__(self)
135 del self.args, self.kwds, self.func
136 try:
--> 137 return next(self.gen)
138 except StopIteration:
139 raise RuntimeError("generator didn't yield") from None
File ~/Documents/pygmt/pygmt/clib/session.py:1276, in Session.virtualfile_from_vectors(self, *vectors)
1274 # Use put_vector for columns with numerical type data
1275 for col, array in enumerate(arrays[:columns]):
-> 1276 self.put_vector(dataset, column=col, vector=array)
1278 # Use put_strings for last column(s) with string type data
1279 # Have to use modifier "GMT_IS_DUPLICATE" to duplicate the strings
1280 string_arrays = arrays[columns:]
File ~/Documents/pygmt/pygmt/clib/session.py:888, in Session.put_vector(self, dataset, column, vector)
847 r"""
848 Attach a numpy 1-D array as a column on a GMT dataset.
849
(...)
880 status != 0.
881 """
882 c_put_vector = self.get_libgmt_func(
883 "GMT_Put_Vector",
884 argtypes=[ctp.c_void_p, ctp.c_void_p, ctp.c_uint, ctp.c_uint, ctp.c_void_p],
885 restype=ctp.c_int,
886 )
--> 888 gmt_type = self._check_dtype_and_dim(vector, ndim=1)
889 if gmt_type in (self["GMT_TEXT"], self["GMT_DATETIME"]):
890 vector_pointer = (ctp.c_char_p * len(vector))()
File ~/Documents/pygmt/pygmt/clib/session.py:841, in Session._check_dtype_and_dim(self, array, ndim)
839 array = array_to_datetime(array)
840 except ValueError as e:
--> 841 raise GMTInvalidInput(
842 f"Unsupported numpy data type '{array.dtype.type}'."
843 ) from e
844 return self[DTYPES[array.dtype.type]]
GMTInvalidInput: Unsupported numpy data type '<class 'numpy.object_'>'.System information
PyGMT information:
version: v0.10.1.dev114+g88ce36d61.d20231203
System information:
python: 3.12.0 | packaged by conda-forge | (main, Oct 3 2023, 08:43:22) [GCC 12.3.0]
executable: /home/user/mambaforge/envs/pygmt/bin/python
machine: Linux-6.5.0-4-amd64-x86_64-with-glibc2.37
Dependency information:
numpy: 1.26.2
pandas: 2.1.3
xarray: 2023.11.0
netCDF4: 1.6.5
packaging: 23.2
contextily: 1.4.0
geopandas: 0.14.1
ipython: None
rioxarray: 0.15.0
ghostscript: 9.54.0
GMT library information:
binary version: 6.4.0
cores: 16
grid layout: rows
image layout:
library path: /home/user/mambaforge/envs/pygmt/lib/libgmt.so
padding: 2
plugin dir: /home/user/mambaforge/envs/pygmt/lib/gmt/plugins
share dir: /home/user/mambaforge/envs/pygmt/share/gmt
version: 6.4.0Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't working