-
Notifications
You must be signed in to change notification settings - Fork 4k
Description
I came across this when debugging #6780.
If pyarrow is installed, but cffi is not, this import
LightGBM/python-package/lightgbm/compat.py
Line 292 in e0c34e7
| from pyarrow.cffi import ffi as arrow_cffi |
raises, such that
pa_Table is a dummy class and the comparison isinstance(data, pa_Table) returns False, and so does _is_pyarrow_table. If fitting on a lgbm.Dataset(data=pyarrow.Table), we thus don't evalute __init_from_pyarrow_table hereLightGBM/python-package/lightgbm/basic.py
Lines 2186 to 2188 in e0c34e7
| elif _is_pyarrow_table(data): | |
| self.__init_from_pyarrow_table(data, params_str, ref_dataset) | |
| feature_name = data.column_names |
but rather jump into the
try / except here:LightGBM/python-package/lightgbm/basic.py
Lines 2202 to 2206 in e0c34e7
| try: | |
| csr = scipy.sparse.csr_matrix(data) | |
| self.__init_from_csr(csr, params_str, ref_dataset) | |
| except BaseException as err: | |
| raise TypeError(f"Cannot initialize Dataset from {type(data).__name__}") from err |
This works, but I am not sure whether this is intended behaviour.
One approach to solve this would be to separate the pyarrow and cffi imports in compat.py. However, then, jumping into self.__init_from_pyarrow_table(data, params_str, ref_dataset) fails, as this requires cffi to be installed.
The underlying problem here is that both cffi and pyarrow need to be installed to "natively" fit from a pyarrow.Table. However, this is not communicated to the user. One approach would be to separate as below, add a and CFFI_INSTALLED to the elif _is_pyarrow_table(data): row and raise a warning if pyarrow is installed by cffi is not.
Details
"""pyarrow"""
try:
import pyarrow.compute as pa_compute
from pyarrow import Array as pa_Array
from pyarrow import ChunkedArray as pa_ChunkedArray
from pyarrow import Table as pa_Table
from pyarrow import chunked_array as pa_chunked_array
from pyarrow.types import is_boolean as arrow_is_boolean
from pyarrow.types import is_floating as arrow_is_floating
from pyarrow.types import is_integer as arrow_is_integer
PYARROW_INSTALLED = True
except ImportError:
PYARROW_INSTALLED = False
class pa_Array: # type: ignore
"""Dummy class for pa.Array."""
def __init__(self, *args: Any, **kwargs: Any):
pass
class pa_ChunkedArray: # type: ignore
"""Dummy class for pa.ChunkedArray."""
def __init__(self, *args: Any, **kwargs: Any):
pass
class pa_Table: # type: ignore
"""Dummy class for pa.Table."""
def __init__(self, *args: Any, **kwargs: Any):
pass
class pa_compute: # type: ignore
"""Dummy class for pyarrow.compute."""
all = None
equal = None
pa_chunked_array = None
arrow_is_boolean = None
arrow_is_integer = None
arrow_is_floating = None
"""cffi"""
try:
from pyarrow.cffi import ffi as arrow_cffi
CFFI_INSTALLED = True
except ImportError:
CFFI_INSTALLED = False
class arrow_cffi: # type: ignore
"""Dummy class for pyarrow.cffi.ffi."""
CData = None
addressof = None
cast = None
new = None
def __init__(self, *args: Any, **kwargs: Any):
pass