Updated script that can be controled by Nodejs web app
This commit is contained in:
@@ -0,0 +1,85 @@
|
||||
from pandas.core.internals.api import make_block # 2023-09-18 pyarrow uses this
|
||||
from pandas.core.internals.array_manager import (
|
||||
ArrayManager,
|
||||
SingleArrayManager,
|
||||
)
|
||||
from pandas.core.internals.base import (
|
||||
DataManager,
|
||||
SingleDataManager,
|
||||
)
|
||||
from pandas.core.internals.concat import concatenate_managers
|
||||
from pandas.core.internals.managers import (
|
||||
BlockManager,
|
||||
SingleBlockManager,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"Block", # pylint: disable=undefined-all-variable
|
||||
"DatetimeTZBlock", # pylint: disable=undefined-all-variable
|
||||
"ExtensionBlock", # pylint: disable=undefined-all-variable
|
||||
"make_block",
|
||||
"DataManager",
|
||||
"ArrayManager",
|
||||
"BlockManager",
|
||||
"SingleDataManager",
|
||||
"SingleBlockManager",
|
||||
"SingleArrayManager",
|
||||
"concatenate_managers",
|
||||
]
|
||||
|
||||
|
||||
def __getattr__(name: str):
|
||||
# GH#55139
|
||||
import warnings
|
||||
|
||||
if name == "create_block_manager_from_blocks":
|
||||
# GH#33892
|
||||
warnings.warn(
|
||||
f"{name} is deprecated and will be removed in a future version. "
|
||||
"Use public APIs instead.",
|
||||
DeprecationWarning,
|
||||
# https://github.com/pandas-dev/pandas/pull/55139#pullrequestreview-1720690758
|
||||
# on hard-coding stacklevel
|
||||
stacklevel=2,
|
||||
)
|
||||
from pandas.core.internals.managers import create_block_manager_from_blocks
|
||||
|
||||
return create_block_manager_from_blocks
|
||||
|
||||
if name in [
|
||||
"NumericBlock",
|
||||
"ObjectBlock",
|
||||
"Block",
|
||||
"ExtensionBlock",
|
||||
"DatetimeTZBlock",
|
||||
]:
|
||||
warnings.warn(
|
||||
f"{name} is deprecated and will be removed in a future version. "
|
||||
"Use public APIs instead.",
|
||||
DeprecationWarning,
|
||||
# https://github.com/pandas-dev/pandas/pull/55139#pullrequestreview-1720690758
|
||||
# on hard-coding stacklevel
|
||||
stacklevel=2,
|
||||
)
|
||||
if name == "NumericBlock":
|
||||
from pandas.core.internals.blocks import NumericBlock
|
||||
|
||||
return NumericBlock
|
||||
elif name == "DatetimeTZBlock":
|
||||
from pandas.core.internals.blocks import DatetimeTZBlock
|
||||
|
||||
return DatetimeTZBlock
|
||||
elif name == "ExtensionBlock":
|
||||
from pandas.core.internals.blocks import ExtensionBlock
|
||||
|
||||
return ExtensionBlock
|
||||
elif name == "Block":
|
||||
from pandas.core.internals.blocks import Block
|
||||
|
||||
return Block
|
||||
else:
|
||||
from pandas.core.internals.blocks import ObjectBlock
|
||||
|
||||
return ObjectBlock
|
||||
|
||||
raise AttributeError(f"module 'pandas.core.internals' has no attribute '{name}'")
|
||||
BIN
Binary file not shown.
Binary file not shown.
BIN
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,156 @@
|
||||
"""
|
||||
This is a pseudo-public API for downstream libraries. We ask that downstream
|
||||
authors
|
||||
|
||||
1) Try to avoid using internals directly altogether, and failing that,
|
||||
2) Use only functions exposed here (or in core.internals)
|
||||
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._libs.internals import BlockPlacement
|
||||
|
||||
from pandas.core.dtypes.common import pandas_dtype
|
||||
from pandas.core.dtypes.dtypes import (
|
||||
DatetimeTZDtype,
|
||||
PeriodDtype,
|
||||
)
|
||||
|
||||
from pandas.core.arrays import DatetimeArray
|
||||
from pandas.core.construction import extract_array
|
||||
from pandas.core.internals.blocks import (
|
||||
check_ndim,
|
||||
ensure_block_shape,
|
||||
extract_pandas_array,
|
||||
get_block_type,
|
||||
maybe_coerce_values,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pandas._typing import Dtype
|
||||
|
||||
from pandas.core.internals.blocks import Block
|
||||
|
||||
|
||||
def make_block(
|
||||
values, placement, klass=None, ndim=None, dtype: Dtype | None = None
|
||||
) -> Block:
|
||||
"""
|
||||
This is a pseudo-public analogue to blocks.new_block.
|
||||
|
||||
We ask that downstream libraries use this rather than any fully-internal
|
||||
APIs, including but not limited to:
|
||||
|
||||
- core.internals.blocks.make_block
|
||||
- Block.make_block
|
||||
- Block.make_block_same_class
|
||||
- Block.__init__
|
||||
"""
|
||||
if dtype is not None:
|
||||
dtype = pandas_dtype(dtype)
|
||||
|
||||
values, dtype = extract_pandas_array(values, dtype, ndim)
|
||||
|
||||
from pandas.core.internals.blocks import (
|
||||
DatetimeTZBlock,
|
||||
ExtensionBlock,
|
||||
)
|
||||
|
||||
if klass is ExtensionBlock and isinstance(values.dtype, PeriodDtype):
|
||||
# GH-44681 changed PeriodArray to be stored in the 2D
|
||||
# NDArrayBackedExtensionBlock instead of ExtensionBlock
|
||||
# -> still allow ExtensionBlock to be passed in this case for back compat
|
||||
klass = None
|
||||
|
||||
if klass is None:
|
||||
dtype = dtype or values.dtype
|
||||
klass = get_block_type(dtype)
|
||||
|
||||
elif klass is DatetimeTZBlock and not isinstance(values.dtype, DatetimeTZDtype):
|
||||
# pyarrow calls get here
|
||||
values = DatetimeArray._simple_new(
|
||||
# error: Argument "dtype" to "_simple_new" of "DatetimeArray" has
|
||||
# incompatible type "Union[ExtensionDtype, dtype[Any], None]";
|
||||
# expected "Union[dtype[datetime64], DatetimeTZDtype]"
|
||||
values,
|
||||
dtype=dtype, # type: ignore[arg-type]
|
||||
)
|
||||
|
||||
if not isinstance(placement, BlockPlacement):
|
||||
placement = BlockPlacement(placement)
|
||||
|
||||
ndim = maybe_infer_ndim(values, placement, ndim)
|
||||
if isinstance(values.dtype, (PeriodDtype, DatetimeTZDtype)):
|
||||
# GH#41168 ensure we can pass 1D dt64tz values
|
||||
# More generally, any EA dtype that isn't is_1d_only_ea_dtype
|
||||
values = extract_array(values, extract_numpy=True)
|
||||
values = ensure_block_shape(values, ndim)
|
||||
|
||||
check_ndim(values, placement, ndim)
|
||||
values = maybe_coerce_values(values)
|
||||
return klass(values, ndim=ndim, placement=placement)
|
||||
|
||||
|
||||
def maybe_infer_ndim(values, placement: BlockPlacement, ndim: int | None) -> int:
|
||||
"""
|
||||
If `ndim` is not provided, infer it from placement and values.
|
||||
"""
|
||||
if ndim is None:
|
||||
# GH#38134 Block constructor now assumes ndim is not None
|
||||
if not isinstance(values.dtype, np.dtype):
|
||||
if len(placement) != 1:
|
||||
ndim = 1
|
||||
else:
|
||||
ndim = 2
|
||||
else:
|
||||
ndim = values.ndim
|
||||
return ndim
|
||||
|
||||
|
||||
def __getattr__(name: str):
|
||||
# GH#55139
|
||||
import warnings
|
||||
|
||||
if name in [
|
||||
"Block",
|
||||
"ExtensionBlock",
|
||||
"DatetimeTZBlock",
|
||||
"create_block_manager_from_blocks",
|
||||
]:
|
||||
# GH#33892
|
||||
warnings.warn(
|
||||
f"{name} is deprecated and will be removed in a future version. "
|
||||
"Use public APIs instead.",
|
||||
DeprecationWarning,
|
||||
# https://github.com/pandas-dev/pandas/pull/55139#pullrequestreview-1720690758
|
||||
# on hard-coding stacklevel
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
if name == "create_block_manager_from_blocks":
|
||||
from pandas.core.internals.managers import create_block_manager_from_blocks
|
||||
|
||||
return create_block_manager_from_blocks
|
||||
|
||||
elif name == "Block":
|
||||
from pandas.core.internals.blocks import Block
|
||||
|
||||
return Block
|
||||
|
||||
elif name == "DatetimeTZBlock":
|
||||
from pandas.core.internals.blocks import DatetimeTZBlock
|
||||
|
||||
return DatetimeTZBlock
|
||||
|
||||
elif name == "ExtensionBlock":
|
||||
from pandas.core.internals.blocks import ExtensionBlock
|
||||
|
||||
return ExtensionBlock
|
||||
|
||||
raise AttributeError(
|
||||
f"module 'pandas.core.internals.api' has no attribute '{name}'"
|
||||
)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,407 @@
|
||||
"""
|
||||
Base class for the internal managers. Both BlockManager and ArrayManager
|
||||
inherit from this class.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Literal,
|
||||
cast,
|
||||
final,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._config import (
|
||||
using_copy_on_write,
|
||||
warn_copy_on_write,
|
||||
)
|
||||
|
||||
from pandas._libs import (
|
||||
algos as libalgos,
|
||||
lib,
|
||||
)
|
||||
from pandas.errors import AbstractMethodError
|
||||
from pandas.util._validators import validate_bool_kwarg
|
||||
|
||||
from pandas.core.dtypes.cast import (
|
||||
find_common_type,
|
||||
np_can_hold_element,
|
||||
)
|
||||
from pandas.core.dtypes.dtypes import (
|
||||
ExtensionDtype,
|
||||
SparseDtype,
|
||||
)
|
||||
|
||||
from pandas.core.base import PandasObject
|
||||
from pandas.core.construction import extract_array
|
||||
from pandas.core.indexes.api import (
|
||||
Index,
|
||||
default_index,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pandas._typing import (
|
||||
ArrayLike,
|
||||
AxisInt,
|
||||
DtypeObj,
|
||||
Self,
|
||||
Shape,
|
||||
)
|
||||
|
||||
|
||||
class _AlreadyWarned:
|
||||
def __init__(self):
|
||||
# This class is used on the manager level to the block level to
|
||||
# ensure that we warn only once. The block method can update the
|
||||
# warned_already option without returning a value to keep the
|
||||
# interface consistent. This is only a temporary solution for
|
||||
# CoW warnings.
|
||||
self.warned_already = False
|
||||
|
||||
|
||||
class DataManager(PandasObject):
|
||||
# TODO share more methods/attributes
|
||||
|
||||
axes: list[Index]
|
||||
|
||||
@property
|
||||
def items(self) -> Index:
|
||||
raise AbstractMethodError(self)
|
||||
|
||||
@final
|
||||
def __len__(self) -> int:
|
||||
return len(self.items)
|
||||
|
||||
@property
|
||||
def ndim(self) -> int:
|
||||
return len(self.axes)
|
||||
|
||||
@property
|
||||
def shape(self) -> Shape:
|
||||
return tuple(len(ax) for ax in self.axes)
|
||||
|
||||
@final
|
||||
def _validate_set_axis(self, axis: AxisInt, new_labels: Index) -> None:
|
||||
# Caller is responsible for ensuring we have an Index object.
|
||||
old_len = len(self.axes[axis])
|
||||
new_len = len(new_labels)
|
||||
|
||||
if axis == 1 and len(self.items) == 0:
|
||||
# If we are setting the index on a DataFrame with no columns,
|
||||
# it is OK to change the length.
|
||||
pass
|
||||
|
||||
elif new_len != old_len:
|
||||
raise ValueError(
|
||||
f"Length mismatch: Expected axis has {old_len} elements, new "
|
||||
f"values have {new_len} elements"
|
||||
)
|
||||
|
||||
def reindex_indexer(
|
||||
self,
|
||||
new_axis,
|
||||
indexer,
|
||||
axis: AxisInt,
|
||||
fill_value=None,
|
||||
allow_dups: bool = False,
|
||||
copy: bool = True,
|
||||
only_slice: bool = False,
|
||||
) -> Self:
|
||||
raise AbstractMethodError(self)
|
||||
|
||||
@final
|
||||
def reindex_axis(
|
||||
self,
|
||||
new_index: Index,
|
||||
axis: AxisInt,
|
||||
fill_value=None,
|
||||
only_slice: bool = False,
|
||||
) -> Self:
|
||||
"""
|
||||
Conform data manager to new index.
|
||||
"""
|
||||
new_index, indexer = self.axes[axis].reindex(new_index)
|
||||
|
||||
return self.reindex_indexer(
|
||||
new_index,
|
||||
indexer,
|
||||
axis=axis,
|
||||
fill_value=fill_value,
|
||||
copy=False,
|
||||
only_slice=only_slice,
|
||||
)
|
||||
|
||||
def _equal_values(self, other: Self) -> bool:
|
||||
"""
|
||||
To be implemented by the subclasses. Only check the column values
|
||||
assuming shape and indexes have already been checked.
|
||||
"""
|
||||
raise AbstractMethodError(self)
|
||||
|
||||
@final
|
||||
def equals(self, other: object) -> bool:
|
||||
"""
|
||||
Implementation for DataFrame.equals
|
||||
"""
|
||||
if not isinstance(other, type(self)):
|
||||
return False
|
||||
|
||||
self_axes, other_axes = self.axes, other.axes
|
||||
if len(self_axes) != len(other_axes):
|
||||
return False
|
||||
if not all(ax1.equals(ax2) for ax1, ax2 in zip(self_axes, other_axes)):
|
||||
return False
|
||||
|
||||
return self._equal_values(other)
|
||||
|
||||
def apply(
|
||||
self,
|
||||
f,
|
||||
align_keys: list[str] | None = None,
|
||||
**kwargs,
|
||||
) -> Self:
|
||||
raise AbstractMethodError(self)
|
||||
|
||||
def apply_with_block(
|
||||
self,
|
||||
f,
|
||||
align_keys: list[str] | None = None,
|
||||
**kwargs,
|
||||
) -> Self:
|
||||
raise AbstractMethodError(self)
|
||||
|
||||
@final
|
||||
def isna(self, func) -> Self:
|
||||
return self.apply("apply", func=func)
|
||||
|
||||
@final
|
||||
def fillna(self, value, limit: int | None, inplace: bool, downcast) -> Self:
|
||||
if limit is not None:
|
||||
# Do this validation even if we go through one of the no-op paths
|
||||
limit = libalgos.validate_limit(None, limit=limit)
|
||||
|
||||
return self.apply_with_block(
|
||||
"fillna",
|
||||
value=value,
|
||||
limit=limit,
|
||||
inplace=inplace,
|
||||
downcast=downcast,
|
||||
using_cow=using_copy_on_write(),
|
||||
already_warned=_AlreadyWarned(),
|
||||
)
|
||||
|
||||
@final
|
||||
def where(self, other, cond, align: bool) -> Self:
|
||||
if align:
|
||||
align_keys = ["other", "cond"]
|
||||
else:
|
||||
align_keys = ["cond"]
|
||||
other = extract_array(other, extract_numpy=True)
|
||||
|
||||
return self.apply_with_block(
|
||||
"where",
|
||||
align_keys=align_keys,
|
||||
other=other,
|
||||
cond=cond,
|
||||
using_cow=using_copy_on_write(),
|
||||
)
|
||||
|
||||
@final
|
||||
def putmask(self, mask, new, align: bool = True, warn: bool = True) -> Self:
|
||||
if align:
|
||||
align_keys = ["new", "mask"]
|
||||
else:
|
||||
align_keys = ["mask"]
|
||||
new = extract_array(new, extract_numpy=True)
|
||||
|
||||
already_warned = None
|
||||
if warn_copy_on_write():
|
||||
already_warned = _AlreadyWarned()
|
||||
if not warn:
|
||||
already_warned.warned_already = True
|
||||
|
||||
return self.apply_with_block(
|
||||
"putmask",
|
||||
align_keys=align_keys,
|
||||
mask=mask,
|
||||
new=new,
|
||||
using_cow=using_copy_on_write(),
|
||||
already_warned=already_warned,
|
||||
)
|
||||
|
||||
@final
|
||||
def round(self, decimals: int, using_cow: bool = False) -> Self:
|
||||
return self.apply_with_block(
|
||||
"round",
|
||||
decimals=decimals,
|
||||
using_cow=using_cow,
|
||||
)
|
||||
|
||||
@final
|
||||
def replace(self, to_replace, value, inplace: bool) -> Self:
|
||||
inplace = validate_bool_kwarg(inplace, "inplace")
|
||||
# NDFrame.replace ensures the not-is_list_likes here
|
||||
assert not lib.is_list_like(to_replace)
|
||||
assert not lib.is_list_like(value)
|
||||
return self.apply_with_block(
|
||||
"replace",
|
||||
to_replace=to_replace,
|
||||
value=value,
|
||||
inplace=inplace,
|
||||
using_cow=using_copy_on_write(),
|
||||
already_warned=_AlreadyWarned(),
|
||||
)
|
||||
|
||||
@final
|
||||
def replace_regex(self, **kwargs) -> Self:
|
||||
return self.apply_with_block(
|
||||
"_replace_regex",
|
||||
**kwargs,
|
||||
using_cow=using_copy_on_write(),
|
||||
already_warned=_AlreadyWarned(),
|
||||
)
|
||||
|
||||
@final
|
||||
def replace_list(
|
||||
self,
|
||||
src_list: list[Any],
|
||||
dest_list: list[Any],
|
||||
inplace: bool = False,
|
||||
regex: bool = False,
|
||||
) -> Self:
|
||||
"""do a list replace"""
|
||||
inplace = validate_bool_kwarg(inplace, "inplace")
|
||||
|
||||
bm = self.apply_with_block(
|
||||
"replace_list",
|
||||
src_list=src_list,
|
||||
dest_list=dest_list,
|
||||
inplace=inplace,
|
||||
regex=regex,
|
||||
using_cow=using_copy_on_write(),
|
||||
already_warned=_AlreadyWarned(),
|
||||
)
|
||||
bm._consolidate_inplace()
|
||||
return bm
|
||||
|
||||
def interpolate(self, inplace: bool, **kwargs) -> Self:
|
||||
return self.apply_with_block(
|
||||
"interpolate",
|
||||
inplace=inplace,
|
||||
**kwargs,
|
||||
using_cow=using_copy_on_write(),
|
||||
already_warned=_AlreadyWarned(),
|
||||
)
|
||||
|
||||
def pad_or_backfill(self, inplace: bool, **kwargs) -> Self:
|
||||
return self.apply_with_block(
|
||||
"pad_or_backfill",
|
||||
inplace=inplace,
|
||||
**kwargs,
|
||||
using_cow=using_copy_on_write(),
|
||||
already_warned=_AlreadyWarned(),
|
||||
)
|
||||
|
||||
def shift(self, periods: int, fill_value) -> Self:
|
||||
if fill_value is lib.no_default:
|
||||
fill_value = None
|
||||
|
||||
return self.apply_with_block("shift", periods=periods, fill_value=fill_value)
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
# Consolidation: No-ops for all but BlockManager
|
||||
|
||||
def is_consolidated(self) -> bool:
|
||||
return True
|
||||
|
||||
def consolidate(self) -> Self:
|
||||
return self
|
||||
|
||||
def _consolidate_inplace(self) -> None:
|
||||
return
|
||||
|
||||
|
||||
class SingleDataManager(DataManager):
|
||||
@property
|
||||
def ndim(self) -> Literal[1]:
|
||||
return 1
|
||||
|
||||
@final
|
||||
@property
|
||||
def array(self) -> ArrayLike:
|
||||
"""
|
||||
Quick access to the backing array of the Block or SingleArrayManager.
|
||||
"""
|
||||
# error: "SingleDataManager" has no attribute "arrays"; maybe "array"
|
||||
return self.arrays[0] # type: ignore[attr-defined]
|
||||
|
||||
def setitem_inplace(self, indexer, value, warn: bool = True) -> None:
|
||||
"""
|
||||
Set values with indexer.
|
||||
|
||||
For Single[Block/Array]Manager, this backs s[indexer] = value
|
||||
|
||||
This is an inplace version of `setitem()`, mutating the manager/values
|
||||
in place, not returning a new Manager (and Block), and thus never changing
|
||||
the dtype.
|
||||
"""
|
||||
arr = self.array
|
||||
|
||||
# EAs will do this validation in their own __setitem__ methods.
|
||||
if isinstance(arr, np.ndarray):
|
||||
# Note: checking for ndarray instead of np.dtype means we exclude
|
||||
# dt64/td64, which do their own validation.
|
||||
value = np_can_hold_element(arr.dtype, value)
|
||||
|
||||
if isinstance(value, np.ndarray) and value.ndim == 1 and len(value) == 1:
|
||||
# NumPy 1.25 deprecation: https://github.com/numpy/numpy/pull/10615
|
||||
value = value[0, ...]
|
||||
|
||||
arr[indexer] = value
|
||||
|
||||
def grouped_reduce(self, func):
|
||||
arr = self.array
|
||||
res = func(arr)
|
||||
index = default_index(len(res))
|
||||
|
||||
mgr = type(self).from_array(res, index)
|
||||
return mgr
|
||||
|
||||
@classmethod
|
||||
def from_array(cls, arr: ArrayLike, index: Index):
|
||||
raise AbstractMethodError(cls)
|
||||
|
||||
|
||||
def interleaved_dtype(dtypes: list[DtypeObj]) -> DtypeObj | None:
|
||||
"""
|
||||
Find the common dtype for `blocks`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
blocks : List[DtypeObj]
|
||||
|
||||
Returns
|
||||
-------
|
||||
dtype : np.dtype, ExtensionDtype, or None
|
||||
None is returned when `blocks` is empty.
|
||||
"""
|
||||
if not len(dtypes):
|
||||
return None
|
||||
|
||||
return find_common_type(dtypes)
|
||||
|
||||
|
||||
def ensure_np_dtype(dtype: DtypeObj) -> np.dtype:
|
||||
# TODO: https://github.com/pandas-dev/pandas/issues/22791
|
||||
# Give EAs some input on what happens here. Sparse needs this.
|
||||
if isinstance(dtype, SparseDtype):
|
||||
dtype = dtype.subtype
|
||||
dtype = cast(np.dtype, dtype)
|
||||
elif isinstance(dtype, ExtensionDtype):
|
||||
dtype = np.dtype("object")
|
||||
elif dtype == np.dtype(str):
|
||||
dtype = np.dtype("object")
|
||||
return dtype
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,598 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
cast,
|
||||
)
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._libs import (
|
||||
NaT,
|
||||
algos as libalgos,
|
||||
internals as libinternals,
|
||||
lib,
|
||||
)
|
||||
from pandas._libs.missing import NA
|
||||
from pandas.util._decorators import cache_readonly
|
||||
from pandas.util._exceptions import find_stack_level
|
||||
|
||||
from pandas.core.dtypes.cast import (
|
||||
ensure_dtype_can_hold_na,
|
||||
find_common_type,
|
||||
)
|
||||
from pandas.core.dtypes.common import (
|
||||
is_1d_only_ea_dtype,
|
||||
is_scalar,
|
||||
needs_i8_conversion,
|
||||
)
|
||||
from pandas.core.dtypes.concat import concat_compat
|
||||
from pandas.core.dtypes.dtypes import (
|
||||
ExtensionDtype,
|
||||
SparseDtype,
|
||||
)
|
||||
from pandas.core.dtypes.missing import (
|
||||
is_valid_na_for_dtype,
|
||||
isna,
|
||||
isna_all,
|
||||
)
|
||||
|
||||
from pandas.core.construction import ensure_wrapped_if_datetimelike
|
||||
from pandas.core.internals.array_manager import ArrayManager
|
||||
from pandas.core.internals.blocks import (
|
||||
ensure_block_shape,
|
||||
new_block_2d,
|
||||
)
|
||||
from pandas.core.internals.managers import (
|
||||
BlockManager,
|
||||
make_na_array,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Sequence
|
||||
|
||||
from pandas._typing import (
|
||||
ArrayLike,
|
||||
AxisInt,
|
||||
DtypeObj,
|
||||
Manager2D,
|
||||
Shape,
|
||||
)
|
||||
|
||||
from pandas import Index
|
||||
from pandas.core.internals.blocks import (
|
||||
Block,
|
||||
BlockPlacement,
|
||||
)
|
||||
|
||||
|
||||
def _concatenate_array_managers(
|
||||
mgrs: list[ArrayManager], axes: list[Index], concat_axis: AxisInt
|
||||
) -> Manager2D:
|
||||
"""
|
||||
Concatenate array managers into one.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
mgrs_indexers : list of (ArrayManager, {axis: indexer,...}) tuples
|
||||
axes : list of Index
|
||||
concat_axis : int
|
||||
|
||||
Returns
|
||||
-------
|
||||
ArrayManager
|
||||
"""
|
||||
if concat_axis == 1:
|
||||
return mgrs[0].concat_vertical(mgrs, axes)
|
||||
else:
|
||||
# concatting along the columns -> combine reindexed arrays in a single manager
|
||||
assert concat_axis == 0
|
||||
return mgrs[0].concat_horizontal(mgrs, axes)
|
||||
|
||||
|
||||
def concatenate_managers(
|
||||
mgrs_indexers, axes: list[Index], concat_axis: AxisInt, copy: bool
|
||||
) -> Manager2D:
|
||||
"""
|
||||
Concatenate block managers into one.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
mgrs_indexers : list of (BlockManager, {axis: indexer,...}) tuples
|
||||
axes : list of Index
|
||||
concat_axis : int
|
||||
copy : bool
|
||||
|
||||
Returns
|
||||
-------
|
||||
BlockManager
|
||||
"""
|
||||
|
||||
needs_copy = copy and concat_axis == 0
|
||||
|
||||
# TODO(ArrayManager) this assumes that all managers are of the same type
|
||||
if isinstance(mgrs_indexers[0][0], ArrayManager):
|
||||
mgrs = _maybe_reindex_columns_na_proxy(axes, mgrs_indexers, needs_copy)
|
||||
# error: Argument 1 to "_concatenate_array_managers" has incompatible
|
||||
# type "List[BlockManager]"; expected "List[Union[ArrayManager,
|
||||
# SingleArrayManager, BlockManager, SingleBlockManager]]"
|
||||
return _concatenate_array_managers(
|
||||
mgrs, axes, concat_axis # type: ignore[arg-type]
|
||||
)
|
||||
|
||||
# Assertions disabled for performance
|
||||
# for tup in mgrs_indexers:
|
||||
# # caller is responsible for ensuring this
|
||||
# indexers = tup[1]
|
||||
# assert concat_axis not in indexers
|
||||
|
||||
if concat_axis == 0:
|
||||
mgrs = _maybe_reindex_columns_na_proxy(axes, mgrs_indexers, needs_copy)
|
||||
return mgrs[0].concat_horizontal(mgrs, axes)
|
||||
|
||||
if len(mgrs_indexers) > 0 and mgrs_indexers[0][0].nblocks > 0:
|
||||
first_dtype = mgrs_indexers[0][0].blocks[0].dtype
|
||||
if first_dtype in [np.float64, np.float32]:
|
||||
# TODO: support more dtypes here. This will be simpler once
|
||||
# JoinUnit.is_na behavior is deprecated.
|
||||
if (
|
||||
all(_is_homogeneous_mgr(mgr, first_dtype) for mgr, _ in mgrs_indexers)
|
||||
and len(mgrs_indexers) > 1
|
||||
):
|
||||
# Fastpath!
|
||||
# Length restriction is just to avoid having to worry about 'copy'
|
||||
shape = tuple(len(x) for x in axes)
|
||||
nb = _concat_homogeneous_fastpath(mgrs_indexers, shape, first_dtype)
|
||||
return BlockManager((nb,), axes)
|
||||
|
||||
mgrs = _maybe_reindex_columns_na_proxy(axes, mgrs_indexers, needs_copy)
|
||||
|
||||
if len(mgrs) == 1:
|
||||
mgr = mgrs[0]
|
||||
out = mgr.copy(deep=False)
|
||||
out.axes = axes
|
||||
return out
|
||||
|
||||
concat_plan = _get_combined_plan(mgrs)
|
||||
|
||||
blocks = []
|
||||
values: ArrayLike
|
||||
|
||||
for placement, join_units in concat_plan:
|
||||
unit = join_units[0]
|
||||
blk = unit.block
|
||||
|
||||
if _is_uniform_join_units(join_units):
|
||||
vals = [ju.block.values for ju in join_units]
|
||||
|
||||
if not blk.is_extension:
|
||||
# _is_uniform_join_units ensures a single dtype, so
|
||||
# we can use np.concatenate, which is more performant
|
||||
# than concat_compat
|
||||
# error: Argument 1 to "concatenate" has incompatible type
|
||||
# "List[Union[ndarray[Any, Any], ExtensionArray]]";
|
||||
# expected "Union[_SupportsArray[dtype[Any]],
|
||||
# _NestedSequence[_SupportsArray[dtype[Any]]]]"
|
||||
values = np.concatenate(vals, axis=1) # type: ignore[arg-type]
|
||||
elif is_1d_only_ea_dtype(blk.dtype):
|
||||
# TODO(EA2D): special-casing not needed with 2D EAs
|
||||
values = concat_compat(vals, axis=0, ea_compat_axis=True)
|
||||
values = ensure_block_shape(values, ndim=2)
|
||||
else:
|
||||
values = concat_compat(vals, axis=1)
|
||||
|
||||
values = ensure_wrapped_if_datetimelike(values)
|
||||
|
||||
fastpath = blk.values.dtype == values.dtype
|
||||
else:
|
||||
values = _concatenate_join_units(join_units, copy=copy)
|
||||
fastpath = False
|
||||
|
||||
if fastpath:
|
||||
b = blk.make_block_same_class(values, placement=placement)
|
||||
else:
|
||||
b = new_block_2d(values, placement=placement)
|
||||
|
||||
blocks.append(b)
|
||||
|
||||
return BlockManager(tuple(blocks), axes)
|
||||
|
||||
|
||||
def _maybe_reindex_columns_na_proxy(
|
||||
axes: list[Index],
|
||||
mgrs_indexers: list[tuple[BlockManager, dict[int, np.ndarray]]],
|
||||
needs_copy: bool,
|
||||
) -> list[BlockManager]:
|
||||
"""
|
||||
Reindex along columns so that all of the BlockManagers being concatenated
|
||||
have matching columns.
|
||||
|
||||
Columns added in this reindexing have dtype=np.void, indicating they
|
||||
should be ignored when choosing a column's final dtype.
|
||||
"""
|
||||
new_mgrs = []
|
||||
|
||||
for mgr, indexers in mgrs_indexers:
|
||||
# For axis=0 (i.e. columns) we use_na_proxy and only_slice, so this
|
||||
# is a cheap reindexing.
|
||||
for i, indexer in indexers.items():
|
||||
mgr = mgr.reindex_indexer(
|
||||
axes[i],
|
||||
indexers[i],
|
||||
axis=i,
|
||||
copy=False,
|
||||
only_slice=True, # only relevant for i==0
|
||||
allow_dups=True,
|
||||
use_na_proxy=True, # only relevant for i==0
|
||||
)
|
||||
if needs_copy and not indexers:
|
||||
mgr = mgr.copy()
|
||||
|
||||
new_mgrs.append(mgr)
|
||||
return new_mgrs
|
||||
|
||||
|
||||
def _is_homogeneous_mgr(mgr: BlockManager, first_dtype: DtypeObj) -> bool:
|
||||
"""
|
||||
Check if this Manager can be treated as a single ndarray.
|
||||
"""
|
||||
if mgr.nblocks != 1:
|
||||
return False
|
||||
blk = mgr.blocks[0]
|
||||
if not (blk.mgr_locs.is_slice_like and blk.mgr_locs.as_slice.step == 1):
|
||||
return False
|
||||
|
||||
return blk.dtype == first_dtype
|
||||
|
||||
|
||||
def _concat_homogeneous_fastpath(
|
||||
mgrs_indexers, shape: Shape, first_dtype: np.dtype
|
||||
) -> Block:
|
||||
"""
|
||||
With single-Block managers with homogeneous dtypes (that can already hold nan),
|
||||
we avoid [...]
|
||||
"""
|
||||
# assumes
|
||||
# all(_is_homogeneous_mgr(mgr, first_dtype) for mgr, _ in in mgrs_indexers)
|
||||
|
||||
if all(not indexers for _, indexers in mgrs_indexers):
|
||||
# https://github.com/pandas-dev/pandas/pull/52685#issuecomment-1523287739
|
||||
arrs = [mgr.blocks[0].values.T for mgr, _ in mgrs_indexers]
|
||||
arr = np.concatenate(arrs).T
|
||||
bp = libinternals.BlockPlacement(slice(shape[0]))
|
||||
nb = new_block_2d(arr, bp)
|
||||
return nb
|
||||
|
||||
arr = np.empty(shape, dtype=first_dtype)
|
||||
|
||||
if first_dtype == np.float64:
|
||||
take_func = libalgos.take_2d_axis0_float64_float64
|
||||
else:
|
||||
take_func = libalgos.take_2d_axis0_float32_float32
|
||||
|
||||
start = 0
|
||||
for mgr, indexers in mgrs_indexers:
|
||||
mgr_len = mgr.shape[1]
|
||||
end = start + mgr_len
|
||||
|
||||
if 0 in indexers:
|
||||
take_func(
|
||||
mgr.blocks[0].values,
|
||||
indexers[0],
|
||||
arr[:, start:end],
|
||||
)
|
||||
else:
|
||||
# No reindexing necessary, we can copy values directly
|
||||
arr[:, start:end] = mgr.blocks[0].values
|
||||
|
||||
start += mgr_len
|
||||
|
||||
bp = libinternals.BlockPlacement(slice(shape[0]))
|
||||
nb = new_block_2d(arr, bp)
|
||||
return nb
|
||||
|
||||
|
||||
def _get_combined_plan(
|
||||
mgrs: list[BlockManager],
|
||||
) -> list[tuple[BlockPlacement, list[JoinUnit]]]:
|
||||
plan = []
|
||||
|
||||
max_len = mgrs[0].shape[0]
|
||||
|
||||
blknos_list = [mgr.blknos for mgr in mgrs]
|
||||
pairs = libinternals.get_concat_blkno_indexers(blknos_list)
|
||||
for ind, (blknos, bp) in enumerate(pairs):
|
||||
# assert bp.is_slice_like
|
||||
# assert len(bp) > 0
|
||||
|
||||
units_for_bp = []
|
||||
for k, mgr in enumerate(mgrs):
|
||||
blkno = blknos[k]
|
||||
|
||||
nb = _get_block_for_concat_plan(mgr, bp, blkno, max_len=max_len)
|
||||
unit = JoinUnit(nb)
|
||||
units_for_bp.append(unit)
|
||||
|
||||
plan.append((bp, units_for_bp))
|
||||
|
||||
return plan
|
||||
|
||||
|
||||
def _get_block_for_concat_plan(
|
||||
mgr: BlockManager, bp: BlockPlacement, blkno: int, *, max_len: int
|
||||
) -> Block:
|
||||
blk = mgr.blocks[blkno]
|
||||
# Assertions disabled for performance:
|
||||
# assert bp.is_slice_like
|
||||
# assert blkno != -1
|
||||
# assert (mgr.blknos[bp] == blkno).all()
|
||||
|
||||
if len(bp) == len(blk.mgr_locs) and (
|
||||
blk.mgr_locs.is_slice_like and blk.mgr_locs.as_slice.step == 1
|
||||
):
|
||||
nb = blk
|
||||
else:
|
||||
ax0_blk_indexer = mgr.blklocs[bp.indexer]
|
||||
|
||||
slc = lib.maybe_indices_to_slice(ax0_blk_indexer, max_len)
|
||||
# TODO: in all extant test cases 2023-04-08 we have a slice here.
|
||||
# Will this always be the case?
|
||||
if isinstance(slc, slice):
|
||||
nb = blk.slice_block_columns(slc)
|
||||
else:
|
||||
nb = blk.take_block_columns(slc)
|
||||
|
||||
# assert nb.shape == (len(bp), mgr.shape[1])
|
||||
return nb
|
||||
|
||||
|
||||
class JoinUnit:
|
||||
def __init__(self, block: Block) -> None:
|
||||
self.block = block
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"{type(self).__name__}({repr(self.block)})"
|
||||
|
||||
def _is_valid_na_for(self, dtype: DtypeObj) -> bool:
|
||||
"""
|
||||
Check that we are all-NA of a type/dtype that is compatible with this dtype.
|
||||
Augments `self.is_na` with an additional check of the type of NA values.
|
||||
"""
|
||||
if not self.is_na:
|
||||
return False
|
||||
|
||||
blk = self.block
|
||||
if blk.dtype.kind == "V":
|
||||
return True
|
||||
|
||||
if blk.dtype == object:
|
||||
values = blk.values
|
||||
return all(is_valid_na_for_dtype(x, dtype) for x in values.ravel(order="K"))
|
||||
|
||||
na_value = blk.fill_value
|
||||
if na_value is NaT and blk.dtype != dtype:
|
||||
# e.g. we are dt64 and other is td64
|
||||
# fill_values match but we should not cast blk.values to dtype
|
||||
# TODO: this will need updating if we ever have non-nano dt64/td64
|
||||
return False
|
||||
|
||||
if na_value is NA and needs_i8_conversion(dtype):
|
||||
# FIXME: kludge; test_append_empty_frame_with_timedelta64ns_nat
|
||||
# e.g. blk.dtype == "Int64" and dtype is td64, we dont want
|
||||
# to consider these as matching
|
||||
return False
|
||||
|
||||
# TODO: better to use can_hold_element?
|
||||
return is_valid_na_for_dtype(na_value, dtype)
|
||||
|
||||
@cache_readonly
|
||||
def is_na(self) -> bool:
|
||||
blk = self.block
|
||||
if blk.dtype.kind == "V":
|
||||
return True
|
||||
|
||||
if not blk._can_hold_na:
|
||||
return False
|
||||
|
||||
values = blk.values
|
||||
if values.size == 0:
|
||||
# GH#39122 this case will return False once deprecation is enforced
|
||||
return True
|
||||
|
||||
if isinstance(values.dtype, SparseDtype):
|
||||
return False
|
||||
|
||||
if values.ndim == 1:
|
||||
# TODO(EA2D): no need for special case with 2D EAs
|
||||
val = values[0]
|
||||
if not is_scalar(val) or not isna(val):
|
||||
# ideally isna_all would do this short-circuiting
|
||||
return False
|
||||
return isna_all(values)
|
||||
else:
|
||||
val = values[0][0]
|
||||
if not is_scalar(val) or not isna(val):
|
||||
# ideally isna_all would do this short-circuiting
|
||||
return False
|
||||
return all(isna_all(row) for row in values)
|
||||
|
||||
@cache_readonly
|
||||
def is_na_after_size_and_isna_all_deprecation(self) -> bool:
|
||||
"""
|
||||
Will self.is_na be True after values.size == 0 deprecation and isna_all
|
||||
deprecation are enforced?
|
||||
"""
|
||||
blk = self.block
|
||||
if blk.dtype.kind == "V":
|
||||
return True
|
||||
return False
|
||||
|
||||
def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike:
|
||||
values: ArrayLike
|
||||
|
||||
if upcasted_na is None and self.block.dtype.kind != "V":
|
||||
# No upcasting is necessary
|
||||
return self.block.values
|
||||
else:
|
||||
fill_value = upcasted_na
|
||||
|
||||
if self._is_valid_na_for(empty_dtype):
|
||||
# note: always holds when self.block.dtype.kind == "V"
|
||||
blk_dtype = self.block.dtype
|
||||
|
||||
if blk_dtype == np.dtype("object"):
|
||||
# we want to avoid filling with np.nan if we are
|
||||
# using None; we already know that we are all
|
||||
# nulls
|
||||
values = cast(np.ndarray, self.block.values)
|
||||
if values.size and values[0, 0] is None:
|
||||
fill_value = None
|
||||
|
||||
return make_na_array(empty_dtype, self.block.shape, fill_value)
|
||||
|
||||
return self.block.values
|
||||
|
||||
|
||||
def _concatenate_join_units(join_units: list[JoinUnit], copy: bool) -> ArrayLike:
|
||||
"""
|
||||
Concatenate values from several join units along axis=1.
|
||||
"""
|
||||
empty_dtype, empty_dtype_future = _get_empty_dtype(join_units)
|
||||
|
||||
has_none_blocks = any(unit.block.dtype.kind == "V" for unit in join_units)
|
||||
upcasted_na = _dtype_to_na_value(empty_dtype, has_none_blocks)
|
||||
|
||||
to_concat = [
|
||||
ju.get_reindexed_values(empty_dtype=empty_dtype, upcasted_na=upcasted_na)
|
||||
for ju in join_units
|
||||
]
|
||||
|
||||
if any(is_1d_only_ea_dtype(t.dtype) for t in to_concat):
|
||||
# TODO(EA2D): special case not needed if all EAs used HybridBlocks
|
||||
|
||||
# error: No overload variant of "__getitem__" of "ExtensionArray" matches
|
||||
# argument type "Tuple[int, slice]"
|
||||
to_concat = [
|
||||
t
|
||||
if is_1d_only_ea_dtype(t.dtype)
|
||||
else t[0, :] # type: ignore[call-overload]
|
||||
for t in to_concat
|
||||
]
|
||||
concat_values = concat_compat(to_concat, axis=0, ea_compat_axis=True)
|
||||
concat_values = ensure_block_shape(concat_values, 2)
|
||||
|
||||
else:
|
||||
concat_values = concat_compat(to_concat, axis=1)
|
||||
|
||||
if empty_dtype != empty_dtype_future:
|
||||
if empty_dtype == concat_values.dtype:
|
||||
# GH#39122, GH#40893
|
||||
warnings.warn(
|
||||
"The behavior of DataFrame concatenation with empty or all-NA "
|
||||
"entries is deprecated. In a future version, this will no longer "
|
||||
"exclude empty or all-NA columns when determining the result dtypes. "
|
||||
"To retain the old behavior, exclude the relevant entries before "
|
||||
"the concat operation.",
|
||||
FutureWarning,
|
||||
stacklevel=find_stack_level(),
|
||||
)
|
||||
return concat_values
|
||||
|
||||
|
||||
def _dtype_to_na_value(dtype: DtypeObj, has_none_blocks: bool):
|
||||
"""
|
||||
Find the NA value to go with this dtype.
|
||||
"""
|
||||
if isinstance(dtype, ExtensionDtype):
|
||||
return dtype.na_value
|
||||
elif dtype.kind in "mM":
|
||||
return dtype.type("NaT")
|
||||
elif dtype.kind in "fc":
|
||||
return dtype.type("NaN")
|
||||
elif dtype.kind == "b":
|
||||
# different from missing.na_value_for_dtype
|
||||
return None
|
||||
elif dtype.kind in "iu":
|
||||
if not has_none_blocks:
|
||||
# different from missing.na_value_for_dtype
|
||||
return None
|
||||
return np.nan
|
||||
elif dtype.kind == "O":
|
||||
return np.nan
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def _get_empty_dtype(join_units: Sequence[JoinUnit]) -> tuple[DtypeObj, DtypeObj]:
|
||||
"""
|
||||
Return dtype and N/A values to use when concatenating specified units.
|
||||
|
||||
Returned N/A value may be None which means there was no casting involved.
|
||||
|
||||
Returns
|
||||
-------
|
||||
dtype
|
||||
"""
|
||||
if lib.dtypes_all_equal([ju.block.dtype for ju in join_units]):
|
||||
empty_dtype = join_units[0].block.dtype
|
||||
return empty_dtype, empty_dtype
|
||||
|
||||
has_none_blocks = any(unit.block.dtype.kind == "V" for unit in join_units)
|
||||
|
||||
dtypes = [unit.block.dtype for unit in join_units if not unit.is_na]
|
||||
if not len(dtypes):
|
||||
dtypes = [
|
||||
unit.block.dtype for unit in join_units if unit.block.dtype.kind != "V"
|
||||
]
|
||||
|
||||
dtype = find_common_type(dtypes)
|
||||
if has_none_blocks:
|
||||
dtype = ensure_dtype_can_hold_na(dtype)
|
||||
|
||||
dtype_future = dtype
|
||||
if len(dtypes) != len(join_units):
|
||||
dtypes_future = [
|
||||
unit.block.dtype
|
||||
for unit in join_units
|
||||
if not unit.is_na_after_size_and_isna_all_deprecation
|
||||
]
|
||||
if not len(dtypes_future):
|
||||
dtypes_future = [
|
||||
unit.block.dtype for unit in join_units if unit.block.dtype.kind != "V"
|
||||
]
|
||||
|
||||
if len(dtypes) != len(dtypes_future):
|
||||
dtype_future = find_common_type(dtypes_future)
|
||||
if has_none_blocks:
|
||||
dtype_future = ensure_dtype_can_hold_na(dtype_future)
|
||||
|
||||
return dtype, dtype_future
|
||||
|
||||
|
||||
def _is_uniform_join_units(join_units: list[JoinUnit]) -> bool:
|
||||
"""
|
||||
Check if the join units consist of blocks of uniform type that can
|
||||
be concatenated using Block.concat_same_type instead of the generic
|
||||
_concatenate_join_units (which uses `concat_compat`).
|
||||
|
||||
"""
|
||||
first = join_units[0].block
|
||||
if first.dtype.kind == "V":
|
||||
return False
|
||||
return (
|
||||
# exclude cases where a) ju.block is None or b) we have e.g. Int64+int64
|
||||
all(type(ju.block) is type(first) for ju in join_units)
|
||||
and
|
||||
# e.g. DatetimeLikeBlock can be dt64 or td64, but these are not uniform
|
||||
all(
|
||||
ju.block.dtype == first.dtype
|
||||
# GH#42092 we only want the dtype_equal check for non-numeric blocks
|
||||
# (for now, may change but that would need a deprecation)
|
||||
or ju.block.dtype.kind in "iub"
|
||||
for ju in join_units
|
||||
)
|
||||
and
|
||||
# no blocks that would get missing values (can lead to type upcasts)
|
||||
# unless we're an extension dtype.
|
||||
all(not ju.is_na or ju.block.is_extension for ju in join_units)
|
||||
)
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,154 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
NamedTuple,
|
||||
)
|
||||
|
||||
from pandas.core.dtypes.common import is_1d_only_ea_dtype
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Iterator
|
||||
|
||||
from pandas._libs.internals import BlockPlacement
|
||||
from pandas._typing import ArrayLike
|
||||
|
||||
from pandas.core.internals.blocks import Block
|
||||
from pandas.core.internals.managers import BlockManager
|
||||
|
||||
|
||||
class BlockPairInfo(NamedTuple):
|
||||
lvals: ArrayLike
|
||||
rvals: ArrayLike
|
||||
locs: BlockPlacement
|
||||
left_ea: bool
|
||||
right_ea: bool
|
||||
rblk: Block
|
||||
|
||||
|
||||
def _iter_block_pairs(
|
||||
left: BlockManager, right: BlockManager
|
||||
) -> Iterator[BlockPairInfo]:
|
||||
# At this point we have already checked the parent DataFrames for
|
||||
# assert rframe._indexed_same(lframe)
|
||||
|
||||
for blk in left.blocks:
|
||||
locs = blk.mgr_locs
|
||||
blk_vals = blk.values
|
||||
|
||||
left_ea = blk_vals.ndim == 1
|
||||
|
||||
rblks = right._slice_take_blocks_ax0(locs.indexer, only_slice=True)
|
||||
|
||||
# Assertions are disabled for performance, but should hold:
|
||||
# if left_ea:
|
||||
# assert len(locs) == 1, locs
|
||||
# assert len(rblks) == 1, rblks
|
||||
# assert rblks[0].shape[0] == 1, rblks[0].shape
|
||||
|
||||
for rblk in rblks:
|
||||
right_ea = rblk.values.ndim == 1
|
||||
|
||||
lvals, rvals = _get_same_shape_values(blk, rblk, left_ea, right_ea)
|
||||
info = BlockPairInfo(lvals, rvals, locs, left_ea, right_ea, rblk)
|
||||
yield info
|
||||
|
||||
|
||||
def operate_blockwise(
|
||||
left: BlockManager, right: BlockManager, array_op
|
||||
) -> BlockManager:
|
||||
# At this point we have already checked the parent DataFrames for
|
||||
# assert rframe._indexed_same(lframe)
|
||||
|
||||
res_blks: list[Block] = []
|
||||
for lvals, rvals, locs, left_ea, right_ea, rblk in _iter_block_pairs(left, right):
|
||||
res_values = array_op(lvals, rvals)
|
||||
if (
|
||||
left_ea
|
||||
and not right_ea
|
||||
and hasattr(res_values, "reshape")
|
||||
and not is_1d_only_ea_dtype(res_values.dtype)
|
||||
):
|
||||
res_values = res_values.reshape(1, -1)
|
||||
nbs = rblk._split_op_result(res_values)
|
||||
|
||||
# Assertions are disabled for performance, but should hold:
|
||||
# if right_ea or left_ea:
|
||||
# assert len(nbs) == 1
|
||||
# else:
|
||||
# assert res_values.shape == lvals.shape, (res_values.shape, lvals.shape)
|
||||
|
||||
_reset_block_mgr_locs(nbs, locs)
|
||||
|
||||
res_blks.extend(nbs)
|
||||
|
||||
# Assertions are disabled for performance, but should hold:
|
||||
# slocs = {y for nb in res_blks for y in nb.mgr_locs.as_array}
|
||||
# nlocs = sum(len(nb.mgr_locs.as_array) for nb in res_blks)
|
||||
# assert nlocs == len(left.items), (nlocs, len(left.items))
|
||||
# assert len(slocs) == nlocs, (len(slocs), nlocs)
|
||||
# assert slocs == set(range(nlocs)), slocs
|
||||
|
||||
new_mgr = type(right)(tuple(res_blks), axes=right.axes, verify_integrity=False)
|
||||
return new_mgr
|
||||
|
||||
|
||||
def _reset_block_mgr_locs(nbs: list[Block], locs) -> None:
|
||||
"""
|
||||
Reset mgr_locs to correspond to our original DataFrame.
|
||||
"""
|
||||
for nb in nbs:
|
||||
nblocs = locs[nb.mgr_locs.indexer]
|
||||
nb.mgr_locs = nblocs
|
||||
# Assertions are disabled for performance, but should hold:
|
||||
# assert len(nblocs) == nb.shape[0], (len(nblocs), nb.shape)
|
||||
# assert all(x in locs.as_array for x in nb.mgr_locs.as_array)
|
||||
|
||||
|
||||
def _get_same_shape_values(
|
||||
lblk: Block, rblk: Block, left_ea: bool, right_ea: bool
|
||||
) -> tuple[ArrayLike, ArrayLike]:
|
||||
"""
|
||||
Slice lblk.values to align with rblk. Squeeze if we have EAs.
|
||||
"""
|
||||
lvals = lblk.values
|
||||
rvals = rblk.values
|
||||
|
||||
# Require that the indexing into lvals be slice-like
|
||||
assert rblk.mgr_locs.is_slice_like, rblk.mgr_locs
|
||||
|
||||
# TODO(EA2D): with 2D EAs only this first clause would be needed
|
||||
if not (left_ea or right_ea):
|
||||
# error: No overload variant of "__getitem__" of "ExtensionArray" matches
|
||||
# argument type "Tuple[Union[ndarray, slice], slice]"
|
||||
lvals = lvals[rblk.mgr_locs.indexer, :] # type: ignore[call-overload]
|
||||
assert lvals.shape == rvals.shape, (lvals.shape, rvals.shape)
|
||||
elif left_ea and right_ea:
|
||||
assert lvals.shape == rvals.shape, (lvals.shape, rvals.shape)
|
||||
elif right_ea:
|
||||
# lvals are 2D, rvals are 1D
|
||||
|
||||
# error: No overload variant of "__getitem__" of "ExtensionArray" matches
|
||||
# argument type "Tuple[Union[ndarray, slice], slice]"
|
||||
lvals = lvals[rblk.mgr_locs.indexer, :] # type: ignore[call-overload]
|
||||
assert lvals.shape[0] == 1, lvals.shape
|
||||
lvals = lvals[0, :]
|
||||
else:
|
||||
# lvals are 1D, rvals are 2D
|
||||
assert rvals.shape[0] == 1, rvals.shape
|
||||
# error: No overload variant of "__getitem__" of "ExtensionArray" matches
|
||||
# argument type "Tuple[int, slice]"
|
||||
rvals = rvals[0, :] # type: ignore[call-overload]
|
||||
|
||||
return lvals, rvals
|
||||
|
||||
|
||||
def blockwise_all(left: BlockManager, right: BlockManager, op) -> bool:
|
||||
"""
|
||||
Blockwise `all` reduction.
|
||||
"""
|
||||
for info in _iter_block_pairs(left, right):
|
||||
res = op(info.lvals, info.rvals)
|
||||
if not res:
|
||||
return False
|
||||
return True
|
||||
Reference in New Issue
Block a user