Files
MLPproject/.venv/lib/python3.12/site-packages/narwhals/_native.py
2025-10-23 15:44:32 +02:00

414 lines
15 KiB
Python

"""The home for *mostly* [structural] counterparts to [nominal] native types.
If you find yourself being yelled at by a typechecker and ended up here - **do not fear!**
We have 5 funky flavors, which tackle two different problem spaces.
How do we describe [Native types] when ...
- ... **wrapping in** a [Narwhals type]?
- ... **matching to** an [`Implementation`]?
## Wrapping in a Narwhals type
[//]: # (TODO @dangotbanned: Replace `Thing` with a better name)
The following examples use the placeholder type `Thing` which represents one of:
- `DataFrame`: (Eager) 2D data structure representing data as a table with rows and columns.
- `LazyFrame`: (Lazy) Computation graph/query against a DataFrame/database.
- `Series`: 1D data structure representing a single column.
Our goal is to **wrap** a *partially-unknown* native object **in** a [generic class]:
def wrapping_in_df(native: IntoDataFrameT) -> DataFrame[IntoDataFrameT]: ...
def wrapping_in_lf(native: IntoLazyFrameT) -> LazyFrame[IntoLazyFrameT]: ...
def wrapping_in_ser(native: IntoSeriesT) -> Series[IntoSeriesT]: ...
### (1) `Native<Thing>`
Minimal [`Protocol`]s that are [assignable to] *almost any* supported native type of that group:
class NativeThing(Protocol):
def something_common(self, *args: Any, **kwargs: Any) -> Any: ...
Note:
This group is primarily a building block for more useful types.
### (2) `Into<Thing>`
*Publicly* exported [`TypeAlias`]s of **(1)**:
IntoThing: TypeAlias = NativeThing
**But**, occasionally, there'll be an edge-case which we can spell like:
IntoThing: TypeAlias = Union[<type that does not fit the protocol>, NativeThing]
Tip:
Reach for these when there **isn't a need to preserve** the original native type.
### (3) `Into<Thing>T`
*Publicly* exported [`TypeVar`]s, bound to **(2)**:
IntoThingT = TypeVar("IntoThingT", bound=IntoThing)
Important:
In most situations, you'll want to use these as they **do preserve** the original native type.
Putting it all together, we can now add a *narwhals-level* wrapper:
class Thing(Generic[IntoThingT]):
def to_native(self) -> IntoThingT: ...
## Matching to an `Implementation`
This problem differs as we need to *create* a relationship between *otherwise-unrelated* types.
Comparing the problems side-by-side, we can more clearly see this difference:
def wrapping_in_df(native: IntoDataFrameT) -> DataFrame[IntoDataFrameT]: ...
def matching_to_polars(native: pl.DataFrame) -> Literal[Implementation.POLARS]: ...
### (4) `Native<Backend>`
If we want to describe a set of specific types and **match** them in [`@overload`s], then these the tools we need.
For common and easily-installed backends, [`TypeAlias`]s are composed of the native type(s):
NativePolars: TypeAlias = pl.DataFrame | pl.LazyFrame | pl.Series
Otherwise, we need to define a [`Protocol`] which the native type(s) can **match** against *when* installed:
class NativeDask(NativeLazyFrame, Protocol):
_partition_type: type[pd.DataFrame]
Tip:
The goal is to be as minimal as possible, while still being *specific-enough* to **not match** something else.
Important:
See [ibis#9276 comment] for a more *in-depth* example that doesn't fit here 😄
### (5) `is_native_<backend>`
[Type guards] for **(4)**, *similar* to those found in `nw.dependencies`.
They differ by checking **all** native types/protocols in a single-call and using ``Native<Backend>`` aliases.
[structural]: https://typing.python.org/en/latest/spec/glossary.html#term-structural
[nominal]: https://typing.python.org/en/latest/spec/glossary.html#term-nominal
[Native types]: https://narwhals-dev.github.io/narwhals/how_it_works/#polars-and-other-implementations
[Narwhals type]: https://narwhals-dev.github.io/narwhals/api-reference/dataframe/
[`Implementation`]: https://narwhals-dev.github.io/narwhals/api-reference/implementation/
[`Protocol`]: https://typing.python.org/en/latest/spec/protocol.html
[assignable to]: https://typing.python.org/en/latest/spec/glossary.html#term-assignable
[`TypeAlias`]: https://mypy.readthedocs.io/en/stable/kinds_of_types.html#type-aliases
[`TypeVar`]: https://mypy.readthedocs.io/en/stable/generics.html#type-variables-with-upper-bounds
[generic class]: https://docs.python.org/3/library/typing.html#user-defined-generic-types
[`@overload`s]: https://typing.python.org/en/latest/spec/overload.html
[ibis#9276 comment]: https://github.com/ibis-project/ibis/issues/9276#issuecomment-3292016818
[Type guards]: https://typing.python.org/en/latest/spec/narrowing.html
"""
from __future__ import annotations
from collections.abc import Callable, Collection, Iterable, Sized
from typing import TYPE_CHECKING, Any, Protocol, TypeVar, Union, cast
from narwhals.dependencies import (
get_cudf,
get_modin,
get_pandas,
get_polars,
get_pyarrow,
is_dask_dataframe,
is_duckdb_relation,
is_ibis_table,
is_pyspark_connect_dataframe,
is_pyspark_dataframe,
is_sqlframe_dataframe,
)
if TYPE_CHECKING:
import duckdb
import pandas as pd
import polars as pl
import pyarrow as pa
from sqlframe.base.dataframe import BaseDataFrame as _BaseDataFrame
from typing_extensions import Self, TypeAlias, TypeIs
SQLFrameDataFrame = _BaseDataFrame[Any, Any, Any, Any, Any]
T = TypeVar("T")
_Guard: TypeAlias = "Callable[[Any], TypeIs[T]]"
Incomplete: TypeAlias = Any
__all__ = [
"IntoDataFrame",
"IntoDataFrameT",
"IntoFrame",
"IntoFrameT",
"IntoLazyFrame",
"IntoLazyFrameT",
"IntoSeries",
"IntoSeriesT",
"NativeAny",
"NativeArrow",
"NativeCuDF",
"NativeDask",
"NativeDataFrame",
"NativeDuckDB",
"NativeFrame",
"NativeIbis",
"NativeKnown",
"NativeLazyFrame",
"NativeModin",
"NativePandas",
"NativePandasLike",
"NativePandasLikeDataFrame",
"NativePandasLikeSeries",
"NativePolars",
"NativePySpark",
"NativePySparkConnect",
"NativeSQLFrame",
"NativeSeries",
"NativeSparkLike",
"NativeUnknown",
"is_native_arrow",
"is_native_cudf",
"is_native_dask",
"is_native_duckdb",
"is_native_ibis",
"is_native_modin",
"is_native_pandas",
"is_native_pandas_like",
"is_native_polars",
"is_native_pyspark",
"is_native_pyspark_connect",
"is_native_spark_like",
"is_native_sqlframe",
]
# All dataframes supported by Narwhals have a
# `columns` property. Their similarities don't extend
# _that_ much further unfortunately...
class NativeFrame(Protocol):
@property
def columns(self) -> Any: ...
def join(self, *args: Any, **kwargs: Any) -> Any: ...
class NativeDataFrame(Sized, NativeFrame, Protocol): ...
class NativeLazyFrame(NativeFrame, Protocol):
def explain(self, *args: Any, **kwargs: Any) -> Any: ...
class NativeSeries(Sized, Iterable[Any], Protocol):
def filter(self, *args: Any, **kwargs: Any) -> Any: ...
class _BasePandasLike(Sized, Protocol):
index: Any
"""`mypy` doesn't like the asymmetric `property` setter in `pandas`."""
def __getitem__(self, key: Any, /) -> Any: ...
def __mul__(self, other: float | Collection[float] | Self, /) -> Self: ...
def __floordiv__(self, other: float | Collection[float] | Self, /) -> Self: ...
@property
def loc(self) -> Any: ...
@property
def shape(self) -> tuple[int, ...]: ...
def set_axis(self, labels: Any, *, axis: Any = ..., copy: bool = ...) -> Self: ...
def copy(self, deep: bool = ...) -> Self: ... # noqa: FBT001
def rename(self, *args: Any, **kwds: Any) -> Self | Incomplete:
"""`mypy` & `pyright` disagree on overloads.
`Incomplete` used to fix [more important issue](https://github.com/narwhals-dev/narwhals/pull/3016#discussion_r2296139744).
"""
class _BasePandasLikeFrame(NativeDataFrame, _BasePandasLike, Protocol): ...
class _BasePandasLikeSeries(NativeSeries, _BasePandasLike, Protocol):
def where(self, cond: Any, other: Any = ..., /) -> Self | Incomplete: ...
class NativeDask(NativeLazyFrame, Protocol):
_partition_type: type[pd.DataFrame]
class _CuDFDataFrame(_BasePandasLikeFrame, Protocol):
def to_pylibcudf(self, *args: Any, **kwds: Any) -> Any: ...
class _CuDFSeries(_BasePandasLikeSeries, Protocol):
def to_pylibcudf(self, *args: Any, **kwds: Any) -> Any: ...
class NativeIbis(NativeFrame, Protocol):
def sql(self, *args: Any, **kwds: Any) -> Any: ...
def __pyarrow_result__(self, *args: Any, **kwds: Any) -> Any: ...
def __pandas_result__(self, *args: Any, **kwds: Any) -> Any: ...
def __polars_result__(self, *args: Any, **kwds: Any) -> Any: ...
class _ModinDataFrame(_BasePandasLikeFrame, Protocol):
_pandas_class: type[pd.DataFrame]
class _ModinSeries(_BasePandasLikeSeries, Protocol):
_pandas_class: type[pd.Series[Any]]
class _PySparkDataFrame(NativeLazyFrame, Protocol):
def dropDuplicatesWithinWatermark(self, *arg: Any, **kwargs: Any) -> Any: ... # noqa: N802
NativePolars: TypeAlias = "pl.DataFrame | pl.LazyFrame | pl.Series"
NativeArrow: TypeAlias = "pa.Table | pa.ChunkedArray[Any]"
NativeDuckDB: TypeAlias = "duckdb.DuckDBPyRelation"
NativePandas: TypeAlias = "pd.DataFrame | pd.Series[Any]"
NativeModin: TypeAlias = "_ModinDataFrame | _ModinSeries"
NativeCuDF: TypeAlias = "_CuDFDataFrame | _CuDFSeries"
NativePandasLikeSeries: TypeAlias = "pd.Series[Any] | _CuDFSeries | _ModinSeries"
NativePandasLikeDataFrame: TypeAlias = "pd.DataFrame | _CuDFDataFrame | _ModinDataFrame"
NativePandasLike: TypeAlias = "NativePandasLikeDataFrame | NativePandasLikeSeries"
NativeSQLFrame: TypeAlias = "_BaseDataFrame[Any, Any, Any, Any, Any]"
NativePySpark: TypeAlias = _PySparkDataFrame
NativePySparkConnect: TypeAlias = _PySparkDataFrame
NativeSparkLike: TypeAlias = "NativeSQLFrame | NativePySpark | NativePySparkConnect"
NativeKnown: TypeAlias = "NativePolars | NativeArrow | NativePandasLike | NativeSparkLike | NativeDuckDB | NativeDask | NativeIbis"
NativeUnknown: TypeAlias = "NativeDataFrame | NativeSeries | NativeLazyFrame"
NativeAny: TypeAlias = "NativeKnown | NativeUnknown"
IntoDataFrame: TypeAlias = NativeDataFrame
"""Anything which can be converted to a Narwhals DataFrame.
Use this if your function accepts a narwhalifiable object but doesn't care about its backend.
Examples:
>>> import narwhals as nw
>>> from narwhals.typing import IntoDataFrame
>>> def agnostic_shape(df_native: IntoDataFrame) -> tuple[int, int]:
... df = nw.from_native(df_native, eager_only=True)
... return df.shape
"""
IntoLazyFrame: TypeAlias = Union[NativeLazyFrame, NativeIbis]
IntoFrame: TypeAlias = Union[IntoDataFrame, IntoLazyFrame]
"""Anything which can be converted to a Narwhals DataFrame or LazyFrame.
Use this if your function can accept an object which can be converted to either
`nw.DataFrame` or `nw.LazyFrame` and it doesn't care about its backend.
Examples:
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrame
>>> def agnostic_columns(df_native: IntoFrame) -> list[str]:
... df = nw.from_native(df_native)
... return df.collect_schema().names()
"""
IntoSeries: TypeAlias = NativeSeries
"""Anything which can be converted to a Narwhals Series.
Use this if your function can accept an object which can be converted to `nw.Series`
and it doesn't care about its backend.
Examples:
>>> from typing import Any
>>> import narwhals as nw
>>> from narwhals.typing import IntoSeries
>>> def agnostic_to_list(s_native: IntoSeries) -> list[Any]:
... s = nw.from_native(s_native)
... return s.to_list()
"""
IntoFrameT = TypeVar("IntoFrameT", bound=IntoFrame)
"""TypeVar bound to object convertible to Narwhals DataFrame or Narwhals LazyFrame.
Use this if your function accepts an object which is convertible to `nw.DataFrame`
or `nw.LazyFrame` and returns an object of the same type.
Examples:
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> def agnostic_func(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.with_columns(c=nw.col("a") + 1).to_native()
"""
IntoDataFrameT = TypeVar("IntoDataFrameT", bound=IntoDataFrame)
"""TypeVar bound to object convertible to Narwhals DataFrame.
Use this if your function accepts an object which can be converted to `nw.DataFrame`
and returns an object of the same class.
Examples:
>>> import narwhals as nw
>>> from narwhals.typing import IntoDataFrameT
>>> def agnostic_func(df_native: IntoDataFrameT) -> IntoDataFrameT:
... df = nw.from_native(df_native, eager_only=True)
... return df.with_columns(c=df["a"] + 1).to_native()
"""
IntoLazyFrameT = TypeVar("IntoLazyFrameT", bound=IntoLazyFrame)
IntoSeriesT = TypeVar("IntoSeriesT", bound=IntoSeries)
"""TypeVar bound to object convertible to Narwhals Series.
Use this if your function accepts an object which can be converted to `nw.Series`
and returns an object of the same class.
Examples:
>>> import narwhals as nw
>>> from narwhals.typing import IntoSeriesT
>>> def agnostic_abs(s_native: IntoSeriesT) -> IntoSeriesT:
... s = nw.from_native(s_native, series_only=True)
... return s.abs().to_native()
"""
def is_native_polars(obj: Any) -> TypeIs[NativePolars]:
return (pl := get_polars()) is not None and isinstance(
obj, (pl.DataFrame, pl.Series, pl.LazyFrame)
)
def is_native_arrow(obj: Any) -> TypeIs[NativeArrow]:
return (pa := get_pyarrow()) is not None and isinstance(
obj, (pa.Table, pa.ChunkedArray)
)
is_native_dask = cast("_Guard[NativeDask]", is_dask_dataframe)
is_native_duckdb: _Guard[NativeDuckDB] = is_duckdb_relation
is_native_sqlframe: _Guard[NativeSQLFrame] = is_sqlframe_dataframe
is_native_pyspark = cast("_Guard[NativePySpark]", is_pyspark_dataframe)
is_native_pyspark_connect = cast(
"_Guard[NativePySparkConnect]", is_pyspark_connect_dataframe
)
is_native_ibis = cast("_Guard[NativeIbis]", is_ibis_table)
def is_native_pandas(obj: Any) -> TypeIs[NativePandas]:
return (pd := get_pandas()) is not None and isinstance(obj, (pd.DataFrame, pd.Series))
def is_native_modin(obj: Any) -> TypeIs[NativeModin]:
return (mpd := get_modin()) is not None and isinstance(
obj, (mpd.DataFrame, mpd.Series)
)
def is_native_cudf(obj: Any) -> TypeIs[NativeCuDF]:
return (cudf := get_cudf()) is not None and isinstance(
obj, (cudf.DataFrame, cudf.Series)
) # pragma: no cover
def is_native_pandas_like(obj: Any) -> TypeIs[NativePandasLike]:
return is_native_pandas(obj) or is_native_cudf(obj) or is_native_modin(obj)
def is_native_spark_like(obj: Any) -> TypeIs[NativeSparkLike]:
return (
is_native_sqlframe(obj)
or is_native_pyspark(obj)
or is_native_pyspark_connect(obj)
)