diff --git a/docs/source/user-guide/common-operations/functions.rst b/docs/source/user-guide/common-operations/functions.rst index ccb47a4e7..ed5a991d8 100644 --- a/docs/source/user-guide/common-operations/functions.rst +++ b/docs/source/user-guide/common-operations/functions.rst @@ -77,8 +77,8 @@ Extracting parts of a date using :py:func:`~datafusion.functions.date_part` (ali .. ipython:: python df.select( - f.date_part(literal("month"), f.to_timestamp(col('"Total"'))).alias("month"), - f.extract(literal("day"), f.to_timestamp(col('"Total"'))).alias("day") + f.date_part("month", f.to_timestamp(col('"Total"'))).alias("month"), + f.extract("day", f.to_timestamp(col('"Total"'))).alias("day") ) String diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index c8f07497d..9158a7146 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -39,6 +39,7 @@ from __future__ import annotations import inspect +import warnings from typing import TYPE_CHECKING, Any import pyarrow as pa @@ -60,6 +61,16 @@ sort_or_default, ) + +def _warn_expr_for_literal_arg(function_name: str, arg_name: str) -> None: + warnings.warn( + f"Passing Expr for {function_name}() argument {arg_name!r} is deprecated; " + "pass a Python literal instead.", + DeprecationWarning, + stacklevel=4, + ) + + __all__ = [ "abs", "acos", @@ -2575,7 +2586,7 @@ def datepart(part: Expr | str, date: Expr) -> Expr: See Also: This is an alias for :py:func:`date_part`. """ - return date_part(part, date) + return _date_part(part, date, "datepart") def date_part(part: Expr | str, date: Expr) -> Expr: @@ -2595,6 +2606,12 @@ def date_part(part: Expr | str, date: Expr) -> Expr: >>> result.collect_column("y")[0].as_py() 2021 """ + return _date_part(part, date, "date_part") + + +def _date_part(part: Expr | str, date: Expr, function_name: str) -> Expr: + if isinstance(part, Expr): + _warn_expr_for_literal_arg(function_name, "part") part = coerce_to_expr(part) return Expr(f.date_part(part.expr, date.expr)) @@ -2605,7 +2622,7 @@ def extract(part: Expr | str, date: Expr) -> Expr: See Also: This is an alias for :py:func:`date_part`. """ - return date_part(part, date) + return _date_part(part, date, "extract") def date_trunc(part: Expr | str, date: Expr) -> Expr: @@ -2626,6 +2643,12 @@ def date_trunc(part: Expr | str, date: Expr) -> Expr: >>> str(result.collect_column("t")[0].as_py()) '2021-07-01 00:00:00' """ + return _date_trunc(part, date, "date_trunc") + + +def _date_trunc(part: Expr | str, date: Expr, function_name: str) -> Expr: + if isinstance(part, Expr): + _warn_expr_for_literal_arg(function_name, "part") part = coerce_to_expr(part) return Expr(f.date_trunc(part.expr, date.expr)) @@ -2636,7 +2659,7 @@ def datetrunc(part: Expr | str, date: Expr) -> Expr: See Also: This is an alias for :py:func:`date_trunc`. """ - return date_trunc(part, date) + return _date_trunc(part, date, "datetrunc") def date_bin(stride: Expr, source: Expr, origin: Expr) -> Expr: diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index 55d9c8ee8..d2abe4741 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import math +import warnings from datetime import date, datetime, time, timezone import numpy as np @@ -1086,10 +1087,10 @@ def test_hash_functions(df): def test_temporal_functions(df): df = df.select( - f.date_part(literal("month"), column("d")), - f.datepart(literal("year"), column("d")), - f.date_trunc(literal("month"), column("d")), - f.datetrunc(literal("day"), column("d")), + f.date_part("month", column("d")), + f.datepart("year", column("d")), + f.date_trunc("month", column("d")), + f.datetrunc("day", column("d")), f.date_bin( literal("15 minutes").cast(pa.string()), column("d"), @@ -1100,7 +1101,7 @@ def test_temporal_functions(df): f.to_timestamp_seconds(literal("2023-09-07 05:06:14.523952")), f.to_timestamp_millis(literal("2023-09-07 05:06:14.523952")), f.to_timestamp_micros(literal("2023-09-07 05:06:14.523952")), - f.extract(literal("day"), column("d")), + f.extract("day", column("d")), f.to_timestamp( literal("2023-09-07 05:06:14.523952000"), literal("%Y-%m-%d %H:%M:%S.%f") ), @@ -2160,16 +2161,51 @@ def test_date_part_native_str(self): ctx = SessionContext() df = ctx.from_pydict({"a": ["2021-07-15T00:00:00"]}) df = df.select(f.to_timestamp(column("a")).alias("a")) - result = df.select(f.date_part("year", column("a")).alias("y")).collect() + with warnings.catch_warnings(): + warnings.simplefilter("error", DeprecationWarning) + result = df.select(f.date_part("year", column("a")).alias("y")).collect() assert result[0].column(0)[0].as_py() == 2021 + @pytest.mark.parametrize( + ("func", "name"), + [ + pytest.param(f.date_part, "date_part", id="date_part"), + pytest.param(f.datepart, "datepart", id="datepart"), + pytest.param(f.extract, "extract", id="extract"), + ], + ) + def test_date_part_expr_part_warns_deprecated(self, func, name): + with pytest.warns( + DeprecationWarning, + match=rf"Passing Expr for {name}\(\) argument 'part' is deprecated", + ): + expr = func(literal("year"), column("a")) + assert expr is not None + def test_date_trunc_native_str(self): ctx = SessionContext() df = ctx.from_pydict({"a": ["2021-07-15T12:34:56"]}) df = df.select(f.to_timestamp(column("a")).alias("a")) - result = df.select(f.date_trunc("month", column("a")).alias("t")).collect() + with warnings.catch_warnings(): + warnings.simplefilter("error", DeprecationWarning) + result = df.select(f.date_trunc("month", column("a")).alias("t")).collect() assert str(result[0].column(0)[0].as_py()) == "2021-07-01 00:00:00" + @pytest.mark.parametrize( + ("func", "name"), + [ + pytest.param(f.date_trunc, "date_trunc", id="date_trunc"), + pytest.param(f.datetrunc, "datetrunc", id="datetrunc"), + ], + ) + def test_date_trunc_expr_part_warns_deprecated(self, func, name): + with pytest.warns( + DeprecationWarning, + match=rf"Passing Expr for {name}\(\) argument 'part' is deprecated", + ): + expr = func(literal("month"), column("a")) + assert expr is not None + def test_left_native_int(self): ctx = SessionContext() df = ctx.from_pydict({"a": ["the cat"]})