Skip to content

Commit a4a016d

Browse files
ueshinYicong-Huang
authored andcommitted
[SPARK-55409][PS] Handle an unexpected keyword argument error from read_excel with pandas 3
### What changes were proposed in this pull request? Handles an unexpected keyword argument error from read_excel with pandas 3. ### Why are the changes needed? There is a removed arguments in `read_excel`. - `date_parser` ### Does this PR introduce _any_ user-facing change? Yes, it will behave more like pandas 3. ### How was this patch tested? The existing tests. ### Was this patch authored or co-authored using generative AI tooling? No. Closes apache#54192 from ueshin/issues/SPARK-55409/read_excel. Authored-by: Takuya Ueshin <ueshin@databricks.com> Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
1 parent 4b1e50d commit a4a016d

File tree

1 file changed

+33
-21
lines changed

1 file changed

+33
-21
lines changed

python/pyspark/pandas/namespace.py

Lines changed: 33 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@
5252
import pyarrow as pa
5353
import pyarrow.parquet as pq
5454

55+
from pyspark._globals import _NoValue, _NoValueType
56+
from pyspark.loose_version import LooseVersion
5557
from pyspark.sql import functions as F, Column as PySparkColumn
5658
from pyspark.sql.functions import pandas_udf
5759
from pyspark.sql.types import (
@@ -941,7 +943,7 @@ def read_excel(
941943
keep_default_na: bool = True,
942944
verbose: bool = False,
943945
parse_dates: Union[bool, List, Dict] = False,
944-
date_parser: Optional[Callable] = None,
946+
date_parser: Union[Optional[Callable], _NoValueType] = _NoValue,
945947
thousands: Optional[str] = None,
946948
comment: Optional[str] = None,
947949
skipfooter: int = 0,
@@ -1137,34 +1139,44 @@ def read_excel(
11371139
2 None NaN
11381140
"""
11391141

1142+
kwargs = dict(
1143+
header=header,
1144+
names=names,
1145+
index_col=index_col,
1146+
usecols=usecols,
1147+
dtype=dtype,
1148+
engine=engine,
1149+
converters=converters,
1150+
true_values=true_values,
1151+
false_values=false_values,
1152+
skiprows=skiprows,
1153+
na_values=na_values,
1154+
keep_default_na=keep_default_na,
1155+
verbose=verbose,
1156+
parse_dates=parse_dates,
1157+
thousands=thousands,
1158+
comment=comment,
1159+
skipfooter=skipfooter,
1160+
**kwds,
1161+
)
1162+
1163+
if LooseVersion(pd.__version__) < "3.0.0":
1164+
if date_parser is not _NoValue:
1165+
kwargs["date_parser"] = date_parser
1166+
else:
1167+
if date_parser is not _NoValue:
1168+
raise TypeError("The 'date_parser' keyword is not supported in pandas 3.0.0 and later.")
1169+
11401170
def pd_read_excel(
11411171
io_or_bin: Any,
11421172
sn: Union[str, int, List[Union[str, int]], None],
11431173
nr: Optional[int] = None,
11441174
) -> pd.DataFrame:
1145-
return pd.read_excel( # type: ignore[call-overload, misc]
1175+
return pd.read_excel( # type: ignore[return-value]
11461176
io=BytesIO(io_or_bin) if isinstance(io_or_bin, (bytes, bytearray)) else io_or_bin,
11471177
sheet_name=sn,
1148-
header=header,
1149-
names=names,
1150-
index_col=index_col,
1151-
usecols=usecols,
1152-
dtype=dtype,
1153-
engine=engine,
1154-
converters=converters,
1155-
true_values=true_values,
1156-
false_values=false_values,
1157-
skiprows=skiprows,
11581178
nrows=nr,
1159-
na_values=na_values,
1160-
keep_default_na=keep_default_na,
1161-
verbose=verbose,
1162-
parse_dates=parse_dates,
1163-
date_parser=date_parser,
1164-
thousands=thousands,
1165-
comment=comment,
1166-
skipfooter=skipfooter,
1167-
**kwds,
1179+
**kwargs,
11681180
)
11691181

11701182
if not isinstance(io, str):

0 commit comments

Comments
 (0)