[SPARK-55293][PS][TESTS][FOLLOW-UP] Avoid more old offset aliases

ueshin · HyukjinKwon · commit 8ffd150afcef · 2026-02-03T14:37:23.000+09:00
### What changes were proposed in this pull request? This is a follow-up of #54077. Avoids more old offset aliases. ### Why are the changes needed? There are still some places using the old offset aliases. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Fixed the related tests. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #54095 from ueshin/issues/SPARK-55293/ye. Authored-by: Takuya Ueshin <ueshin@databricks.com> Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
diff --git a/python/pyspark/pandas/datetimes.py b/python/pyspark/pandas/datetimes.py
@@ -517,7 +517,7 @@ def is_leap_year(self) -> "ps.Series":
         This method is available on Series with datetime values under
         the ``.dt`` accessor.
 
-        >>> dates_series = ps.Series(pd.date_range("2012-01-01", "2015-01-01", freq="Y"))
+        >>> dates_series = ps.Series(pd.date_range("2012-01-01", "2015-01-01", freq="YE"))
         >>> dates_series
         0   2012-12-31
         1   2013-12-31
@@ -700,7 +700,7 @@ def round(self, freq: Union[str, DateOffset], *args: Any, **kwargs: Any) -> "ps.
         2   2018-01-01 12:01:00
         dtype: datetime64[ns]
 
-        >>> series.dt.round("H")
+        >>> series.dt.round("h")
         0   2018-01-01 12:00:00
         1   2018-01-01 12:00:00
         2   2018-01-01 12:00:00
@@ -755,7 +755,7 @@ def floor(self, freq: Union[str, DateOffset], *args: Any, **kwargs: Any) -> "ps.
         2   2018-01-01 12:01:00
         dtype: datetime64[ns]
 
-        >>> series.dt.floor("H")
+        >>> series.dt.floor("h")
         0   2018-01-01 11:00:00
         1   2018-01-01 12:00:00
         2   2018-01-01 12:00:00
@@ -810,7 +810,7 @@ def ceil(self, freq: Union[str, DateOffset], *args: Any, **kwargs: Any) -> "ps.S
         2   2018-01-01 12:01:00
         dtype: datetime64[ns]
 
-        >>> series.dt.ceil("H")
+        >>> series.dt.ceil("h")
         0   2018-01-01 12:00:00
         1   2018-01-01 12:00:00
         2   2018-01-01 13:00:00
diff --git a/python/pyspark/pandas/indexes/datetimes.py b/python/pyspark/pandas/indexes/datetimes.py
@@ -505,7 +505,7 @@ def is_leap_year(self) -> Index:
 
         Examples
         --------
-        >>> idx = ps.date_range("2012-01-01", "2015-01-01", freq="Y")  # doctest: +SKIP
+        >>> idx = ps.date_range("2012-01-01", "2015-01-01", freq="YE")  # doctest: +SKIP
         >>> idx.is_leap_year  # doctest: +SKIP
         Index([True, False, False], dtype='bool')
         """
diff --git a/python/pyspark/pandas/namespace.py b/python/pyspark/pandas/namespace.py
@@ -1879,7 +1879,7 @@ def date_range(
 
     Multiples are allowed
 
-    >>> ps.date_range(start='1/1/2018', periods=5, freq='3M')  # doctest: +SKIP
+    >>> ps.date_range(start='1/1/2018', periods=5, freq='3ME')  # doctest: +SKIP
     DatetimeIndex(['2018-01-31', '2018-04-30', '2018-07-31', '2018-10-31',
                    '2019-01-31'],
                   dtype='datetime64[ns]', freq=None)
@@ -2087,7 +2087,7 @@ def timedelta_range(
     The freq parameter specifies the frequency of the TimedeltaIndex.
     Only fixed frequencies can be passed, non-fixed frequencies such as ‘M’ (month end) will raise.
 
-    >>> ps.timedelta_range(start='1 day', end='2 days', freq='6H')
+    >>> ps.timedelta_range(start='1 day', end='2 days', freq='6h')
     ... # doctest: +NORMALIZE_WHITESPACE
     TimedeltaIndex(['1 days 00:00:00', '1 days 06:00:00', '1 days 12:00:00',
                     '1 days 18:00:00', '2 days 00:00:00'],
diff --git a/python/pyspark/pandas/resample.py b/python/pyspark/pandas/resample.py
@@ -324,7 +324,7 @@ def _downsample(self, f: str) -> DataFrame:
         #   ]
         #   index = pd.DatetimeIndex(dates)
         #   pdf = pd.DataFrame(np.array([1,2,3]), index=index, columns=['A'])
-        #   pdf.resample('3Y').max()
+        #   pdf.resample('3YE').max()
         #                 A
         #   2012-12-31  2.0
         #   2015-12-31  NaN
diff --git a/python/pyspark/pandas/series.py b/python/pyspark/pandas/series.py
@@ -7223,7 +7223,7 @@ def resample(
         Downsample the series into 3 minute bins and sum the values
         of the timestamps falling into a bin.
 
-        >>> series.resample('3T').sum().sort_index()
+        >>> series.resample('3min').sum().sort_index()
         2000-01-01 00:00:00     3.0
         2000-01-01 00:03:00    12.0
         2000-01-01 00:06:00    21.0
@@ -7239,7 +7239,7 @@ def resample(
         To include this value, close the right side of the bin interval as
         illustrated in the example below this one.
 
-        >>> series.resample('3T', label='right').sum().sort_index()
+        >>> series.resample('3min', label='right').sum().sort_index()
         2000-01-01 00:03:00     3.0
         2000-01-01 00:06:00    12.0
         2000-01-01 00:09:00    21.0
@@ -7248,7 +7248,7 @@ def resample(
         Downsample the series into 3 minute bins as above, but close the right
         side of the bin interval.
 
-        >>> series.resample('3T', label='right', closed='right').sum().sort_index()
+        >>> series.resample('3min', label='right', closed='right').sum().sort_index()
         2000-01-01 00:00:00     0.0
         2000-01-01 00:03:00     6.0
         2000-01-01 00:06:00    15.0
@@ -7257,7 +7257,7 @@ def resample(
 
         Upsample the series into 30 second bins.
 
-        >>> series.resample('30S').sum().sort_index()[0:5]   # Select first 5 rows
+        >>> series.resample('30s').sum().sort_index()[0:5]   # Select first 5 rows
         2000-01-01 00:00:00    0.0
         2000-01-01 00:00:30    0.0
         2000-01-01 00:01:00    1.0
diff --git a/python/pyspark/pandas/tests/resample/test_error.py b/python/pyspark/pandas/tests/resample/test_error.py
@@ -31,12 +31,12 @@ def test_resample_error(self):
         with self.assertRaisesRegex(
             NotImplementedError, "resample currently works only for DatetimeIndex"
         ):
-            psdf.resample("3Y").sum()
+            psdf.resample("3YE").sum()
 
         with self.assertRaisesRegex(
             NotImplementedError, "resample currently works only for DatetimeIndex"
         ):
-            psdf.id.resample("3Y").sum()
+            psdf.id.resample("3YE").sum()
 
         dates = [
             datetime.datetime(2012, 1, 2),
diff --git a/python/pyspark/pandas/tests/resample/test_frame.py b/python/pyspark/pandas/tests/resample/test_frame.py
@@ -132,9 +132,9 @@ def test_dataframe_resample(self):
         self._test_resample(self.pdf6, self.psdf6, ["29s", "10min", "3h"], "left", "right", "var")
 
         with self.assertRaisesRegex(ValueError, "rule code YE-DEC is not supported"):
-            self._test_resample(self.pdf2, self.psdf2, ["3A", "11ME", "D"], None, "left", "max")
+            self._test_resample(self.pdf2, self.psdf2, ["3YE", "11ME", "D"], None, "left", "max")
         with self.assertRaisesRegex(ValueError, "rule code YE-DEC is not supported"):
-            self._test_resample(self.pdf1, self.psdf1, ["3Y", "9ME", "17D"], None, None, "min")
+            self._test_resample(self.pdf1, self.psdf1, ["3YE", "9ME", "17D"], None, None, "min")
 
 
 class ResampleFrameTests(ResampleFrameMixin, PandasOnSparkTestCase, TestUtils):
diff --git a/python/pyspark/pandas/tests/resample/test_series.py b/python/pyspark/pandas/tests/resample/test_series.py
@@ -133,7 +133,7 @@ def test_series_resample(self):
         self._test_resample(self.pdf6.A, self.psdf6.A, ["111s"], "right", "right", "std")
 
         with self.assertRaisesRegex(ValueError, "rule code YE-DEC is not supported"):
-            self._test_resample(self.pdf1.A, self.psdf1.A, ["4Y"], "right", None, "min")
+            self._test_resample(self.pdf1.A, self.psdf1.A, ["4YE"], "right", None, "min")
 
 
 class ResampleSeriesTests(ResampleSeriesMixin, PandasOnSparkTestCase, TestUtils):