@@ -1498,6 +1498,12 @@ class _BaseRandomSizedCrop(DualTransform):
14981498 for image resizing. Default: cv2.INTER_LINEAR.
14991499 mask_interpolation (OpenCV flag): Flag that is used to specify the interpolation
15001500 algorithm for mask resizing. Default: cv2.INTER_NEAREST.
1501+ area_for_downscale (Literal[None, "image", "image_mask"]): Controls automatic use of INTER_AREA interpolation
1502+ for downscaling. Options:
1503+ - None: No automatic interpolation selection, always use the specified interpolation method
1504+ - "image": Use INTER_AREA when downscaling images, retain specified interpolation for upscaling and masks
1505+ - "image_mask": Use INTER_AREA when downscaling both images and masks
1506+ Default: None.
15011507 p (float): Probability of applying the transform. Default: 1.0.
15021508
15031509 Targets:
@@ -1510,6 +1516,8 @@ class _BaseRandomSizedCrop(DualTransform):
15101516 This class is not meant to be used directly. Instead, use derived transforms
15111517 like RandomSizedCrop or RandomResizedCrop that implement specific crop selection
15121518 strategies.
1519+ When area_for_downscale is set, INTER_AREA interpolation will be used automatically for
1520+ downscaling (when the crop is larger than the target size), which provides better quality for size reduction.
15131521
15141522 Examples:
15151523 >>> import numpy as np
@@ -1524,12 +1532,14 @@ class _BaseRandomSizedCrop(DualTransform):
15241532 ... custom_parameter=0.5,
15251533 ... interpolation=cv2.INTER_LINEAR,
15261534 ... mask_interpolation=cv2.INTER_NEAREST,
1535+ ... area_for_downscale="image",
15271536 ... p=1.0
15281537 ... ):
15291538 ... super().__init__(
15301539 ... size=size,
15311540 ... interpolation=interpolation,
15321541 ... mask_interpolation=mask_interpolation,
1542+ ... area_for_downscale=area_for_downscale,
15331543 ... p=p,
15341544 ... )
15351545 ... self.custom_parameter = custom_parameter
@@ -1560,7 +1570,7 @@ class _BaseRandomSizedCrop(DualTransform):
15601570 >>>
15611571 >>> # Create a pipeline with our custom transform
15621572 >>> transform = A.Compose(
1563- ... [CustomRandomCrop(size=(64, 64), custom_parameter=0.6)],
1573+ ... [CustomRandomCrop(size=(64, 64), custom_parameter=0.6, area_for_downscale="image" )],
15641574 ... bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
15651575 ... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels'])
15661576 ... )
@@ -1604,6 +1614,7 @@ class InitSchema(BaseRandomSizedCropInitSchema):
16041614 cv2 .INTER_LANCZOS4 ,
16051615 cv2 .INTER_LINEAR_EXACT ,
16061616 ]
1617+ area_for_downscale : Literal [None , "image" , "image_mask" ]
16071618
16081619 def __init__ (
16091620 self ,
@@ -1626,12 +1637,39 @@ def __init__(
16261637 cv2 .INTER_LANCZOS4 ,
16271638 cv2 .INTER_LINEAR_EXACT ,
16281639 ] = cv2 .INTER_NEAREST ,
1640+ area_for_downscale : Literal [None , "image" , "image_mask" ] = None ,
16291641 p : float = 1.0 ,
16301642 ):
16311643 super ().__init__ (p = p )
16321644 self .size = size
16331645 self .interpolation = interpolation
16341646 self .mask_interpolation = mask_interpolation
1647+ self .area_for_downscale = area_for_downscale
1648+
1649+ def _get_interpolation_for_resize (self , crop_shape : tuple [int , int ], target_type : str ) -> int :
1650+ """Get the appropriate interpolation method for resizing.
1651+
1652+ Args:
1653+ crop_shape: Shape of the crop (height, width)
1654+ target_type: Either "image" or "mask" to determine base interpolation
1655+
1656+ Returns:
1657+ OpenCV interpolation flag
1658+
1659+ """
1660+ crop_height , crop_width = crop_shape
1661+ target_height , target_width = self .size
1662+
1663+ # Determine if this is downscaling
1664+ is_downscale = (crop_height > target_height ) or (crop_width > target_width )
1665+
1666+ # Use INTER_AREA for downscaling if configured
1667+ if (is_downscale and (target_type == "image" and self .area_for_downscale in ["image" , "image_mask" ])) or (
1668+ target_type == "mask" and self .area_for_downscale == "image_mask"
1669+ ):
1670+ return cv2 .INTER_AREA
1671+ # Get base interpolation
1672+ return self .interpolation if target_type == "image" else self .mask_interpolation
16351673
16361674 def apply (
16371675 self ,
@@ -1648,7 +1686,8 @@ def apply(
16481686
16491687 """
16501688 crop = fcrops .crop (img , * crop_coords )
1651- return fgeometric .resize (crop , self .size , self .interpolation )
1689+ interpolation = self ._get_interpolation_for_resize (crop .shape [:2 ], "image" )
1690+ return fgeometric .resize (crop , self .size , interpolation )
16521691
16531692 def apply_to_mask (
16541693 self ,
@@ -1665,7 +1704,8 @@ def apply_to_mask(
16651704
16661705 """
16671706 crop = fcrops .crop (mask , * crop_coords )
1668- return fgeometric .resize (crop , self .size , self .mask_interpolation )
1707+ interpolation = self ._get_interpolation_for_resize (crop .shape [:2 ], "mask" )
1708+ return fgeometric .resize (crop , self .size , interpolation )
16691709
16701710 def apply_to_bboxes (
16711711 self ,
@@ -1731,8 +1771,11 @@ def apply_to_images(
17311771 # First crop the volume using volume_crop_yx (reduces data size)
17321772 crop = fcrops .volume_crop_yx (images , * crop_coords )
17331773
1734- # Then resize the smaller cropped volume using decorated helper method
1735- return np .stack ([fgeometric .resize (crop [i ], self .size , self .interpolation ) for i in range (images .shape [0 ])])
1774+ # Get interpolation method based on crop dimensions
1775+ interpolation = self ._get_interpolation_for_resize (crop .shape [1 :3 ], "image" )
1776+
1777+ # Then resize the smaller cropped volume using the selected interpolation
1778+ return np .stack ([fgeometric .resize (crop [i ], self .size , interpolation ) for i in range (images .shape [0 ])])
17361779
17371780 def apply_to_volume (
17381781 self ,
@@ -1783,6 +1826,12 @@ class RandomSizedCrop(_BaseRandomSizedCrop):
17831826 mask_interpolation (OpenCV flag): Flag that is used to specify the interpolation algorithm for mask.
17841827 Should be one of: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
17851828 Default: cv2.INTER_NEAREST.
1829+ area_for_downscale (Literal[None, "image", "image_mask"]): Controls automatic use of INTER_AREA interpolation
1830+ for downscaling. Options:
1831+ - None: No automatic interpolation selection, always use the specified interpolation method
1832+ - "image": Use INTER_AREA when downscaling images, retain specified interpolation for upscaling and masks
1833+ - "image_mask": Use INTER_AREA when downscaling both images and masks
1834+ Default: None.
17861835 p (float): Probability of applying the transform. Default: 1.0
17871836
17881837 Targets:
@@ -1799,6 +1848,8 @@ class RandomSizedCrop(_BaseRandomSizedCrop):
17991848 - Keypoints that end up outside the cropped area will be removed.
18001849 - This transform differs from RandomResizedCrop in that it allows more control over the crop size
18011850 through the 'min_max_height' parameter, rather than using a scale parameter.
1851+ - When area_for_downscale is set, INTER_AREA interpolation will be used automatically for
1852+ downscaling (when the crop is larger than the target size), which provides better quality for size reduction.
18021853
18031854 Mathematical Details:
18041855 1. A random crop height h is sampled from the range [min_max_height[0], min_max_height[1]].
@@ -1828,6 +1879,7 @@ class RandomSizedCrop(_BaseRandomSizedCrop):
18281879 ... w2h_ratio=1.0,
18291880 ... interpolation=cv2.INTER_LINEAR,
18301881 ... mask_interpolation=cv2.INTER_NEAREST,
1882+ ... area_for_downscale="image", # Use INTER_AREA for image downscaling
18311883 ... p=1.0
18321884 ... ),
18331885 ... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
@@ -1877,6 +1929,7 @@ class InitSchema(BaseTransformInitSchema):
18771929 min_max_height : OnePlusIntRangeType
18781930 w2h_ratio : Annotated [float , Field (gt = 0 )]
18791931 size : Annotated [tuple [int , int ], AfterValidator (check_range_bounds (1 , None ))]
1932+ area_for_downscale : Literal [None , "image" , "image_mask" ]
18801933
18811934 def __init__ (
18821935 self ,
@@ -1901,12 +1954,14 @@ def __init__(
19011954 cv2 .INTER_LANCZOS4 ,
19021955 cv2 .INTER_LINEAR_EXACT ,
19031956 ] = cv2 .INTER_NEAREST ,
1957+ area_for_downscale : Literal [None , "image" , "image_mask" ] = None ,
19041958 p : float = 1.0 ,
19051959 ):
19061960 super ().__init__ (
19071961 size = size ,
19081962 interpolation = interpolation ,
19091963 mask_interpolation = mask_interpolation ,
1964+ area_for_downscale = area_for_downscale ,
19101965 p = p ,
19111966 )
19121967 self .min_max_height = min_max_height
@@ -1960,6 +2015,12 @@ class RandomResizedCrop(_BaseRandomSizedCrop):
19602015 mask_interpolation (OpenCV flag): Flag that is used to specify the interpolation algorithm for mask.
19612016 Should be one of: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
19622017 Default: cv2.INTER_NEAREST
2018+ area_for_downscale (Literal[None, "image", "image_mask"]): Controls automatic use of INTER_AREA interpolation
2019+ for downscaling. Options:
2020+ - None: No automatic interpolation selection, always use the specified interpolation method
2021+ - "image": Use INTER_AREA when downscaling images, retain specified interpolation for upscaling and masks
2022+ - "image_mask": Use INTER_AREA when downscaling both images and masks
2023+ Default: None.
19632024 p (float): Probability of applying the transform. Default: 1.0
19642025
19652026 Targets:
@@ -1976,6 +2037,8 @@ class RandomResizedCrop(_BaseRandomSizedCrop):
19762037 - Bounding boxes that end up fully outside the cropped area will be removed.
19772038 - Keypoints that end up outside the cropped area will be removed.
19782039 - After cropping, the result is resized to the specified size.
2040+ - When area_for_downscale is set, INTER_AREA interpolation will be used automatically for
2041+ downscaling (when the crop is larger than the target size), which provides better quality for size reduction.
19792042
19802043 Mathematical Details:
19812044 1. A target area A is sampled from the range [scale[0] * input_area, scale[1] * input_area].
@@ -2009,6 +2072,7 @@ class RandomResizedCrop(_BaseRandomSizedCrop):
20092072 ... ratio=(0.75, 1.33), # Aspect ratio will vary from 3:4 to 4:3
20102073 ... interpolation=cv2.INTER_LINEAR,
20112074 ... mask_interpolation=cv2.INTER_NEAREST,
2075+ ... area_for_downscale="image", # Use INTER_AREA for image downscaling
20122076 ... p=1.0
20132077 ... ),
20142078 ... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
@@ -2062,6 +2126,7 @@ class InitSchema(BaseTransformInitSchema):
20622126 cv2 .INTER_LANCZOS4 ,
20632127 cv2 .INTER_LINEAR_EXACT ,
20642128 ]
2129+ area_for_downscale : Literal [None , "image" , "image_mask" ]
20652130
20662131 def __init__ (
20672132 self ,
@@ -2086,12 +2151,14 @@ def __init__(
20862151 cv2 .INTER_LANCZOS4 ,
20872152 cv2 .INTER_LINEAR_EXACT ,
20882153 ] = cv2 .INTER_NEAREST ,
2154+ area_for_downscale : Literal [None , "image" , "image_mask" ] = None ,
20892155 p : float = 1.0 ,
20902156 ):
20912157 super ().__init__ (
20922158 size = size ,
20932159 interpolation = interpolation ,
20942160 mask_interpolation = mask_interpolation ,
2161+ area_for_downscale = area_for_downscale ,
20952162 p = p ,
20962163 )
20972164 self .scale = scale
0 commit comments