Skip to content

Commit

Permalink
Add IOU, CIOU and minor fixes to bounding boxes (#20635)
Browse files Browse the repository at this point in the history
* Add computer affine matrix method and reformat some of the bounding box arguments

* Add rotation for boxes

* proper reshape of the rotation matrix

* iou and random rotation using affine

* bounding boxes iou

* - add encode and decode to deltas for bounding boxes
- add iou and ciou methods

* add api points for encode and decode methods of bounding boxes

* fix arg name and proper for args for test_affine

* correct dtype mul
  • Loading branch information
sineeli authored Dec 12, 2024
1 parent 5b2ba9a commit eec1fbd
Show file tree
Hide file tree
Showing 14 changed files with 1,211 additions and 187 deletions.
12 changes: 12 additions & 0 deletions keras/api/_tf_keras/keras/utils/bounding_boxes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,18 @@
from keras.src.layers.preprocessing.image_preprocessing.bounding_boxes.converters import (
crop,
)
from keras.src.layers.preprocessing.image_preprocessing.bounding_boxes.converters import (
decode_deltas_to_boxes,
)
from keras.src.layers.preprocessing.image_preprocessing.bounding_boxes.converters import (
encode_box_to_deltas,
)
from keras.src.layers.preprocessing.image_preprocessing.bounding_boxes.converters import (
pad,
)
from keras.src.layers.preprocessing.image_preprocessing.bounding_boxes.iou import (
compute_ciou,
)
from keras.src.layers.preprocessing.image_preprocessing.bounding_boxes.iou import (
compute_iou,
)
12 changes: 12 additions & 0 deletions keras/api/utils/bounding_boxes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,18 @@
from keras.src.layers.preprocessing.image_preprocessing.bounding_boxes.converters import (
crop,
)
from keras.src.layers.preprocessing.image_preprocessing.bounding_boxes.converters import (
decode_deltas_to_boxes,
)
from keras.src.layers.preprocessing.image_preprocessing.bounding_boxes.converters import (
encode_box_to_deltas,
)
from keras.src.layers.preprocessing.image_preprocessing.bounding_boxes.converters import (
pad,
)
from keras.src.layers.preprocessing.image_preprocessing.bounding_boxes.iou import (
compute_ciou,
)
from keras.src.layers.preprocessing.image_preprocessing.bounding_boxes.iou import (
compute_iou,
)
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import math

from keras.src.backend import config as backend_config
from keras.src.layers.preprocessing.image_preprocessing.bounding_boxes.validation import ( # noqa: E501
densify_bounding_boxes,
Expand Down Expand Up @@ -314,3 +316,70 @@ def _unwrap_value_range(self, value_range, dtype="float32"):
min_value = self.backend.cast(min_value, dtype=dtype)
max_value = self.backend.cast(max_value, dtype=dtype)
return min_value, max_value

def _compute_affine_matrix(
self,
center_x,
center_y,
angle,
translate_x,
translate_y,
scale,
shear_x,
shear_y,
height,
width,
):
"""
# Scaling Shear Rotation
# [sx 0 0] [1 shx 0] [cos(θ) -sin(θ) 0]
# M = [0 sy 0] * [shy 1 0] * [sin(θ) cos(θ) 0]
# [0 0 1] [0 0 1] [0 0 1]
# a0 = sx * (cos(θ) + shx * sin(θ))
# a1 = sx * (-sin(θ) + shx * cos(θ))
# a2 = tx + cx - cx * a0 - cy * a1
# b0 = sy * (shy * cos(θ) + sin(θ))
# b1 = sy * (shy * -sin(θ) + cos(θ))
# b2 = ty + cy - cx * b0 - cy * b1
"""
ops = self.backend

degree_to_radian_factor = ops.convert_to_tensor(math.pi / 180.0)

angle = angle * degree_to_radian_factor
shear_x = shear_x * degree_to_radian_factor
shear_y = shear_y * degree_to_radian_factor

batch_size = ops.shape(angle)[0]
dtype = angle.dtype
width = ops.cast(width, dtype)
height = ops.cast(height, dtype)
cx = center_x * (width - 1)
cy = center_y * (height - 1)

cos_theta = ops.numpy.cos(angle)
sin_theta = ops.numpy.sin(angle)
shear_x = ops.numpy.tan(shear_x)
shear_y = ops.numpy.tan(shear_y)

a0 = scale * (cos_theta + shear_x * sin_theta)
a1 = scale * (-sin_theta + shear_x * cos_theta)
a2 = translate_x + cx - cx * a0 - cy * a1
b0 = scale * (shear_y * cos_theta + sin_theta)
b1 = scale * (shear_y * -sin_theta + cos_theta)
b2 = translate_y + cy - cx * b0 - cy * b1
affine_matrix = ops.numpy.concatenate(
[
a0[:, None],
a1[:, None],
a2[:, None],
b0[:, None],
b1[:, None],
b2[:, None],
ops.numpy.zeros((batch_size, 2)),
],
axis=1,
)

return affine_matrix
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,18 @@

from keras.src.utils import backend_utils

SUPPORTED_FORMATS = (
"xyxy",
"yxyx",
"xywh",
"center_xywh",
"center_yxhw",
"rel_xyxy",
"rel_yxyx",
"rel_xywh",
"rel_center_xywh",
)


class BoundingBox:
def __init__(self):
Expand All @@ -16,80 +28,6 @@ def convert_format(
width=None,
dtype="float32",
):
"""Converts `boxes` from one format to another.
Supported formats are:
- `"xyxy"`, also known as `corners` format. In this format the first
four axes represent `[left, top, right, bottom]` in that order.
- `"rel_xyxy"`. In this format, the axes are the same as `"xyxy"` but
the x coordinates are normalized using the image width, and the y
axes the image height. All values in `rel_xyxy` are in the range
`(0, 1)`.
- `"xywh"`. In this format the first four axes represent
`[left, top, width, height]`.
- `"rel_xywh". In this format the first four axes represent
[left, top, width, height], just like `"xywh"`. Unlike `"xywh"`,
the values are in the range (0, 1) instead of absolute pixel values.
- `"center_xyWH"`. In this format the first two coordinates represent
the x and y coordinates of the center of the bounding box, while the
last two represent the width and height of the bounding box.
- `"center_yxHW"`. In this format the first two coordinates represent
the y and x coordinates of the center of the bounding box, while the
last two represent the height and width of the bounding box.
- `"yxyx"`. In this format the first four axes represent
[top, left, bottom, right] in that order.
- `"rel_yxyx"`. In this format, the axes are the same as `"yxyx"` but
the x coordinates are normalized using the image width, and the y
axes the image height. All values in `rel_yxyx` are in the range
(0, 1).
Formats are case insensitive. It is recommended that you capitalize
width and height to maximize the visual difference between `"xyWH"`
and `"xyxy"`.
Relative formats, abbreviated `rel`, make use of the shapes of the
`images` passed. In these formats, the coordinates, widths, and heights
are all specified as percentages of the host image.
Example:
```python
boxes = {
"boxes": [TODO],
"labels": [TODO],
}
boxes_in_xywh = keras.utils.bounding_boxes.convert_format(
boxes,
source='xyxy',
target='xyWH'
)
```
Args:
boxes: tensor representing bounding boxes in the format specified in
the `source` parameter. `boxes` can optionally have extra
dimensions stacked on the final axis to store metadata. boxes
should be a 3D tensor, with the shape
`[batch_size, num_boxes, 4]`. Alternatively, boxes can be a
dictionary with key 'boxes' containing a tensor matching the
aforementioned spec.
source:One of `"xyxy"`, `"yxyx"`, `"xywh"`, `"center_xywh"`,
`"center_yxhw"`, `"rel_xyxy"`, "rel_yxyx", "rel_xywh",
"rel_center_xywh". Used to specify the original format of the
`boxes` parameter.
target:One of `"xyxy"`, `"yxyx"`, `"xywh"`, `"center_xywh"`,
`"center_yxhw"`, `"rel_xyxy"`, "rel_yxyx", "rel_xywh",
"rel_center_xywh". Used to specify the destination format of
the `boxes` parameter.
images: (Optional) a batch of images aligned with `boxes` on the
first axis. Should be at least 3 dimensions, with the first 3
dimensions representing: `[batch_size, height, width]`. Used in
some converters to compute relative pixel values of the bounding
box dimensions. Required when transforming from a rel format to
a non-rel format.
dtype: the data type to use when transforming the boxes, defaults to
`"float32"`.
"""
if isinstance(boxes, dict):
boxes["boxes"] = self.convert_format(
boxes["boxes"],
Expand Down Expand Up @@ -133,23 +71,29 @@ def convert_format(
)
source = source.lower()
target = target.lower()
if source not in to_xyxy_converters.keys():
if source not in SUPPORTED_FORMATS or target not in SUPPORTED_FORMATS:
raise ValueError(
f"Available source: {list(to_xyxy_converters.keys())}. "
f"Received: source={source}"
f"Invalid source or target format. "
f"Supported formats: {SUPPORTED_FORMATS}"
)
if target not in from_xyxy_converters.keys():

if (source.startswith("rel_") or target.startswith("rel_")) and (
width is None or height is None
):
raise ValueError(
f"Available target: {list(from_xyxy_converters.keys())}. "
f"Received: target={target}"
"convert_format() must receive `height` and `width` "
"transforming between relative and absolute formats."
f"convert_format() received source=`{source}`, "
f"target=`{target}, "
f"but height={height} and width={width}."
)
boxes = ops.cast(boxes, dtype)
if source == target:
return boxes
if height is not None:
height = ops.cast(height, dtype)
if width is not None:
width = ops.cast(width, dtype)
if height is not None:
height = ops.cast(height, dtype)

if source.startswith("rel_") and target.startswith("rel_"):
source = source.replace("rel_", "", 1)
Expand All @@ -160,19 +104,27 @@ def convert_format(
return from_xyxy_converter(in_xyxy_boxes, height, width)

def clip_to_image_size(
self, bounding_boxes, height=None, width=None, format="xyxy"
self,
bounding_boxes,
height=None,
width=None,
bounding_box_format="xyxy",
):
if format not in ("xyxy", "rel_xyxy"):
if bounding_box_format not in ("xyxy", "rel_xyxy"):
raise NotImplementedError
if format == "xyxy" and (height is None or width is None):
if bounding_box_format == "xyxy" and (height is None or width is None):
raise ValueError(
"`height` and `width` must be set if `format='xyxy'`."
)

ops = self.backend
boxes, labels = bounding_boxes["boxes"], bounding_boxes["labels"]
if width is not None:
width = ops.cast(width, boxes.dtype)
if height is not None:
height = ops.cast(height, boxes.dtype)

if format == "xyxy":
if bounding_box_format == "xyxy":
x1, y1, x2, y2 = ops.numpy.split(boxes, 4, axis=-1)
x1 = ops.numpy.clip(x1, 0, width)
y1 = ops.numpy.clip(y1, 0, height)
Expand All @@ -183,7 +135,7 @@ def clip_to_image_size(
areas = self._compute_area(boxes)
areas = ops.numpy.squeeze(areas, axis=-1)
labels = ops.numpy.where(areas > 0, labels, -1)
elif format == "rel_xyxy":
elif bounding_box_format == "rel_xyxy":
x1, y1, x2, y2 = ops.numpy.split(boxes, 4, axis=-1)
x1 = ops.numpy.clip(x1, 0.0, 1.0)
y1 = ops.numpy.clip(y1, 0.0, 1.0)
Expand Down Expand Up @@ -223,7 +175,6 @@ def affine(
center_x = 0.5
if center_y is None:
center_y = 0.5

matrix = self._compute_inverse_affine_matrix(
center_x,
center_y,
Expand All @@ -236,6 +187,7 @@ def affine(
height,
width,
)
boxes = ops.cast(boxes, dtype=matrix.dtype)
transposed_matrix = ops.numpy.transpose(matrix[:, :2, :], [0, 2, 1])
points = boxes # [B, N, 4]
points = ops.numpy.stack(
Expand Down Expand Up @@ -445,7 +397,6 @@ def _compute_area(self, boxes, format="xyxy"):
heights = y2 - y1
return widths * heights

# Affine
def _compute_inverse_affine_matrix(
self,
center_x,
Expand All @@ -463,18 +414,16 @@ def _compute_inverse_affine_matrix(
ops = self.backend
batch_size = ops.shape(angle)[0]
dtype = angle.dtype
width = ops.cast(width, dtype)
height = ops.cast(height, dtype)

angle = -angle
shear_x = -shear_x
shear_y = -shear_y

cx = center_x * width
cy = center_y * height
cx = center_x * (width - 1)
cy = center_y * (height - 1)
rot = ops.numpy.multiply(angle, 1.0 / 180.0 * math.pi)
tx = -translate_x * width
ty = -translate_y * height
tx = -translate_x * (width - 1)
ty = -translate_y * (height - 1)
sx = ops.numpy.multiply(shear_x, 1.0 / 180.0 * math.pi)
sy = ops.numpy.multiply(shear_y, 1.0 / 180.0 * math.pi)

Expand All @@ -487,8 +436,8 @@ def _compute_inverse_affine_matrix(

# Rotate Scale Shear (RSS) without scaling
a = ops.numpy.cos(rot_minus_sy) / cos_sy
b = -(a * tan_sx + ops.numpy.sin(rot))
c = ops.numpy.sin(rot_minus_sy) / cos_sy
b = a * tan_sx + ops.numpy.sin(rot)
c = -ops.numpy.sin(rot_minus_sy) / cos_sy
d = ops.numpy.cos(rot) - c * tan_sx

# Inverted rotation matrix with scale and shear
Expand Down
Loading

0 comments on commit eec1fbd

Please sign in to comment.