revisit gainmapmath constants

- luminance function for p3 gamut is using weights of DCI P3 instead of Display P3. This is corrected. Accordingly, the gamut conversions involving Display P3 are updated. - Increased precision of some constants used in csc, oetf and eotf functions. - map +inf of hdr intent half fp values to 49.2 instead of zero fixes google#177 Test: ./ultrahdr_unit_test Change-Id: Iec77b3d24adb7ad887a8f53d805eecc0c0160f28
ittiam-systems · Nov 8, 2024 · 582fa7b · 582fa7b
1 parent fd67a9a
commit 582fa7b
Show file tree

Hide file tree

Showing 8 changed files with 329 additions and 187 deletions.
diff --git a/examples/ultrahdr_app.cpp b/examples/ultrahdr_app.cpp
@@ -32,16 +32,22 @@
 
 #include "ultrahdr_api.h"
 
-const float BT601YUVtoRGBMatrix[9] = {
-    1, 0, 1.402, 1, (-0.202008 / 0.587), (-0.419198 / 0.587), 1.0, 1.772, 0.0};
+const float DisplayP3YUVtoRGBMatrix[9] = {
+    1, 0, 1.542, 1, (-0.146023 / 0.6917), (-0.353118 / 0.6917), 1.0, 1.8414, 0.0};
 const float BT709YUVtoRGBMatrix[9] = {
     1, 0, 1.5748, 1, (-0.13397432 / 0.7152), (-0.33480248 / 0.7152), 1.0, 1.8556, 0.0};
 const float BT2020YUVtoRGBMatrix[9] = {
     1, 0, 1.4746, 1, (-0.11156702 / 0.6780), (-0.38737742 / 0.6780), 1, 1.8814, 0};
 
-const float BT601RGBtoYUVMatrix[9] = {
-    0.299,           0.587, 0.114, (-0.299 / 1.772), (-0.587 / 1.772), 0.5, 0.5, (-0.587 / 1.402),
-    (-0.114 / 1.402)};
+const float DisplayP3RGBtoYUVMatrix[9] = {0.229,
+                                          0.6917,
+                                          0.0793,
+                                          (-0.229 / 1.8414),
+                                          (-0.6917 / 1.8414),
+                                          0.5,
+                                          0.5,
+                                          (-0.6917 / 1.542),
+                                          (-0.0793 / 1.542)};
 const float BT709RGBtoYUVMatrix[9] = {0.2126,
                                       0.7152,
                                       0.0722,
@@ -849,7 +855,7 @@ bool UltraHdrAppInput::convertP010ToRGBImage() {
   } else if (mHdrCg == UHDR_CG_BT_2100) {
     coeffs = BT2020YUVtoRGBMatrix;
   } else if (mHdrCg == UHDR_CG_DISPLAY_P3) {
-    coeffs = BT601YUVtoRGBMatrix;
+    coeffs = DisplayP3YUVtoRGBMatrix;
   } else {
     std::cerr << "color matrix not present for gamut " << mHdrCg << " using BT2020Matrix"
               << std::endl;
@@ -939,13 +945,13 @@ bool UltraHdrAppInput::convertYuv420ToRGBImage() {
   uint8_t* u = static_cast<uint8_t*>(mRawYuv420Image.planes[UHDR_PLANE_U]);
   uint8_t* v = static_cast<uint8_t*>(mRawYuv420Image.planes[UHDR_PLANE_V]);
 
-  const float* coeffs = BT601YUVtoRGBMatrix;
+  const float* coeffs = DisplayP3YUVtoRGBMatrix;
   if (mSdrCg == UHDR_CG_BT_709) {
     coeffs = BT709YUVtoRGBMatrix;
   } else if (mSdrCg == UHDR_CG_BT_2100) {
     coeffs = BT2020YUVtoRGBMatrix;
   } else if (mSdrCg == UHDR_CG_DISPLAY_P3) {
-    coeffs = BT601YUVtoRGBMatrix;
+    coeffs = DisplayP3YUVtoRGBMatrix;
   } else {
     std::cerr << "color matrix not present for gamut " << mSdrCg << " using BT601Matrix"
               << std::endl;
@@ -1009,13 +1015,13 @@ bool UltraHdrAppInput::convertRgba8888ToYUV444Image() {
   uint8_t* uData = static_cast<uint8_t*>(mDecodedUhdrYuv444Image.planes[UHDR_PLANE_U]);
   uint8_t* vData = static_cast<uint8_t*>(mDecodedUhdrYuv444Image.planes[UHDR_PLANE_V]);
 
-  const float* coeffs = BT601RGBtoYUVMatrix;
+  const float* coeffs = DisplayP3RGBtoYUVMatrix;
   if (mDecodedUhdrRgbImage.cg == UHDR_CG_BT_709) {
     coeffs = BT709RGBtoYUVMatrix;
   } else if (mDecodedUhdrRgbImage.cg == UHDR_CG_BT_2100) {
     coeffs = BT2020RGBtoYUVMatrix;
   } else if (mDecodedUhdrRgbImage.cg == UHDR_CG_DISPLAY_P3) {
-    coeffs = BT601RGBtoYUVMatrix;
+    coeffs = DisplayP3RGBtoYUVMatrix;
   } else {
     std::cerr << "color matrix not present for gamut " << mDecodedUhdrRgbImage.cg
               << " using BT601Matrix" << std::endl;
@@ -1063,7 +1069,7 @@ bool UltraHdrAppInput::convertRgba1010102ToYUV444Image() {
   } else if (mDecodedUhdrRgbImage.cg == UHDR_CG_BT_2100) {
     coeffs = BT2020RGBtoYUVMatrix;
   } else if (mDecodedUhdrRgbImage.cg == UHDR_CG_DISPLAY_P3) {
-    coeffs = BT601RGBtoYUVMatrix;
+    coeffs = DisplayP3RGBtoYUVMatrix;
   } else {
     std::cerr << "color matrix not present for gamut " << mDecodedUhdrRgbImage.cg
               << " using BT2020Matrix" << std::endl;

diff --git a/lib/include/ultrahdr/gainmapmath.h b/lib/include/ultrahdr/gainmapmath.h
@@ -347,6 +347,13 @@ Color pqInvOetfLUT(Color e_gamma);
 constexpr int32_t kPqInvOETFPrecision = 12;
 constexpr int32_t kPqInvOETFNumEntries = 1 << kPqInvOETFPrecision;
 
+////////////////////////////////////////////////////////////////////////////////
+// BT.601 transformations
+
+// BT.601 rgb <-> yuv  conversion
+Color Bt601RgbToYuv(Color e_gamma);
+Color Bt601YuvToRgb(Color e_gamma);
+
 // util class to prepare look up tables for oetf/eotf functions
 class LookUpTable {
  public:
@@ -408,20 +415,26 @@ Color bt2100ToP3(Color e);
 
 // convert between yuv encodings
 extern const std::array<float, 9> kYuvBt709ToBt601;
+extern const std::array<float, 9> kYuvBt709ToDisplayP3;
 extern const std::array<float, 9> kYuvBt709ToBt2100;
-extern const std::array<float, 9> kYuvBt601ToBt709;
-extern const std::array<float, 9> kYuvBt601ToBt2100;
-extern const std::array<float, 9> kYuvBt2100ToBt709;
+extern const std::array<float, 9> kYuvDisplayP3ToBt601;
+extern const std::array<float, 9> kYuvDisplayP3ToBt709;
+extern const std::array<float, 9> kYuvDisplayP3ToBt2100;
 extern const std::array<float, 9> kYuvBt2100ToBt601;
+extern const std::array<float, 9> kYuvBt2100ToBt709;
+extern const std::array<float, 9> kYuvBt2100ToDisplayP3;
 
 #if (defined(UHDR_ENABLE_INTRINSICS) && (defined(__ARM_NEON__) || defined(__ARM_NEON)))
 
 extern const int16_t kYuv709To601_coeffs_neon[8];
+extern const int16_t kYuv709ToP3_coeffs_neon[8];
 extern const int16_t kYuv709To2100_coeffs_neon[8];
-extern const int16_t kYuv601To709_coeffs_neon[8];
-extern const int16_t kYuv601To2100_coeffs_neon[8];
-extern const int16_t kYuv2100To709_coeffs_neon[8];
+extern const int16_t kYuvP3To601_coeffs_neon[8];
+extern const int16_t kYuvP3To709_coeffs_neon[8];
+extern const int16_t kYuvP3To2100_coeffs_neon[8];
 extern const int16_t kYuv2100To601_coeffs_neon[8];
+extern const int16_t kYuv2100To709_coeffs_neon[8];
+extern const int16_t kYuv2100ToP3_coeffs_neon[8];
 
 /*
  * The Y values are provided at half the width of U & V values to allow use of the widening
@@ -559,10 +572,18 @@ static inline Color clampPixelFloatLinear(Color e) {
   return {{{clampPixelFloatLinear(e.r), clampPixelFloatLinear(e.g), clampPixelFloatLinear(e.b)}}};
 }
 
+static float mapNonFiniteFloats(float val) {
+  if (std::isinf(val)) {
+    return val > 0 ? kMaxPixelFloatHdrLinear : 0.0f;
+  }
+  // nan
+  return 0.0f;
+}
+
 static inline Color sanitizePixel(Color e) {
-  float r = std::isfinite(e.r) ? clampPixelFloatLinear(e.r) : 0.0f;
-  float g = std::isfinite(e.g) ? clampPixelFloatLinear(e.g) : 0.0f;
-  float b = std::isfinite(e.b) ? clampPixelFloatLinear(e.b) : 0.0f;
+  float r = std::isfinite(e.r) ? clampPixelFloatLinear(e.r) : mapNonFiniteFloats(e.r);
+  float g = std::isfinite(e.g) ? clampPixelFloatLinear(e.g) : mapNonFiniteFloats(e.g);
+  float b = std::isfinite(e.b) ? clampPixelFloatLinear(e.b) : mapNonFiniteFloats(e.b);
   return {{{r, g, b}}};
 }
 

diff --git a/lib/include/ultrahdr/ultrahdrcommon.h b/lib/include/ultrahdr/ultrahdrcommon.h
@@ -159,6 +159,8 @@
 
 static const uhdr_error_info_t g_no_error = {UHDR_CODEC_OK, 0, ""};
 
+static const int UHDR_CG_BT_601 = 3; /**< BT.601 */
+
 namespace ultrahdr {
 
 // ===============================================================================================

diff --git a/lib/src/dsp/arm/gainmapmath_neon.cpp b/lib/src/dsp/arm/gainmapmath_neon.cpp
@@ -35,47 +35,68 @@ namespace ultrahdr {
 // {Y1, Y2, U1, U2, V1, V2, 0, 0}
 
 // Yuv Bt709 -> Yuv Bt601
-// Y' = (1.0f * Y) + ( 0.101579f * U) + ( 0.196076f * V)
-// U' = (0.0f * Y) + ( 0.989854f * U) + (-0.110653f * V)
-// V' = (0.0f * Y) + (-0.072453f * U) + ( 0.983398f * V)
+// Y' = (1.0 * Y) + ( 0.101579 * U) + ( 0.196076 * V)
+// U' = (0.0 * Y) + ( 0.989854 * U) + (-0.110653 * V)
+// V' = (0.0 * Y) + (-0.072453 * U) + ( 0.983398 * V)
 ALIGNED(16)
 const int16_t kYuv709To601_coeffs_neon[8] = {1664, 3213, 16218, -1813, -1187, 16112, 0, 0};
 
+// Yuv Bt709 -> Display P3
+// Y' = (1.0 * Y) + ( 0.017545 * U) + ( 0.03677 * V)
+// U' = (0.0 * Y) + ( 0.998169 * U) + (-0.019968 * V)
+// V' = (0.0 * Y) + (-0.011378 * U) + ( 0.997393 * V)
+ALIGNED(16)
+const int16_t kYuv709ToP3_coeffs_neon[8] = {287, 602, 16354, -327, -186, 16341, 0, 0};
+
 // Yuv Bt709 -> Yuv Bt2100
 // Y' = (1.0f * Y) + (-0.016969f * U) + ( 0.096312f * V)
 // U' = (0.0f * Y) + ( 0.995306f * U) + (-0.051192f * V)
 // V' = (0.0f * Y) + ( 0.011507f * U) + ( 1.002637f * V)
 ALIGNED(16)
 const int16_t kYuv709To2100_coeffs_neon[8] = {-278, 1578, 16307, -839, 189, 16427, 0, 0};
 
-// Yuv Bt601 -> Yuv Bt709
-// Y' = (1.0f * Y) + (-0.118188f * U) + (-0.212685f * V),
-// U' = (0.0f * Y) + ( 1.018640f * U) + ( 0.114618f * V),
-// V' = (0.0f * Y) + ( 0.075049f * U) + ( 1.025327f * V);
+// Yuv Display P3 -> Yuv Bt601
+// Y' = (1.0 * Y) + ( 0.086028 * U) + ( 0.161445 * V)
+// U' = (0.0 * Y) + ( 0.990631 * U) + (-0.091109 * V)
+// V' = (0.0 * Y) + (-0.061361 * U) + ( 0.98474 * V)
 ALIGNED(16)
-const int16_t kYuv601To709_coeffs_neon[8] = {-1936, -3485, 16689, 1878, 1230, 16799, 0, 0};
+const int16_t kYuvP3To601_coeffs_neon[8] = {1409, 2645, 16230, -1493, -1005, 16134, 0, 0};
 
-// Yuv Bt601 -> Yuv Bt2100
-// Y' = (1.0f * Y) + (-0.128245f * U) + (-0.115879f * V)
-// U' = (0.0f * Y) + ( 1.010016f * U) + ( 0.061592f * V)
-// V' = (0.0f * Y) + ( 0.086969f * U) + ( 1.029350f * V)
+// Yuv Display P3 -> Yuv Bt709
+// Y' = (1.0 * Y) + (-0.018002 * U) + (-0.037226 * V)
+// U' = (0.0 * Y) + ( 1.002063 * U) + ( 0.020061 * V)
+// V' = (0.0 * Y) + ( 0.011431 * U) + ( 1.002843 * V)
 ALIGNED(16)
-const int16_t kYuv601To2100_coeffs_neon[8] = {-2101, -1899, 16548, 1009, 1425, 16865, 0, 0};
+const int16_t kYuvP3To709_coeffs_neon[8] = {-295, -610, 16418, 329, 187, 16431, 0, 0};
 
-// Yuv Bt2100 -> Yuv Bt709
-// Y' = (1.0f * Y) + ( 0.018149f * U) + (-0.095132f * V)
-// U' = (0.0f * Y) + ( 1.004123f * U) + ( 0.051267f * V)
-// V' = (0.0f * Y) + (-0.011524f * U) + ( 0.996782f * V)
+// Yuv Display P3 -> Yuv Bt2100
+// Y' = (1.0 * Y) + (-0.033905 * U) + ( 0.059019 * V)
+// U' = (0.0 * Y) + ( 0.996774 * U) + ( -0.03137 * V)
+// V' = (0.0 * Y) + ( 0.022992 * U) + ( 1.005718 * V)
 ALIGNED(16)
-const int16_t kYuv2100To709_coeffs_neon[8] = {297, -1559, 16452, 840, -189, 16331, 0, 0};
+const int16_t kYuvP3To2100_coeffs_neon[8] = {-555, 967, 16331, -514, 377, 16478, 0, 0};
 
 // Yuv Bt2100 -> Yuv Bt601
-// Y' = (1.0f * Y) + ( 0.117887f * U) + ( 0.105521f * V)
-// U' = (0.0f * Y) + ( 0.995211f * U) + (-0.059549f * V)
-// V' = (0.0f * Y) + (-0.084085f * U) + ( 0.976518f * V)
+// Y' = (1.0 * Y) + ( 0.117887 * U) + ( 0.105521 * V)
+// U' = (0.0 * Y) + ( 0.995211 * U) + (-0.059549 * V)
+// V' = (0.0 * Y) + (-0.084085 * U) + ( 0.976518 * V)
 ALIGNED(16)
 const int16_t kYuv2100To601_coeffs_neon[8] = {1931, 1729, 16306, -976, -1378, 15999, 0, 0};
 
+// Yuv Bt2100 -> Yuv Bt709
+// Y' = (1.0 * Y) + ( 0.018149 * U) + (-0.095132 * V)
+// U' = (0.0 * Y) + ( 1.004123 * U) + ( 0.051267 * V)
+// V' = (0.0 * Y) + (-0.011524 * U) + ( 0.996782 * V)
+ALIGNED(16)
+const int16_t kYuv2100To709_coeffs_neon[8] = {297, -1559, 16452, 840, -189, 16331, 0, 0};
+
+// Yuv Bt2100 -> Yuv Display P3
+// Y' = (1.0 * Y) + ( 0.035343 * U) + ( -0.057581 * V)
+// U' = (0.0 * Y) + ( 1.002515 * U) + ( 0.03127 * V)
+// V' = (0.0 * Y) + (-0.022919 * U) + ( 0.9936 * V)
+ALIGNED(16)
+const int16_t kYuv2100ToP3_coeffs_neon[8] = {579, -943, 16425, 512, -376, 16279, 0, 0};
+
 static inline int16x8_t yConversion_neon(uint8x8_t y, int16x8_t u, int16x8_t v, int16x8_t coeffs) {
   int32x4_t lo = vmull_lane_s16(vget_low_s16(u), vget_low_s16(coeffs), 0);
   int32x4_t hi = vmull_lane_s16(vget_high_s16(u), vget_low_s16(coeffs), 0);
@@ -240,11 +261,14 @@ uhdr_error_info_t convertYuv_neon(uhdr_raw_image_t* image, uhdr_color_gamut_t sr
 
   switch (src_encoding) {
     case UHDR_CG_BT_709:
-      switch (dst_encoding) {
+      switch ((int)dst_encoding) {
+        case UHDR_CG_BT_601:
+          coeffs = kYuv709To601_coeffs_neon;
+          break;
         case UHDR_CG_BT_709:
           return status;
         case UHDR_CG_DISPLAY_P3:
-          coeffs = kYuv709To601_coeffs_neon;
+          coeffs = kYuv709ToP3_coeffs_neon;
           break;
         case UHDR_CG_BT_2100:
           coeffs = kYuv709To2100_coeffs_neon;
@@ -258,14 +282,17 @@ uhdr_error_info_t convertYuv_neon(uhdr_raw_image_t* image, uhdr_color_gamut_t sr
       }
       break;
     case UHDR_CG_DISPLAY_P3:
-      switch (dst_encoding) {
+      switch ((int)dst_encoding) {
+        case UHDR_CG_BT_601:
+          coeffs = kYuvP3To601_coeffs_neon;
+          break;
         case UHDR_CG_BT_709:
-          coeffs = kYuv601To709_coeffs_neon;
+          coeffs = kYuvP3To709_coeffs_neon;
           break;
         case UHDR_CG_DISPLAY_P3:
           return status;
         case UHDR_CG_BT_2100:
-          coeffs = kYuv601To2100_coeffs_neon;
+          coeffs = kYuvP3To2100_coeffs_neon;
           break;
         default:
           status.error_code = UHDR_CODEC_INVALID_PARAM;
@@ -276,12 +303,15 @@ uhdr_error_info_t convertYuv_neon(uhdr_raw_image_t* image, uhdr_color_gamut_t sr
       }
       break;
     case UHDR_CG_BT_2100:
-      switch (dst_encoding) {
+      switch ((int)dst_encoding) {
+        case UHDR_CG_BT_601:
+          coeffs = kYuv2100To601_coeffs_neon;
+          break;
         case UHDR_CG_BT_709:
           coeffs = kYuv2100To709_coeffs_neon;
           break;
         case UHDR_CG_DISPLAY_P3:
-          coeffs = kYuv2100To601_coeffs_neon;
+          coeffs = kYuv2100ToP3_coeffs_neon;
           break;
         case UHDR_CG_BT_2100:
           return status;