updated blend function to optimized 8bit calculation

- efficient color blend calculation in fews operations possible - omitting min / max checks makes it faster on average - using 8bit for "blend" variable does not significantly influence the resulting color, just transition points are slightly shifted but yield very good results (and better than the original 16bit version using the old fastled math with improper rounding) - updated drawCircle and drawLine to use 8bit directly instead of 16bit with a shift
DedeHai · Nov 21, 2024 · 5069e50 · 5069e50
1 parent 633d0c6
commit 5069e50
Show file tree

Hide file tree

Showing 3 changed files with 34 additions and 43 deletions.
diff --git a/wled00/FX_2Dfcn.cpp b/wled00/FX_2Dfcn.cpp
@@ -513,25 +513,25 @@ void Segment::drawCircle(uint16_t cx, uint16_t cy, uint8_t radius, uint32_t col,
     unsigned oldFade = 0;
     while (x < y) {
       float yf = sqrtf(float(rsq - x*x)); // needs to be floating point
-      uint16_t fade = float(0xFFFF) * (ceilf(yf) - yf); // how much color to keep
+      uint16_t fade = float(0xFF) * (ceilf(yf) - yf); // how much color to keep
       if (oldFade > fade) y--;
       oldFade = fade;
-      setPixelColorXY(cx+x, cy+y, color_blend16(col, getPixelColorXY(cx+x, cy+y), fade));
-      setPixelColorXY(cx-x, cy+y, color_blend16(col, getPixelColorXY(cx-x, cy+y), fade));
-      setPixelColorXY(cx+x, cy-y, color_blend16(col, getPixelColorXY(cx+x, cy-y), fade));
-      setPixelColorXY(cx-x, cy-y, color_blend16(col, getPixelColorXY(cx-x, cy-y), fade));
-      setPixelColorXY(cx+y, cy+x, color_blend16(col, getPixelColorXY(cx+y, cy+x), fade));
-      setPixelColorXY(cx-y, cy+x, color_blend16(col, getPixelColorXY(cx-y, cy+x), fade));
-      setPixelColorXY(cx+y, cy-x, color_blend16(col, getPixelColorXY(cx+y, cy-x), fade));
-      setPixelColorXY(cx-y, cy-x, color_blend16(col, getPixelColorXY(cx-y, cy-x), fade));
-      setPixelColorXY(cx+x, cy+y-1, color_blend16(getPixelColorXY(cx+x, cy+y-1), col, fade));
-      setPixelColorXY(cx-x, cy+y-1, color_blend16(getPixelColorXY(cx-x, cy+y-1), col, fade));
-      setPixelColorXY(cx+x, cy-y+1, color_blend16(getPixelColorXY(cx+x, cy-y+1), col, fade));
-      setPixelColorXY(cx-x, cy-y+1, color_blend16(getPixelColorXY(cx-x, cy-y+1), col, fade));
-      setPixelColorXY(cx+y-1, cy+x, color_blend16(getPixelColorXY(cx+y-1, cy+x), col, fade));
-      setPixelColorXY(cx-y+1, cy+x, color_blend16(getPixelColorXY(cx-y+1, cy+x), col, fade));
-      setPixelColorXY(cx+y-1, cy-x, color_blend16(getPixelColorXY(cx+y-1, cy-x), col, fade));
-      setPixelColorXY(cx-y+1, cy-x, color_blend16(getPixelColorXY(cx-y+1, cy-x), col, fade));
+      setPixelColorXY(cx+x, cy+y, color_blend(col, getPixelColorXY(cx+x, cy+y), fade));
+      setPixelColorXY(cx-x, cy+y, color_blend(col, getPixelColorXY(cx-x, cy+y), fade));
+      setPixelColorXY(cx+x, cy-y, color_blend(col, getPixelColorXY(cx+x, cy-y), fade));
+      setPixelColorXY(cx-x, cy-y, color_blend(col, getPixelColorXY(cx-x, cy-y), fade));
+      setPixelColorXY(cx+y, cy+x, color_blend(col, getPixelColorXY(cx+y, cy+x), fade));
+      setPixelColorXY(cx-y, cy+x, color_blend(col, getPixelColorXY(cx-y, cy+x), fade));
+      setPixelColorXY(cx+y, cy-x, color_blend(col, getPixelColorXY(cx+y, cy-x), fade));
+      setPixelColorXY(cx-y, cy-x, color_blend(col, getPixelColorXY(cx-y, cy-x), fade));
+      setPixelColorXY(cx+x, cy+y-1, color_blend(getPixelColorXY(cx+x, cy+y-1), col, fade));
+      setPixelColorXY(cx-x, cy+y-1, color_blend(getPixelColorXY(cx-x, cy+y-1), col, fade));
+      setPixelColorXY(cx+x, cy-y+1, color_blend(getPixelColorXY(cx+x, cy-y+1), col, fade));
+      setPixelColorXY(cx-x, cy-y+1, color_blend(getPixelColorXY(cx-x, cy-y+1), col, fade));
+      setPixelColorXY(cx+y-1, cy+x, color_blend(getPixelColorXY(cx+y-1, cy+x), col, fade));
+      setPixelColorXY(cx-y+1, cy+x, color_blend(getPixelColorXY(cx-y+1, cy+x), col, fade));
+      setPixelColorXY(cx+y-1, cy-x, color_blend(getPixelColorXY(cx+y-1, cy-x), col, fade));
+      setPixelColorXY(cx-y+1, cy-x, color_blend(getPixelColorXY(cx-y+1, cy-x), col, fade));
       x++;
     }
   } else {
@@ -608,13 +608,13 @@ void Segment::drawLine(uint16_t x0, uint16_t y0, uint16_t x1, uint16_t y1, uint3
     float gradient = x1-x0 == 0 ? 1.0f : float(y1-y0) / float(x1-x0);
     float intersectY = y0;
     for (int x = x0; x <= x1; x++) {
-      uint16_t keep = float(0xFFFF) * (intersectY-int(intersectY)); // how much color to keep
-      uint16_t seep = 0xFFFF - keep; // how much background to keep
+      uint8_t keep = float(0xFF) * (intersectY-int(intersectY)); // how much color to keep
+      uint8_t seep = 0xFF - keep; // how much background to keep
       int y = int(intersectY);
       if (steep) std::swap(x,y);  // temporaryly swap if steep
       // pixel coverage is determined by fractional part of y co-ordinate
-      setPixelColorXY(x, y, color_blend16(c, getPixelColorXY(x, y), keep));
-      setPixelColorXY(x+int(steep), y+int(!steep), color_blend16(c, getPixelColorXY(x+int(steep), y+int(!steep)), seep));
+      setPixelColorXY(x, y, color_blend(c, getPixelColorXY(x, y), keep));
+      setPixelColorXY(x+int(steep), y+int(!steep), color_blend(c, getPixelColorXY(x+int(steep), y+int(!steep)), seep));
       intersectY += gradient;
       if (steep) std::swap(x,y);  // restore if steep
     }

diff --git a/wled00/colors.cpp b/wled00/colors.cpp
@@ -5,26 +5,18 @@
  */
 
 /*
- * base color blend function, used for 8bit and 16bit
+ * color blend function, based on FastLED blend function
+ * the calculation for each color is: result = (A*(amountOfA) + A + B*(amountOfB) + B) / 256 with amountOfA = 255 - amountOfB
  */
-uint32_t color_blend_base(uint32_t color1, uint32_t color2, uint16_t blend) {
-  if(blend == 0) return color1;
-  uint32_t w1 = W(color1);
-  uint32_t r1 = R(color1);
-  uint32_t g1 = G(color1);
-  uint32_t b1 = B(color1);
-
-  uint32_t w2 = W(color2);
-  uint32_t r2 = R(color2);
-  uint32_t g2 = G(color2);
-  uint32_t b2 = B(color2);
-
-  uint32_t w3 = ((w2 * blend) + (w1 * (0xFFFF - blend))) >> 16;
-  uint32_t r3 = ((r2 * blend) + (r1 * (0xFFFF - blend))) >> 16;
-  uint32_t g3 = ((g2 * blend) + (g1 * (0xFFFF - blend))) >> 16;
-  uint32_t b3 = ((b2 * blend) + (b1 * (0xFFFF - blend))) >> 16;
-
-  return RGBW32(r3, g3, b3, w3);
+uint32_t color_blend(uint32_t color1, uint32_t color2, uint8_t blend) {
+  // min / max blend checking is omitted: calls with 0 or 255 are rare, checking lowers overall performance
+  uint32_t rb1 = color1 & 0x00FF00FF;
+  uint32_t wg1 = (color1>>8) & 0x00FF00FF;
+  uint32_t rb2 = color2 & 0x00FF00FF;
+  uint32_t wg2 = (color2>>8) & 0x00FF00FF;
+  uint32_t rb3 = ((((rb1 << 8) | rb2) + (rb2 * blend) - (rb1 * blend)) >> 8) & 0x00FF00FF;
+  uint32_t wg3 = ((((wg1 << 8) | wg2) + (wg2 * blend) - (wg1 * blend))) & 0xFF00FF00;
+  return rb3 | wg3;
 }
 
 /*

diff --git a/wled00/fcn_declare.h b/wled00/fcn_declare.h
@@ -78,9 +78,8 @@ class NeoGammaWLEDMethod {
 };
 #define gamma32(c) NeoGammaWLEDMethod::Correct32(c)
 #define gamma8(c)  NeoGammaWLEDMethod::rawGamma8(c)
-[[gnu::hot]] uint32_t color_blend_base(uint32_t c1, uint32_t c2 , uint16_t blend);
-inline uint32_t color_blend16(uint32_t c1, uint32_t c2, uint16_t b) { if (b == 0xFFFF) return c2; return color_blend_base(c1, c2, b); };
-inline uint32_t color_blend(uint32_t c1, uint32_t c2, uint8_t b) { if (b == 0xFF) return c2; return color_blend_base(c1, c2, (((uint16_t)b << 8))); };
+[[gnu::hot]] uint32_t color_blend(uint32_t c1, uint32_t c2 , uint8_t blend);
+inline uint32_t color_blend16(uint32_t c1, uint32_t c2, uint16_t b) { return color_blend(c1, c2, b >> 8); };
 [[gnu::hot]] uint32_t color_add(uint32_t,uint32_t, bool fast=false);
 [[gnu::hot]] uint32_t color_fade(uint32_t c1, uint8_t amount, bool video=false);
 CRGBPalette16 generateHarmonicRandomPalette(CRGBPalette16 &basepalette);