Skip to content

Commit

Permalink
updated blend function to optimized 8bit calculation
Browse files Browse the repository at this point in the history
- efficient color blend calculation in fews operations possible
- omitting min / max checks makes it faster on average
- using 8bit for "blend" variable does not significantly influence the resulting color, just transition points are slightly shifted but yield very good results (and better than the original 16bit version using the old fastled math with improper rounding)
- updated drawCircle and drawLine to use 8bit directly instead of 16bit with a shift
  • Loading branch information
DedeHai committed Nov 21, 2024
1 parent 633d0c6 commit 5069e50
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 43 deletions.
42 changes: 21 additions & 21 deletions wled00/FX_2Dfcn.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -513,25 +513,25 @@ void Segment::drawCircle(uint16_t cx, uint16_t cy, uint8_t radius, uint32_t col,
unsigned oldFade = 0;
while (x < y) {
float yf = sqrtf(float(rsq - x*x)); // needs to be floating point
uint16_t fade = float(0xFFFF) * (ceilf(yf) - yf); // how much color to keep
uint16_t fade = float(0xFF) * (ceilf(yf) - yf); // how much color to keep
if (oldFade > fade) y--;
oldFade = fade;
setPixelColorXY(cx+x, cy+y, color_blend16(col, getPixelColorXY(cx+x, cy+y), fade));
setPixelColorXY(cx-x, cy+y, color_blend16(col, getPixelColorXY(cx-x, cy+y), fade));
setPixelColorXY(cx+x, cy-y, color_blend16(col, getPixelColorXY(cx+x, cy-y), fade));
setPixelColorXY(cx-x, cy-y, color_blend16(col, getPixelColorXY(cx-x, cy-y), fade));
setPixelColorXY(cx+y, cy+x, color_blend16(col, getPixelColorXY(cx+y, cy+x), fade));
setPixelColorXY(cx-y, cy+x, color_blend16(col, getPixelColorXY(cx-y, cy+x), fade));
setPixelColorXY(cx+y, cy-x, color_blend16(col, getPixelColorXY(cx+y, cy-x), fade));
setPixelColorXY(cx-y, cy-x, color_blend16(col, getPixelColorXY(cx-y, cy-x), fade));
setPixelColorXY(cx+x, cy+y-1, color_blend16(getPixelColorXY(cx+x, cy+y-1), col, fade));
setPixelColorXY(cx-x, cy+y-1, color_blend16(getPixelColorXY(cx-x, cy+y-1), col, fade));
setPixelColorXY(cx+x, cy-y+1, color_blend16(getPixelColorXY(cx+x, cy-y+1), col, fade));
setPixelColorXY(cx-x, cy-y+1, color_blend16(getPixelColorXY(cx-x, cy-y+1), col, fade));
setPixelColorXY(cx+y-1, cy+x, color_blend16(getPixelColorXY(cx+y-1, cy+x), col, fade));
setPixelColorXY(cx-y+1, cy+x, color_blend16(getPixelColorXY(cx-y+1, cy+x), col, fade));
setPixelColorXY(cx+y-1, cy-x, color_blend16(getPixelColorXY(cx+y-1, cy-x), col, fade));
setPixelColorXY(cx-y+1, cy-x, color_blend16(getPixelColorXY(cx-y+1, cy-x), col, fade));
setPixelColorXY(cx+x, cy+y, color_blend(col, getPixelColorXY(cx+x, cy+y), fade));
setPixelColorXY(cx-x, cy+y, color_blend(col, getPixelColorXY(cx-x, cy+y), fade));
setPixelColorXY(cx+x, cy-y, color_blend(col, getPixelColorXY(cx+x, cy-y), fade));
setPixelColorXY(cx-x, cy-y, color_blend(col, getPixelColorXY(cx-x, cy-y), fade));
setPixelColorXY(cx+y, cy+x, color_blend(col, getPixelColorXY(cx+y, cy+x), fade));
setPixelColorXY(cx-y, cy+x, color_blend(col, getPixelColorXY(cx-y, cy+x), fade));
setPixelColorXY(cx+y, cy-x, color_blend(col, getPixelColorXY(cx+y, cy-x), fade));
setPixelColorXY(cx-y, cy-x, color_blend(col, getPixelColorXY(cx-y, cy-x), fade));
setPixelColorXY(cx+x, cy+y-1, color_blend(getPixelColorXY(cx+x, cy+y-1), col, fade));
setPixelColorXY(cx-x, cy+y-1, color_blend(getPixelColorXY(cx-x, cy+y-1), col, fade));
setPixelColorXY(cx+x, cy-y+1, color_blend(getPixelColorXY(cx+x, cy-y+1), col, fade));
setPixelColorXY(cx-x, cy-y+1, color_blend(getPixelColorXY(cx-x, cy-y+1), col, fade));
setPixelColorXY(cx+y-1, cy+x, color_blend(getPixelColorXY(cx+y-1, cy+x), col, fade));
setPixelColorXY(cx-y+1, cy+x, color_blend(getPixelColorXY(cx-y+1, cy+x), col, fade));
setPixelColorXY(cx+y-1, cy-x, color_blend(getPixelColorXY(cx+y-1, cy-x), col, fade));
setPixelColorXY(cx-y+1, cy-x, color_blend(getPixelColorXY(cx-y+1, cy-x), col, fade));
x++;
}
} else {
Expand Down Expand Up @@ -608,13 +608,13 @@ void Segment::drawLine(uint16_t x0, uint16_t y0, uint16_t x1, uint16_t y1, uint3
float gradient = x1-x0 == 0 ? 1.0f : float(y1-y0) / float(x1-x0);
float intersectY = y0;
for (int x = x0; x <= x1; x++) {
uint16_t keep = float(0xFFFF) * (intersectY-int(intersectY)); // how much color to keep
uint16_t seep = 0xFFFF - keep; // how much background to keep
uint8_t keep = float(0xFF) * (intersectY-int(intersectY)); // how much color to keep
uint8_t seep = 0xFF - keep; // how much background to keep
int y = int(intersectY);
if (steep) std::swap(x,y); // temporaryly swap if steep
// pixel coverage is determined by fractional part of y co-ordinate
setPixelColorXY(x, y, color_blend16(c, getPixelColorXY(x, y), keep));
setPixelColorXY(x+int(steep), y+int(!steep), color_blend16(c, getPixelColorXY(x+int(steep), y+int(!steep)), seep));
setPixelColorXY(x, y, color_blend(c, getPixelColorXY(x, y), keep));
setPixelColorXY(x+int(steep), y+int(!steep), color_blend(c, getPixelColorXY(x+int(steep), y+int(!steep)), seep));
intersectY += gradient;
if (steep) std::swap(x,y); // restore if steep
}
Expand Down
30 changes: 11 additions & 19 deletions wled00/colors.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,26 +5,18 @@
*/

/*
* base color blend function, used for 8bit and 16bit
* color blend function, based on FastLED blend function
* the calculation for each color is: result = (A*(amountOfA) + A + B*(amountOfB) + B) / 256 with amountOfA = 255 - amountOfB
*/
uint32_t color_blend_base(uint32_t color1, uint32_t color2, uint16_t blend) {
if(blend == 0) return color1;
uint32_t w1 = W(color1);
uint32_t r1 = R(color1);
uint32_t g1 = G(color1);
uint32_t b1 = B(color1);

uint32_t w2 = W(color2);
uint32_t r2 = R(color2);
uint32_t g2 = G(color2);
uint32_t b2 = B(color2);

uint32_t w3 = ((w2 * blend) + (w1 * (0xFFFF - blend))) >> 16;
uint32_t r3 = ((r2 * blend) + (r1 * (0xFFFF - blend))) >> 16;
uint32_t g3 = ((g2 * blend) + (g1 * (0xFFFF - blend))) >> 16;
uint32_t b3 = ((b2 * blend) + (b1 * (0xFFFF - blend))) >> 16;

return RGBW32(r3, g3, b3, w3);
uint32_t color_blend(uint32_t color1, uint32_t color2, uint8_t blend) {
// min / max blend checking is omitted: calls with 0 or 255 are rare, checking lowers overall performance
uint32_t rb1 = color1 & 0x00FF00FF;
uint32_t wg1 = (color1>>8) & 0x00FF00FF;
uint32_t rb2 = color2 & 0x00FF00FF;
uint32_t wg2 = (color2>>8) & 0x00FF00FF;
uint32_t rb3 = ((((rb1 << 8) | rb2) + (rb2 * blend) - (rb1 * blend)) >> 8) & 0x00FF00FF;
uint32_t wg3 = ((((wg1 << 8) | wg2) + (wg2 * blend) - (wg1 * blend))) & 0xFF00FF00;
return rb3 | wg3;
}

/*
Expand Down
5 changes: 2 additions & 3 deletions wled00/fcn_declare.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,8 @@ class NeoGammaWLEDMethod {
};
#define gamma32(c) NeoGammaWLEDMethod::Correct32(c)
#define gamma8(c) NeoGammaWLEDMethod::rawGamma8(c)
[[gnu::hot]] uint32_t color_blend_base(uint32_t c1, uint32_t c2 , uint16_t blend);
inline uint32_t color_blend16(uint32_t c1, uint32_t c2, uint16_t b) { if (b == 0xFFFF) return c2; return color_blend_base(c1, c2, b); };
inline uint32_t color_blend(uint32_t c1, uint32_t c2, uint8_t b) { if (b == 0xFF) return c2; return color_blend_base(c1, c2, (((uint16_t)b << 8))); };
[[gnu::hot]] uint32_t color_blend(uint32_t c1, uint32_t c2 , uint8_t blend);
inline uint32_t color_blend16(uint32_t c1, uint32_t c2, uint16_t b) { return color_blend(c1, c2, b >> 8); };
[[gnu::hot]] uint32_t color_add(uint32_t,uint32_t, bool fast=false);
[[gnu::hot]] uint32_t color_fade(uint32_t c1, uint8_t amount, bool video=false);
CRGBPalette16 generateHarmonicRandomPalette(CRGBPalette16 &basepalette);
Expand Down

0 comments on commit 5069e50

Please sign in to comment.