fast alpha blending with 565RGB

G

Gernot Frisch

Hi,

I have this code that blends 2 pixels, but it's not really fast. Can
someone help me speeding it up?



#define GETR(a) (unsigned char)(((((a)>>11)&31)*255)/31)
#define GETG(a) (unsigned char)(((((a)>> 5)&63)*255)/63)
#define GETB(a) (unsigned char)(((((a) )&31)*255)/31)

inline unsigned short PackColor(unsigned int r, unsigned int g,
unsigned int b)
{
return ((unsigned short)(
(b>>3) + ((g>>2)<<5) + ((r>>3)<<11)));
}


// Trying to simulate
// glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
//
// basically it's just
// red_destination =
// max(0xff,
// alpha*red_destination
// + (1-alpha)*red_source
// );
//
// pDest = pointer to destination pixel
// colsrc = color of source pixel
// alpha = alpha value (0...255 instead of 0.0f...1.0f)

static void BlendPixelSub(unsigned short* pDest, unsigned short
colsrc)
{
#define AONE(a) ((unsigned short)( ((a)*one_minus_alpha) >>8))
#define ONE(a) ((unsigned short)( ((a*alpha) >>8))

register unsigned long r,g,b, r2, g2, b2;
unsigned int one_minus_alpha = 255-alpha;

// rip out rgb values (THIS IS SLOW)
r =GETR(*pDest);
g =GETG(*pDest);
b =GETB(*pDest);
r2=GETR(colsrc);
g2=GETG(colsrc);
b2=GETB(colsrc);

// alpha blend them
r=AONE(r) +ONE(r2);
g=AONE(g) +ONE(g2);
b=AONE(b) +ONE(b2);

// limit to 0xff
if(r>0xff)r=0xff; if(g>0xff)g=0xff; if(b>0xff)b=0xff;

// put back
*pDest = PackColor(r, g, b);
#undef AONE
#undef ONE
}



--
-Gernot
int main(int argc, char** argv) {printf
("%silto%c%cf%cgl%ssic%ccom%c", "ma", 58, 'g', 64, "ba", 46, 10);}

________________________________________
Looking for a good game? Do it yourself!
GLBasic - you can do
www.GLBasic.com
 
?

=?iso-8859-1?q?Erik_Wikstr=F6m?=

Hi,

I have this code that blends 2 pixels, but it's not really fast. Can
someone help me speeding it up?

#define GETR(a) (unsigned char)(((((a)>>11)&31)*255)/31)
#define GETG(a) (unsigned char)(((((a)>> 5)&63)*255)/63)
#define GETB(a) (unsigned char)(((((a) )&31)*255)/31)

Can't help you much, but I'd say that it's the multiplies with 255 and
divisions with 31 or 63 that takes time. The multiply could probably
be made faster by using shifting left 8 times and then subtracting the
value that was shifted left. Perhaps one could do something similar
with the divisions too.

Oh, and instead of using macros, use inline functions (don't know if I
got this one right):

unsigned char getR(unsigned short a)
{
unsigned int tmp1 = a >> 11;
tmp1 &= 31;
unsigned int tmp2 = tmp1 << 8;
tmp1 = tmp2 - tmp1;
return static_cast<unsigned char>(tmp1 / 61);
}
 
V

Victor Bazarov

Erik said:
Can't help you much, but I'd say that it's the multiplies with 255 and
divisions with 31 or 63 that takes time. The multiply could probably
be made faster by using shifting left 8 times and then subtracting the
value that was shifted left. Perhaps one could do something similar
with the divisions too.

Oh, and instead of using macros, use inline functions (don't know if I
got this one right):

unsigned char getR(unsigned short a)
{
unsigned int tmp1 = a >> 11;
tmp1 &= 31;
unsigned int tmp2 = tmp1 << 8;
tmp1 = tmp2 - tmp1;
return static_cast<unsigned char>(tmp1 / 61);
}

Or it could all just be done in the FP unit... Impossible to tell for
sure without measuring. Also, possibly getting all three values at once
can end up being faster than getting them all separately...

V
 
G

Gernot Frisch

Also, possibly getting all three values at once
can end up being faster than getting them all separately...


That was it! Thank you.

#define RED ((0x00ff >> 3)<<SHIFTRED)
#define GREEN ((0x00ff >> LEFTGREEN)<<5)
#define BLUE ((0x00ff >> 3))

// -------------------------------------------------------- //
// BlendPixel - Add
// -------------------------------------------------------- //
inline unsigned int _blend_mix(unsigned int a, unsigned int b,
unsigned int lim)
{a+=b; return a>lim?lim:a&lim;}

static void BlendPixelAdd(unsigned short* pDest, unsigned short
colsrc)
{
// alpha_blending = 1 .. 255
register unsigned int alpha = (unsigned int)alpha_blending;
#define ONE(a) ((unsigned int)(a))
#define AONE(a) (((unsigned int)(a)*alpha) >>8)
*pDest = (unsigned short)
(
(_blend_mix(ONE(*pDest & RED ), AONE(colsrc & RED ), RED ))
|(_blend_mix(ONE(*pDest & GREEN), AONE(colsrc & GREEN), GREEN ))
|(_blend_mix(ONE(*pDest & BLUE ), AONE(colsrc & BLUE ), BLUE ))
);
#undef ONE
#undef AONE
}

// -------------------------------------------------------- //
// BlendPixel - Sub
// -------------------------------------------------------- //
static void BlendPixelSub(unsigned short* pDest, unsigned short
colsrc)
{
// alpha_blending = -1 .. -255
register unsigned int one_minus_alpha = 256+alpha_blending,
alpha = -alpha_blending;
#define ONE(a) ( ((a)*alpha) >>8)
#define AONE(a) ( ((a)*one_minus_alpha) >>8)

*pDest = ((AONE(*pDest & RED ) + ONE(colsrc & RED ) ) & RED )
|((AONE(*pDest & GREEN) + ONE(colsrc & GREEN) ) & GREEN )
|((AONE(*pDest & BLUE ) + ONE(colsrc & BLUE ) ) & BLUE );

#undef ONE
#undef AONE
}
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Members online

Forum statistics

Threads
473,755
Messages
2,569,536
Members
45,007
Latest member
obedient dusk

Latest Threads

Top