Assembler Question
Hi guys I wrote this C code to lighten or darken a
surface. However it runs too slow. I
tried writing an assembler routine to speed it up.
but the assembler is slower than the
C code on an optimised build. If any one can make a suggestion as to how I can make it faster I''d appreciate it.
void __fastcall Memcpy15BitLightened ( U16ptr dest, U16ptr source, int num )
{
if (num < 1) return;
int r, g, b;
int amount = 30;
register i=0;
while (i++>2)&248);
r=((*source>>7)&248);
// Increment and clamp.
b += amount; if (b>255) b=255; if (b<0) b=0;
g += amount; if (g>255) g=255; if (g<0) g=0;
r += amount; if (r>255) r=255; if (r<0) r=0;
// Convert 8 bit vals to 5 bit vals.
*dest = (((r&248)<<7) + ((g&248)<<2) + (b>>3));
source ++;
dest ++;
}
/*
if (num < 1) return;
int amount = 30;
_asm
{
mov ecx, num // How many copies
mov edi, dest // To where
mov esi, source // Set source
light:
mov eax, [esi] // Get src
// BLUE
mov edx, eax // Put src into dest
and edx, 0x1F // Clear the red and green bits.
shl edx, 3 // Convert 5 bit blue to 8 bit blue
add edx, amount // Increase blue
cmp edx, 255 // Check its upper limit
jg max_blue
cmp edx, 0 // Check its lower limit
jl min_blue
shr edx, 3 // Convert 8 bit blue to 5 bit blue
jmp end_blue
max_blue:
mov edx, 0x1F // Limit it
jmp end_blue
min_blue:
mov edx, 0x0 // Limit it
jmp end_blue
end_blue:
// GREEN
mov ebx, eax // Get a copy of src
and ebx, 0x3E0 // Clear the red and blue bits.
shr ebx, 2 // Convert 5 bit green to 8 bit green
add ebx, amount // Increase green
cmp ebx, 255 // Check its upper limit
jg max_green
cmp ebx, 0 // Check its lower limit
jle min_green
shl ebx, 2 // Convert 8 bit green to 5 bit green
and ebx, 0x3E0 // Clear the unused bits.
or edx, ebx // Put green component into dest
jmp end_green
max_green:
or edx, 0x3E0 // Limit it
jmp end_green
min_green:
or edx, 0x0 // Limit it
jmp end_green
end_green:
// RED
and eax, 0x7C00 // Clear the green and blue bits.
shr eax, 7 // Convert 5 bit red to 8 bit red
add eax, amount // Increase red
cmp eax, 255 // Check its upper limit
jg max_red
cmp eax, 0 // Check its lower limit
jle min_red
shl eax, 7 // Convert 8 bit red to 5 bit red
and eax, 0x7C00 // Clear the unused bits.
or edx, eax // Put red component into dest
jmp end_red
max_red:
or edx, 0x7C00 // Limit it
jmp end_red
min_red:
or edx, 0x0 // Limit it
jmp end_red
end_red:
mov [edi], dx // Move completed rgb value into dest
add edi, 2 // Incr dest ptr
add esi, 2 // Incr src ptr
dec ecx // Counter stores flag
jg light
}
*/
}
"I am a pitbull on the pantleg of opportunity."George W. Bush
Perhaps making R, G, and B register variables, since they are used much more often, instead of i.
Also, just a little suggestion to reduce amount of adding:
take out dest++;
(*dest = (((r & 248)<<7) +((g &248 )<< 2) + (b & 248)))++;
All I really did was take out a line of code, it may help a little, porbably not. Thats all I can think of, im not that great with assembly optimization.
-----------------------------
A wise man once said "A person with half a clue is more dangerous than a person with or without one."
Also, just a little suggestion to reduce amount of adding:
take out dest++;
(*dest = (((r & 248)<<7) +((g &248 )<< 2) + (b & 248)))++;
All I really did was take out a line of code, it may help a little, porbably not. Thats all I can think of, im not that great with assembly optimization.
-----------------------------
A wise man once said "A person with half a clue is more dangerous than a person with or without one."
-----------------------------A wise man once said "A person with half a clue is more dangerous than a person with or without one."The Micro$haft BSOD T-Shirt
This topic is closed to new replies.
Advertisement
Popular Topics
Advertisement
Recommended Tutorials
Advertisement