Tack vare moderna processorers brutala kraft funkar det drägligt ändå, men skulle vilja ha dessa 6 rader optimerat som inline assembler.
Skulle nog kunna krafsa ihop något själv, men hur får jag fatt i ingångsvärdena och pekaren för resultatet? Det ser ut att ligga som registervariabler och det är ju bra, men kan inget om inline asm i gcc.
Kod: Markera allt
for (y = rows; y > 0; y--) { //rows
for (x = colw; x > 0; x--) { //columns
mask=*fc_p++; //get pixel from cache
(*p)&=~mask; //punch a hole in bkgnd
(*p++)|=mask&fg; //fill with fg and nxt column
};
p+=pmx-colw; //nxt row
};
370: **** for (y = rows; y > 0; y--) { //rows
993 .loc 1 370 9
994 0868 8B45AC movl -84(%rbp), %eax
995 086b 4898 cltq
996 086d 488945F0 movq %rax, -16(%rbp)
997 .loc 1 370 2
998 0871 EB71 jmp .L38
999 .L41:
371: **** for (x = colw; x > 0; x--) { //columns
1000 .loc 1 371 11
1001 0873 8B45A8 movl -88(%rbp), %eax
1002 0876 4898 cltq
1003 0878 488945F8 movq %rax, -8(%rbp)
1004 .loc 1 371 4
1005 087c EB47 jmp .L39
1006 .L40:
372: **** mask=*fc_p++; //fc_p++; //get pixel from cache
1007 .loc 1 372 16 discriminator 3
1008 087e 488B45C8 movq -56(%rbp), %rax
1009 0882 488D5004 leaq 4(%rax), %rdx
1010 0886 488955C8 movq %rdx, -56(%rbp)
1011 .loc 1 372 11 discriminator 3
1012 088a 8B00 movl (%rax), %eax
1013 .loc 1 372 10 discriminator 3
1014 088c 89459C movl %eax, -100(%rbp)
373: **** (*p)&=~mask; //punch a hole in bkgnd
1015 .loc 1 373 10 discriminator 3
1016 088f 488B45D0 movq -48(%rbp), %rax
1017 0893 8B00 movl (%rax), %eax
1018 0895 89C2 movl %eax, %edx
1019 .loc 1 373 12 discriminator 3
1020 0897 8B459C movl -100(%rbp), %eax
1021 089a F7D0 notl %eax
1022 .loc 1 373 10 discriminator 3
1023 089c 21D0 andl %edx, %eax
1024 089e 89C2 movl %eax, %edx
1025 08a0 488B45D0 movq -48(%rbp), %rax
1026 08a4 8910 movl %edx, (%rax)
374: **** (*p++)|=mask&fg; //put fg and nxt column
1027 .loc 1 374 9 discriminator 3
1028 08a6 488B45D0 movq -48(%rbp), %rax
1029 08aa 488D5004 leaq 4(%rax), %rdx
1030 08ae 488955D0 movq %rdx, -48(%rbp)
1031 .loc 1 374 12 discriminator 3
1032 08b2 8B10 movl (%rax), %edx
1033 08b4 89D1 movl %edx, %ecx
1034 .loc 1 374 18 discriminator 3
1035 08b6 8B55B4 movl -76(%rbp), %edx
1036 08b9 23559C andl -100(%rbp), %edx
1037 .loc 1 374 12 discriminator 3
1038 08bc 09CA orl %ecx, %edx
1039 08be 8910 movl %edx, (%rax)
371: **** mask=*fc_p++; //fc_p++; //get pixel from cache
1040 .loc 1 371 27 discriminator 3
1041 08c0 48836DF8 subq $1, -8(%rbp)
1041 01
1042 .L39:
371: **** mask=*fc_p++; //fc_p++; //get pixel from cache
1043 .loc 1 371 4 discriminator 1
1044 08c5 48837DF8 cmpq $0, -8(%rbp)
1044 00
1045 08ca 75B2 jne .L40
375: **** };
376: **** p+=pmx-colw; //nxt row
1046 .loc 1 376 10 discriminator 2
1047 08cc 8B050000 movl pmx(%rip), %eax
1047 0000
1048 08d2 2B45A8 subl -88(%rbp), %eax
1049 08d5 4898 cltq
1050 .loc 1 376 5 discriminator 2
1051 08d7 48C1E002 salq $2, %rax
1052 08db 480145D0 addq %rax, -48(%rbp)
370: **** for (x = colw; x > 0; x--) { //columns
1053 .loc 1 370 25 discriminator 2
1054 08df 48836DF0 subq $1, -16(%rbp)
1054 01
1055 .L38:
370: **** for (x = colw; x > 0; x--) { //columns
1056 .loc 1 370 2 discriminator 1
1057 08e4 48837DF0 cmpq $0, -16(%rbp)
1057 00
1058 08e9 7588 jne .L41
336: **** //yes
1059 .loc 1 336 10
1060 08eb E9FE0300 jmp .L55
1060 00
1061 .L37:
377: ĸs
**** };