@Leffmann, phx
If my calculations are right than bcc version is 2 cycles slower
(on 68000 of course)
Lets see
Code:
moveq #-1, d0 ;4c
.1 tst.b (a0)+ ;8c*x
dbeq d0, .1 ;10c*(x-1) + 12c
add.l d0, a0 ;8c
not.l d0 ;4c
;total 4c+ 8c*x + 10c*(x-1) + 12c + 8c + 4c = 28c + 18c * x 0 - 10c =
; = 18c + 18c*x
Code:
move.l a0, d0 ;4c
.1 tst.b (a0)+ ;8c*x
bne .1 ;10c*(x-1) + 8c
sub.l d0, a0 ;8c
exg a0, d0 ;6c
subq.l #1, d0 ;4c
;toal 4c + 8c*x + 10c*x - 10c + 8c + 8c + 6c + 4c =
; = 20c + 18c*x
The fastest which I know is to use ax register like this
Code:
move.l a0,a1 ;4c
moveq #-1,d0 ;4c
.loop tst.b (a1)+ ;8c*x
dbeq.b .loop ;10c*(x-1) + 12c
not.l d0 ;4c
;total 8c + 18c*x - 10c +12c + 4c =
; = 16c + 18c*x