Registered User
Join Date: Jan 2008
Location: Warsaw/Poland
Age: 55
Posts: 1,975
|
Added more a/b optimisations. DCB.B was used in github version. I made changes (cleaning code) step by step. Now ds.b is used.
Code:
OldOpenLibrary = -408
CloseLibrary = -414
Output = -60
Input = -54
Write = -48
Read = -42
Forbid = -132
Permit = -138
AddIntServer = -168
RemIntServer = -174
VBlankFrequency = 530
INTB_VERTB = 5 ;for vblank interrupt
NT_INTERRUPT = 2 ;node type
;N = 7*D/2 ;D digits, e.g., N = 350 for 100 digits
start
lea libname(pc),a1 ;open the dos library
move.l 4.W,a5
move.l a5,a6
jsr OldOpenLibrary(a6)
move.l d0,a6
jsr Output(a6) ;get stdout
lea cout(PC),A4
move.l d0,(A4) ;cout
move.l d0,d1 ;call Write(stdout,buff,size)
moveq #msg1-cout,D2 ; must be checked if in moveq range, the longest text can be moved at end
add.l A4,D2
moveq #msg4-msg1,d3
jsr Write(a6)
move.l #$10000-(ra-start),D7
divu.w #7*4,D7
lsl.l #2,D7 ; d7.w=maxn
.l20
; move.l cout(pc),d1
move.l (A4),D1 ; cout
; move.l #msg4,d2
moveq #msg4-cout,D2
add.l A4,D2
moveq #msg5-msg4,d3
jsr Write(a6)
move.l d7,d5
bsr.w PR0000
; move.l cout(pc),d1
move.l (A4),D1 ; cout
; move.l #msg5,d2
moveq #msg5-cout,D2
add.l A4,D2
moveq #msg3-msg5,d3
jsr Write(a6)
bsr.w getnum
cmp.w d7,d5
bhi.b .l20
move.w d5,d1
beq.b .l20
addq.w #3,d5
and.w #$fffc,d5
cmp.b #10,(a0)
bne.b .l21
move.w d5,d6
cmp.w d1,d5
beq.b .l7
.l21
bsr.w PR0000
move.l (A4),D1 ; cout
; move.l #msg3,d2
moveq #msg3-cout,D2
add.l A4,D2
moveq #msg2-msg3+1,d3
jsr Write(a6)
.l7
mulu.w #7,d6 ;kv = d6
lsr.l #2,D6 ; /4
move.l d6,d7
lea ra(pc),a3
exg a5,a6
jsr Forbid(a6)
moveq #INTB_VERTB,d0
lea VBlankServer(pc),a1
jsr AddIntServer(a6)
exg a5,a6
;move.w #$4000,$dff096 ;DMA off
move.l #2000*65537,d0
move.l a3,a0
.fill move.l d0,(a0)+
subq.l #1,D7
bne.b .fill
move.l D7,-(SP) ; cv
lea 10000.W,A2
moveq #4,D3
moveq #buf-cout,D2
add.l A4,D2 ; buf
.l0 moveq #0,D5 ;d <- 0
move.l d6,d4 ;i <- kv, i <- i*2
lsl.l #2,D4 ; *4
adda.l d4,a3
subq.l #1,d4 ;b <- 2*i-1
move.l A2,D1
bra.b .l4
.longdiv
swap d0
move.w d0,d7
divu.w d4,d7
swap d7
move.w d7,d0
swap d0
divu.w d4,d0
move.w d0,d7
exg d0,d7
clr.w d7
swap d7
move.w d7,(a3) ;r[i] <- d%b
bra.b .enddiv
.l2 sub.l d0,d5
sub.l d7,d5
lsr.l #1,d5
.l4
move -(a3),d0 ; r[i]
mulu.w d1,d0 ;r[i]*10000
add.l d0,d5 ;d += r[i]*10000
move.l d5,d0
divu.w d4,d0
bvs.s .longdiv
move.w d0,d7
clr.w d0
swap d0
move.w d0,(a3) ;r[i] <- d%b
.enddiv
subq.l #2,d4 ;i <- i - 1
bcc.b .l2 ;the main loop
divu.w d1,d5 ;removed with MULU optimization
add.w (SP),D5 ; cv
move.l D5,(SP) ; cv
bsr.w PR000N
subq.l #7,d6 ;kv
bne.b .l0
addq.l #4,SP ; restore stack
move.l time(pc),d5
;move.w #$c000,$dff096 ;DMA on
exg a5,a6
moveq #INTB_VERTB,d0
lea VBlankServer(pc),a1
jsr RemIntServer(a6)
jsr Permit(a6)
exg a5,a6
moveq #1,d3
; move.l cout(pc),d1
move.l (A4),D1 ; cout
move.l #msgx,d2
jsr Write(a6) ;space
move.l d5,d3
lsl.l #1,d5
cmp.b #50,VBlankFrequency(a5)
beq .l8
lsl.l #1,d5 ;60 Hz
add.l d3,d5
divu.w #3,d5
swap d5
lsr.w #2,d5
swap d5
negx.l d5
neg.l d5
.l8 lea string(pc),a3
moveq.l #10,d4
move.l d5,d6
;div32x16 macro ;D7=D6/D4, D6=D6%D4
; moveq #0,d7 ; not necessary D7 highword is already cleared
divu.w d4,d6
bvc.b .div32no
swap d6
move.w d6,d7
divu.w d4,d7
swap d7
move d7,d6
swap d6
divu.w d4,d6
.div32no
move.w d6,d7
; clr.w d6 ;not necessary
swap d6
move.b d6,(a3)+
divu.w d4,d7
swap d7
move.b d7,(a3)+
clr.w d7
swap d7
move.b #'.'-'0',(a3)+
.l12 tst.w d7
beq .l11
divu.w d4,d7
swap d7
move.b d7,(a3)+
clr.w d7
swap d7
bra .l12
.l11 add.b #'0',-(a3)
moveq #1,d3
; move.l cout(pc),d1
move.l (A4),D1 ; cout
move.l a3,d2
jsr Write(a6)
cmp.l #string,a3
bne .l11
; move.l cout(pc),d1
move.l (A4),D1 ; cout
move.l #msgx+1,d2
jsr Write(a6) ;newline
move.l a6,a1
move.l a5,a6
jmp CloseLibrary(a6)
PR0000 ;prints d5, uses a0,a1(scratch),d0,d1,d2,d3
moveq #4,D3
moveq #buf-cout,D2
add.l A4,D2 ; buf
PR000N
move.w #$0100,a0
move.l #$2f3a2f2f,d0
move.w #1000,d1
.b1000 add.w a0,d0
sub.w d1,d5
bcc.b .b1000
add.w d1,d5
moveq #100,d1
.b100 addq.b #1,d0
sub.w d1,d5
bcc.b .b100
add.w d1,d5
swap d0
moveq #10,d1
.b10 add.w a0,d0
sub.w d1,d5
bcc.b .b10
add.b d5,d0
move.l D0,4(A4) ; buf
move.l (A4),D1 ; cout
jmp Write(A6) ;call Write(stdout,buff,size)
rasteri
addq.l #1,(a1)
;If you set your interrupt to priority 10 or higher then a0 must point at $dff000 on exit
moveq #0,d0 ; must set Z flag on exit!
rts
VBlankServer:
dc.l 0,0 ;ln_Succ,ln_Pred
dc.b NT_INTERRUPT,0 ;ln_Type,ln_Pri
dc.l 0 ;ln_Name
dc.l time,rasteri ;is_Data,is_Code
msgx dc.b 32,10
cnop 0,4
time dc.l 0
cout dc.l 0
buf ds.b 4
; Overwritten code/data start here.
ra
getnum jsr Input(a6) ;get stdin
; move.l #string,d2 ;set by previous call
moveq #msg1-cout,D2
add.l A4,D2
move.l d0,d1
moveq #5,d3 ;+ newline
jsr Read(a6)
move.l d2,a0
moveq #0,d5
.loop subq.w #1,d0
beq.b .done
move.w #256-'0',d6
add.b (a0)+,d6
cmp.w #9,d6
bhi.b .error
mulu.w #10,d5
add.w d6,d5
bra.b .loop
.error moveq #0,d5
.done rts
string = msg1
libname dc.b "dos.library",0
msg1 dc.b 'number pi calculator v13'
dc.b 10
msg4 dc.b 'number of digits (up to '
msg5 dc.b ')? '
msg3 dc.b ' digits will be printed'
msg2 dc.b 10
Buffy
ds.b 65536-(Buffy-start)
Last edited by Don_Adan; 10 June 2021 at 23:00.
|