English Amiga Board


Go Back   English Amiga Board > Coders > Coders. General

 
 
Thread Tools
Old 12 June 2021, 05:13   #301
Don_Adan
Registered User
 
Join Date: Jan 2008
Location: Warsaw/Poland
Age: 53
Posts: 1,327
Small size optimisations. To do end part, VBI ticks conversion routine to time.

Code:
OldOpenLibrary = -408
CloseLibrary = -414
Output = -60
Input = -54
Write = -48
Read = -42
Forbid = -132
Permit = -138
AddIntServer = -168
RemIntServer = -174
VBlankFrequency = 530
INTB_VERTB = 5     ;for vblank interrupt
NT_INTERRUPT = 2   ;node type

;N = 7*D/2 ;D digits, e.g., N = 350 for 100 digits

start
         lea libname(pc),a1         ;open the dos library
         move.l 4.W,a5
         move.l a5,a6
         jsr OldOpenLibrary(a6)
         move.l d0,a6
         jsr Output(a6)          ;get stdout
         lea cout(PC),A4
         move.l d0,(A4)            ;cout
         move.l d0,d1                   ;call Write(stdout,buff,size)
         moveq #msg1-cout,D2 ; must be checked if in moveq range, the longest text can be moved at end
         add.l A4,D2
         moveq #msg4-msg1,d3
         jsr Write(a6)
 ;        move.l #$10000-(ra-start),d7
 ;        divu.w #7*4,D7
 ;        lsl.l #2,D7    ; d7.w=maxn
	move.l	#((65536-(ra-start))/(7<<2))<<2,D7	; d7=maxn

.l20 
         move.l (A4),D1    ; cout
         moveq #msg4-cout,D2
         add.l A4,D2
         moveq #msg5-msg4,d3
         jsr Write(a6)
         move.l d7,d5
         bsr.w PR0000
         move.l (A4),D1 ; cout
         moveq #msg5-cout,D2
         add.l A4,D2
         moveq #msg3-msg5,d3
         jsr Write(a6)
         bsr.w getnum
         cmp.w d7,d5
         bhi.b .l20

         move.w d5,d1
         beq.b .l20

         addq.w #3,d5
         and.w #$fffc,d5
         cmp.b #10,(a0)
         bne.b .l21

         move.w d5,d6
         cmp.w d1,d5
         beq.b .l7

.l21
         bsr.w PR0000
         move.l (A4),D1 ; cout
         moveq #msg3-cout,D2
         add.l A4,D2
         moveq #msg2-msg3+1,d3
         jsr Write(a6)

.l7 
         mulu.w #7,d6          ;kv = d6
         lsr.l #2,D6               ; /4
         move.l d6,d7
         lea ra(pc),a3

         exg a5,a6
         jsr Forbid(a6)
         moveq #INTB_VERTB,d0
         lea VBlankServer(pc),a1
         jsr AddIntServer(a6)
         exg a5,a6
         ;move.w #$4000,$dff096    ;DMA off
 
         move.l #2000*65537,d0
         move.l a3,a0
.fill    move.l d0,(a0)+
         subq.l #1,D7
         bne.b .fill

         move.l D7,-(SP)    ; cv
         lea 10000.W,A2
         moveq #4,D3
         moveq #buf-cout,D2
         add.l  A4,D2 ; buf

.l0      moveq #0,D5       ;d <- 0
         move.l d6,d4     ;i <- kv, i <- i*2
         lsl.l #2,D4           ; *4
         adda.l d4,a3
         subq.l #1,d4     ;b <- 2*i-1
         move.l A2,D1
         bra.b .l4

.longdiv
         swap d0
         move.w d0,d7
         divu.w d4,d7
         swap d7
         move.w d7,d0
         swap d0
         divu.w d4,d0

         move.w d0,d7
         exg d0,d7
         clr.w d7
         swap d7
         move.w d7,(a3)     ;r[i] <- d%b
         bra.b .enddiv

.l2
         sub.l d0,d5
         sub.l d7,d5
         lsr.l #1,d5
.l4
         move -(a3),d0      ; r[i]
         mulu.w d1,d0       ;r[i]*10000
         add.l d0,d5       ;d += r[i]*10000
         move.l d5,d0
         divu.w d4,d0
         bvs.s .longdiv

         move.w d0,d7
         clr.w d0
         swap d0
         move.w d0,(a3)     ;r[i] <- d%b
.enddiv
         subq.l #2,d4    ;i <- i - 1
         bcc.b .l2       ;the main loop
         divu.w d1,d5      ;removed with MULU optimization
 
         add.w (SP),D5 ; cv
         move.l D5,(SP) ; cv
         bsr.w PR000N

         subq.l #7,d6   ;kv
         bne.b .l0
         addq.l #4,SP ;  restore stack


         move.l time(pc),d5
         ;move.w #$c000,$dff096    ;DMA on
         exg a5,a6
         moveq #INTB_VERTB,d0
         lea VBlankServer(pc),a1
         jsr RemIntServer(a6)
         jsr Permit(a6)
         exg a5,a6

         moveq #1,d3
         move.l (A4),D1 ; cout
;         move.l #msgx,d2

         moveq #msgx-cout,d2
         add.l  A4,D2
         jsr Write(a6)  ;space

         move.l d5,d3
         lsl.l #1,d5
         cmp.b #50,VBlankFrequency(a5)
         beq .l8

         lsl.l #1,d5      ;60 Hz
         add.l d3,d5
         divu.w #3,d5
         swap d5
         lsr.w #2,d5
         swap d5
         negx.l d5
         neg.l d5

.l8      lea string(pc),a3
         moveq.l #10,d4
         move.l d5,d6

;div32x16 macro    ;D7=D6/D4, D6=D6%D4
 
;     moveq #0,d7    ; not necessary D7 highword is already cleared
     divu.w d4,d6
     bvc.b .div32no

     swap d6
     move.w d6,d7
     divu.w d4,d7
     swap d7
     move d7,d6
     swap d6
     divu.w d4,d6
.div32no
     move.w d6,d7
;     clr.w d6 ;not necessary
     swap d6

         move.b d6,(a3)+
         divu.w d4,d7
         swap d7
         move.b d7,(a3)+
         clr.w d7
         swap d7
         move.b #'.'-'0',(a3)+
.l12     tst.w d7
         beq .l11

         divu.w d4,d7
         swap d7
         move.b d7,(a3)+
         clr.w d7
         swap d7
         bra .l12

.l11     add.b #'0',-(a3)
         moveq #1,d3
 ;        move.l cout(pc),d1

        move.l (A4),D1 ; cout
         move.l a3,d2
         jsr Write(a6)
         cmp.l #string,a3
         bne .l11

;         move.l cout(pc),d1

          move.l (A4),D1 ; cout
;         move.l #msgx+1,d2
         moveq #msgx+1-cout,d2
         add.l A4,D2
         jsr Write(a6)  ;newline

         move.l a6,a1
         move.l a5,a6
         jmp CloseLibrary(a6)

PR0000     ;prints d5, uses a0,a1(scratch),d0,d1,d2,d3
      moveq #4,D3
      moveq #buf-cout,D2
      add.l  A4,D2 ; buf
PR000N
        move.w	#$0100,a0
	move.l	#$2f3a2f2f,d0
	move.w	#1000,d1
.b1000	add.w	a0,d0
	sub.w	d1,d5
	bcc.b	.b1000
	add.w	d1,d5

	moveq	#100,d1
.b100	addq.b	#1,d0
	sub.w	d1,d5
	bcc.b	.b100
	add.w	d1,d5

	swap	d0
	moveq	#10,d1
.b10	add.w	a0,d0
	sub.w	d1,d5
	bcc.b	.b10
	add.b	d5,d0

        move.l D0,4(A4) ; buf
        move.l (A4),D1    ; cout
        jmp Write(A6) ;call Write(stdout,buff,size)

rasteri
      addq.l #1,(a1)
;If you set your interrupt to priority 10 or higher then a0 must point at $dff000 on exit
      moveq #0,d0  ; must set Z flag on exit!
      rts

VBlankServer:
      dc.l  0,0                   ;ln_Succ,ln_Pred
      dc.b  NT_INTERRUPT,0        ;ln_Type,ln_Pri
      dc.l  0                     ;ln_Name
      dc.l  time,rasteri          ;is_Data,is_Code

 msgx dc.b 32,10

 cnop 0,4

 time dc.l 0
 cout dc.l 0
 buf ds.b 4

; Overwritten code/data start here. 
ra
string = msg1
libname  dc.b "dos.library",0
msg1  dc.b 'number pi calculator v13',10
msg4 dc.b 'number of digits (up to '
msg5 dc.b ')? '
msg3 dc.b ' digits will be printed'
msg2 dc.b 10,0
      even

getnum
        jsr Input(a6)          ;get stdin
        moveq #msg1-cout,D2
        add.l A4,D2
        move.l d0,d1
        moveq #5,d3     ;+ newline
        jsr Read(a6)
 
        move.l	d2,a0
	moveq	#0,d5
.loop	subq.w	#1,d0
	beq.b	.done
	move.w	#256-'0',d6
	add.b	(a0)+,d6
	cmp.w	#9,d6
	bhi.b	.error
	mulu.w	#10,d5
	add.w	d6,d5
	bra.b	.loop
.error	moveq	#0,d5
.done	rts

Buffy
     ds.b 65536-(Buffy-start)

Last edited by Don_Adan; 12 June 2021 at 12:38.
Don_Adan is offline  
Old 12 June 2021, 12:18   #302
a/b
Registered User

 
Join Date: Jun 2016
Location: europe
Posts: 385
This is not equivalent:
Code:
         move.l #(65536-(ra-start))/7,D7 ; D7=maxn
 ;        move.l #$10000-(ra-start),d7
 ;        divu.w #7*4,D7
 ;        lsl.l #2,D7    ; d7.w=maxn
Also missing ext.l between div and lsl.

8/7 = 1
(8/28)<<2 = 0
7777/7 = 1111
(7777/28)<<2 = 1108

It should be written as either of these:
Code:
	move.l	#((65536-(ra-start))/7)&(~3),D7		; d7=maxn
	move.l	#((65536-(ra-start))/(7<<2))<<2,D7	; d7=maxn
a/b is offline  
Old 12 June 2021, 12:21   #303
Bruce Abbott
Registered User

Bruce Abbott's Avatar
 
Join Date: Mar 2018
Location: Hastings, New Zealand
Posts: 675
Quote:
Originally Posted by meynaf View Post
But this is bringing us quite far from the original topic of 32-bit division...
This thread is supposed to be about division on 68020/030 only, but discussing differences between them other CPUs is not totally out of place. Also the OP was interested in performance on 'real iron', so the effect of different types of memory etc. could be relevant (eg. base model A1200 vs with FastRAM or accelerator card).

Here's a few more interesting timings. First a straight copy from FastRAM to ChipRAM, which took 46 clock cycles per loop.

Code:
  lea     fastram,a0      ; a0 = pointer to fastram
  lea     chipram,a1      ; a1 = pointer to chipram
  move.w  #1000-1,d5      ; repeat inner loop code 1000 times
; -- inner loop --
loop:
   move.l  (a0)+,(a1)+        ; copy longword from fastram to chipram
  dbf     d5,loop
That's about 4.3MB per second, which isn't particularly impressive. Curiously however, copying the data through a register was just as fast despite having an extra instruction...
Code:
loop:
  move.l  (a0)+,d0        ; read longword from next fastram address
  move.l  d0,(a1)+        ; write longword to next chipram address
  dbf     d5,loop
So how many instructions can we add without increasing the copy time? The answer is, a lot!
Code:
loop:
  move.l  (a0)+,d0        ; read longword from next fastram address
  move.l  d2,d2
  move.l  d2,d2
  move.l  d2,d2
  move.l  d0,(a1)+        ; write longword to next chipram address
  move.l  d2,d2
  move.l  d2,d2
  move.l  d2,d2
  move.l  d2,d2
  move.l  d2,d2
  move.l  d2,d2
  move.l  d2,d2
  move.l  d2,d2
  move.l  d2,d2
  move.l  d2,d2
  dbf     d5,loop
That's 13 'free' instructions that could be used to manipulate the data while copying it, or for some other purpose.

I don't know where this effect is coming from, but it certainly could be useful. 4.3MB/s may not be so much of a bottleneck if you can combine it with some other processing.

Maybe this analysis is a bit off topic, but it shows that when dealing with slow memory it pays to interleave data memory accesses with internal operations. The pi-spigot code has mostly register to register instructions and no consecutive data memory accesses in its inner loop, so it (fortunately?) has nothing to gain from this principle.
Bruce Abbott is offline  
Old 12 June 2021, 12:41   #304
Don_Adan
Registered User
 
Join Date: Jan 2008
Location: Warsaw/Poland
Age: 53
Posts: 1,327
Quote:
Originally Posted by a/b View Post
This is not equivalent:
Code:
         move.l #(65536-(ra-start))/7,D7 ; D7=maxn
 ;        move.l #$10000-(ra-start),d7
 ;        divu.w #7*4,D7
 ;        lsl.l #2,D7    ; d7.w=maxn
Also missing ext.l between div and lsl.

8/7 = 1
(8/28)<<2 = 0
7777/7 = 1111
(7777/28)<<2 = 1108

It should be written as either of these:
Code:
	move.l	#((65536-(ra-start))/7)&(~3),D7		; d7=maxn
	move.l	#((65536-(ra-start))/(7<<2))<<2,D7	; d7=maxn
Ok, fixed, thanks.
Ext.l is not necessary for this version. Because D7 (D5 later) is handled as word only.
Ext.l is only necessary for litwr version of PR0000 routine with divu.w, for sub.w version can be ignored.
Don_Adan is offline  
Old 12 June 2021, 12:45   #305
meynaf
son of 68k
meynaf's Avatar
 
Join Date: Nov 2007
Location: Lyon / France
Age: 48
Posts: 4,315
Quote:
Originally Posted by Bruce Abbott View Post
This thread is supposed to be about division on 68020/030 only, but discussing differences between them other CPUs is not totally out of place. Also the OP was interested in performance on 'real iron', so the effect of different types of memory etc. could be relevant (eg. base model A1200 vs with FastRAM or accelerator card).
Ok then. Let's go for it.


Quote:
Originally Posted by Bruce Abbott View Post
I don't know where this effect is coming from, but it certainly could be useful. 4.3MB/s may not be so much of a bottleneck if you can combine it with some other processing.
I suppose the effect for the reads is the one mentioned by grond. It doesn't explain everything but it exists.
For the writes, we know why already.

Now perhaps it's possible to do better. What about :
Code:
loop
 move.l (a0)+,d0
 move.l (a0)+,d1
 move.l (a0)+,d2
 move.l d0,(a1)+
 move.l d1,(a1)+
 move.l d2,(a1)+
 dbf d5,loop
You could also attempt to enable/disable data burst, to see if this has a significant impact.
meynaf is offline  
Old 12 June 2021, 14:31   #306
SpeedGeek
Moderator
SpeedGeek's Avatar
 
Join Date: Dec 2010
Location: Wisconsin USA
Age: 57
Posts: 592
@Thread

I've edited the thread title so the 040 and 060 can be included as on-topic here. As you can see, the topic diversity is something to think about when the thread is created.
SpeedGeek is offline  
Old Yesterday, 20:21   #307
litwr
Registered User

 
Join Date: Mar 2016
Location: Ozherele
Posts: 229
First, thanks to people who helped to optimize my code. I have just made a commit with some Don_Adan's suggestions. However I must notice that I was invited to start this thread by meynaf.

Second, something weird is going on. An Amiga enthusiast was banned when he was just discussing how to make better Amiga coding. No reason for what triggered this ban was provided. So it looks like an overt bullying.

Third, BippyM, if you are a decent person you should apologize for your strange behavior. BTW there is no argument between EAB and me, we had quite fruitful discussions here. Everyone, including you, received some new information from them. Sometimes, however, few people allowed themselves to be not entirely polite. It's sad but it wasn't me. Or did I miss something? BTW I sent a pm to you and you ignored it. So I have no options but to start a public talk with you. I have just started a new thread for this.
litwr is offline  
Old Yesterday, 20:49   #308
alkis
Registered User

 
Join Date: Dec 2010
Location: Athens/Greece
Age: 50
Posts: 582
Quote:
Originally Posted by litwr View Post
Third, BippyM, if you are a decent person you should apologize for your strange behavior.
I got to give it to him though, quite a creative way to ask for reban.
alkis is offline  
Old Yesterday, 20:51   #309
kriz
Junior Member
kriz's Avatar
 
Join Date: Sep 2001
Location: No(R)Way
Age: 39
Posts: 2,780
He is not banned anymore, so now he can report why he got banned
kriz is offline  
Old Yesterday, 20:54   #310
BippyM
Global Moderator

BippyM's Avatar
 
Join Date: Nov 2001
Location: Derby, UK
Age: 45
Posts: 9,248
Quote:
Originally Posted by litwr View Post
First, thanks to people who helped to optimize my code. I have just made a commit with some Don_Adan's suggestions. However I must notice that I was invited to start this thread by meynaf.

Second, something weird is going on. An Amiga enthusiast was banned when he was just discussing how to make better Amiga coding. No reason for what triggered this ban was provided. So it looks like an overt bullying.

Third, BippyM, if you are a decent person you should apologize for your strange behavior. BTW there is no argument between EAB and me, we had quite fruitful discussions here. Everyone, including you, received some new information from them. Sometimes, however, few people allowed themselves to be not entirely polite. It's sad but it wasn't me. Or did I miss something? BTW I sent a pm to you and you ignored it. So I have no options but to start a public talk with you. I have just started a new thread for this.

Excuse me?



Again you are proving the point. You bait people and then make out they are in the wrong, that they are acting strangely. I am sorry but banning are discussed between the global mods. Numerous members have reported you and your posts, and as such you were warned, ignored the warnings and was banned. You then come back and make accusations of bullying, clearly you do not listen and you do not learn.



I am sorry but neither myself nor any other moderator has a personal issue with you. Your behaviour is why you was banned.


This is what is going to happen.. If you continue with this ridiculous behaviour you will he banned permanently. The eab doesn't need members like you. Members who ask for help and advice and then proceed to dismiss advice, argue about advice, say it is wrong and then say the members giving advice are strange.. This is your very last warning. Go back to discussing coding, discussing the Amiga vs pc and just follow the rules and be respectful.
BippyM is offline  
 


Currently Active Users Viewing This Thread: 3 (0 members and 3 guests)
 
Thread Tools

Similar Threads
Thread Thread Starter Forum Replies Last Post
68060 64-bit integer math BSzili Coders. Asm / Hardware 7 25 January 2021 21:18
68020 Bit Field Instructions mcgeezer Coders. Asm / Hardware 7 07 February 2019 14:59
Discovery: Math Audio Snow request.Old Rare Games 30 20 August 2018 12:17
Math apps mtb support.Apps 1 08 September 2002 18:59

Posting Rules
You may not post new threads
You may not post replies
You may not post attachments
You may not edit your posts

BB code is On
Smilies are On
[IMG] code is On
HTML code is Off

Forum Jump


All times are GMT +2. The time now is 06:31.


Powered by vBulletin® Version 3.8.11
Copyright ©2000 - 2021, vBulletin Solutions Inc.
Page generated in 0.09527 seconds with 15 queries