English Amiga Board


Go Back   English Amiga Board > Coders > Coders. Asm / Hardware

 
 
Thread Tools
Old 30 December 2013, 01:43   #1
Thorham
Computer Nerd
 
Thorham's Avatar
 
Join Date: Sep 2007
Location: Rotterdam/Netherlands
Age: 48
Posts: 3,835
Optimizing SHA-256 crypto hash.

Hi,

Here's an implementation of SHA-256 that I've written. Can anyone find some optimizations? The code is tested and works properly.

Source: sha256.s

Code:
sha256
    lea     w+(16*4),a0
    lea     -02*4(a0),a2
    lea     -16*4(a0),a1
    lea     -07*4(a0),a6

    moveq   #48-1,d7
.loop1

; s1 := (w[i-2] rightrotate 17) xor (w[i-2] rightrotate 19) xor (w[i-2] rightshift 10)

    move.l  (a2)+,d3
    move.l  d3,d6
    moveq   #10,d0
    lsr.l   d0,d6
    swap    d3
    ror.l   #1,d3
    eor.l   d3,d6
    ror.l   #2,d3
    eor.l   d3,d6

; s0 := (w[i-15] rightrotate 7) xor (w[i-15] rightrotate 18) xor (w[i-15] rightshift 3)

    add.l   (a1)+,d6

    move.l  (a1),d0
    move.l  d0,d3
    lsr.l   #3,d3
    ror.l   #7,d0
    eor.l   d0,d3
    swap    d0
    rol.l   #5,d0
    eor.l   d3,d0

; w[i] := w[i-16] + s0 + w[i-7] + s1

    add.l   (a6)+,d0
    add.l   d6,d0
    move.l  d0,(a0)+

    dbra    d7,.loop1

; work variables

    lea     h,a0

    move.l  (a0)+,a3    ; a
    move.l  (a0)+,d1    ; b
    move.l  (a0)+,d2    ; c
    move.l  (a0)+,a1    ; d
    move.l  (a0)+,a4    ; e
    move.l  (a0)+,d4    ; f
    move.l  (a0)+,d5    ; g
    move.l  (a0)+,a5    ; h

    lea     k,a0
    lea     w,a2

    move.l  #64,a6

.loop2
;
; temp 0
;
; S0 := (a rightrotate 2) xor (a rightrotate 13) xor (a rightrotate 22)

    move.l  a3,d3
    ror.l   #2,d3
    move.l  d3,d6
    swap    d6
    move.l  d6,d7
    rol.l   #5,d6
    eor.l   d6,d3
    ror.l   #4,d7
    eor.l   d7,d3

; maj := (a and b) xor (a and c) xor (b and c)

    move.l  a3,d6
    and.l   d1,d6
    move.l  a3,d7
    and.l   d2,d7
    eor.l   d7,d6
    move.l  d1,d7
    and.l   d2,d7
    eor.l   d7,d6
    add.l   d6,d3   ; d3 = temp 0
;
; temp 1
;
; S1 := (e rightrotate 6) xor (e rightrotate 11) xor (e rightrotate 25)

    move.l  a4,d0
    move.l  d0,d6
    ror.l   #6,d0
    move.l  d0,d7
    ror.l   #5,d7
    eor.l   d7,d0
    move.l  d6,d7
    rol.l   #7,d6
    eor.l   d6,d0

; ch := (e and f) xor ((not e) and g)

    move.l  a4,d6
    and.l   d4,d6
    move.l  a4,d7
    not.l   d7
    and.l   d5,d7
    eor.l   d7,d6


; d0 = temp1 := h + S1 + ch + k[i] + w[i]

    add.l   a5,d0
    add.l   d6,d0
    add.l   (a0)+,d0
    add.l   (a2)+,d0

; part 2

    add.l   d0,d3
    move.l  d5,a5
    move.l  d4,d5
    move.l  a4,d4
    add.l   a1,d0
    move.l  d0,a4
    move.l  d2,a1
    move.l  d1,d2
    move.l  a3,d1
    move.l  d3,a3

; next
    subq.l  #1,a6
    tst.l   a6
    bne     .loop2

; merge work variables with hash: hash += a..h

    lea     h,a2

    add.l   (a2),a3
    move.l  a3,(a2)+
    add.l   (a2),d1
    move.l  d1,(a2)+
    add.l   (a2),d2
    move.l  d2,(a2)+
    add.l   (a2),a1
    move.l  a1,(a2)+
    add.l   (a2),a4
    move.l  a4,(a2)+
    add.l   (a2),d4
    move.l  d4,(a2)+
    add.l   (a2),d5
    move.l  d5,(a2)+
    add.l   (a2),a5
    move.l  a5,(a2)+

    rts

k
    dc.l $428a2f98,$71374491,$b5c0fbcf,$e9b5dba5,$3956c25b,$59f111f1,$923f82a4,$ab1c5ed5
    dc.l $d807aa98,$12835b01,$243185be,$550c7dc3,$72be5d74,$80deb1fe,$9bdc06a7,$c19bf174
    dc.l $e49b69c1,$efbe4786,$0fc19dc6,$240ca1cc,$2de92c6f,$4a7484aa,$5cb0a9dc,$76f988da
    dc.l $983e5152,$a831c66d,$b00327c8,$bf597fc7,$c6e00bf3,$d5a79147,$06ca6351,$14292967

    dc.l $27b70a85,$2e1b2138,$4d2c6dfc,$53380d13,$650a7354,$766a0abb,$81c2c92e,$92722c85
    dc.l $a2bfe8a1,$a81a664b,$c24b8b70,$c76c51a3,$d192e819,$d6990624,$f40e3585,$106aa070
    dc.l $19a4c116,$1e376c08,$2748774c,$34b0bcb5,$391c0cb3,$4ed8aa4a,$5b9cca4f,$682e6ff3
    dc.l $748f82ee,$78a5636f,$84c87814,$8cc70208,$90befffa,$a4506ceb,$bef9a3f7,$c67178f2
w
    dc.b    "abc"       ; message, maximum size 55 bytes
wEnd
wSize=wEnd-w
    dc.b    %10000000
    dcb.b   64-8-1-wSize
    dc.l    0
    dc.l    wSize*8
wExtend
    dcb.l   48
h
    dc.l $6a09e667,$bb67ae85,$3c6ef372,$a54ff53a,$510e527f,$9b05688c,$1f83d9ab,$5be0cd19

pad ; for hex viewing h, not used by the code
    dcb.b   1024
Thorham is offline  
Old 30 December 2013, 13:43   #2
Don_Adan
Registered User
 
Join Date: Jan 2008
Location: Warsaw/Poland
Age: 56
Posts: 2,033
Quote:
Originally Posted by Thorham View Post
Hi,

Here's an implementation of SHA-256 that I've written. Can anyone find some optimizations? The code is tested and works properly.

Source: Attachment 38458

Code:
sha256
    lea     w+(16*4),a0
    lea     -02*4(a0),a2
    lea     -16*4(a0),a1
    lea     -07*4(a0),a6
 
    moveq #10,D1
    moveq   #48-1,d7
.loop1
 
; s1 := (w[i-2] rightrotate 17) xor (w[i-2] rightrotate 19) xor (w[i-2] rightshift 10)
 
    move.l  (a2)+,d3
    move.l  d3,d6
;    moveq   #10,d0
    lsr.l   D1,d6
    swap    d3
    ror.l   #1,d3
    eor.l   d3,d6
    ror.l   #2,d3
    eor.l   d3,d6
 
; s0 := (w[i-15] rightrotate 7) xor (w[i-15] rightrotate 18) xor (w[i-15] rightshift 3)
 
    add.l   (a1)+,d6
 
    move.l  (a1),d0
    move.l  d0,d3
    lsr.l   #3,d3
    ror.l   #7,d0
    eor.l   d0,d3
    swap    d0
    rol.l   #5,d0
    eor.l   d3,d0
 
; w[i] := w[i-16] + s0 + w[i-7] + s1
 
    add.l   (a6)+,d0
    add.l   d6,d0
    move.l  d0,(a0)+
 
    dbra    d7,.loop1
 
; work variables
 
    lea     h,a0
 
    move.l  (a0)+,a3    ; a
    move.l  (a0)+,d1    ; b
    move.l  (a0)+,d2    ; c
    move.l  (a0)+,a1    ; d
    move.l  (a0)+,a4    ; e
    move.l  (a0)+,d4    ; f
    move.l  (a0)+,d5    ; g
    move.l  (a0)+,a5    ; h
 
    lea     k,a0
    lea     w,a2
 
;    move.l  #64,a6
    lea 64*4(A2),A6
.loop2
;
; temp 0
;
; S0 := (a rightrotate 2) xor (a rightrotate 13) xor (a rightrotate 22)
 
    move.l  a3,d3
    ror.l   #2,d3
    move.l  d3,d6
    swap    d6
    move.l  d6,d7
    rol.l   #5,d6
    eor.l   d6,d3
    ror.l   #4,d7
    eor.l   d7,d3
 
; maj := (a and b) xor (a and c) xor (b and c)
 
    move.l  a3,d6
    and.l   d1,d6
    move.l  a3,d7
    and.l   d2,d7
    eor.l   d7,d6
    move.l  d1,d7
    and.l   d2,d7
    eor.l   d7,d6
    add.l   d6,d3   ; d3 = temp 0
;
; temp 1
;
; S1 := (e rightrotate 6) xor (e rightrotate 11) xor (e rightrotate 25)
 
    move.l  a4,d0
    move.l  d0,d6
    ror.l   #6,d0
    move.l  d0,d7
    ror.l   #5,d7
    eor.l   d7,d0
    move.l  d6,d7
    rol.l   #7,d6
    eor.l   d6,d0
 
; ch := (e and f) xor ((not e) and g)
 
    move.l  a4,d6
    and.l   d4,d6
    move.l  a4,d7
    not.l   d7
    and.l   d5,d7
    eor.l   d7,d6
 
 
; d0 = temp1 := h + S1 + ch + k[i] + w[i]
 
    add.l   a5,d0
    add.l   d6,d0
    add.l   (a0)+,d0
    add.l   (a2)+,d0
 
; part 2
 
    add.l   d0,d3
    move.l  d5,a5
    move.l  d4,d5
    move.l  a4,d4
    add.l   a1,d0
    move.l  d0,a4
    move.l  d2,a1
    move.l  d1,d2
    move.l  a3,d1
    move.l  d3,a3
 
; next
 ;   subq.l  #1,a6
 ;   tst.l   a6
  cmp.l A2,A6 
   bne     .loop2
 
; merge work variables with hash: hash += a..h
 
    lea     h,a2
 
    add.l   (a2),a3
    move.l  a3,(a2)+
    add.l   (a2),d1
    move.l  d1,(a2)+
    add.l   (a2),d2
    move.l  d2,(a2)+
    add.l   (a2),a1
    move.l  a1,(a2)+
    add.l   (a2),a4
    move.l  a4,(a2)+
    add.l   (a2),d4
    move.l  d4,(a2)+
    add.l   (a2),d5
    move.l  d5,(a2)+
    add.l   (a2),a5
    move.l  a5,(a2)+
 
    rts
 
k
    dc.l $428a2f98,$71374491,$b5c0fbcf,$e9b5dba5,$3956c25b,$59f111f1,$923f82a4,$ab1c5ed5
    dc.l $d807aa98,$12835b01,$243185be,$550c7dc3,$72be5d74,$80deb1fe,$9bdc06a7,$c19bf174
    dc.l $e49b69c1,$efbe4786,$0fc19dc6,$240ca1cc,$2de92c6f,$4a7484aa,$5cb0a9dc,$76f988da
    dc.l $983e5152,$a831c66d,$b00327c8,$bf597fc7,$c6e00bf3,$d5a79147,$06ca6351,$14292967
 
    dc.l $27b70a85,$2e1b2138,$4d2c6dfc,$53380d13,$650a7354,$766a0abb,$81c2c92e,$92722c85
    dc.l $a2bfe8a1,$a81a664b,$c24b8b70,$c76c51a3,$d192e819,$d6990624,$f40e3585,$106aa070
    dc.l $19a4c116,$1e376c08,$2748774c,$34b0bcb5,$391c0cb3,$4ed8aa4a,$5b9cca4f,$682e6ff3
    dc.l $748f82ee,$78a5636f,$84c87814,$8cc70208,$90befffa,$a4506ceb,$bef9a3f7,$c67178f2
w
    dc.b    "abc"       ; message, maximum size 55 bytes
wEnd
wSize=wEnd-w
    dc.b    %10000000
    dcb.b   64-8-1-wSize
    dc.l    0
    dc.l    wSize*8
wExtend
    dcb.l   48
h
    dc.l $6a09e667,$bb67ae85,$3c6ef372,$a54ff53a,$510e527f,$9b05688c,$1f83d9ab,$5be0cd19
 
pad ; for hex viewing h, not used by the code
    dcb.b   1024
Some small changes. Perhaps all move.l (A0)+,R commands can be replaced with one movem.l h,D/A but it needs some changes inside code or h data.
Don_Adan is offline  
Old 30 December 2013, 14:15   #3
Thorham
Computer Nerd
 
Thorham's Avatar
 
Join Date: Sep 2007
Location: Rotterdam/Netherlands
Age: 48
Posts: 3,835
Nice I especially like the loop counter solution The movem idea is certainly possible. As you said, only requires re-ordering table h. Should be fine for fixed size messages as in this implementation. Also, movem can be used at the end, because for this implementation the hash long words don't have to be in the right order, as long as they're correct.

The moveq #10,d1 outside the loop at the beginning is a real I was going to extend the code to handle messages larger than 55 bytes, and then you have to loop back and execute that code again, hence the register choices (you have to preserve the contents of the move.l (a0)+,reg registers so you don't have to re-read them), but I don't need that now.

Any more?

Last edited by Thorham; 30 December 2013 at 14:23.
Thorham is offline  
Old 03 January 2014, 06:21   #4
modrobert
old bearded fool
 
modrobert's Avatar
 
Join Date: Jan 2010
Location: Bangkok
Age: 56
Posts: 779
Bitcoin miner for Amiga?
modrobert is offline  
Old 07 January 2014, 15:36   #5
Thorham
Computer Nerd
 
Thorham's Avatar
 
Join Date: Sep 2007
Location: Rotterdam/Netherlands
Age: 48
Posts: 3,835
Quote:
Originally Posted by modrobert View Post
Bitcoin miner for Amiga?


It's for a random number generator that takes the hardware state and the system time and hashes it. The hardware state and system time are utterly crap for random number generation by them selves, so some bit mixing is needed. Seeing how SHA256 even generates good PRNG data in counter mode (32 bit counter no less), it seems to be the perfect choice for this.
Thorham is offline  
Old 07 January 2014, 16:57   #6
Leffmann
 
Join Date: Jul 2008
Location: Sweden
Posts: 2,269
I don't know about perfect though, doing SHA-256 hashes is a horrendously slow way of generating random numbers. Try the Xorshift algorithms, they are a hundred times faster at least, and pass all tests of randomness.
Leffmann is offline  
Old 07 January 2014, 18:05   #7
Thorham
Computer Nerd
 
Thorham's Avatar
 
Join Date: Sep 2007
Location: Rotterdam/Netherlands
Age: 48
Posts: 3,835
Yep, it's slow alright, especially when combined with the way in which I obtain data from the hardware state (eight registers XORed together into a byte, and then generate 64 bytes for hashing that way). But it's fine. It's only supposed to be used for seeding or secure/unpredictable numbers (cases where you don't need many numbers).

Xorshift is good and ridiculously simple, but it's not a good way to mix data from an entropy source, especially not the Amiga's hardware state. This really needs to be hashed.
Thorham is offline  
Old 20 June 2015, 22:38   #8
daxb
Registered User
 
Join Date: Oct 2009
Location: Germany
Posts: 3,307
Thorham, you aren`t the author of sha.lha from Aminet? I ask because I get a "LONG READ from 00000000" when "S5" (512-bit hash) option is used. Everything else works without hits. Unfortunately, I haven`t a working email address.
daxb is offline  
Old 21 June 2015, 08:11   #9
Thorham
Computer Nerd
 
Thorham's Avatar
 
Join Date: Sep 2007
Location: Rotterdam/Netherlands
Age: 48
Posts: 3,835
Quote:
Originally Posted by daxb View Post
Thorham, you aren`t the author of sha.lha from Aminet?
Nope
Thorham is offline  
 


Currently Active Users Viewing This Thread: 1 (0 members and 1 guests)
 
Thread Tools

Similar Threads
Thread Thread Starter Forum Replies Last Post
NetSurf AGA optimizing arti Coders. Asm / Hardware 199 10 November 2013 14:36
Layered tile engine optimizing. Thorham Coders. General 0 30 September 2011 20:43
Search Software from Hash Inc AndreasM request.Apps 3 03 April 2011 15:23
KS3.1 hash orange support.Apps 3 11 January 2011 19:15
CRC32 , MD5 , SHA - 1 Curiosity Kyon support.Other 38 09 June 2009 10:08

Posting Rules
You may not post new threads
You may not post replies
You may not post attachments
You may not edit your posts

BB code is On
Smilies are On
[IMG] code is On
HTML code is Off

Forum Jump


All times are GMT +2. The time now is 15:41.

Top

Powered by vBulletin® Version 3.8.11
Copyright ©2000 - 2024, vBulletin Solutions Inc.
Page generated in 0.09434 seconds with 14 queries