English Amiga Board


Go Back   English Amiga Board > Coders > Coders. Asm / Hardware

 
 
Thread Tools
Old 15 October 2017, 15:39   #1
Thorham
Computer Nerd
 
Thorham's Avatar
 
Join Date: Sep 2007
Location: Rotterdam/Netherlands
Age: 47
Posts: 3,751
Sha256 stand-alone hash function.

Here's a stand-alone, optimized (more so than my previous effort) sha256 hash function that you can drop into just about any code that needs it: sha256.s Related: If anyone wants an optimized AES encrypt and decrypt, let me know in this thread.

Updated version (compress loop optimizations from Wikipedia): sha256standAlone.s

Example usage (try it in AsmPro, don't forget the path!):

Code:
a
    lea     sha,a0
    lea     b,a1
    lea     hash,a2
    move.l  #c-b,d0
    bsr     sha256.computeHash

    rts

    include "680x0:sha256.s"
sha
    dcb.b   sizeof.sha256
hash
    dcb.b   32
b
    dc.b    "Boring test message."
c
Sha256 code:

Code:
                rsset 0
sha256.hash     rs.b 32
sha256.schedule rs.b 256
sha256.padding  rs.b 128
sizeof.sha256   rs.b 0

;
; a0 = sha256 object
; a1 = message
; a2 = output array
; d0 = message length
;
sha256.computeHash
    movem.l d1/d7/a1/a3-a4,-(sp)

; copy sha hash constants to hash

    lea     sha256.hashConstants,a4
    lea     sha256.hash(a0),a3

    moveq   #8-1,d7
.loop
    move.l  (a4)+,(a3)+
    dbra    d7,.loop

; calculate number of sha blocks and call hash

    move.l  d0,d1
    lsr.l   #6,d1
    beq     .singleBlock
    bsr     sha256.hashBlocks
.singleBlock

; padding and last block

    bsr     sha256.padMessage
    lea     sha256.padding(a0),a1
    bsr     sha256.hashBlocks

; copy hash to output array

    lea     sha256.hash(a0),a3

    move.l  (a3)+,1*4(a2) ; b 1
    move.l  (a3)+,2*4(a2) ; c 2
    move.l  (a3)+,5*4(a2) ; f 5
    move.l  (a3)+,6*4(a2) ; g 6
    move.l  (a3)+,3*4(a2) ; d 3
    move.l  (a3)+,0*4(a2) ; a 0
    move.l  (a3)+,4*4(a2) ; e 4
    move.l  (a3)+,7*4(a2) ; h 7

    movem.l (sp)+,d1/d7/a1/a3-a4
    rts
;
; a0 = sha256 object
; a1 = message blocks
; d1 = number of message blocks
;
; a1 <- pointer to remaining message block
;
sha256.hashBlocks
    movem.l d0-a0/a2-a6,-(sp)

; stack variables

    lea     sha256.hash(a0),a2
    lea     sha256.schedule(a0),a3

    move.l  a1,-(sp) ; 12 message
    move.l  a2,-(sp) ; 8 hash
    move.l  a3,-(sp) ; 4 schedule
    move.l  d1,-(sp) ; 0 counter

; setup work variables and hash

    lea     sha256.hash(a0),a1
    movem.l (a1),d1/d2/d4/d5/a2/a3/a4/a5

.loop

; copy block to schedule

    move.l  12(sp),a0 ; message
    move.l  4(sp),a1 ; schedule

    moveq   #4-1,d7

.loopCopy
    move.l  (a0)+,(a1)+
    move.l  (a0)+,(a1)+
    move.l  (a0)+,(a1)+
    move.l  (a0)+,(a1)+

    dbra    d7,.loopCopy

    move.l  a0,12(sp) ; message

; schedule block

    lea     -2*4(a1),a0
    lea     -16*4(a1),a6

    moveq   #48-1,d7

.loopSchedule
    move.l  (a0)+,d3
    move.l  d3,d6
    lsr.l   #8,d6
    lsr.l   #2,d6
    swap    d3
    ror.l   #1,d3
    eor.l   d3,d6
    ror.l   #2,d3
    eor.l   d3,d6

    add.l   (a6)+,d6
    move.l  (a6),d0
    move.l  d0,d3
    lsr.l   #3,d3
    ror.l   #7,d0
    eor.l   d0,d3
    swap    d0
    rol.l   #5,d0
    eor.l   d3,d0

    add.l   -7*4(a1),d0
    add.l   d6,d0
    move.l  d0,(a1)+

    dbra    d7,.loopSchedule

; compress block

    lea     -256(a1),a0
    lea     sha256.constants,a6

; main hash loop

.loopCompress
    move.l  a3,d3
    ror.l   #2,d3
    move.l  d3,d6
    swap    d6
    move.l  d6,d7
    rol.l   #5,d6
    eor.l   d6,d3
    ror.l   #4,d7
    eor.l   d7,d3

    move.l  a3,d6
    and.l   d1,d6
    move.l  a3,d7
    and.l   d2,d7
    eor.l   d7,d6
    move.l  d1,d7
    and.l   d2,d7
    eor.l   d7,d6
    add.l   d6,d3

    move.l  a4,d0
    ror.l   #6,d0
    move.l  d0,d6
    ror.l   #5,d6
    eor.l   d6,d0
    move.l  a4,d6
    rol.l   #7,d6
    eor.l   d6,d0

    move.l  a4,d6
    and.l   d4,d6
    move.l  a4,d7
    not.l   d7
    and.l   d5,d7
    eor.l   d7,d6

    add.l   a5,d0
    add.l   d6,d0
    add.l   (a6)+,d0
    add.l   (a0)+,d0

    add.l   d0,d3
    add.l   d0,a2

    move.l  d5,a5
    move.l  d4,d5
    move.l  a4,d4
    move.l  a2,a4
    move.l  d2,a2
    move.l  d1,d2
    move.l  a3,d1
    move.l  d3,a3

    cmp.l   a1,a0
    bne     .loopCompress

; merge work variables with hash: hash += a..h

    move.l  8(sp),a0 ; hash

    add.l   (a0)+,d1 ; b
    add.l   (a0)+,d2 ; c
    add.l   (a0)+,d4 ; f
    add.l   (a0)+,d5 ; g
    add.l   (a0)+,a2 ; d
    add.l   (a0)+,a3 ; a
    add.l   (a0)+,a4 ; e
    add.l   (a0)+,a5 ; h

    movem.l d1/d2/d4/d5/a2/a3/a4/a5,-(a0)

; next message block

    subq.l  #1,(sp) ; counter
    bne     .loop

; point return register to remaining block

    move.l  12(sp),a1

; deallocate stack variables and exit

    add.w   #16,sp

    movem.l (sp)+,d0-a0/a2-a6
    rts
;
; a0 = sha object
; a1 = remaining message block
; d0 = full message length
;
; d1 <- number of remaining blocks
;
sha256.padMessage
    movem.l d0/d2/d7/a1-a2,-(sp)

    lea     sha256.padding(a0),a2

; calculate number of remaining message bytes

    move.l  d0,d1
    and.l   #63,d1
    beq     .noCopy

; copy remaining message bytes

    move.w  d1,d7
    subq.w  #1,d7

.loopCopy
    move.b  (a1)+,(a2)+
    dbra    d7,.loopCopy

.noCopy

; write padding byte

    move.b  #$80,(a2)+

; write zero padding bytes

    move.w  d1,d2
    addq.w  #8,d2
    and.w   #63,d2
    moveq   #62,d7
    sub.w   d2,d7
    blt     .noClear

    clr.l   d2
.loopClear
    move.b  d2,(a2)+
    dbra    d7,.loopClear

.noClear

; write message length in bits

    rol.l   #3,d0
    move.l  d0,d2
    and.l   #7,d2 ; high
    eor.l   d2,d0 ; low

    move.l  d2,(a2)+
    move.l  d0,(a2)

; calculate remaining number of blocks

    sub.b   #56,d1
    asr.b   #7,d1
    addq.b  #2,d1

    movem.l (sp)+,d0/d2/d7/a1-a2
    rts

; sha constants

    cnop 0,4

; hash constants are rearranged: b c f g d a e h

sha256.hashConstants

    dc.l $bb67ae85,$3c6ef372,$9b05688c,$1f83d9ab,$a54ff53a,$6a09e667,$510e527f,$5be0cd19

sha256.constants

    dc.l $428a2f98,$71374491,$b5c0fbcf,$e9b5dba5,$3956c25b,$59f111f1,$923f82a4,$ab1c5ed5
    dc.l $d807aa98,$12835b01,$243185be,$550c7dc3,$72be5d74,$80deb1fe,$9bdc06a7,$c19bf174
    dc.l $e49b69c1,$efbe4786,$0fc19dc6,$240ca1cc,$2de92c6f,$4a7484aa,$5cb0a9dc,$76f988da
    dc.l $983e5152,$a831c66d,$b00327c8,$bf597fc7,$c6e00bf3,$d5a79147,$06ca6351,$14292967
    dc.l $27b70a85,$2e1b2138,$4d2c6dfc,$53380d13,$650a7354,$766a0abb,$81c2c92e,$92722c85
    dc.l $a2bfe8a1,$a81a664b,$c24b8b70,$c76c51a3,$d192e819,$d6990624,$f40e3585,$106aa070
    dc.l $19a4c116,$1e376c08,$2748774c,$34b0bcb5,$391c0cb3,$4ed8aa4a,$5b9cca4f,$682e6ff3
    dc.l $748f82ee,$78a5636f,$84c87814,$8cc70208,$90befffa,$a4506ceb,$bef9a3f7,$c67178f2
Updated version (compress loop optimizations from Wikipedia):

Code:
                rsset 0
sha256.hash     rs.b 32
sha256.schedule rs.b 256
sha256.padding  rs.b 128
sizeof.sha256   rs.b 0

;
; a0 = sha256 object
; a1 = message
; a2 = output array
; d0 = message length
;
sha256.computeHash
	movem.l	d1/d7/a1/a3-a4,-(sp)

; copy sha hash constants to hash

	lea		sha256.hashConstants,a4
	lea		sha256.hash(a0),a3

	moveq	#8-1,d7
.loop
	move.l	(a4)+,(a3)+
	dbra	d7,.loop

; calculate number of sha blocks and call hash

	move.l	d0,d1
	lsr.l	#6,d1
	beq		.singleBlock
	bsr		sha256.hashBlocks
.singleBlock

; padding and last block

	bsr		sha256.padMessage
	lea		sha256.padding(a0),a1
	bsr		sha256.hashBlocks

; copy hash to output array

	lea		sha256.hash(a0),a3

	move.l	(a3)+,1*4(a2) ; b 1
	move.l	(a3)+,2*4(a2) ; c 2
	move.l	(a3)+,5*4(a2) ; f 5
	move.l	(a3)+,6*4(a2) ; g 6
	move.l	(a3)+,3*4(a2) ; d 3
	move.l	(a3)+,0*4(a2) ; a 0
	move.l	(a3)+,4*4(a2) ; e 4
	move.l	(a3)+,7*4(a2) ; h 7

	movem.l	(sp)+,d1/d7/a1/a3-a4
	rts
;
; a0 = sha256 object
; a1 = message blocks
; d1 = number of message blocks
;
; a1 <- pointer to remaining message block
;
sha256.hashBlocks
	movem.l	d0-a0/a2-a6,-(sp)

; stack variables

	lea		sha256.hash(a0),a2
	lea		sha256.schedule(a0),a3

	move.l	a1,-(sp) ; 12 message
	move.l	a2,-(sp) ; 8 hash
	move.l	a3,-(sp) ; 4 schedule
	move.l	d1,-(sp) ; 0 counter

; setup work variables and hash

	lea		sha256.hash(a0),a1
	movem.l	(a1),d1/d2/d4/d5/a2/a3/a4/a5

.loop

; copy block to schedule

	move.l	12(sp),a0 ; message
	move.l	4(sp),a1 ; schedule

	moveq	#4-1,d7

.loopCopy
	move.l	(a0)+,(a1)+
	move.l	(a0)+,(a1)+
	move.l	(a0)+,(a1)+
	move.l	(a0)+,(a1)+

	dbra	d7,.loopCopy

	move.l	a0,12(sp) ; message

; schedule block

	lea		-2*4(a1),a0
	lea		-16*4(a1),a6

	moveq	#48-1,d7

.loopSchedule
	move.l	(a0)+,d3
	move.l	d3,d6
	lsr.l	#8,d6
	lsr.l	#2,d6
	swap	d3
	ror.l	#1,d3
	eor.l	d3,d6
	ror.l	#2,d3
	eor.l	d3,d6

	add.l	(a6)+,d6
	move.l	(a6),d0
	move.l	d0,d3
	lsr.l	#3,d3
	ror.l	#7,d0
	eor.l	d0,d3
	swap	d0
	rol.l	#5,d0
	eor.l	d3,d0

	add.l	-7*4(a1),d0
	add.l	d6,d0
	move.l	d0,(a1)+

	dbra	d7,.loopSchedule

; compress block

	lea		-256(a1),a0
	lea		sha256.constants,a6

; main hash loop

.loopCompress
	move.l	a3,d3
	ror.l	#2,d3
	move.l	d3,d6
	swap	d6
	move.l	d6,d7
	rol.l	#5,d6
	eor.l	d6,d3
	ror.l	#4,d7
	eor.l	d7,d3

	move.l	a3,d7
	eor.l	d1,d7
	and.l	d2,d7
	move.l	a3,d6
	and.l	d1,d6
	eor.l	d6,d7
	add.l	d7,d3

	move.l	a4,d0
	ror.l	#6,d0
	move.l	d0,d6
	ror.l	#5,d6
	eor.l	d6,d0
	move.l	a4,d6
	rol.l	#7,d6
	eor.l	d6,d0

	move.l	d4,d6
	eor.l	d5,d6
	move.l	a4,d7
	and.l	d7,d6
	eor.l	d5,d6

	add.l	a5,d0
	add.l	d6,d0
	add.l	(a6)+,d0
	add.l	(a0)+,d0

	add.l	d0,d3
	add.l	d0,a2

	move.l	d5,a5
	move.l	d4,d5
	move.l	a4,d4
	move.l	a2,a4
	move.l	d2,a2
	move.l	d1,d2
	move.l	a3,d1
	move.l	d3,a3

	cmp.l	a1,a0
	bne		.loopCompress

; merge work variables with hash: hash += a..h

	move.l	8(sp),a0 ; hash

	add.l	(a0)+,d1 ; b
	add.l	(a0)+,d2 ; c
	add.l	(a0)+,d4 ; f
	add.l	(a0)+,d5 ; g
	add.l	(a0)+,a2 ; d
	add.l	(a0)+,a3 ; a
	add.l	(a0)+,a4 ; e
	add.l	(a0)+,a5 ; h

	movem.l	d1/d2/d4/d5/a2/a3/a4/a5,-(a0)

; next message block

	subq.l	#1,(sp) ; counter
	bne		.loop

; point return register to remaining block

	move.l	12(sp),a1

; deallocate stack variables and exit

	add.w	#16,sp

	movem.l	(sp)+,d0-a0/a2-a6
	rts
;
; a0 = sha object
; a1 = remaining message block
; d0 = full message length
;
; d1 <- number of remaining blocks
;
sha256.padMessage
	movem.l	d0/d2/d7/a1-a2,-(sp)

	lea		sha256.padding(a0),a2

; calculate number of remaining message bytes

	move.l	d0,d1
	and.l	#63,d1
	beq		.noCopy

; copy remaining message bytes

	move.w	d1,d7
	subq.w	#1,d7

.loopCopy
	move.b	(a1)+,(a2)+
	dbra	d7,.loopCopy

.noCopy

; write padding byte

	move.b	#$80,(a2)+

; write zero padding bytes

	move.w	d1,d2
	addq.w	#8,d2
	and.w	#63,d2
	moveq	#62,d7
	sub.w	d2,d7
	blt		.noClear

	clr.l	d2
.loopClear
	move.b	d2,(a2)+
	dbra	d7,.loopClear

.noClear

; write message length in bits

	rol.l	#3,d0
	move.l	d0,d2
	and.l	#7,d2 ; high
	eor.l	d2,d0 ; low

	move.l	d2,(a2)+
	move.l	d0,(a2)

; calculate remaining number of blocks

	sub.b	#56,d1
	asr.b	#7,d1
	addq.b	#2,d1

	movem.l	(sp)+,d0/d2/d7/a1-a2
	rts

; sha constants

	cnop 0,4

; hash constants are rearranged: b c f g d a e h

sha256.hashConstants

	dc.l $bb67ae85,$3c6ef372,$9b05688c,$1f83d9ab,$a54ff53a,$6a09e667,$510e527f,$5be0cd19

sha256.constants

	dc.l $428a2f98,$71374491,$b5c0fbcf,$e9b5dba5,$3956c25b,$59f111f1,$923f82a4,$ab1c5ed5
	dc.l $d807aa98,$12835b01,$243185be,$550c7dc3,$72be5d74,$80deb1fe,$9bdc06a7,$c19bf174
	dc.l $e49b69c1,$efbe4786,$0fc19dc6,$240ca1cc,$2de92c6f,$4a7484aa,$5cb0a9dc,$76f988da
	dc.l $983e5152,$a831c66d,$b00327c8,$bf597fc7,$c6e00bf3,$d5a79147,$06ca6351,$14292967
	dc.l $27b70a85,$2e1b2138,$4d2c6dfc,$53380d13,$650a7354,$766a0abb,$81c2c92e,$92722c85
	dc.l $a2bfe8a1,$a81a664b,$c24b8b70,$c76c51a3,$d192e819,$d6990624,$f40e3585,$106aa070
	dc.l $19a4c116,$1e376c08,$2748774c,$34b0bcb5,$391c0cb3,$4ed8aa4a,$5b9cca4f,$682e6ff3
	dc.l $748f82ee,$78a5636f,$84c87814,$8cc70208,$90befffa,$a4506ceb,$bef9a3f7,$c67178f2

Last edited by Thorham; 04 December 2017 at 11:35.
Thorham is online now  
Old 16 October 2017, 22:06   #2
Leffmann
 
Join Date: Jul 2008
Location: Sweden
Posts: 2,269
Cool! You could make it more user-friendly by putting the temporary data on the stack, instead of tasking the caller with allocating and freeing that memory.
Leffmann is offline  
Old 16 October 2017, 23:24   #3
Thorham
Computer Nerd
 
Thorham's Avatar
 
Join Date: Sep 2007
Location: Rotterdam/Netherlands
Age: 47
Posts: 3,751
Quote:
Originally Posted by Leffmann View Post
You could make it more user-friendly by putting the temporary data on the stack, instead of tasking the caller with allocating and freeing that memory.
It's that way because I prefer it, but using the stack is easy:

Code:
;
; a1 = message
; a2 = output array
; d0 = message length
;
sha256.computeHash
    movem.l d1/d7/a0-a1/a3-a4,-(sp)

; allocate sha object memory on stack
    
    sub.w   #sizeof.sha256,sp
    move.l  sp,a0
    
; copy sha hash constants to hash

    lea     sha256.hashConstants,a4
    lea     sha256.hash(a0),a3

    moveq   #8-1,d7
.loop
    move.l  (a4)+,(a3)+
    dbra    d7,.loop

; calculate number of sha blocks and call hash

    move.l  d0,d1
    lsr.l   #6,d1
    beq     .singleBlock
    bsr     sha256.hashBlocks
.singleBlock

; padding and last block

    bsr     sha256.padMessage
    lea     sha256.padding(a0),a1
    bsr     sha256.hashBlocks

; copy hash to output array

    lea     sha256.hash(a0),a3

    move.l  (a3)+,1*4(a2) ; b 1
    move.l  (a3)+,2*4(a2) ; c 2
    move.l  (a3)+,5*4(a2) ; f 5
    move.l  (a3)+,6*4(a2) ; g 6
    move.l  (a3)+,3*4(a2) ; d 3
    move.l  (a3)+,0*4(a2) ; a 0
    move.l  (a3)+,4*4(a2) ; e 4
    move.l  (a3)+,7*4(a2) ; h 7

    add.w   #sizeof.sha256,sp
    
    movem.l (sp)+,d1/d7/a0-a1/a3-a4
    rts

Last edited by Thorham; 16 October 2017 at 23:29.
Thorham is online now  
Old 17 October 2017, 15:12   #4
Megol
Registered User
 
Megol's Avatar
 
Join Date: May 2014
Location: inside the emulator
Posts: 377
Nice clean code, thanks!
Megol is offline  
Old 19 October 2017, 07:04   #5
Thorham
Computer Nerd
 
Thorham's Avatar
 
Join Date: Sep 2007
Location: Rotterdam/Netherlands
Age: 47
Posts: 3,751
Quote:
Originally Posted by Megol View Post
Nice clean code
Glad you think so, thanks.
Thorham is online now  
Old 02 December 2017, 14:16   #6
Thorham
Computer Nerd
 
Thorham's Avatar
 
Join Date: Sep 2007
Location: Rotterdam/Netherlands
Age: 47
Posts: 3,751
Anyone interested in standalone AES256? I almost have that ready for posting.
Thorham is online now  
Old 04 December 2017, 11:36   #7
Thorham
Computer Nerd
 
Thorham's Avatar
 
Join Date: Sep 2007
Location: Rotterdam/Netherlands
Age: 47
Posts: 3,751
Updated the code with some optimizations from Wikipedia that didn't seem like they would work, see first post.
Thorham is online now  
 


Currently Active Users Viewing This Thread: 1 (0 members and 1 guests)
 
Thread Tools

Similar Threads
Thread Thread Starter Forum Replies Last Post
Optimizing SHA-256 crypto hash. Thorham Coders. Asm / Hardware 8 21 June 2015 08:11
Search Software from Hash Inc AndreasM request.Apps 3 03 April 2011 15:23
KS3.1 hash orange support.Apps 3 11 January 2011 19:15
One on One stand alone version... scifi request.Old Rare Games 3 30 April 2006 19:42

Posting Rules
You may not post new threads
You may not post replies
You may not post attachments
You may not edit your posts

BB code is On
Smilies are On
[IMG] code is On
HTML code is Off

Forum Jump


All times are GMT +2. The time now is 23:38.

Top

Powered by vBulletin® Version 3.8.11
Copyright ©2000 - 2024, vBulletin Solutions Inc.
Page generated in 0.07611 seconds with 14 queries