User Tools

Site Tools


base:fastest_multiplication

This is an old revision of the document!


Requires tables or a generator routine such as table_generator_routine_for_fast_8_bit_mul_table

;World's fastest 16x16 unsigned mult for 6502
;you can go faster, but not without more code and/or data
;and being less elegant and harder to follow.
;by Repose 2017

;tables of squares
;sqr(x)=x^2/4
;negsqr(x)=(255-x)^2/4
sqrlo=$c000;511 bytes
sqrhi=$c200;511 bytes
negsqrlo=$c400;511 bytes
negsqrhi=$c600;511 bytes

;pointers to square tables above
p_sqr_lo=$8b;2 bytes
p_sqr_hi=$8d;2 bytes
p_invsqr_lo=$8f;2 bytes
p_invsqr_hi=$91;2 bytes

;the inputs and outputs
x0=$fb;multiplier, 2 bytes
x1=$fc
y0=$fd;multiplicand, 2 bytes
y1=$fe
z0=$80;product, 4 bytes
z1=$81
z2=$82
z3=$83

;not shown is a routine to make the tables
;also you need to init the pointers' high bytes to the tables

umult16:
;set multiplier as x0
lda x0
sta p_sqr_lo
sta p_sqr_hi
eor #$ff
sta p_invsqr_lo
sta p_invsqr_hi;17

ldy y0
sec
lda (p_sqr_lo),y
sbc (p_invsqr_lo),y;note these two lines taken as 11 total
sta z0;x0*y0l
lda (p_sqr_hi),y
sbc (p_invsqr_hi),y
sta c1a+1;x0*y0h;31
;c1a means column 1, row a (partial product to be added later)

ldy y1
;sec  ;notice that the high byte of sub above is always +ve
lda (p_sqr_lo),y
sbc (p_invsqr_lo),y
sta c1b+1;x0*y1l
lda (p_sqr_hi),y
sbc (p_invsqr_hi),y
sta c2a+1;x0*y1h;31

;set multiplier as x1
lda x1
sta p_sqr_lo
sta p_sqr_hi
eor #$ff
sta p_invsqr_lo
sta p_invsqr_hi;17

ldy y0
;sec
lda (p_sqr_lo),y
sbc (p_invsqr_lo),y
sta c1c+1;x1*y0l
lda (p_sqr_hi),y
sbc (p_invsqr_hi),y
sta c2b+1;x1*y1h;31

ldy y1
;sec
lda (p_sqr_lo),y
sbc (p_invsqr_lo),y
sta c2c+1;x1*y1l
lda (p_sqr_hi),y
sbc (p_invsqr_hi),y
sta z3;x1*y1h;31

;4*31+2*17 so far=158
;add partials
;-add first two numbers in column 1
;jmp do_adds;put in zp to save 3 cycles :)
do_adds:
clc
c1a lda #0 
c1b adc #0;add first two rows of column 1
sta z1;9
;-continue to first two numbers in column 2                   
c2a lda #0
c2b adc #0 
sta z2;7
bcc c1c;3 taken/9 not taken, avg 6 
inc z3
clc
;-add last number of column 1 (row c)
c1c lda #0 
adc z1
sta z1;8
;-add last number of column 2
c2c lda #0 
adc z2
sta z2;8
bcc fin;3/7 avg 5
inc z3
;9+7+6+8+8+5=43
fin rts
base/fastest_multiplication.1492423921.txt.gz · Last modified: 2017-04-17 12:12 by repose