User Tools

Site Tools


base:fastest_multiplication

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revisionPrevious revision
Next revisionBoth sides next revision
base:fastest_multiplication [2021-02-23 04:49] – fix typo in example usage strobebase:fastest_multiplication [2023-08-21 02:45] repose
Line 9: Line 9:
 Mine: 196 zp variation: 192 \\ Mine: 196 zp variation: 192 \\
 Times above need to add 12 for jsr/rts \\ Times above need to add 12 for jsr/rts \\
 +Note: updated 2023; corrected typos and timings \\
  
 <code> <code>
Line 22: Line 23:
 ;do_add: 30 bytes in zp, if used ;do_add: 30 bytes in zp, if used
 ;time: 196 cycles, option for 192 if you use 30 more zp bytes for do_add ;time: 196 cycles, option for 192 if you use 30 more zp bytes for do_add
-;measurement method: branches/page boundary crossings were averaged+;measurement method: average timings over all input combinations
  
 ;How to use: ;How to use:
Line 48: Line 49:
 z0=$80;product, 2 bytes z0=$80;product, 2 bytes
 z1=$81 z1=$81
-;z2=$82 returned in X reg +z2=$82 ;returned in X reg 
-;z3=$83 returned in Y reg+z3=$83 ;returned in Y reg
  
 ;Example showing use ;Example showing use
Line 59: Line 60:
 jsr makesqrtables jsr makesqrtables
 jsr umult16 jsr umult16
 +stx z2
 +sty z3
 ;result should be $fffe0001, e.g. as viewed with a typical m 0080 monitor command: ;result should be $fffe0001, e.g. as viewed with a typical m 0080 monitor command:
 ;0080 01 00 fe ff ;0080 01 00 fe ff
Line 121: Line 124:
 sta p_invsqr_hi;17 sta p_invsqr_hi;17
  
-ldy y0 
 sec sec
 +ldy y0
 lda (p_sqr_lo),y lda (p_sqr_lo),y
-sbc (p_invsqr_lo),y;note these two lines taken as 11 total+sbc (p_invsqr_lo),y;note these two lines taken as 10.996 total or 10+65280/65536
 sta z0;x0*y0l sta z0;x0*y0l
 lda (p_sqr_hi),y lda (p_sqr_hi),y
 sbc (p_invsqr_hi),y sbc (p_invsqr_hi),y
-sta c1a+1;x0*y0h;31+sta c1a+1;x0*y0h; 2+3+10.996+3+10.996+4=33.992
 ;c1a means column 1, row a (partial product to be added later) ;c1a means column 1, row a (partial product to be added later)
  
 ldy y1 ldy y1
-;sec  ;notice that the high byte of sub above is always +ve+;sec  ;notice that the high byte of subtraction above is always positive, leaving Carry set
 lda (p_sqr_lo),y lda (p_sqr_lo),y
 sbc (p_invsqr_lo),y sbc (p_invsqr_lo),y
Line 138: Line 141:
 lda (p_sqr_hi),y lda (p_sqr_hi),y
 sbc (p_invsqr_hi),y sbc (p_invsqr_hi),y
-sta c2a+1;x0*y1h;31+sta c2a+1;x0*y1h; 3+10.996+4+10.996+4=32.992
  
 ;set multiplier as x1 ;set multiplier as x1
Line 155: Line 158:
 lda (p_sqr_hi),y lda (p_sqr_hi),y
 sbc (p_invsqr_hi),y sbc (p_invsqr_hi),y
-sta c2b+1;x1*y1h;31+sta c2b+1;x1*y1h;32.992
  
 ldy y1 ldy y1
Line 164: Line 167:
 lda (p_sqr_hi),y lda (p_sqr_hi),y
 sbc (p_invsqr_hi),y sbc (p_invsqr_hi),y
-tay;x1*y1h;Y=z3, 30 cycles +tay;x1*y1h;Y=z3, 30.992 cycles 
-;17+33+31+17+31+30=159 cycles for main multiply part+;17+34+33+17+33+31=164.97 cycles for main multiply part (minimum=157, maximum=173)
  
 ;jmp do_adds; can put do_adds in zp for a slight speed increase ;jmp do_adds; can put do_adds in zp for a slight speed increase
base/fastest_multiplication.txt · Last modified: 2024-02-13 08:24 by repose