; Integer Square Roots in PDP-11 machine code
; ===========================================
; A simple, but inefficient way of calculating integer square roots is to
; count how many times increasing odd numbers can be subtracted from the
; starting value. For example:
; 
; 32-1=31 -> 31-3=28 -> 28-5=23 -> 23-7=16 -> 16-9=7 -> 7-11<0
;         1          2          3          4          5
; 
; -> SQR(30)=5, remainder 7
;
; This can be done in PDP-11 machine code as follows.
;
; Calculate 16-bit square root
; ----------------------------
; On entry  r1 = input value
; On exit   r0 = root
;           r1 = remainder
;           r2 = corrupted
; Size      22 bytes
;
.sqr16
mov #1,r2    ; Initialise to first subtrand
clr r0       ; Initial root=0
.sqr_loop    ; Subtract increasing odd numbers until r1<0
sub r2,r1    ; r1=r1-subtrand
bcs sqr_done ; r1<0, all done
inc r0       ; Increase root
inc r2       ;
inc r2       ; step subtrand +2 to next odd number
br  sqr_loop ; Loop to subtract next odd number
.sqr_done
add r2,r1    ; add subtrand back to r1 to get remainder
rts pc
;
; You can test this in BBC BASIC with:
; B%=1:REPEAT:PRINT B%,~USR sqr16:B%=B%+1:UNTIL B%>65535
;
;
; An alternative is to notice that the odd numbers are always twice the root plus
; one, so we can use the subtractand to give the root by dividing it by two at the
; end of the loop. This also reduces the number of registers used, but does not
; give the remainder.
; Also, as the root of a 32-bit number is a 16-bit number, 32-bit roots can be
; calculated with the addition of a SBC instruction to subtract the carry from
; the top 16 bits.
; As the working subtrand is twice the root, the maximum subtrand before overflows
; is &7FFF which means the maximum root is &3FFF, so the maximum input value is
; &0FFFFFFF, so this code actually calculates 28-bit square roots.
; As this uses a simple subtraction loop, it will naturally get slower and slower
; for larger and larger numbers.
;
; Calculate 28-bit square root
; ----------------------------
; On entry  r1 = input value b0-b15
;           r2 = input value b16-b31
; On exit   r0 = root
;           r1 = corrupted
;           r2 = corrupted
; Size      18 bytes
;
.sqr28
mov #-1,r0   ; Initial root will be 1 after adding 2 to it
.sqr_loop
add #2,r0    ; Step to next odd number
sub r0,r1    ; Subtract increasing odd numbers from b0-b15
sbc r2       ; Subtract from b16-b31 - omit this to do 16-bit roots
bcc sqr_loop ; Loop to subtract next odd number
asr r0       ; Divide subtrand by 2 to get root
rts pc
;
; You can test this in BBC BASIC with:
; B%=1:REPEAT:C%=B% DIV 65536:PRINT B%,(USR sqr28) AND &FFFF:B%=B%+1:UNTIL B%>&3FFFFFF
;
;
; To get a full 32-bit square root the subtractand needs to be increments in steps
; of one, so needs to be subtracted twice from the input value in each pass of
; the loop. This is actually a 31-bit square root as the 32-bit integer has to
; be treated as a signed value to pass around the loop. The maximum input value
; is &7FFFFFFF. Calling with a negative input value happens to return zero.
;
; Calculate 32-bit square root
; ----------------------------
; On entry  r1 = input value b0-b15
;           r2 = input value b16-b31
; On exit   r0 = root
;           r1 = corrupted
;           r2 = corrupted
; Size      24 bytes
;
.sqr32
mov #-1,r0   ; Initial root will be 0*2+1 after adding 1 to it
.sqr_loop
inc r0       ; Step to next root, next odd number is 2*r0+1
sub r0,r1    ; r1=r1-r0
sbc r2
sub r0,r1    ; r1=r1-(2*r0)
sbc r2
sub #1,r1    ; r1=r1-(2*r0+1)
sbc r2
bpl sqr_loop ; Loop to subtract next odd number
rts pc
;
; You can test this in BBC BASIC with:
; B%=1:REPEAT:C%=B% DIV 65536:PRINT B%,(USR sqr32) AND &FFFF:B%=B%+1:UNTIL B%=0
;