;   Copyright (C) 2016 Free Software Foundation, Inc.
;   Contributed by Alex Panek.
;
; This file is free software; you can redistribute it and/or modify it
; under the terms of the GNU General Public License as published by the
; Free Software Foundation; either version 3, or (at your option) any
; later version.
;
; This file is distributed in the hope that it will be useful, but
; WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
; General Public License for more details.
;
; Under Section 7 of GPL version 3, you are granted additional
; permissions described in the GCC Runtime Library Exception, version
; 3.1, as published by the Free Software Foundation.
;
; You should have received a copy of the GNU General Public License and
; a copy of the GCC Runtime Library Exception along with this program;
; see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
; <http://www.gnu.org/licenses/>.


#include "vregs.h"

    .text

; the 16x16->16 and unsigned 16x16->32 multiplications is used as primitives
; the G14 has a dedicated instruction for that, G10/G13 use calls to ___mulhi3
; and ___umulhisi3, respectively
#ifdef __RL78_MUL_G14__
#define mulhi   mulhu
#define mulhisi mulhu
#else
#define mulhi   call !!___mulhi3
#define mulhisi call !!___umulhisi3
#endif

; the 16-bit words composing the two operands on the stack (0-least..3-most significant)
#define A0 [sp+4]
#define A1 [sp+6]
#define A2 [sp+8]
#define A3 [sp+10]
#define B0 [sp+12]
#define B1 [sp+14]
#define B2 [sp+16]
#define B3 [sp+18]

.macro LOAD X, Y
    movw  ax, \X
    movw  bc, ax
    movw  ax, \Y
.endm

.macro MUL X, Y
    LOAD \X, \Y
    mulhi
.endm

.macro MULX X, Y
    LOAD \X, \Y
    mulhisi
.endm

.macro MULX_ST0 X, Y
    MULX  \X, \Y
    movw  r8, ax
    movw  ax, bc
    movw  r10, ax
.endm

.macro MULX_ST2 X, Y
    MULX  \X, \Y
    movw  r12, ax
    movw  hl, r_2
.endm

.macro MULX_ADD X, Y, lbl
    MULX  \X, \Y
    call  $!\lbl
.endm

.macro MUL_ADD X, Y
    MUL   \X, \Y
    addw  ax, hl
    movw  hl, ax
.endm

; inputs:   [sp+10]..[sp+4]  = 1st operand (64-bit un/signed)
;           [sp+18]..[sp+12] = 2nd operand (64-bit un/signed)
; output:   r14:r12:r10:r8   = the lower 64-bits of the product of the two values
; clobbers: AX, BC, DE, HL
START_FUNC ___muldi3

    MULX_ST0 A0, B0        ; compute A0xB0 and store the result in r8-r10
    MULX_ST2 A1, B1        ; compute A1xB1 and store the result in r12 and HL

    MULX_ADD A0, B1, 3f    ; compute A0xB1 and add it to words 1-3 of the result
    MULX_ADD A1, B0, 3f    ; compute A1xB0 and add it to words 1-3 of the result

    MULX_ADD A0, B2, 2f    ; compute A0xB2 and add it to words 2-3 of the result
    MULX_ADD A2, B0, 2f    ; compute A2xB0 and add it to words 2-3 of the result

    MUL_ADD  A0, B3        ; compute A0xB3 and add it to word 3 of the result
    MUL_ADD  A3, B0        ; compute A3xB0 and add it to word 3 of the result
    MUL_ADD  A1, B2        ; compute A1xB2 and add it to word 3 of the result
    MUL_ADD  A2, B1        ; compute A2xB1 and add it to word 3 of the result

    movw   ax, hl          ; get the value of the 3rd word of the result
    movw   r14, ax         ; and store it to r14
    ret

2:  addw   ax, r12         ; add to the 32-47 bits of the result (may generate a carry)
    movw   r12, ax
    movw   ax, bc
    sknc
    incw   ax              ; add the carry bit to the upper 16 bits of the result
    addw   ax, hl          ; plus the higher 16 bits of the multiplication result
    movw   hl, ax
    ret

3:  addw   ax, r10         ; add to the 16-31 bits of the result (may generate a carry)
    movw   r10, ax
    movw   ax, bc
    sknc
    incw   ax              ; add the carry bit to the upper 16 bits of the product
    addw   ax, r12         ; add to the 32-47 bits of the result (may generate a carry)
    movw   r12, ax
    sknc
    incw   hl              ; add the carry bit to the upper 16 bits of the result
    ret

END_FUNC ___muldi3
