;   Copyright (C) 2016, 2017 Free Software Foundation, Inc.
;   Contributed by Alex Panek.
;
; This file is free software; you can redistribute it and/or modify it
; under the terms of the GNU General Public License as published by the
; Free Software Foundation; either version 3, or (at your option) any
; later version.
;
; This file is distributed in the hope that it will be useful, but
; WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
; General Public License for more details.
;
; Under Section 7 of GPL version 3, you are granted additional
; permissions described in the GCC Runtime Library Exception, version
; 3.1, as published by the Free Software Foundation.
;
; You should have received a copy of the GNU General Public License and
; a copy of the GCC Runtime Library Exception along with this program;
; see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
; <http://www.gnu.org/licenses/>.


#include "vregs.h"

    .text

    .global ___umulhisi3

#ifdef __RL78_MUL_NONE__

; inputs:   BC:AX           = unsigned 32 bit value
;           [sp+4]..[sp+7]  = unsigned 32 bit value
; output:   BC:AX           = the higher 32 bits of the product of the two inputs
; clobbers: DE
START_FUNC ___umulsi3_highpart

    push   hl              ; HL should be preseved
    push   bc              ; save A on the stack
    push   ax              ; save B on the stack

    subw   sp, #6          ; make room for r10-r15 from ___umulsidi3
    movw   hl, sp

    movw   bc, ax          ; B
    movw   ax, [hl+16]     ; D
    call   !!___umulhisi3  ; BxD in BC:AX
    movw   ax, bc
    movw   [hl], ax        ; store the word that we need from the BxD result

    movw   ax, [hl+8]      ; A
    movw   bc, ax
    movw   ax, [hl+18]     ; C
    call   !!___umulhisi3  ; AxC in BC:AX
    movw   [hl+2], ax
    movw   ax, bc
    movw   [hl+4], ax      ; store AxC in the upper 32-bits of the (provisional) result

    movw   ax, [hl+8]      ; A
    movw   bc, ax
    movw   ax, [hl+16]     ; D
    call   !!___umulhisi3  ; AxD in BC:AX

    addw   ax, [hl]        ; add to the 16-31 bits of the result (may generate a carry)
    movw   [hl], ax
    movw   ax, bc
    sknc
    incw   ax              ; add the carry bit to the upper 16 bits of the product
    addw   ax, [hl+2]      ; add to the 32-47 bits of the result (may generate a carry)
    movw   [hl+2], ax
    sknc
    incw   [hl+4]          ; add the carry bit to the upper 16 bits of the result

    movw   ax, [hl+6]      ; B
    movw   bc, ax
    movw   ax, [hl+18]     ; C
    call   !!___umulhisi3  ; BxC in BC:AX

    addw   ax, [hl]        ; add to the 16-31 bits of the result (may generate a carry)
    movw   ax, bc
    sknc
    incw   ax              ; add the carry bit to the upper 16 bits of the product
    addw   ax, [hl+2]      ; add to the 32-47 bits of the result (may generate a carry)
    movw   bc, ax
    movw   ax, [hl+4]
    sknc
    incw   ax              ; add the carry bit to the upper 16 bits of the result

    xchw   ax, bc          ; BC:AX - the upper 32 bits of the result

    addw   sp, #10         ; get rid of the stack temporaries
    pop    hl              ; restore the value of HL

    ret

END_FUNC ___umulsi3_highpart

#endif /* __RL78_MUL_NONE__ */

#ifdef __RL78_MUL_G13__

; inputs:   BC:AX           = unsigned 32 bit value
;           [sp+4]..[sp+7]  = unsigned 32 bit value
; output:   BC:AX           = the higher 32 bits of the product of the two inputs
; clobbers: DE
START_FUNC ___umulsi3_highpart

    push   ax              ; B is at [sp], C is at [sp+8] and D at [sp+6]

    mov    !MDUC, #0       ; unsigned multiply operation

    movw   MDAL, ax        ; B
    movw   ax, [sp+6]      ; D
    movw   MDAH, ax        ; <--- this starts the multiplication operation

    nop                    ; MDBH:MDBL = MDAL * MDAH (must wait 1 cycle)

    movw   ax, MDBH        ; copy the higher 16-bits of the BxD value
    movw   de, ax          ; cannot store to MDCL now (not in mult-acc mode)

    movw   ax, bc          ; A
    movw   MDAL, ax
    movw   ax, [sp+8]      ; C
    movw   MDAH, ax        ; <--- this starts the multiplication operation

    nop                    ; MDBH:MDBL = MDAL * MDAH (must wait 1 cycle)

    mov    !MDUC, #0x40    ; switch MDUC to unsigned multiply-accumulation operation

    movw   ax, de          ; get the higher 16-bits of the BxD value and store
    movw   !MDCL, ax       ; them to the lower part of the MDC accumulator
    
    movw   ax, MDBL        ; store the AxC value by parts:
    movw   !MDCH, ax       ; its lower part into the high part of the accumulator
    
    movw   ax, MDBH        ; and its higher part in the high 16 bits of the result
    xchw   ax, bc          ; A
    movw   MDAL, ax
    movw   ax, [sp+6]      ; D
    movw   MDAH, ax        ; <--- this starts the 1st multiply-accumulation operation

    pop    ax              ; B
    movw   MDAL, ax        ; <--- 1st multiply-acc result ready

    mov    a, !MDUC
    mov1   CY, a.2         ; get the overflow/carry of the mult-acc op

    movw   ax, [sp+6]      ; C
    movw   MDAH, ax        ; <--- this starts the 2nd multiply-accumulation operation

    sknc
    incw   bc
    nop                    ; *I think* one additional cycle is needed here if CY == 0

    mov    a, !MDUC
    mov1   CY, a.2         ; get the overflow/carry of the mult-acc op

    sknc
    incw   bc              ; update the highest word of the result, if necessary

    movw   ax, !MDCH

    ret

END_FUNC ___umulsi3_highpart

#endif /* __RL78_MUL_G13__ */

#ifdef __RL78_MUL_G14__

; inputs:   BC:AX           = unsigned 32 bit value
;           [sp+4]..[sp+7]  = unsigned 32 bit value
; output:   BC:AX           = the higher 32 bits of the product of the two inputs
; clobbers: DE
START_FUNC ___umulsi3_highpart

    movw   de, ax          ; A is at [sp], B in DE
    push   bc              ; C at [sp+8] and D at [sp+6]

    movw   bc, ax
    movw   ax, [sp+6]      ; D
    mulhu                  ; BC:AX = AX * BC

    movw   ax, bc
    movw   MACRL, ax       ; store the high word to the lower part of the accumulator

    movw   ax, [sp]        ; A
    movw   bc, ax
    movw   ax, [sp+8]      ; C
    mulhu                  ; BC:AX = AX * BC

    movw   MACRH, ax       ; store the lower part of the result into the high part

    xchw   ax, bc          ; swap the values in BC and DE
    xchw   ax, de          ; so that DE will contain the (provisional) high word of the result
    xchw   ax, bc          ; and BC the B value previously stored in DE

    movw   ax, [sp+8]      ; C
    machu                  ; MACRH:MACRL += AX * BC

    sknc
    incw   de              ; take into account a possible carry

    pop    bc              ; A
    movw   ax, [sp+4]      ; D
    machu                  ; MACRH:MACRL += AX * BC

    movw   bc, r_4
    sknc
    incw   bc              ; take into account a possible carry

    movw   ax, MACRH
    ret

END_FUNC ___umulsi3_highpart

#endif /* __RL78_MUL_G14__ */
