/*	$NetBSD: start.S,v 1.4 2021/12/03 10:49:25 andvar Exp $	*/

/*-
 * Copyright (c) 2010 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code was written by Alessandro Forin and Neil Pittman
 * at Microsoft Research and contributed to The NetBSD Foundation
 * by Microsoft Corporation.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

	/* Trivial support for printing stuff on the serial line from C pgms.
     */
#include <mips/asm.h>
#include <mips/cpuregs.h>
#define __ASSEMBLER__ 1
#include <machine/emipsreg.h>

/* Offsets in the CXTINFO structure
 */
#define TS_AT (1 * 4)
#define TS_V0 (2 * 4)
#define TS_V1 (3 * 4)
#define TS_A0 (4 * 4)
#define TS_A1 (5 * 4)
#define TS_A2 (6 * 4)
#define TS_A3 (7 * 4)
#define TS_T0 (8 * 4)
#define TS_T1 (9 * 4)
#define TS_T2 (10 * 4)
#define TS_T3 (11 * 4)
#define TS_T4 (12 * 4)
#define TS_T5 (13 * 4)
#define TS_T6 (14 * 4)
#define TS_T7 (15 * 4)
#define TS_S0 (16 * 4)
#define TS_S1 (17 * 4)
#define TS_S2 (18 * 4)
#define TS_S3 (19 * 4)
#define TS_S4 (20 * 4)
#define TS_S5 (21 * 4)
#define TS_S6 (22 * 4)
#define TS_S7 (23 * 4)
#define TS_T8 (24 * 4)
#define TS_T9 (25 * 4)
#define TS_K0 (26 * 4)
#define TS_K1 (27 * 4)
#define TS_GP (28 * 4)
#define TS_SP (29 * 4)
#define TS_FP (30 * 4)
#define fp s8
#define TS_RA (31 * 4)

#define TS_PC (32 * 4)
#define TS_SR (33 * 4)
#define TS_HI (34 * 4)
#define TS_LO (35 * 4)
#define TS_EC (36 * 4)
#define SIZEOF_CXTINFO (37*4)

/* PROM_MODE means the user plans to keep this code around while running an OS.
 * So we act kind of like PROM code (BIOS?), but we live in RAM.
 * So we need to safeguard ourselves against corruptions, some unavoidable.
 * Like the overriding of the exception vectors, right where our "start" code is.
 */

        IMPORT(main,4)
        IMPORT(_end,4)

        .set noreorder

EXPORT(start)
    bgezal zero,_C_LABEL(real_start)
    nop


/* Does not handle the exception, really.
 * But to test interrupts should be enough
 */
     .org 0x00000080
NESTED_NOPROFILE(ExceptionHandler,SIZEOF_CXTINFO,$31)
    la     k1, UserInterruptHandler
    lw     k1,0(k1)
    bne    k1,zero,Dispatch
    mfc0   k0, MIPS_COP_0_EXC_PC
    j      k0
    nop /* do not! pop status */

EXPORT(UserInterruptHandler)
    .word 0

EXPORT(Dispatch)
	/* Save state on stack */
	addiu   sp, sp, -SIZEOF_CXTINFO
	/* save registers */
    .set noat
	sw      AT, TS_AT(sp)
    .set at
	sw      v0, TS_V0(sp)
	sw      v1, TS_V1(sp)
	sw      a0, TS_A0(sp)
	sw      a1, TS_A1(sp)
	sw      a2, TS_A2(sp)
	sw      a3, TS_A3(sp)
	sw      t0, TS_T0(sp)
	sw      t1, TS_T1(sp)
	sw      t2, TS_T2(sp)
	sw      t3, TS_T3(sp)
	sw      t4, TS_T4(sp)
	sw      t5, TS_T5(sp)
	sw      t6, TS_T6(sp)
	sw      t7, TS_T7(sp)
	sw      s0, TS_S0(sp)
	sw      s1, TS_S1(sp)
	sw      s2, TS_S2(sp)
	sw      s3, TS_S3(sp)
	sw      s4, TS_S4(sp)
	sw      s5, TS_S5(sp)
	sw      s6, TS_S6(sp)
	sw      s7, TS_S7(sp)
	sw      t8, TS_T8(sp)
	sw      t9, TS_T9(sp)
	sw      k0, TS_K0(sp)
	sw      k1, TS_K1(sp)
	sw      gp, TS_GP(sp)
	/* sp: later */
	sw      fp, TS_FP(sp)
	sw      ra, TS_RA(sp)

    mfc0    a0, MIPS_COP_0_STATUS
    mflo    t0
    mfhi    t1
	sw      a0, TS_SR(sp)
	sw      t0, TS_LO(sp)
	sw      t1, TS_HI(sp)
	sw      k0, TS_PC(sp)

	/* Save original stack */
    move    a0,sp
	addiu   t0, sp, SIZEOF_CXTINFO
    jalr    k1
	sw      t0, TS_SP(sp)
    
    /* Returned value is new PCXINFO */
    move    a0,v0

	/* First load most registers */
    .set noat
	lw       AT, TS_AT(a0)
	lw       v0, TS_V0(a0)
	lw       v1, TS_V1(a0)
	/* a0 later */
	lw       a1, TS_A1(a0)
	lw       a2, TS_A2(a0)
	lw       a3, TS_A3(a0)
	lw       t0, TS_T0(a0)
	lw       t1, TS_T1(a0)
	lw       t2, TS_T2(a0)
	lw       t3, TS_T3(a0)
	lw       t4, TS_T4(a0)
	lw       t5, TS_T5(a0)
	lw       t6, TS_T6(a0)
	lw       t7, TS_T7(a0)
	lw       s0, TS_S0(a0)
	lw       s1, TS_S1(a0)
	lw       s2, TS_S2(a0)
	lw       s3, TS_S3(a0)
	lw       s4, TS_S4(a0)
	lw       s5, TS_S5(a0)
	lw       s6, TS_S6(a0)
	lw       s7, TS_S7(a0)
	lw       t8, TS_T8(a0)
	lw       t9, TS_T9(a0)
    /* k0,k1 not restored */
	lw       gp, TS_GP(a0)
	/* sp later */
	lw       fp, TS_FP(a0)
	lw       ra, TS_RA(a0)

    lw       k1, TS_HI(a0)
    lw       k0, TS_LO(a0)
    mthi     k1
    mtlo     k0
    lw       k1, TS_SR(a0)
    mtc0     k1, MIPS_COP_0_STATUS
     /* NB: After this instruction we cannot take any interrupts or traps
      */
	lw	sp, TS_SP(a0)

	/* Put pc into k0 */
	lw	k0, TS_PC(a0)
	lw	a0, TS_A0(a0)
	j	k0
    rfe
    .set at

END(ExceptionHandler)

     .org 0x00000200
EXPORT(real_start)
	.ent _C_LABEL(real_start)

#ifdef SECONDARY_BOOTBLOCK
    /*
     * If this is the program that goes into FLASH we must copy ourselves down to RAM.
     * FLASH default on the MLx is at 0xf0000000, DRAM at 0. 
     */        
    addi    a0,ra,-8         /* Compensate for the first two instructions */

    /* Get the address(relative) of TextStart
     */
    bgezal  zero, _C_LABEL(MipsStart2) /* Always jumps */
    nop
        
    /* All of the static data, since we are at it.
     */
TextStart:                                /* + 0 */
    /* Text start at final link address */
    .int    start

DataEnd:                                  /* + 4 */
    /* Data end == bss start */
    .int    _edata

BssEnd:                                   /* + 8 */
    /* Bss end */
    .int    _end

RelocToRAM:                               /* *+12 */
    .int    InRAM

MipsStart2:       

    /* Source = a0, Dst = t2 */
    lw      t2, 0(ra)     /* _C_LABEL(TextStart) */

    /* EndPtr = t3 */
     /* in bdelay slot */

    /* If a0 != t2 then we are running in Flash but should run in RAM
     * In that case copy .text. Otherwise skip to .bss.
     */
    beq     a0,t2,ZroLoop-4
    lw      t3, 4(ra)    /* _C_LABEL(DataEnd)   */

CpyLoop:
    /* loop copying 2 words at a time */
    lw      t4,0(a0)
    lw      t5,4(a0)
    addiu   a0,a0,8
    sw      t4,0(t2)
    addiu   t2,t2,8
    sltu    t1,t2,t3
    bne     t1,zero,CpyLoop
    sw      t5,-4(t2)

    /* zero the bss
     */
    lw      t4, 8(ra)   /* _C_LABEL(BssEnd)  */
ZroLoop:
    sltu    t1,t3,t4
    sw      zero,0(t3)
    bne     t1,zero,ZroLoop
    addiu   t3,t3,4

    /* Jump to RAM copy (below)
     */
    lw      t1, 12(ra)   /* _C_LABEL(RelocToRAM) */
    jr      t1
    nop

    /*
     * Execute from here after copying out of FLASH into RAM
     */
InRAM:

#endif /*  SECONDARY_BOOTBLOCK */
	
    /* Get a stack
     */
#ifdef __GP_SUPPORT__
    la      gp, _C_LABEL (_gp)
#endif
    la    sp,_end
	addiu sp,sp,(8*1024)          /* BUGBUG arbitrary */

    /* Jump to main
     */
    jal   main
    add   a0,sp,zero

    /* Load failed, reset the processor and jump back to the origins.
     */
EXPORT(_rtt)    /* ahem */
    li     t0,0x1260ff80  /* NB: On new builds this is a SYS-RESET as well */
    mtc0   t0,MIPS_COP_0_STATUS

    lui    t0,(BRAM_DEFAULT_ADDRESS>>16) /* nb: knows about 16bit chop */
	jr     t0
    nop

EXPORT(Stop)
	b     Stop
    nop

END(real_start)

        .set noreorder
        .set noat
        .set nomacro

/* void Delay(UINT32 count)
 */
LEAF(Delay)
    bne    a0,zero,_C_LABEL(Delay)
    subu   a0,1
    j      ra
    nop
END(Delay)

/* UINT32 GetPsr(void)
 * Returns the PSR (coprocessor 0 status)
 */
LEAF(GetPsr)
    mfc0   v0, MIPS_COP_0_STATUS
    j      ra
    nop
END(GetPsr)

/* void SetPsr(UINT32 Psr)
 * Sets the PSR (coprocessor 0 status)
 */
LEAF(SetPsr)
    mtc0   a0,MIPS_COP_0_STATUS
    j      ra
    nop
END(SetPsr)

/* UINT32 GetCause(void)
 * Returns the Cause register (coprocessor 0)
 */
LEAF(GetCause)
    mfc0   v0,MIPS_COP_0_CAUSE
    j      ra
    nop
END(GetCause)

/* UINT32 GetEpc(void)
 * Returns the Epc register (coprocessor 0)
 */
LEAF(GetEpc)
    mfc0   v0,MIPS_COP_0_EXC_PC
    j      ra
    nop
END(GetEpc)


/* int PutWord(UINT32 Word);
 * Returns: 0 if ok, -1 otherwise
 */
NESTED(PutWord,12,$31)
    subu   sp,sp,12
    sw     s0,8(sp)
    sw     s1,4(sp)
    sw     ra,0(sp)

    or     s1,a0,zero
    /* Spit all nibbles
     */
    li     s0,8
PutWordLoop:
    srl    a0,s1,32-4
    li     t0,10
    sltu   t1,a0,t0
    bnez   t1,$Digit
    li     a1,'0'
    subu   a0,a0,t0
    li     a1,'a'
$Digit:
    sll    s1,s1,4
    jal    PutChar
    add    a0,a0,a1

    subu   s0,s0,1
    bne    v0,zero,PutWordDone /* printed ok? */
    li     v0,-1

    /* done yet? */
    bne    s0,zero,PutWordLoop
    nop

    /* done
     */
    li     v0,0
PutWordDone:
    lw     ra,0(sp)
    lw     s1,4(sp)
    lw     s0,8(sp)
    jr     ra
    addiu  sp,sp,12

END(PutWord)

/* int Puts(char *String);
 * Returns: 0 if ok, -1 otherwise
 */
NESTED(Puts,8,$31)
    subu   sp,sp,8
    sw     s0,4(sp)
    sw     ra,0(sp)

    or     s0,a0,zero
    /* Spit all chars until zero
     */
PutsLoop:
    lbu    a0,0(s0)
    addiu  s0,s0,1
    beq    a0,zero,PutsDoneOk
    nop
    jal    PutChar
    nop
    beq    v0,zero,PutsLoop
    nop

    /* Timed out
     */
    b      PutsDone
    li     v0,-1

    /* done
     */
PutsDoneOk:
    li     v0,0
PutsDone:
    lw     ra,0(sp)
    lw     s0,4(sp)
    jr     ra
    addiu  sp,sp,8

END(Puts)


/* int GetChar(void);
 * Returns: a non-negative value if ok, -1 otherwise
 */
LEAF(GetChar)
    lui    t0,(USART_DEFAULT_ADDRESS>>16) /* nb: knows about 16bit chop */
    lui    t1,1000          /* n*65k spins max */
RxNotReady:
    lw     t4,USARTST(t0)       /* ChannelStatus */
    andi   t4,t4,USI_RXRDY
    bgtz   t4,$GotByte
    subu   t1,t1,1
    /* still ok to spin? */
    bgtz   t1,RxNotReady
    nop
    /* Timed out
     */
    jr     ra
    li     v0,-1

    /* Gottabyte
     */
$GotByte:        
    lw     v0,USARTRX(t0)        /* RxData */
    jr     ra
    andi   v0,0xff
END(GetChar)

/* int PutChar(UINT8 v);
 * Returns: 0 if ok, -1 otherwise
 */
LEAF(PutChar)
    lui    t0,(USART_DEFAULT_ADDRESS>>16) /* nb: knows about 16bit chop */
    lui    t1,1000          /* n*65k spins max */
    li     v0,0
TxNotReady:
    lw     t4,USARTST(t0)       /* ChannelStatus */
    andi   t4,t4,USI_TXRDY
    bgtz   t4,TxReady
    subu   t1,t1,1
    /* still ok to spin? */
    bgtz   t1,TxNotReady
    nop
    /* Timed out
     */
    jr     ra
    li     v0,-1

    /* Send it
     */
TxReady:
    jr     ra
    sw     a0,USARTTX(t0)

END(PutChar)

/* Second arg is a function to call with the first arg:
 * void switch_stack_and_call(void *arg, void (*function)(void *));
 */
LEAF(switch_stack_and_call)
    /* Get a stack and jump. It would be a very bad idea to return but..
     */
    lui   sp,%hi(_end)
    addiu sp,%lo(_end)
    jr    a1
	addiu sp,sp,(2*1024)          /* BUGBUG arbitrary */

END(switch_stack_and_call)