/*
 * Copyright 2020-2025 Yuntu Microelectronics co.,ltd
 * All rights reserved.
 *
 * YUNTU Confidential. This software is owned or controlled by YUNTU and may only be
 * used strictly in accordance with the applicable license terms. By expressly
 * accepting such terms or by downloading, installing, activating and/or otherwise
 * using the software, you are agreeing that you have read, and that you agree to
 * comply with and are bound by, such license terms. If you do not agree to be
 * bound by the applicable license terms, then you may not retain, install,
 * activate or otherwise use the software. The production use license in
 * Section 2.3 is expressly granted for this software.
 */

/******************************************************************************
* Test summary:
* -------------
* Tests MAC module internal logics. 
* - 32 x 32   Multiplier 
* - 16 x 16   Multiplier 
* - 32 x 16   Multiplier
* - 2x(16x16) Multiplier
*
* Tests signed/unsigned multiplications and absolute differences as well as 
* their decoding.
*
* Note that MAC is never dual-issued and only executes from slot0.
*              
* Decoder coverage:
* ----------------           
* Thumb (16-bit) DECODER:
*   - MULS
*
* Thumb (32-bit) DECODER:
* Long multiply, long multiply accumulate
*   - SMULL, UMULL, SMLAL, SMLALBB, SMLALBT, SMLALTB, SMLALTT, SMLALD, SMLALDX,
*     SMLSLD, SMLSLDX, UMLAL, UMAAL 
* Multiply, multiply accumulate, and absolute difference
*   - MUL, MLA, MLS, SMLABB, SMLABT, SMLATB, SMLATT, SMULBB, SMULBT, SMULTB,
*     SMULTT, SMLAD, SMLADX, SMUAD, SMUADX, SMLAWT, SMLAWB, SMULWB, SMULWT,
*     SMLSD, SMLSDX, SMUSD, SMUSDX SMMLA, SMMLAR, SMMUL, SMMULR, SMMLS, SMMLSR,
*     USADA8, USAD8
******************************************************************************/

#include "CorTst_Compiler.h"
#include "CorTst_M33_Cfg.h"

#if (CORTST_M33_MAC_ENABLE==1)
    /* Compatible with ABI. */
    CST_PRES8
    /* Symbols defined in the current module but to be visible to outside */
    CST_EXPORT M33_Cst_MacTest

    /* Symbols defined outside but used within current module */
    CST_EXTERN m33_cst_test_tail_end


    CST_SET(PRESIGNATURE, 0xE5692F9C)
    
    /*------------------------------------------------------------------------*/
    CST_SECTION_EXEC(mcal_text)
    /*------------------------------------------------------------------------*/
    /* The ".type" directive instructs the assembler/linker that the label 
       "M33_Cst_MacTest" designates a function.
       This would cause setting the least significant bit to '1' within any 
       pointer to this function, causing change to Thumb mode whenever this 
       function is called. */
    CST_THUMB2
    CST_TYPE(M33_Cst_MacTest, function)
M33_Cst_MacTest:

    PUSH    {R4-R12,R14}
    
    /*------------------------------------------------------------------------*/
    /* Test - preparation                                                     */
    /*------------------------------------------------------------------------*/
    LDR     R10,=PRESIGNATURE   /* Initialize results registers */
    MOV     R11,R10     
    MOV     R12,R10
    /* Prepare initial test vectors */
    MVN     R0,R10  /* Invert PRESIGNATURE */
    MOV     R1,R0
    
    /*------------------------------------------------------------------------*/
    /* Test - start                                                           */
    /*------------------------------------------------------------------------*/
    /**************************************************************************/
    /* 32x32 Multiplier internal logic will be tested by using the UMULL      */
    /* instruction. We will perform in total 11x31=341 multiplications        */
    /*                                                                        */                         
    /* m33_cst_mac_test_inner_loop:                                           */
    /* -----------------------------                                          */
    /*  R7=0x00000001   1st run                                               */
    /*  R7=0x00000002   2nd run                                               */
    /*  ...                                                                   */
    /*  R7=0x80000000   31st run sets N flag                                  */
    /*  R7=0x00000000   32nd run sets Z flag                                  */
    /*                                                                        */ 
    /* m33_cst_mac_test_outer_loop:                                           */
    /* -----------------------------                                          */
    /* R3=0x00000008   1st run                                                */
    /* R3=0x00000040   2nd run                                                */
    /* ...                                                                    */
    /* R3=0x00000000   8th run sets Z flag                                    */
    /**************************************************************************/
    
    /************************ Start of Outer Loop *****************************/
    MOV     R3,#0x00000008  /* Initialize outer loop */
    MOV     R5,#8
    PUSH    {R3,R5} 
m33_cst_mac_test_outer_loop:   /* Outer loop init 8x */
    
    /************************ Start of Inner Loop *****************************/
    MOV     R7,#0x00000001  /* Initialize inner loop */
    MOV     R6,#2
m33_cst_mac_test_inner_loop:   /* Inner loop init 32x */

    /**************************************************************************/
    /* Test 32 x 32 Multiplier internal logic                                 */
    /**************************************************************************/
    UMULL   R2,R3,R0,R1     /* 64 = 32 x 32  */
    /* Accumulate result to R10 */
    ADD     R10,R2
    ADD     R10,R3          
    
    /**************************************************************************/
    /* Test remaining Multipliers internal logics                             */
    /**************************************************************************/
    SMULBB  R4,R1,R0        /* 32 = 16 x 16       Multiplier */
    SMULWB  R5,R0,R1        /* 32 = 32 x 16       Multiplier */
    SMUAD   R8,R1,R0        /* 32 = 2 x (16 x 16) Multiplier */
    /* Accumulate results to R11 */          
    ADD     R11,R4
    ADD     R11,R5
    ADD     R11,R8
    
    /* Prepare new test vectors */
    ROR     R0,R0,#3
    ROR     R1,R1,#1
    
    MULS    R7,R6       /* Update counter  - 32 = 32 x 32 - Thumb1 */           
    BGT     m33_cst_mac_test_inner_loop    /* Check N Flag */
    BNE     m33_cst_mac_test_inner_loop    /* Check Z Flag */     
    /************************ End of Inner Loop ******************************/
    MOV     R2,#0
    MSR     APSR_nzcvq,R2       /* Clear flags */
    
    /**************************************************************************/
    /*  Test remaining Signed MAC instructions                                */ 
    /*  Repeate 11 times to ensure that signed bits are set to "1" and "0" !  */
    /**************************************************************************/
    SMULL   R2,R3,R1,R0     /* 64 = 32 x 32           */
    SMLAL   R2,R3,R0,R1     /* 64 = 64 + 32 x 32      */
    UMLAL   R2,R3,R0,R1     /* 64 = 64 + 32 x 32      */
    UMAAL   R2,R3,R1,R0     /* 64 = 32 + 32 + 32 x 32 */
    MLA     R3,R0,R1,R3     /* 32 = 32 + 32 x 32      */    
    MLS     R2,R1,R0,R2     /* 32 = 32 - 32 x 32      */
    MUL     R9,R0,R1        /* 32 = 32 x 32 - Thumb2  */
    /* Accumulate results to R12 */
    ADD     R12,R2
    ADD     R12,R3
    ADD     R12,R9
    
    SMULTT  R4,R0,R1        /* 32 = 16 x 16           */
    SMULBT  R5,R1,R0        /* 32 = 16 x 16           */
    SMLALBB R4,R5,R0,R1     /* 64 = 64 + 16 x 16      */
    SMLALBT R4,R5,R1,R0     /* 64 = 64 + 16 x 16      */
    SMLALTB R4,R5,R0,R1     /* 64 = 64 + 16 x 16      */
    SMLALTT R4,R5,R1,R0     /* 64 = 64 + 16 x 16      */
    SMLALD  R4,R5,R0,R1     /* 64 + 16 x 16 + 16 x 16 */ 
    SMLALDX R4,R5,R1,R0     /* 64 + 16 x 16 + 16 x 16 */                       
    SMLSLD  R5,R4,R0,R1     /* 64 + 16 x 16 - 16 x 16 */     
    SMLSLDX R5,R4,R0,R1     /* 64 + 16 x 16 - 16 x 16 */
    /* Accumulate results to R12 */
    ADD     R12,R4
    ADD     R12,R5
    
    SMULTB  R5,R0,R1        /* 32 = 16 x 16      */
    SMLABB  R6,R0,R1,R5     /* 32 = 32 + 16 x 16 */
    SMLABT  R7,R1,R0,R6     /* 32 = 32 + 16 x 16 */
    SMLATB  R8,R0,R1,R7     /* 32 = 32 + 16 x 16 */    
    SMLATT  R9,R1,R0,R8     /* 32 = 32 + 16 x 16 */    
    /* Accumulate results to R12 */
    ADD     R12,R9 
    
    SMUADX  R2,R0,R1        /* 32 = 16 x 16 + 16 x 16      */
    SMLAD   R3,R0,R1,R2     /* 32 = 32 + 16 x 16 + 16 x 16 */
    SMLADX  R4,R0,R1,R3     /* 32 = 32 + 16 x 16 + 16 x 16 */
    SMUSD   R5,R0,R1        /* 32 = 16 x 16 - 16 x 16      */              
    SMLSDX  R6,R0,R1,R5     /* 32 = 32 + 16 x 16 - 16 x 16 */
    SMUSDX  R7,R0,R1        /* 32 = 16 x 16 - 16 x 16      */
    SMLSD   R8,R0,R1,R7     /* 32 = 32 + 16 x 16 - 16 x 16 */
    /* Accumulate results to R12 */
    ADD     R12,R4
    ADD     R12,R6
    ADD     R12,R8
    
    SMULWT  R2,R0,R1        /* 32 = 32 x 16      */      
    SMLAWB  R3,R1,R0,R2     /* 32 = 32 + 32 x 16 */   
    SMLAWT  R4,R0,R1,R3     /* 32 = 32 + 32 x 16 */
    USAD8   R5,R1,R0
    USADA8  R5,R0,R1,R5
    /* Accumulate results to R12 */
    ADD     R12,R4
    ADD     R12,R5
    
    SMMUL   R4,R1,R0
    SMMULR  R5,R0,R1        /* 32 = 32 x 32      */
    SMMLA   R6,R0,R1,R5     /* 32 = 32 + 32 x 32 */
    SMMLAR  R7,R1,R0,R6     /* 32 = 32 + 32 x 32 */
    SMMLS   R8,R0,R1,R7     /* 32 = 32 - 32 x 32 */
    SMMLSR  R9,R0,R1,R8     /* 32 = 32 - 32 x 32 */
    /* Accumulate results to R12 */
    ADD     R12,R4
    ADD     R12,R9
    
    /* Update signature in case of Q flag is set */
    MRS     R2,APSR
    AND     R2,R2,#(1<<27)   /* Filter out other flags and GE bits !! */
    ADD     R12,R2
    
    /* Prepare new pseudo-random test vectors for next loops */
    MOV     R0,R12
    RSB     R1,R0,#0
    
    POP     {R3,R5}     /* Restore outer-loop counter */         
    MULS    R3,R5       /* Update counter  - 32 = 32 x 32 - Thumb1 */
    PUSH    {R3,R5}
    BNE     m33_cst_mac_test_outer_loop    /* Check Z flag */
    /************************* End of Outer Loop ******************************/
    
    /* Prepare final signature, results are in R10, R11, R12 */
    ADD     R12,R10
    ADD     R12,R11
    MOV     R0,R12
    /*------------------------------------------------------------------------*/
    /* Test routine - End                                                     */
    /*------------------------------------------------------------------------*/
m33_cst_mac_test_end:
    POP     {R6,R7} /* Remove R6,R7 from stack  */
    /* Test result is returned in R0, according to the conventions */
    B       m33_cst_test_tail_end

    
    CST_ALIGN_BYTES_4
    /* Marks the current location for dumping psuedoinstruction pools containing
       numeric values for used symbolic names used within LDR instruction. */
    CST_LTORG
    
#endif /* CORTST_M33_MAC_ENABLE */
  
    CST_FILE_END

