/*
 * Copyright 2020-2025 Yuntu Microelectronics co.,ltd
 * All rights reserved.
 *
 * YUNTU Confidential. This software is owned or controlled by YUNTU and may only be
 * used strictly in accordance with the applicable license terms. By expressly
 * accepting such terms or by downloading, installing, activating and/or otherwise
 * using the software, you are agreeing that you have read, and that you agree to
 * comply with and are bound by, such license terms. If you do not agree to be
 * bound by the applicable license terms, then you may not retain, install,
 * activate or otherwise use the software. The production use license in
 * Section 2.3 is expressly granted for this software.
 */

/******************************************************************************
* Test summary:
* -------------
* Tests the functionality of the SPFPU divider (cm33fpu_sp_div.v).
* The test covers VDIV, VSQRT instructions.
* variants.
*
* Test procedure:
* --------------
* Floating-point division has its specifics in comparison to the integer
* division despite the fact that the integer divider is also included in
* the FPU divider. The difference comes from the range of input operands,
* which is very limited in case of division of two floating-point mantissas
* because the division algorithm requires both mantissas to be normalized,
* thus have the most significant bit set to 1. Due to this limitation,
* the divider operates with numbers in the following format:
*
*   Dividend: 1fffff ... f  (representing mantissa 1,ffffffff...f)
*    Divisor: 1zzzzz ... z  (representing mantissa 1,zzzzzzzz...z)
*
* Result of division (output mantissa) is always in range (0.5,2.0).
*
* Two sets of test vectors were proposed for testing the divider with
* VDIV/VSQRT instructions:
*
*   - 1st test set:
*        shift_right(r1, i)/(r2 & 0x0000000F)
*       ~shift_right(r1, i)/(r2 & 0x0000000F)
*
*       In the limit, input mantissas generated from random variable
*       r1 are approaching values 1.0 and 2.0 respectively. The divisor
*       derived from random variable r2 is kept close close to value 1.0,
*       such that the division result does not differentiate too much
*       from the original value. Any deviation in high order bits is
*       considered to be a fault.
*
*   - 2nd test set:
*       x/y
*       x = 1, c7 c6 c5 c4 R R R ... R
*       y = 1, c3 c2 c1 c0 R R R ... R
*
*       Variables x (dividend) and y (divisor), together constituting
*       a single test vector, are combined from two numbers - c (with
*       individual bits marked as c7..c0) and R. Number c is an 8-bit
*       integer and number R is a pseudo-random value. To generate test
*       vectors, all possible values of number c are used with different
*       values of R. This is a pseudo-exhaustive approach targeting SRT
*       quotient digit selection logic.
*
* Additional test vectors are applied to test the surrounding logic, mainly:
*   - Handling of special cases (NaN, Inf, Zero inputs)
*   - Floating-point exponent calculation (adders, subtracters)
*   - Handling of inexact result
*   - Result shifting
*
* FPU Decoder coverage:
* ------------------------
* - VDIV.F32, VSQRT.F32
******************************************************************************/

#include "CorTst_Compiler.h"
#include "CorTst_M33_Cfg.h"

#if (CORTST_M33_FPU_ENABLE==1)

    /* Compatible with ABI. */
    CST_PRES8
    /* Symbols defined in the current module but to be visible to outside */
    CST_EXPORT M33_Cst_SpfpuDivTest
    
    /* Symbols defined outside but used within current module */
    CST_EXTERN m33_cst_test_tail_fpu_end
    CST_EXTERN m33_cst_write_fpu_regs_from_r0
    CST_EXTERN m33_cst_sum_fpu_s31_s31_to_r1_destr_r2
    CST_EXTERN m33_cst_sum_fpu_s28_s31_to_r1_destr_r2
    CST_EXTERN m33_cst_sum_fpu_s26_s31_to_r1_destr_r2
    CST_EXTERN m33_cst_sum_fpu_s21_s31_to_r1_destr_r2
    
    CST_SET(PRESIGNATURE_SEED_0, 0x424B)
    CST_SET(PRESIGNATURE_SEED_1, 0x33B1)

    /* Values stored in registers:
       - R1 ... accumulated signature
       - R12 .. FPSCR
    */

    /*------------------------------------------------------------------------*/
    CST_SECTION_EXEC(mcal_text)
    /*------------------------------------------------------------------------*/
    /* The ".type" directive instructs the assembler/linker that the label 
       "M33_Cst_SpfpuDivTest" designates a function.
       This would cause setting the least significant bit to '1' within any 
       pointer to this function, causing change to Thumb mode whenever this 
       function is called. */
    CST_TYPE(M33_Cst_SpfpuDivTest, function)
    CST_THUMB2
M33_Cst_SpfpuDivTest:
    
    PUSH    {R4-R12,R14}
    MRS     R1,CONTROL  /* Store CONTROL prior first FPU instruction */
    PUSH    {R1}
    VPUSH   {S16-S31}
    
    /*------------------------------------------------------------------------*/
    /* Test - preparation                                                     */
    /*------------------------------------------------------------------------*/
    MOV     R1,#PRESIGNATURE_SEED_0
    MOVT    R1,#PRESIGNATURE_SEED_1
    
    /*------------------------------------------------------------------------*/
    /* Test - start                                                           */
    /*------------------------------------------------------------------------*/
    MOV     R0,#0
    VMRS    R12,FPSCR   /* Store FPSCR register */
    VMSR    FPSCR,R0    /* Clear FPSR register */
   
   
    /**************************************************************************/
    /* Check the divider (1st test set)                                       */
    /**************************************************************************/
    MOV     R0,#0
    BL      m33_cst_write_fpu_regs_from_r0
    MOV     R0,#22
    MOV     R6,#0x3F800000  /* Test values in range (1,2) */
    
    
m33_cst_spfpu_div_test_set1_loop:
    
    /* Prepare Test Vectors */
    LSR     R4,R1,R0
    MVN     R5,R4
    ORR     R3,R6,R1,LSR #28
    ORR     R4,R6,R4,LSR #9
    ORR     R5,R6,R5,LSR #9
    /* Load Test Vectors to FPU registers */
    VMOV    S3,R3
    VMOV    D2,R4,R5
    
    /* Test */
    VDIV.F32    S26,S5,S3
    VDIV.F32    S27,S4,S3
    VDIV.F32    S28,S5,S4
    VSQRT.F32   S29,S5
    VSQRT.F32   S30,S3
    VSQRT.F32   S31,S4

    /* Update signature */
    BL          m33_cst_sum_fpu_s26_s31_to_r1_destr_r2
              
    /* Adjust loop counter and check for end loop */
    SUBS    R0,R0,#1    /* End of loop condition !! */
    BGE     m33_cst_spfpu_div_test_set1_loop    /* End of loop */
    
    
    /**************************************************************************/
    /* Check the divider (2nd test set)                                       */
    /**************************************************************************/
    MOV     R0,#0
    BL      m33_cst_write_fpu_regs_from_r0
    
    MOV     R7,#0x7F0
    MOV     R8,#0

m33_cst_spfpu_div_test_set2_loop:
    /* Generate new pseudo-random values */
    MOV     R4,R1
    MOV     R3,R1,ROR #1

    /* Prepare Test Vectors */
    ORR     R5,R7,R8,LSR#4
    BFI     R3,R5,#19,#13
    ORR     R5,R7,R8
    BFI     R4,R5,#19,#13
    /* Load Test Vectors to FPU registers */
    VMOV    S3,R3
    VMOV    S4,R4

    /* Test */
    VDIV.F32    S28,S4,S3
    VDIV.F32    S29,S3,S4
    VSQRT.F32   S30,S3
    VSQRT.F32   S31,S4

    /* !! Update signature !! */
    BL      m33_cst_sum_fpu_s28_s31_to_r1_destr_r2

    /* Next iteration */
    ADD     R8,R8,#1
    TST     R8,#0x100
    BEQ     m33_cst_spfpu_div_test_set2_loop    /* End of loop */
    
    
    /**************************************************************************/
    /* Floating-point exponent calculation (adders, subtracters)              */
    /**************************************************************************/
    MOV     R0,#0
    BL      m33_cst_write_fpu_regs_from_r0
    
    /* Prepare test vectors */
    MOV     R2,#0x00800000  /* -> S2  */           
    MOV     R3,#0x3F800000  /* -> S3  */           
    MOV     R4,#0x2A000000  /* -> S4  */           
    MOV     R5,#0x3F000000  /* -> S5  */           
    MOV     R6,#0x16000000  /* -> S6  */           
    MOV     R7,#0x15800000  /* -> S7  */           
    MOV     R8,#0x2B000000  /* -> S8  */           
    MOV     R9,#0x55000000  /* -> S9  */           
    MOV     R10,#0x6A800000 /* -> S10 */
    VMOV    D1,R2,R3
    VMOV    D2,R4,R5
    VMOV    D3,R6,R7
    VMOV    D4,R8,R9
    VMOV    S10,R10
    
    /* Test                     a_exp   b_exp   res_exp  */
    VDIV.F32    S26,S2,S2   /*  0x01    0x01    0x7F */
    VDIV.F32    S27,S3,S2   /*  0x7F    0x01    0xFD */
    VDIV.F32    S28,S4,S5   /*  0x54    0x7E    0x55 */
    VDIV.F32    S29,S6,S2   /*  0x16    0x01    0xAA */
    VDIV.F32    S30,S7,S9   /*  0xAB    0xAA    0x00 */
    VDIV.F32    S31,S8,S10  /*  0x56    0xD5    0x00 */
    
    /* !! Update signature !! */
    BL          m33_cst_sum_fpu_s26_s31_to_r1_destr_r2
    
    
    /**************************************************************************/
    /* Handling of special cases (NaN, Inf, Zero inputs)                      */
    /**************************************************************************/
    /* Check setting of FPSCR.DZC bit when division by zero is injected.      
    */
    MOV     R0,#0
    BL      m33_cst_write_fpu_regs_from_r0   
        
    MOV     R2,#0x00000000  /* +0.0  -> S2 */
    MOV     R3,#0x7F800000  /* +Inf  -> S3 */
    LDR     R4,=0xFFDAAAAA  /* -qNaN -> S4 */
    MOV     R5,#0x3F800000  /* +1.0  -> S5 */
    LDR     R6,=0xBF800000  /* -1.0  -> S6 */
    VMOV    D1,R2,R3
    VMOV    D2,R4,R5
    VMOV    S6,R6
    
    /* Test */
    VDIV.F32    S21,S3,S2   /*  +Inf / +0.0 => +Inf, FPSR.DZC = 0 */
    VMRS    R2,FPSCR        /* Load FPSCR */
    ADD     R1,R1,R2        /* !! Update Signature !! */
    VDIV.F32    S22,S2,S5   /*  +0.0 / +1.0  */
    VMRS    R2,FPSCR        /* Load FPSCR */
    ADD     R1,R1,R2        /* !! Update Signature !! */
    VDIV.F32    S23,S5,S3   /*  +1.0 / +Inf  */
    VMRS    R2,FPSCR        /* Load FPSCR */
    ADD     R1,R1,R2        /* !! Update Signature !! */
    VDIV.F32    S24,S3,S5   /*  +Inf / +1.0  */
    VMRS    R2,FPSCR        /* Load FPSCR */
    ADD     R1,R1,R2        /* !! Update Signature !! */
    VDIV.F32    S25,S2,S3   /*  +0.0 / +Inf  */
    VMRS    R2,FPSCR        /* Load FPSCR */
    ADD     R1,R1,R2        /* !! Update Signature !! */
    VDIV.F32    S26,S5,S2   /*  +1.0 / +0.0 => +Inf, FPSR.DZC = 1 */
    VMRS    R2,FPSCR        /* Load FPSCR */
    ADD     R1,R1,R2        /* !! Update Signature !! */
    VDIV.F32    S27,S5,S4   /*  +1.0 / -qNaN */
    VMRS    R2,FPSCR        /* Load FPSCR */
    ADD     R1,R1,R2        /* !! Update Signature !! */
    VDIV.F32    S28,S4,S5   /* -qNaN / +1.0  */
    VMRS    R2,FPSCR        /* Load FPSCR */
    ADD     R1,R1,R2        /* !! Update Signature !! */
    VSQRT.F32   S29,S2      /* SQRT(+0.0) */
    VMRS    R2,FPSCR        /* Load FPSCR */
    ADD     R1,R1,R2        /* !! Update Signature !! */
    VSQRT.F32   S30,S4      /* SQRT(-qNaN) => -qNaN, FPSR.IOC = 0 */
    VMRS    R2,FPSCR        /* Load FPSCR */
    ADD     R1,R1,R2        /* !! Update Signature !! */
    VSQRT.F32   S31,S6      /* SQRT(-1.0) => +dNaN, FPSR.IOC = 1 */
    VMRS    R2,FPSCR        /* Load FPSCR */
    ADD     R1,R1,R2        /* !! Update Signature !! */
    
    /* !! Update signature !! */
    BL      m33_cst_sum_fpu_s21_s31_to_r1_destr_r2
    
    
    /**************************************************************************/
    /* Check the result shift amount and inexact result detection             */
    /**************************************************************************/
    /*  The shift amount is given by difference of the input exponents and
        number of leading zeros in the inputs operands. The aim is to vary
        these parameters to achieve various shift amounts of the result.
        The shifter itself is covered by 'm33_cst_sfpu_mac_test'.

        Check also setting of the FPSCR.IXC and FPSCR.UFC bits when number 
        of trailing zeros of the result is less than sub-norm result shift 
        amount. 
     */
    MOV     R0,#0
    BL      m33_cst_write_fpu_regs_from_r0
    
    MOV     R3,#(1<<23)
    MOV     R5,#0x4B800000
    
m33_cst_spfpu_div_inexact_loop:
    /* Load input operands to FPU registers */
    VMOV    S3,R3
    VMOV    S5,R5
    
    VMSR    FPSCR,R0        /* Clear FPSCR register */
    VDIV.F32    S31,S3,S5   /* Test FPSCR IXC, UFC flags  */
    
    /* !! Update signature !! */
    BL      m33_cst_sum_fpu_s31_s31_to_r1_destr_r2
    VMRS    R2,FPSCR
    ADD     R1,R1,R2

    /* Next iteration - Prepare operands */
    SUB     R5,R5,#(1<<23) 
    LSRS    R3,R3,#1
    BNE     m33_cst_spfpu_div_inexact_loop  /* End of loop */
    
    /*------------------------------------------------------------------------*/
    /* Test - end                                                             */
    /*------------------------------------------------------------------------*/
m33_cst_spfpu_div_test_end:
    VMSR    FPSCR,R12   /* Restore FPSCR register */
    /* Test result is returned in R0, according to the conventions */
    MVN     R0,R1 
    B       m33_cst_test_tail_fpu_end 

    
    CST_ALIGN_BYTES_4
    /* Marks the current location for dumping psuedoinstruction pools containing
       numeric values for used symbolic names used within LDR instruction. */
    CST_LTORG

#endif  /* CORTST_M33_FPU_ENABLE */

    CST_FILE_END

