/*
 * Copyright 2020-2025 Yuntu Microelectronics co.,ltd
 * All rights reserved.
 *
 * YUNTU Confidential. This software is owned or controlled by YUNTU and may only be
 * used strictly in accordance with the applicable license terms. By expressly
 * accepting such terms or by downloading, installing, activating and/or otherwise
 * using the software, you are agreeing that you have read, and that you agree to
 * comply with and are bound by, such license terms. If you do not agree to be
 * bound by the applicable license terms, then you may not retain, install,
 * activate or otherwise use the software. The production use license in
 * Section 2.3 is expressly granted for this software.
 */

/******************************************************************************
* Test summary:
* -------------
* Checks the floating point addition/subtraction provided by
* cm33fpu_sp_add.v and cm33fpu_shifter.v modules.
*
* The following functionality is tested:
* - input/output ports
* - internal adder logic
* - internal shifters providing denormalization/renormalization
* - swap functionality
* - handling the signed values
* - producing +0 or -0 driven by the rounding mode
* - handling the special values (sNaN, qNaN, +-Inf, +-0)
* - producing the FPSCR[IOC, IDC, IXC] flags
* - flush to zero functionality
*
* SP FPU Decoder coverage:
* ------------------------
* - VADD.F32, VSUB.F32,
*
******************************************************************************/

#include "CorTst_Compiler.h"
#include "CorTst_M33_Cfg.h"

#if (CORTST_M33_FPU_ENABLE==1)

    /* Compatible with ABI. */
    CST_PRES8
    /* Symbols defined in the current module but to be visible to outside */
    CST_EXPORT M33_Cst_SpfpuAdderTest

    /* External Symbols */
    /* Symbols defined outside but used within current module */
    CST_EXTERN m33_cst_test_tail_fpu_end
    CST_EXTERN m33_cst_write_fpu_regs_from_r0
    CST_EXTERN m33_cst_sum_fpu_s8_s31_to_r1_destr_r2
    CST_EXTERN m33_cst_sum_fpu_s17_s31_to_r1_destr_r2
    CST_EXTERN m33_cst_sum_fpu_s22_s31_to_r1_destr_r2
    CST_EXTERN m33_cst_sum_fpu_s28_s31_to_r1_destr_r2
    CST_EXTERN m33_cst_sum_fpu_s29_s31_to_r1_destr_r2
    CST_EXTERN m33_cst_sum_fpu_s30_s31_to_r1_destr_r2

    /* Pre-signature */
    CST_SET(PRESIGNATURE_SEED_0, 0x78C1)
    CST_SET(PRESIGNATURE_SEED_1, 0xA5D6)

    /* Values stored in registers:
       - R1 ... accumulated signature
       - R12....FPSCR register
    */

    /*------------------------------------------------------------------------*/
    CST_SECTION_EXEC(mcal_text)
    /*------------------------------------------------------------------------*/
    /* The ".type" directive instructs the assembler/linker that the label 
       "M33_Cst_SpfpuAdderTest" designates a function.
       This would cause setting the least significant bit to '1' within any 
       pointer to this function, causing change to Thumb mode whenever this 
       function is called. */
    CST_TYPE(M33_Cst_SpfpuAdderTest, function)
    CST_THUMB2
M33_Cst_SpfpuAdderTest:

    PUSH    {R4-R12,R14}
    MRS     R1,CONTROL  /* Store CONTROL prior first FPU instruction */
    PUSH    {R1}
    VPUSH   {S16-S31}
    
    /*------------------------------------------------------------------------*/
    /* Test - preparation                                                     */
    /*------------------------------------------------------------------------*/
    MOV     R1,#PRESIGNATURE_SEED_0
    MOVT    R1,#PRESIGNATURE_SEED_1
        
    /*------------------------------------------------------------------------*/
    /* Test - start                                                           */
    /*------------------------------------------------------------------------*/
    VMRS    R12,FPSCR   /* Store FPSCR register */
    
    
    /* Set Round-to-Zero mode to truncate the result */
    MOV     R0,#(3<<22)         /* Prepare RMode bits */
    VMSR    FPSCR,R0            /* Clear FPSCR & Set Round to Zero */
   
    /**************************************************************************/
    /*  Check input/output ports                                              */
    /**************************************************************************/
    /* Initialize vector registers */
    MOV     R0,#0x00000000      /* 0x00000000 -> S0 */
    MOV     R5,#0x55555555      /* 0x55555555 -> S1 */
    MOV     R10,#0xAAAAAAAA     /* 0xAAAAAAAA -> S2 */
    
    /* Clear FPU registers */
    BL      m33_cst_write_fpu_regs_from_r0
    
    /* Write FPU registers */
    VMOV    S1,S2,R5,R10   
     
    /* Run test code - stimulate all bits in all ports with 1 and 0 */
    VADD.F32    S31,S0,S1   
    VADD.F32    S30,S0,S2
    VADD.F32    S29,S1,S0
    VADD.F32    S28,S2,S0
    
    /**************************************************************************/
    /*  Check internal adder functionality                                    */
    /**************************************************************************/
    /* Stimulate each bit position of the internal adder with all possibe 
       inputs:

         Inputs
         A | B | Ci      Ideal Input Vector 1)                       Input Vector to Be Applied            Case ID
         ---------------------------------------------------------------------------------------------------------------------------
         0 | 0 | 0       0x0000 + 0x0000                             0x0000 + 0x0000                      Case 1
         0 | 1 | 0       0x0000 + 0x5555; 0x0000 + 0xAAAA;           0x0000 + 0x5555; 0x0000 + 0xAAAA;    Case 2 - Already covered above
         1 | 0 | 0       0x5555 + 0x0000; 0xAAAA + 0x0000;           0x5555 + 0x0000; 0xAAAA + 0x0000;    Case 3 - Already covered above
         1 | 1 | 0       0x5555 + 0x5555; 0xAAAA + 0xAAAA + carry    0x5555 + 0x5555; 0xAAAA + 0xAAAA     Case 4a; Case 4b
         0 | 0 | 1       the same as above                           the same as above                    Case 5 - the same as Cases 4a/4b
         0 | 1 | 1       0x0000 + 0xFFFF + carry                     0x0001 + 0xFFFF                      Case 6
         1 | 0 | 1       0xFFFF + 0x0000 + carry                     0xFFFF + 0x0001                      Case 7
         1 | 1 | 1       0xFFFF + 0xFFFF + carry                     0xFFFF + 0xFFFF                      Case 8

         1) Note: the carry bit can not be supplied to the lowest significant
                  bit so input vectors has to be accomodated to that limitation.

     **************************************************************************/


    /* Initialize vector with the corresponding Single precision (SP) values */
    MOV     R6,#0x3F800000
    ORR     R5,R6,R5,LSR #8     /* 0x3FD55555 -> S3 */
    ORR     R10,R6,R10,LSR #8   /* 0x3FAAAAAA -> S4 */   
    MOV     R7,#0x3FFFFFFF      /* 0x3FFFFFFF -> S5 */
    ADD     R8,R6,#1            /* 0x3F800001 -> S6 */
    
    /* Write FPU registers */
    VMOV    S3,S4,R5,R10                     
    VMOV    S5,S6,R7,R8           
    
    /* Run test code */
    VADD.F32    S27,S0,S0       /* Case 1, SP  */
    VADD.F32    S26,S3,S3       /* Case 4a, SP */
    VADD.F32    S25,S4,S4       /* Case 4b, SP */
    VADD.F32    S24,S6,S5       /* Case 6, SP  */
    VADD.F32    S23,S5,S6       /* Case 7, SP  */
    VADD.F32    S22,S5,S5       /* Case 8, SP  */

    /* !! Update Signature !! */
    BL          m33_cst_sum_fpu_s22_s31_to_r1_destr_r2

    /**************************************************************************/
    /*  Check the functionality of the internal denormalize/renormalize       */
    /*  shifters and swap functionality                                       */
    /**************************************************************************/
    /* Denormalize shifter check:
       In each step increment the difference between exponents by 1 so that
        - The smaller operand will be denormalized and shifted through the 
          whole range (from 0 to 24(SP)).
        - Check the swap functionality by exchanging the operands A<=>B 
          (only smaller operand is denormalized and shifted)
        - Repeat the following steps for mantisas m_A = 0xAAAA... and  0x5555...
        - Smaller operand will be denormalized.
     **************************************************************************/
    MOV     R4,#0x55555555
    LSR     R3,R4,#9        /* 0x002AAAAA Operand A sign + mantissa (Case1) */
    LSR     R4,R4,#8        /* 0x00555555 Operand A sign bit + mantissa (Case2) */
    MOV     R0,#0x00000000  /* Operand B sign bit + mantissa */
    
    /* Clear FPU registers */
    BL      m33_cst_write_fpu_regs_from_r0
    
    /* Run test subroutine */
    BL      m33_cst_spfpu_adder_test_denorm_sft_sp

    
    /**************************************************************************/
    /* Renormalize shifter check:                                             */
    /**************************************************************************/
    /* - lets have 2 positive numbers A, B in the form 
         (exp_A, m_A) and (exp_B, m_B).

       - Let mantissa m_A = 0x002AAAAA
       - Perform the subtraction A-B for the following mantissas m_B:
         0x002AAAA8, 0x002AAAA0, ..., 0x00200000, 0x00000000,
         The subtraction will provide the following results which will be further
         renormalized:
         0x00000002, 0x0000000A, ..., 0x000AAAAA, 0x002AAAAA

       - Let mantissa m_A = 0x00555555
       - Perform the subtraction A-B for the following mantissas m_B:
         0x00555554, 0x00555550, ..., 0x0040000, 0x00000000,
         The subtraction will provide the following results which will be further
         renormalized: 0x00000001, 0x00000005, ..., 0x0015555, 0x00555555,

       - Because of denormalizing nature, the output mantissa is always produced
         in the form: 0x00200000 0x00280000, 0x002A0000, 0x00AA0000, ...
         for the both input mantissas m_a = 0x002AAAAA and 0x005555555.

       - To force producing the output mantissas in the form: 
         0x00400000 0x0050000, 0x00540000, 0x00550000, ... 
         additional subtractions mod_A-B are performed with the modified operand 
         mod_A = A | high_bit, where high_bit = 
         0x00000002, 0x00000004, 0x00000008, ... with the aim to produce the 
         differences where two MSB bits are set (e.g. 0b11000, 0b11010).
         Such a results will lead to renormalized mantissas
         0x00400000, 0x00500000, 0x00540000, 0x00550000, ...
     **************************************************************************/
    MOV     R2,#0x3F800000  /* Prepare exponent exp=127 */
    ORR     R7,R2,R3        /* 0x3FAAAAAA Operand A sign + mantissa (Case1) */
    ORR     R9,R2,R4        /* 0x3FD55555 Operand A sign + mantissa (Case2) */    
    
    /* Clear FPU registers */
    BL      m33_cst_write_fpu_regs_from_r0
    
    /* Run test subroutine */
    BL      m33_cst_spfpu_adder_test_renorm_sft_sp


    /**************************************************************************/
    /*  Check handling the signed values                                      */            
    /**************************************************************************/
    /* ADD:
        (+) + (+) -> (+)      Case 1  (already covered)
        (+) + (-) -> (+)      Case 2
        (+) + (-) -> (-)      Case 3
        (+) + (-) -> (0)      Case 4
        (-) + (+) -> (+)      Case 5
        (-) + (+) -> (-)      Case 6
        (-) + (+) -> (0)      Case 6
        (-) + (-) -> (-)      Case 7
      SUB:
        (+) - (+) -> (+)      Case 8
        (+) - (+) -> (-)      Case 9
        (+) - (+) -> (0)      Case 10
        (+) - (-) -> (+)      Case 11
        (-) - (+) -> (-)      Case 12
        (-) - (-) -> (+)      Case 13
        (-) - (-) -> (-)      Case 14
        (-) - (-) -> (0)      Case 15
    **************************************************************************/
    LDR     R3,=0x3F8CCCCD      /* ~1.1 -> S0 */
    LDR     R2,=0x40200000      /* ~2.5 -> S1 */
    LDR     R4,=0x40B66666      /* ~5.7 -> S2, S7 */
    ORR     R5,R4,#0x80000000   /* -S2  -> S3 */
    LDR     R6,=0xBFA66666      /* ~-1.3 -> S4 */
    LDR     R7,=0xC0266666      /* ~-2.6 -> S5 */
    LDR     R8,=0xC0ECCCCD      /* ~-7.4 -> S6, S8 */
    
    /* Clear FPU registers */
    BL      m33_cst_write_fpu_regs_from_r0
    
    /* Write FPU registers */
    VMOV    S0,S1,R3,R2
    VMOV    S2,S3,R4,R5
    VMOV    S4,S5,R6,R7
    VMOV    S6,S7,R8,R4
    VMOV    S8,R8
    
    /* Run test code */
    VADD.F32    S31,S2,S4       /* (+) + (-) -> (+)      Case 2 */
    VADD.F32    S30,S0,S5       /* (+) + (-) -> (-)      Case 3 */
    VADD.F32    S29,S2,S3       /* (+) + (-) -> (0)      Case 4 - the result is depending also on the rounding mode */
    VADD.F32    S28,S4,S1       /* (-) + (+) -> (+)      Case 5 */
    VADD.F32    S27,S6,S1       /* (-) + (+) -> (-)      Case 6 */
    VADD.F32    S26,S3,S2       /* (-) + (+) -> (0)      Case 6  - the result is depending also on the rounding mode */
    VADD.F32    S25,S5,S6       /* (-) + (-) -> (-)      Case 7 */

    VSUB.F32    S24,S1,S0       /* (+) - (+) -> (+)      Case 8  */
    VSUB.F32    S23,S0,S2       /* (+) - (+) -> (-)      Case 9  */
    VSUB.F32    S22,S2,S7       /* (+) - (+) -> (0)      Case 10 */
    VSUB.F32    S21,S0,S4       /* (+) - (-) -> (+)      Case 11 */
    VSUB.F32    S20,S4,S0       /* (-) - (+) -> (-)      Case 12 */
    VSUB.F32    S19,S5,S6       /* (-) - (-) -> (+)      Case 13 */
    VSUB.F32    S18,S5,S4       /* (-) - (-) -> (-)      Case 14 */
    VSUB.F32    S17,S6,S8       /* (-) - (-) -> (0)      Case 15 */

    /* Set Round towards Minus Infinity */
    MOV     R0,#(2<<22)         /* Prepare RMode bits */
    VMSR    FPSCR,R0            /* Clear FPSCR & Set Round towards Minus Infinity */
    
    /* Continue with test code */
    VADD.F32    S16,S2,S3       /* (+) + (-) -> (0)      Case 4 - rounding to minus infinity produces negative 0 */
    VADD.F32    S15,S3,S2       /* (-) + (+) -> (0)      Case 6 - rounding to minus infinity produces negative 0 */
    
    /* Set Round towards Zero */
    MOV     R0,#(3<<22)         /* Prepare RMode bits */
    VMSR    FPSCR,R0            /* Clear FPSCR & Set Round towards Minus Infinity */
    
    /* !! Update Signature !! */
    BL          m33_cst_sum_fpu_s17_s31_to_r1_destr_r2


    /**************************************************************************/
    /*  Check handling special cases (sNaN, qNan, +-Infinity, +-0)            */
    /*  Single Precision                                                      */
    /**************************************************************************/
    MOV     R4,#0xAAAAAAAA
    MOV     R3,R3               /* ~1.1 (see above)                     -> S0 */
    MOV     R5,#0x7F800000      /* +Inf                                 -> S3 */
    ORR     R2,R5,R4,LSR #8     /* (0x7FAAAAAA) Signal NaN (BIT[22]==0) -> S1 */
    ORR     R4,R5,R4,LSR #9     /* (0x7FD55555) Quiet  NaN (BIT[22]==1) -> S2 */
    MOV     R0,#0x00000000      /* +0                                   -> S5 */
    MOV     R8,#0x80000000      /* -0                                   -> S6 */
    ORR     R6,R5,R8            /* (0xFF800000) -Inf                    -> S4 */
    
    /* Clear FPU registers */
    BL      m33_cst_write_fpu_regs_from_r0
    
    /* Write FPU registers */
    VMOV    S0,S1,R3,R2
    VMOV    S2,S3,R4,R5
    VMOV    S4,S5,R6,R0
    VMOV    S6,R8

    MOV     R0,#0
    VMSR    FPSCR,R0                /* Clear FPSCR register */
    VADD.F32    S31,S0,S1           /* valid num. + sNaN => FPSCR[IOC] */
    VMRS    R2,FPSCR                /* Load FPSCR register  */
    EOR     R1,R2,R1,ROR #1         /* !! Update Signature !! */
    VMSR    FPSCR,R0                /* Clear FPSCR register */
    VADD.F32    S30,S1,S0           /* sNaN + valid num. => FPSCR[IOC] */
    VMRS    R2,FPSCR                /* Load FPSCR register  */
    EOR     R1,R2,R1,ROR #1         /* !! Update Signature !! */
    VMSR    FPSCR,R0                /* Clear FPSCR register */
    VADD.F32    S29,S0,S2           /* valid num. + qNaN  */
    VMRS    R2,FPSCR                /* Load FPSCR register  */
    EOR     R1,R2,R1,ROR #1         /* !! Update Signature !! */
    VMSR    FPSCR,R0                /* Clear FPSCR register */
    VADD.F32    S28,S2,S0           /* qNaN + valid num.  */
    VMRS    R2,FPSCR                /* Load FPSCR register  */
    EOR     R1,R2,R1,ROR #1         /* !! Update Signature !! */
    VMSR    FPSCR,R0                /* Clear FPSCR register */
    VADD.F32    S27,S0,S3           /* valid num. + (+Inf) */
    VMRS    R2,FPSCR                /* Load FPSCR register  */
    EOR     R1,R2,R1,ROR #1         /* !! Update Signature !! */
    VMSR    FPSCR,R0                /* Clear FPSCR register */
    VADD.F32    S26,S3,S0           /* (+Inf) + valid num. */
    VMRS    R2,FPSCR                /* Load FPSCR register  */
    EOR     R1,R2,R1,ROR #1         /* !! Update Signature !! */
    VMSR    FPSCR,R0                /* Clear FPSCR register */
    VADD.F32    S25,S0,S4           /* valid num. + (-Inf) */
    VMRS    R2,FPSCR                /* Load FPSCR register  */
    EOR     R1,R2,R1,ROR #1         /* !! Update Signature !! */
    VMSR    FPSCR,R0                /* Clear FPSCR register */
    VADD.F32    S24,S4,S0           /* (-Inf) + valid num. */
    VMRS    R2,FPSCR                /* Load FPSCR register  */
    EOR     R1,R2,R1,ROR #1         /* !! Update Signature !! */
    VMSR    FPSCR,R0                /* Clear FPSCR register */
    VADD.F32    S23,S4,S3           /* (-Inf) + (+Inf) => FPSCR[IOC] */
    VMRS    R2,FPSCR                /* Load FPSCR register  */
    EOR     R1,R2,R1,ROR #1         /* !! Update Signature !! */
    VMSR    FPSCR,R0                /* Clear FPSCR register */
    VADD.F32    S22,S3,S4           /* (+Inf) + (-Inf) => FPSCR[IOC] */
    VMRS    R2,FPSCR                /* Load FPSCR register  */
    EOR     R1,R2,R1,ROR #1         /* !! Update Signature !! */
    VMSR    FPSCR,R0                /* Clear FPSCR register */
    VADD.F32    S21,S3,S3           /* (+Inf) + (+Inf)     */
    VMRS    R2,FPSCR                /* Load FPSCR register  */
    EOR     R1,R2,R1,ROR #1         /* !! Update Signature !! */
    VMSR    FPSCR,R0                /* Clear FPSCR register */
    VADD.F32    S20,S4,S4           /* (-Inf) + (-Inf)     */
    VMRS    R2,FPSCR                /* Load FPSCR register  */
    EOR     R1,R2,R1,ROR #1         /* !! Update Signature !! */
    VMSR    FPSCR,R0                /* Clear FPSCR register */

    VADD.F32    S19,S6,S5           /* (-0) + (+0); Depends on Rounding mode */
    VADD.F32    S18,S5,S6           /* (+0) + (-0); Depends on Rounding mode */
    VADD.F32    S17,S5,S5           /* (+0) + (+0)         */
    VADD.F32    S16,S6,S6           /* (-0) + (-0)         */

    /* Set Round towards Minus Infinity */
    MOV     R0,#(2<<22)
    VMSR    FPSCR,R0
    
    VADD.F32    S15,S6,S5           /* (-0) + (+0); Depends on Rounding mode */
    VADD.F32    S14,S5,S6           /* (+0) + (-0); Depends on Rounding mode */
    VADD.F32    S13,S5,S5           /* (+0) + (+0)         */
    VADD.F32    S12,S6,S6           /* (-0) + (-0)         */


    /* Check producing the default NaNs */
    MOV     R0,#(1<<25)             /* Set FPSCR.DN bit */
    VMSR    FPSCR,R0                /* Clear FPSCR & Enable Default NaN */
    
    VADD.F32    S11,S0,S1           /* valid num. + sNaN */
    VADD.F32    S10,S1,S0           /* sNaN + valid num. */
    VADD.F32    S9,S0,S2            /* valid num. + qNaN */
    VADD.F32    S8,S2,S0            /* qNaN + valid num. */
    
    /* !! Update Signature !! */
    BL          m33_cst_sum_fpu_s8_s31_to_r1_destr_r2

    /**************************************************************************/
    /*  Check handling denormalized numbers and setting FPSCR[IDC] bit        */
    /*  Check setting FPSCR[IXC] (Inexact Cumulative Exc.) bit                */
    /**************************************************************************/
    /* Single Precision initial values */
    MOV     R4,#0x3F000000
    MOV     R5,#0x00800000          /*             -> S2 */
    MOV     R6,#0x00000003          /*             -> S3 */
    MOV     R7,#0x3F800000          /*             -> S0 */
    ORR     R8,R4,R6                /*(0x3F000003) -> S1 */
    
    /* Clear FPU registers */
    BL      m33_cst_write_fpu_regs_from_r0
    
    /* Write FPU registers */
    VMOV    S2,S3,R5,R6
    VMOV    S0,S1,R7,R8
    
    /* Run test code */
    MOV     R0,#0
    VMSR    FPSCR,R0                /* Clear FPSCR register */
    VSUB.F32    S31,S2,S3           /* Produce highest denormalized SP number */
    VMRS    R2,FPSCR                /* Load FPSCR register  */
    EOR     R1,R2,R1,ROR #1 

    MOV     R0,#(1<<24)             /* Set FPSCR.FZ bit */
    VMSR    FPSCR,R0                /* Clear FPSCR & Enable Flush-to-zero */
    VSUB.F32    S30,S2,S3           /* Flush to zero - generate FPSCR[IDC] */
    VMRS    R2,FPSCR                /* Load FPSCR register  */
    EOR     R1,R2,R1,ROR #1 
    
    MOV     R0,#0
    VMSR    FPSCR,R0                /* Clear FPSCR register */
    VADD.F32    S29,S0,S1           /* Generate FPSCR[IXC] */
    VMRS    R2,FPSCR                /* Load FPSCR register  */
    EOR     R1,R2,R1,ROR #1 
    
    /* !! Update Signature !! */
    BL          m33_cst_sum_fpu_s29_s31_to_r1_destr_r2

    /*------------------------------------------------------------------------*/
    /* Test - end                                                             */
    /*------------------------------------------------------------------------*/
    /* Test result is returned in R0, according to the conventions */
m33_cst_spfpu_adder_test_end:
    VMSR    FPSCR,R12   /* Restore FPSCR register */
    /* Test result is returned in R0, according to the conventions */
    MOV     R0,R1 
    B       m33_cst_test_tail_fpu_end




/******************************************************************************/
/*  Subroutines checking the internal shifters and swap functionality         */
/******************************************************************************/

/******************************************************************************/
/*  Denorm shifter - Single Precision                                         */
/******************************************************************************/
/*  Test all cases where exponents have the following values:
    
   Case1)   expA     |  0|  9| 18|...|189|198|207|216| 
            expB     |  0| 10| 20|...|210|220|230|240|
            ------------------------------------------
            (expDiff)|  0|  1|  2|...| 21| 22| 23| 24|
       
       
   Case2)   expA     |  0|  8| 16|...|168|176|184|192|
            expB     |  0|  9| 18|...|189|198|207|216|
            ------------------------------------------
            (expDiff)|  0|  1|  2|...| 21| 22| 23| 24|

   ****************************************************************************/
m33_cst_spfpu_adder_test_denorm_sft_sp:
    PUSH    {R14}

    MOV     R11,#0                   /* iteration init */
    MOV     R9,#0                    /* Case1) expA init */
    MOV     R10,#0                   /* Case2) expA init */

m33_cst_spfpu_adder_test_denorm_sft_sp_loop_back:
    /* Case1) Prepare operands */
    ORR     R5,R3,R9,LSL #23    /* Operand A = (sign | mantissa) | exp << 23 */
    ADD     R6,R9,R11           /*      expB = current expA + iteration */
    ORR     R6,R0,R6,LSL #23    /* Operand B = (sign | mantissa) | exp << 23 */
    /* Case2) Prepare operands */
    ORR     R7,R4,R10,LSL #23   /* Operand A  = (sign | mantissa) | exp << 23 */
    ADD     R8,R10,R11          /*      expB  = current expA + iteration */
    ORR     R8,R0,R8,LSL #23    /* Operand B  = (sign | mantissa) | exp << 23 */
    
    /* Write operands to FPU registers */
    VMOV    S0,S1,R5,R6     /* S0 = Operand A  S1 = Operand B */
    VMOV    S2,S3,R7,R8     /* S2 = Operand A  S3 = Operand B */
    
    /* Run test code */
    VADD.F32    S31,S0,S1   /* Case1) */    
    VADD.F32    S30,S1,S0   /* Exchange operands to check swap functionality */
    VADD.F32    S29,S2,S3   /* Case2) */
    VADD.F32    S28,S3,S2   /* Exchange operands to check swap functionality */

    /* !! Update Signature !! */
    BL      m33_cst_sum_fpu_s28_s31_to_r1_destr_r2
    
    /* Prepare exponents for next iteration */
    ADD     R9,R9,#9        /* Case1) expA = current expA + exp increment */
    ADD     R10,R10,#8      /* Case2) expA = current expA + exp increment */  
    ADD     R11,R11,#1      /* ++ iteration */
    
    CMP     R11,#25
    BLT     m33_cst_spfpu_adder_test_denorm_sft_sp_loop_back   /* LT - Less Than */

    POP     {R15}   /* Exit subroutine */


/******************************************************************************/
/*  Renorm shifter - Single Precision                                         */
/******************************************************************************/
m33_cst_spfpu_adder_test_renorm_sft_sp:
    PUSH    {R14}

    MOV     R8,#0xFFFFFFFC          /* initial mask */
    MOV     R4,#0x00000002          /* initial high_bit*/
    MOV     R6,#0x7FFFFFFF          

m33_cst_spfpu_adder_test_renorm_sft_sp_loop_back:

    AND     R2,R7,R8            /* Case1) Operand B = (Operand A)&(mask) */
    AND     R3,R9,R8            /* Case2) Operand B = (Operand A)&(mask) */
    
    AND     R5,R4,R6,LSR #8     
    ORR     R10,R5,R7           /* Case1) Operand A = (Operand A)|(high_bit) */
    ORR     R11,R5,R9           /* Case2) alt Operand A = (Operand A)|(high_bit) */
    
    ROR     R4,R4,#31           /* high_bit = high_bit<<1 */
    AND     R5,R4,R6,LSR #8
    ORR     R14,R5,R7           /* Case1) alt Operand A = (Operand A)|(high_bit) */
    ORR     R5,R5,R9            /* Case2) Operand A = (Operand A)|(high_bit) */
    
    /* Write operands to FPU registers */
    VMOV    S21,S22,R2,R3
    VMOV    S10,S11,R10,R11
    VMOV    S12,S13,R5,R14         
    
    /* Run test code */
    VSUB.F32    S31,S10,S21     /* Case1)     Operand A - Operand B */
    VSUB.F32    S30,S12,S22     /* Case2      Operand A - Operand B */
    VSUB.F32    S29,S13,S21     /* Case1) alt Operand A - Operand B */
    VSUB.F32    S28,S11,S22     /* Case2) alt Operand A - Operand B */
    
    /* !! Update Signature !! */
    BL      m33_cst_sum_fpu_s30_s31_to_r1_destr_r2
    
    /* high_bit = high_bit<<1 */
    ROR     R4,R4,#31   /* high_bit is shifted twice by one and the mask 
                           once by two per one loop iteration */
    
    /* produce next mask => mask = (mask)<<2 */
    LSL     R8,R8,#2    /* (mask is ANDed with values 0x55.. and 0xAA.. and one
                            bit shift would produce the same values  
                            (e.g. (0xAA & 0xFF) = (0xAA & 0xFE) = 0xAA )*/                                   
    CMP     R8,#0xFC000000                                                                                                           
    BNE     m33_cst_spfpu_adder_test_renorm_sft_sp_loop_back
    
    POP     {R15}   /* Exit subroutine */
    
    
    CST_ALIGN_BYTES_4
    /* Marks the current location for dumping psuedoinstruction pools containing
       numeric values for used symbolic names used within LDR instruction. */
    CST_LTORG

#endif  /* CORTST_M33_FPU_ENABLE */

    CST_FILE_END

