2 * Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
4 * SPDX-License-Identifier: BSD-3-Clause
9#include "divider_helper.S"
11__pre_init __aeabi_float_init, 00020
17.macro float_section name
19.section RAM_SECTION_NAME(\name), "ax"
21.section SECTION_NAME(\name), "ax"
25.macro float_wrapper_section func
26float_section WRAPPER_FUNC_NAME(\func)
29.macro _float_wrapper_func x
33.macro wrapper_func_f1 x
34 _float_wrapper_func \x
35#if PICO_FLOAT_PROPAGATE_NANS
42.macro wrapper_func_f2 x
43 _float_wrapper_func \x
44#if PICO_FLOAT_PROPAGATE_NANS
53#if PICO_FLOAT_PROPAGATE_NANS
82.macro table_tail_call SF_TABLE_OFFSET
83#if PICO_FLOAT_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED
90 ldr r3, [r3, #\SF_TABLE_OFFSET]
94.macro shimmable_table_tail_call SF_TABLE_OFFSET shim
96 ldr r3, [r3, #\SF_TABLE_OFFSET]
97#if PICO_FLOAT_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED
101#if PICO_FLOAT_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED
102.byte \SF_TABLE_OFFSET, 0xdf
108// note generally each function is in a separate section unless there is fall thru or branching between them
109// note fadd, fsub, fmul, fdiv are so tiny and just defer to rom so are lumped together so they can share constant pool
111// note functions are word aligned except where they are an odd number of linear instructions
113// float FUNC_NAME(__aeabi_fadd)(float, float) single-precision addition
114float_wrapper_section __aeabi_farithmetic
115// float FUNC_NAME(__aeabi_frsub)(float x, float y) single-precision reverse subtraction, y - x
117// frsub first because it is the only one that needs alignment
119wrapper_func __aeabi_frsub
125// float FUNC_NAME(__aeabi_fsub)(float x, float y) single-precision subtraction, x - y
126wrapper_func_f2 __aeabi_fsub
127#if PICO_FLOAT_PROPAGATE_NANS
128 // we want to return nan for inf-inf or -inf - -inf, but without too much upfront cost
131 bmi 1f // different signs
134 b fdiv_fsub_nan_helper
137 table_tail_call SF_TABLE_FSUB
139wrapper_func_f2 __aeabi_fadd
140 table_tail_call SF_TABLE_FADD
142// float FUNC_NAME(__aeabi_fdiv)(float n, float d) single-precision division, n / d
143wrapper_func_f2 __aeabi_fdiv
144#if PICO_FLOAT_PROPAGATE_NANS
147 b fdiv_fsub_nan_helper
150#if !PICO_DIVIDER_DISABLE_INTERRUPTS
151 // to support IRQ usage (or context switch) we must save/restore divider state around call if state is dirty
153 ldr r3, [r2, #SIO_DIV_CSR_OFFSET]
154 lsrs r3, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY
157 // to avoid worrying about IRQs (or context switches), simply disable interrupts around call
166 table_tail_call SF_TABLE_FDIV
167#if !PICO_DIVIDER_DISABLE_INTERRUPTS
169 save_div_state_and_lr
172 restore_div_state_and_return
176#if PICO_FLOAT_PROPAGATE_NANS
179 // check for infinite op infinite (or rather check for infinite result with both
180 // operands being infinite)
202// float FUNC_NAME(__aeabi_fmul)(float, float) single-precision multiplication
203wrapper_func_f2 __aeabi_fmul
204#if PICO_FLOAT_PROPAGATE_NANS
209 // check for multiplication of infinite by zero (or rather check for infinite result with either
227 table_tail_call SF_TABLE_FMUL
229// void FUNC_NAME(__aeabi_cfrcmple)(float, float) reversed 3-way (<, =, ?>) compare [1], result in PSR ZC flags
230float_wrapper_section __aeabi_cfcmple
232wrapper_func __aeabi_cfrcmple
237 b __aeabi_cfcmple_guts
239// NOTE these share an implementation as we have no excepting NaNs.
240// void FUNC_NAME(__aeabi_cfcmple)(float, float) 3-way (<, =, ?>) compare [1], result in PSR ZC flags
241// void FUNC_NAME(__aeabi_cfcmpeq)(float, float) non-excepting equality comparison [1], result in PSR ZC flags
243wrapper_func __aeabi_cfcmple
244wrapper_func __aeabi_cfcmpeq
256 lsrs r0,#23 @ clear mantissa if denormal or infinite
267 lsrs r1,#23 @ clear mantissa if denormal or infinite
270 movs r2,#1 @ initialise result
272 bmi 2f @ opposite signs? then can proceed on basis of sign of x
273 eors r1,r0 @ restore y
281 orrs r1, r0 @ handle 0/-0
282 adds r1, r1 @ note this always sets C
284 mvns r0, r0 @ carry inverse of r0 sign
290// int FUNC_NAME(__aeabi_fcmpeq)(float, float) result (1, 0) denotes (=, ?<>) [2], use for C == and !=
291float_wrapper_section __aeabi_fcmpeq
293wrapper_func __aeabi_fcmpeq
303// int FUNC_NAME(__aeabi_fcmplt)(float, float) result (1, 0) denotes (<, ?>=) [2], use for C <
304float_wrapper_section __aeabi_fcmplt
306wrapper_func __aeabi_fcmplt
312// int FUNC_NAME(__aeabi_fcmple)(float, float) result (1, 0) denotes (<=, ?>) [2], use for C <=
313float_wrapper_section __aeabi_fcmple
315wrapper_func __aeabi_fcmple
325// int FUNC_NAME(__aeabi_fcmpge)(float, float) result (1, 0) denotes (>=, ?<) [2], use for C >=
326float_wrapper_section __aeabi_fcmpge
328wrapper_func __aeabi_fcmpge
330 // because of NaNs it is better to reverse the args than the result
339// int FUNC_NAME(__aeabi_fcmpgt)(float, float) result (1, 0) denotes (>, ?<=) [2], use for C >
340float_wrapper_section __aeabi_fcmpgt
341wrapper_func __aeabi_fcmpgt
343 // because of NaNs it is better to reverse the args than the result
348// int FUNC_NAME(__aeabi_fcmpun)(float, float) result (1, 0) denotes (?, <=>) [2], use for C99 isunordered()
349float_wrapper_section __aeabi_fcmpun
350wrapper_func __aeabi_fcmpun
366// float FUNC_NAME(__aeabi_ui2f)(unsigned) unsigned to float (single precision) conversion
367float_wrapper_section __aeabi_ui2f
368wrapper_func __aeabi_ui2f
375float_wrapper_section __aeabi_i2f
376// float FUNC_NAME(__aeabi_i2f)(int) integer to float (single precision) conversion
377wrapper_func __aeabi_i2f
397 adds r1,#0x80 @ rounding
398 bcs 5f @ tripped carry? then have leading 1 in C as required (and result is even so can ignore sticky bits)
400 lsls r3,r1,#24 @ check bottom 8 bits of r1
401 beq 6f @ in rounding-tie case?
402 lsls r1,#1 @ remove leading 1
404 lsrs r1,#9 @ align mantissa
405 lsls r0,#23 @ align exponent
406 orrs r0,r2 @ assemble exponent and mantissa
408 orrs r0,r1 @ apply sign
412 adds r0,#1 @ correct exponent offset
415 lsrs r1,#9 @ ensure even result
422// int FUNC_NAME(__aeabi_f2iz)(float) float (single precision) to integer C-style conversion [3]
423float_wrapper_section __aeabi_f2iz
424wrapper_func __aeabi_f2iz
425regular_func float2int_z
469float_section float2int
470regular_func float2int
471 shimmable_table_tail_call SF_TABLE_FLOAT2INT float2int_shim
473float_section float2fix
474regular_func float2fix
475 shimmable_table_tail_call SF_TABLE_FLOAT2FIX float2fix_shim
477float_section float2ufix
478regular_func float2ufix
479 table_tail_call SF_TABLE_FLOAT2UFIX
481// unsigned FUNC_NAME(__aeabi_f2uiz)(float) float (single precision) to unsigned C-style conversion [3]
482float_wrapper_section __aeabi_f2uiz
483wrapper_func __aeabi_f2uiz
484 table_tail_call SF_TABLE_FLOAT2UINT
486float_section fix2float
487regular_func fix2float
488 table_tail_call SF_TABLE_FIX2FLOAT
490float_section ufix2float
491regular_func ufix2float
492 table_tail_call SF_TABLE_UFIX2FLOAT
494float_section fix642float
495regular_func fix642float
496 shimmable_table_tail_call SF_TABLE_FIX642FLOAT fix642float_shim
498float_section ufix642float
499regular_func ufix642float
500 shimmable_table_tail_call SF_TABLE_UFIX642FLOAT ufix642float_shim
502// float FUNC_NAME(__aeabi_l2f)(long long) long long to float (single precision) conversion
503float_wrapper_section __aeabi_l2f
507wrapper_func __aeabi_l2f
511 shimmable_table_tail_call SF_TABLE_INT642FLOAT int642float_shim
513// float FUNC_NAME(__aeabi_l2f)(long long) long long to float (single precision) conversion
514float_wrapper_section __aeabi_ul2f
516 ldr r2, =__aeabi_ui2f
518wrapper_func __aeabi_ul2f
521 shimmable_table_tail_call SF_TABLE_UINT642FLOAT uint642float_shim
523// long long FUNC_NAME(__aeabi_f2lz)(float) float (single precision) to long long C-style conversion [3]
524float_wrapper_section __aeabi_f2lz
525wrapper_func __aeabi_f2lz
526regular_func float2int64_z
547float_section float2int64
548regular_func float2int64
549 shimmable_table_tail_call SF_TABLE_FLOAT2INT64 float2int64_shim
551float_section float2fix64
552regular_func float2fix64
553 shimmable_table_tail_call SF_TABLE_FLOAT2FIX64 float2fix64_shim
555// unsigned long long FUNC_NAME(__aeabi_f2ulz)(float) float to unsigned long long C-style conversion [3]
556float_wrapper_section __aeabi_f2ulz
557wrapper_func __aeabi_f2ulz
558 shimmable_table_tail_call SF_TABLE_FLOAT2UINT64 float2uint64_shim
560float_section float2ufix64
561regular_func float2ufix64
562 shimmable_table_tail_call SF_TABLE_FLOAT2UFIX64 float2ufix64_shim
564float_wrapper_section __aeabi_f2d
566#if PICO_FLOAT_PROPAGATE_NANS
567 // copy sign bit and 25 NAN id bits into sign bit and significant ID bits, also setting the high id bit
575wrapper_func __aeabi_f2d
576#if PICO_FLOAT_PROPAGATE_NANS
583 shimmable_table_tail_call SF_TABLE_FLOAT2DOUBLE float2double_shim
585float_wrapper_section srqtf
587#if PICO_FLOAT_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED
588 // check for negative
592 table_tail_call SF_TABLE_FSQRT
593#if PICO_FLOAT_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED
598 // -0 or -Denormal return -0 (0x80000000)
602 // return -Inf (0xff800000)
608float_wrapper_section cosf
609// note we don't use _f1 since we do an infinity/nan check for outside of range
611 // rom version only works for -128 < angle < 128
617 table_tail_call SF_TABLE_FCOS
619#if PICO_FLOAT_PROPAGATE_NANS
620 // also check for infinites
630 ldr r1, =0x40c90fdb // 2 * M_PI
637float_wrapper_section sinf
638// note we don't use _f1 since we do an infinity/nan check for outside of range
640 // rom version only works for -128 < angle < 128
646 table_tail_call SF_TABLE_FSIN
648#if PICO_FLOAT_PROPAGATE_NANS
649 // also check for infinites
659 ldr r1, =0x40c90fdb // 2 * M_PI
666float_wrapper_section sincosf
667// note we don't use _f1 since we do an infinity/nan check for outside of range
670 // rom version only works for -128 < angle < 128
677 ldr r3, [r3, #SF_TABLE_FSIN]
683#if PICO_FLOAT_PROPAGATE_NANS
688#if PICO_FLOAT_PROPAGATE_NANS
689 // also check for infinites
702 ldr r1, =0x40c90fdb // 2 * M_PI
709float_wrapper_section tanf
710// note we don't use _f1 since we do an infinity/nan check for outside of range
712 // rom version only works for -128 < angle < 128
716 bge ftan_out_of_range
718#if !PICO_DIVIDER_DISABLE_INTERRUPTS
719 // to support IRQ usage (or context switch) we must save/restore divider state around call if state is dirty
721 ldr r3, [r2, #SIO_DIV_CSR_OFFSET]
722 lsrs r3, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY
725 // to avoid worrying about IRQs (or context switches), simply disable interrupts around call
734 table_tail_call SF_TABLE_FTAN
735#if !PICO_DIVIDER_DISABLE_INTERRUPTS
737 save_div_state_and_lr
740 restore_div_state_and_return
743#if PICO_FLOAT_PROPAGATE_NANS
744 // also check for infinites
754 ldr r1, =0x40c90fdb // 2 * M_PI
761float_wrapper_section atan2f
762wrapper_func_f2 atan2f
763 shimmable_table_tail_call SF_TABLE_FATAN2 fatan2_shim
765float_wrapper_section expf
767 table_tail_call SF_TABLE_FEXP
769float_wrapper_section logf
771 table_tail_call SF_TABLE_FLN