2 * Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
4 * SPDX-License-Identifier: BSD-3-Clause
9#include "divider_helper.S"
11__pre_init __aeabi_double_init, 00020
17.macro double_section name
19.section RAM_SECTION_NAME(\name), "ax"
21.section SECTION_NAME(\name), "ax"
25.macro _double_wrapper_func x
29.macro wrapper_func_d1 x
30 _double_wrapper_func \x
31#if PICO_DOUBLE_PROPAGATE_NANS
38.macro wrapper_func_d2 x
39 _double_wrapper_func \x
40#if PICO_DOUBLE_PROPAGATE_NANS
49#if PICO_DOUBLE_PROPAGATE_NANS
84.macro table_tail_call SF_TABLE_OFFSET
86#if PICO_DOUBLE_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED
93 ldr r3, [r3, #\SF_TABLE_OFFSET]
98.macro shimmable_table_tail_call SF_TABLE_OFFSET shim
101 ldr r3, [r3, #\SF_TABLE_OFFSET]
102#if PICO_DOUBLE_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED
107#if PICO_DOUBLE_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED
108.byte \SF_TABLE_OFFSET, 0xdf
113.macro double_wrapper_section func
114double_section WRAPPER_FUNC_NAME(\func)
117double_section push_r8_r11
118regular_func push_r8_r11
126double_section pop_r8_r11
127regular_func pop_r8_r11
135// note generally each function is in a separate section unless there is fall thru or branching between them
136// note fadd, fsub, fmul, fdiv are so tiny and just defer to rom so are lumped together so they can share constant pool
138// note functions are word aligned except where they are an odd number of linear instructions
140// double FUNC_NAME(__aeabi_dadd)(double, double) double-precision addition
141double_wrapper_section __aeabi_darithmetic
142// double FUNC_NAME(__aeabi_drsub)(double x, double y) double-precision reverse subtraction, y - x
144// frsub first because it is the only one that needs alignment
146wrapper_func __aeabi_drsub
152// double FUNC_NAME(__aeabi_dsub)(double x, double y) double-precision subtraction, x - y
153wrapper_func_d2 __aeabi_dsub
154#if PICO_DOUBLE_PROPAGATE_NANS
155 // we want to return nan for inf-inf or -inf - -inf, but without too much upfront cost
159 bmi 1f // different signs
163 b ddiv_dsub_nan_helper
168 shimmable_table_tail_call SF_TABLE_FSUB dsub_shim
170wrapper_func_d2 __aeabi_dadd
171 shimmable_table_tail_call SF_TABLE_FADD dadd_shim
173// double FUNC_NAME(__aeabi_ddiv)(double n, double d) double-precision division, n / d
174wrapper_func_d2 __aeabi_ddiv
175#if PICO_DOUBLE_PROPAGATE_NANS
178 b ddiv_dsub_nan_helper
181#if !PICO_DIVIDER_DISABLE_INTERRUPTS
182 // to support IRQ usage (or context switch) we must save/restore divider state around call if state is dirty
185 ldr r2, [r2, #SIO_DIV_CSR_OFFSET]
186 lsrs r2, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY
190 // to avoid worrying about IRQs (or context switches), simply disable interrupts around call
199 shimmable_table_tail_call SF_TABLE_FDIV ddiv_shim
201#if !PICO_DIVIDER_DISABLE_INTERRUPTS
204 save_div_state_and_lr
208 restore_div_state_and_return
212#if PICO_DOUBLE_PROPAGATE_NANS
213 // check for infinite op infinite (or rather check for infinite result with both
214 // operands being infinite)
240// double FUNC_NAME(__aeabi_dmul)(double, double) double-precision multiplication
241wrapper_func_d2 __aeabi_dmul
242#if PICO_DOUBLE_PROPAGATE_NANS
246 // check for multiplication of infinite by zero (or rather check for infinite result with either
268 shimmable_table_tail_call SF_TABLE_FMUL dmul_shim
270// void FUNC_NAME(__aeabi_cdrcmple)(double, double) reversed 3-way (<, =, ?>) compare [1], result in PSR ZC flags
271double_wrapper_section __aeabi_cdcmple
273wrapper_func __aeabi_cdrcmple
281 b __aeabi_dfcmple_guts
283// NOTE these share an implementation as we have no excepting NaNs.
284// void FUNC_NAME(__aeabi_cdcmple)(double, double) 3-way (<, =, ?>) compare [1], result in PSR ZC flags
285// void FUNC_NAME(__aeabi_cdcmpeq)(double, double) non-excepting equality comparison [1], result in PSR ZC flags
286@ compare r0:r1 against r2:r3, returning -1/0/1 for <, =, >
287@ also set flags accordingly
289wrapper_func __aeabi_cdcmple
290wrapper_func __aeabi_cdcmpeq
293 ldr r7,=0x7ff @ flush NaNs and denormals
320 bmi 4f @ opposite signs? then can proceed on basis of sign of x
321 eors r3,r1 @ restore r3
336 orrs r3,r1 @ make -0==+0
341 mvns r1, r1 @ carry inverse of r1 sign
346// int FUNC_NAME(__aeabi_dcmpeq)(double, double) result (1, 0) denotes (=, ?<>) [2], use for C == and !=
347double_wrapper_section __aeabi_dcmpeq
349wrapper_func __aeabi_dcmpeq
359// int FUNC_NAME(__aeabi_dcmplt)(double, double) result (1, 0) denotes (<, ?>=) [2], use for C <
360double_wrapper_section __aeabi_dcmplt
362wrapper_func __aeabi_dcmplt
368// int FUNC_NAME(__aeabi_dcmple)(double, double) result (1, 0) denotes (<=, ?>) [2], use for C <=
369double_wrapper_section __aeabi_dcmple
371wrapper_func __aeabi_dcmple
381// int FUNC_NAME(__aeabi_dcmpge)(double, double) result (1, 0) denotes (>=, ?<) [2], use for C >=
382double_wrapper_section __aeabi_dcmpge
384wrapper_func __aeabi_dcmpge
386 // because of NaNs it is better to reverse the args than the result
395// int FUNC_NAME(__aeabi_dcmpgt)(double, double) result (1, 0) denotes (>, ?<=) [2], use for C >
396double_wrapper_section __aeabi_dcmpgt
397wrapper_func __aeabi_dcmpgt
399 // because of NaNs it is better to reverse the args than the result
404// int FUNC_NAME(__aeabi_dcmpun)(double, double) result (1, 0) denotes (?, <=>) [2], use for C99 isunordered()
405double_wrapper_section __aeabi_dcmpun
406wrapper_func __aeabi_dcmpun
424// double FUNC_NAME(__aeabi_ui2d)(unsigned) unsigned to double (double precision) conversion
425double_wrapper_section __aeabi_ui2d
426 shimmable_table_tail_call SF_TABLE_UINT2FLOAT uint2double_shim
428double_wrapper_section __aeabi_i2d
430wrapper_func __aeabi_ui2d
436// double FUNC_NAME(__aeabi_i2d)(int) integer to double (double precision) conversion
437wrapper_func __aeabi_i2d
444 push {r0, r1, r4, lr}
460// int FUNC_NAME(__aeabi_d2iz)(double) double (double precision) to integer C-style conversion [3]
461double_wrapper_section __aeabi_d2iz
462wrapper_func __aeabi_d2iz
463regular_func double2int_z
505double_section double2int
506regular_func double2int
507 shimmable_table_tail_call SF_TABLE_FLOAT2INT double2int_shim
509// unsigned FUNC_NAME(__aeabi_d2uiz)(double) double (double precision) to unsigned C-style conversion [3]
510double_wrapper_section __aeabi_d2uiz
511wrapper_func __aeabi_d2uiz
512regular_func double2uint
513 shimmable_table_tail_call SF_TABLE_FLOAT2UINT double2uint_shim
515double_section fix2double
516regular_func fix2double
517 shimmable_table_tail_call SF_TABLE_FIX2FLOAT fix2double_shim
519double_section ufix2double
520regular_func ufix2double
521 shimmable_table_tail_call SF_TABLE_UFIX2FLOAT ufix2double_shim
523double_section fix642double
524regular_func fix642double
525 shimmable_table_tail_call SF_TABLE_FIX642FLOAT fix642double_shim
527double_section ufix2double
528regular_func ufix642double
529 shimmable_table_tail_call SF_TABLE_UFIX642FLOAT ufix642double_shim
531// double FUNC_NAME(__aeabi_l2d)(long long) long long to double (double precision) conversion
532double_wrapper_section __aeabi_l2d
533wrapper_func __aeabi_l2d
534 shimmable_table_tail_call SF_TABLE_INT642FLOAT int642double_shim
536// double FUNC_NAME(__aeabi_l2f)(long long) long long to double (double precision) conversion
537double_wrapper_section __aeabi_ul2d
538wrapper_func __aeabi_ul2d
539 shimmable_table_tail_call SF_TABLE_UINT642FLOAT uint642double_shim
541// long long FUNC_NAME(__aeabi_d2lz)(double) double (double precision) to long long C-style conversion [3]
542double_wrapper_section __aeabi_d2lz
543wrapper_func __aeabi_d2lz
544regular_func double2int64_z
565double_section double2int64
566regular_func double2int64
567 shimmable_table_tail_call SF_TABLE_FLOAT2INT64 double2int64_shim
569// unsigned long long FUNC_NAME(__aeabi_d2ulz)(double) double to unsigned long long C-style conversion [3]
570double_wrapper_section __aeabi_d2ulz
571wrapper_func __aeabi_d2ulz
572 shimmable_table_tail_call SF_TABLE_FLOAT2UINT64 double2uint64_shim
574double_section double2fix64
575regular_func double2fix64
576 shimmable_table_tail_call SF_TABLE_FLOAT2FIX64 double2fix64_shim
578double_section double2ufix64
579regular_func double2ufix64
580 shimmable_table_tail_call SF_TABLE_FLOAT2UFIX64 double2ufix64_shim
582double_section double2fix
583regular_func double2fix
584 shimmable_table_tail_call SF_TABLE_FLOAT2FIX double2fix_shim
586double_section double2ufix
587regular_func double2ufix
588 shimmable_table_tail_call SF_TABLE_FLOAT2UFIX double2ufix_shim
590double_wrapper_section __aeabi_d2f
592#if PICO_DOUBLE_PROPAGATE_NANS
593 // copy sign bit and 23 NAN id bits into sign bit and significant id bits, also set high id bit
604wrapper_func __aeabi_d2f
605#if PICO_DOUBLE_PROPAGATE_NANS
612 // note double->float in double table at same index as float->double in double table
613 shimmable_table_tail_call SF_TABLE_FLOAT2DOUBLE double2float_shim
615double_wrapper_section srqt
617 shimmable_table_tail_call SF_TABLE_FSQRT dsqrt_shim
619double_wrapper_section sincostan_remainder
620regular_func sincostan_remainder
621 ldr r2, =0x54442D18 // 2 * M_PI
624 // note remainder only uses the divider thru integer divider functions
625 // which save and restore themselves
629double_wrapper_section cos
630#don't use _d1 as we're doing a range check anyway and infinites/nans are bigger than 1024
632 // rom version only works for -1024 < angle < 1024
639 shimmable_table_tail_call SF_TABLE_FCOS dcos_shim
641#if PICO_DOUBLE_PROPAGATE_NANS
654 bl sincostan_remainder
659double_wrapper_section sin
660#don't use _d1 as we're doing a range check anyway and infinites/nans are bigger than 1024
662 // rom version only works for -1024 < angle < 1024
669 shimmable_table_tail_call SF_TABLE_FSIN dsin_shim
671#if PICO_DOUBLE_PROPAGATE_NANS
684 bl sincostan_remainder
689double_wrapper_section sincos
690 // out of line remainder code for abs(angle)>=1024
692#if PICO_DOUBLE_PROPAGATE_NANS
708 bl sincostan_remainder
711 b 1f // continue with sincos
715 // rom version only works for -1024 < angle < 1024
722 bl 2f // call the shim
729 shimmable_table_tail_call SF_TABLE_V3_FSINCOS sincos_shim_bootstrap
732sincos_shim_bootstrap:
736#if PICO_DOUBLE_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED
739 ldr r3, =dsincos_shim
743 ldr r3, =dsincos_shim_v2
746 str r3, [r2, #SF_TABLE_V3_FSINCOS]
753 bl v2_rom_dsincos_internal
755 bl v2_rom_dcos_finish
758 bl v2_rom_dsin_finish
763v2_rom_dsincos_internal:
781double_wrapper_section tan
782#don't use _d1 as we're doing a range check anyway and infinites/nans are bigger than 1024
784 // rom version only works for -1024 < angle < 1024
789 bge dtan_angle_out_of_range
791#if !PICO_DIVIDER_DISABLE_INTERRUPTS
792 // to support IRQ usage (or context switch) we must save/restore divider state around call if state is dirty
795 ldr r2, [r2, #SIO_DIV_CSR_OFFSET]
796 lsrs r2, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY
800 // to avoid worrying about IRQs (or context switches), simply disable interrupts around call
809 shimmable_table_tail_call SF_TABLE_FTAN dtan_shim
810#if !PICO_DIVIDER_DISABLE_INTERRUPTS
813 save_div_state_and_lr
817 restore_div_state_and_return
819dtan_angle_out_of_range:
820#if PICO_DOUBLE_PROPAGATE_NANS
833 bl sincostan_remainder
838double_wrapper_section atan2
840 shimmable_table_tail_call SF_TABLE_FATAN2 datan2_shim
842double_wrapper_section exp
844 shimmable_table_tail_call SF_TABLE_FEXP dexp_shim
846double_wrapper_section log
848 shimmable_table_tail_call SF_TABLE_FLN dln_shim