2 * Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
4 * SPDX-License-Identifier: BSD-3-Clause
9#if PICO_FLOAT_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED
14#ifndef PICO_FLOAT_IN_RAM
15#define PICO_FLOAT_IN_RAM 0
18.macro float_section name
19// todo separate flag for shims?
21.section RAM_SECTION_NAME(\name), "ax"
23.section SECTION_NAME(\name), "ax"
27float_section float_table_shim_on_use_helper
28regular_func float_table_shim_on_use_helper
32 // sanity check to make sure we weren't called by non (shimmable_) table_tail_call macro
43 uxtb r1, r1 // r1 holds table offset
60float_section 642float_shims
62@ convert uint64 to float, rounding
63regular_func uint642float_shim
64 movs r2,#0 @ fall through
66@ convert unsigned 64-bit fix to float, rounding; number of r0:r1 bits after point in r2
67regular_func ufix642float_shim
70 bpl 3f @ positive? we can use signed code
71 lsls r5,r1,#31 @ contribution to sticky bits
77@ convert int64 to float, rounding
78regular_func int642float_shim
79 movs r2,#0 @ fall through
81@ convert signed 64-bit fix to float, rounding; number of r0:r1 bits after point in r2
82regular_func fix642float_shim
87 beq ret_pop45 @ zero? return +0
88 asrs r5,r1,#31 @ sign bits
90 asrs r4,r1,#24 @ try shifting 7 bits at a time
92 bne 1f @ next shift will overflow?
107 ldr r1, =0x29ef // packx
112float_section fatan2_shim
113regular_func fatan2_shim
116 ldr r4, =0x29c1 // unpackx
118@ unpack arguments and shift one down to have common exponent
129 adds r4,r2,r3 @ this is -760 if both arguments are 0 and at least -380-126=-506 otherwise
132 bmi 2f @ force y to 0 proper, so result will be zero
133 subs r4,r2,r3 @ calculate shift
135 negs r4,r4 @ make shift positive
146@ here |x|>>|y| or both x and y are ±0
148 bge 4f @ x positive, return signed 0
149 ldr r3, =0x2cfc @ &pi_q29, circular coefficients
150 ldr r0,[r3] @ x negative, return +/- pi
158 movs r2,#0 @ initial angle
159 ldr r3, =0x2cfc @ &pi_q29, circular coefficients
160 cmp r0,#0 @ x negative
162 negs r0,r0 @ rotate to 1st/4th quadrants
167 ldr r5, =0x2b97 @ cordic_vec
168 blx r5 @ also produces magnitude (with scaling factor 1.646760119), which is discarded
169 mov r0,r2 @ result here is -pi/2..3pi/2 Q29
172 ldr r3, =0x2cfc @ &pi_q29, circular coefficients
174 adds r4,r0,r2 @ attempt to fix -3pi/2..-pi case
175 bcs 6f @ -pi/2..0? leave result as is
176 subs r4,r0,r2 @ <pi? leave as is
178 subs r0,r4,r2 @ >pi: take off 2pi
180 subs r0,#1 @ fiddle factor so atan2(0,1)==0
182 movs r2,#0 @ exponent for pack
186float_section float232_shims
188regular_func float2int_shim
189 movs r1,#0 @ fall through
190regular_func float2fix_shim
191 // check for -0 or -denormal upfront
203float_section float264_shims
205regular_func float2int64_shim
206 movs r1,#0 @ and fall through
207regular_func float2fix64_shim
212regular_func float2uint64_shim
213 movs r1,#0 @ and fall through
214regular_func float2ufix64_shim
215 asrs r3,r0,#23 @ negative? return 0
219@ convert float in r0 to signed fixed point in r0:r1:r3, r1 places after point, rounding towards -Inf
220@ result clamped so that r3 can only be 0 or -1
233 subs r2,#0x7f @ remove exponent bias
235 subs r0,r1 @ insert implied 1
237 subs r0,r3 @ top two's complement
238 asrs r1,r0,#4 @ convert to double format
248 mvns r0,r3 @ return max/min value
257float_section d2fix_a_float
259.weak d2fix_a // weak because it exists in float shims too
263@ r0:r1 two's complement mantissa
264@ r2 unbaised exponent
265@ r3 mantissa sign extension bits
266 add r2,r12 @ exponent plus offset for required binary point position
267 subs r2,#52 @ required shift
269@ here a shift up by r2 places
270 cmp r2,#12 @ will clamp?
276 adds r2,#32 @ complementary shift
282 mvns r1,r3 @ overflow: clamp to extreme fixed-point values
285@ here a shift down by -r2 places
291 adds r2,#32 @ complementary shift
297@ here a long shift down
299 asrs r1,#31 @ shift down 32 places
301 bmi 1f @ very long shift?
307 movs r0,r3 @ result very near zero: use sign extension bits
313 bne 1f @ sign extension bits fail to match sign of result?
319 eors r1,r1,r0 @ generate extreme fixed-point values
322float_section float2double_shim
323regular_func float2double_shim
324 lsrs r3,r0,#31 @ sign bit
327 lsrs r2,r1,#24 @ exponent
331 lsrs r1,#4 @ exponent and top 20 bits of mantissa
332 ldr r2,=(0x3ff-0x7f)<<20 @ difference in exponent offsets
335 lsls r0,#29 @ bottom 3 bits of mantissa
338 movs r1,r3 @ return signed zero
343 ldr r1,=0x7ff00000 @ return signed infinity