n 2006 March 03 on the IBM developerWorks Cell Broadband Engine Architecture forum [ibm.com] an interesting question was asked:
It turned out this function was not available in the SDK.
The following is a branch-free implementation of atan2 vector floats for the SPU. A scalar version which simply casts to vector and back is also provided. This implementation is fairly quick-and-dirty and no particular level of accuracy is gauranteed, but it should be usable for many purposes.
Or download the source files:
cp_fatan-cbe-spu.h
cp_fatan-cbe-spu.c
"I am trying to port an application from an older version of SDK to SDK
1.0. It uses atan2(.....) function, which is causing trouble... This
code worked fine on SDK28, but now it looks like the new functions dont
have this particular function defined..
I did change the makefile to include $(SDKLIB)/libmath.a
I searched in ./sysroot/usr/spu/include/* and src/include/spu/* but couldn't find a headerfile that has it defined.
Can anyone please suggest if I should just change the code to not use that function or is there a way to invoke it still?
Thanks!"
I did change the makefile to include $(SDKLIB)/libmath.a
I searched in ./sysroot/usr/spu/include/* and src/include/spu/* but couldn't find a headerfile that has it defined.
Can anyone please suggest if I should just change the code to not use that function or is there a way to invoke it still?
Thanks!"
It turned out this function was not available in the SDK.
The following is a branch-free implementation of atan2 vector floats for the SPU. A scalar version which simply casts to vector and back is also provided. This implementation is fairly quick-and-dirty and no particular level of accuracy is gauranteed, but it should be usable for many purposes.
Or download the source files:
cp_fatan-cbe-spu.h
cp_fatan-cbe-spu.c
This code is C99 source. For gcc, use the following flags: -std=c99 -pedantic
0// ## cp_fatan-cbe-spu.h (C99)
1// ## Version 1.0
2// ##
3// ## Copyright (c) 2006 Mike Acton
4// ##
5// ## SIGNIFICANT REFERENCES:
6// ##
7// ## [1] Cephes Math Library Release 2.8: June, 2000
8// ## Copyright 1984, 1995, 2000, Stephen L. Moshier
9// ## [2] Numerical Computation Guide (PDF)
10// ## Copyright 2000, Sun Microsystems, Inc.
11// ## [3] IEEE 754 Support in C99 (PDF)
12// ## Copyright 2001, Jim Thomas
13// ## [4] Solaris 10 Reference Manual : atan2(3M)
14// ## Copyright 1994-2005, Sun Microsystems, Inc.
15// ##
16// ## Permission is hereby granted, free of charge, to any person obtaining
17// ## a copy of this software and associated documentation files
18// ## (the "Software"), to deal in the Software without restriction, including
19// ## without limitation the rights to use, copy, modify, merge, publish,
20// ## distribute, sublicense, and/or sell copies of the Software, and to permit
21// ## persons to whom the Software is furnished to do so, subject to the
22// ## following conditions:
23// ##
24// ## The above copyright notice and this permission notice shall be included
25// ## in all copies or substantial portions of the Software.
26// ##
27// ## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
28// ## OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
29// ## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
30// ## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
31// ## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
32// ## OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
33// ## THE SOFTWARE.
34// ##
35
36#ifndef CP_FATAN_CBE_SPU_H
37#define CP_FATAN_CBE_SPU_H
38
39#include <stdint.h>
40#include <spu_intrinsics.h>
41
42// ##
43// ## Global Floating-point constants (32 bit)
44// ##
45// ## Constant is loaded in each element of 32 bit floating-point vector
46// ## from local store.
47// ##
48// ## cp_flpio4() +PI/+4
49// ## cp_flt3p8() tan( +3.0 * PI / +8.0 )
50// ## cp_flnpio2() -PI/+2
51// ## cp_flpio2() +PI/+2
52// ## cp_flpt66() +0.66
53// ## cp_flpi() +PI
54// ## cp_flnpi() -PI
55
56extern const vector unsigned int _cp_f_pio4;
57extern const vector unsigned int _cp_f_t3p8;
58extern const vector unsigned int _cp_f_npio2;
59extern const vector unsigned int _cp_f_pio2;
60extern const vector unsigned int _cp_f_pt66;
61extern const vector unsigned int _cp_f_pi;
62extern const vector unsigned int _cp_f_npi;
63
64static inline qword
65cp_flpio4( void )
66{
67 return si_lqa( (intptr_t)&_cp_f_pio4 );
68}
69
70static inline qword
71cp_flt3p8( void )
72{
73 return si_lqa( (intptr_t)&_cp_f_t3p8 );
74}
75
76static inline qword
77cp_flnpio2( void )
78{
79 return si_lqa( (intptr_t)&_cp_f_npio2 );
80}
81
82static inline qword
83cp_flpio2( void )
84{
85 return si_lqa( (intptr_t)&_cp_f_pio2 );
86}
87
88static inline qword
89cp_flpt66( void )
90{
91 return si_lqa( (intptr_t)&_cp_f_pt66 );
92}
93
94static inline qword
95cp_flpi( void )
96{
97 return si_lqa( (intptr_t)&_cp_f_pi );
98}
99
100static inline qword
101cp_flnpi( void )
102{
103 return si_lqa( (intptr_t)&_cp_f_npi );
104}
105
106// ##
107// ## Load-Immediate Floating-point constants (32 bit)
108// ##
109// ## Constant is loaded in each element of 32 bit floating-point vector
110// ## using immediate values. i.e. No loads
111// ##
112// ## cp_filzero() +0.0 +0x00000000
113// ## cp_filnzero() -0.0 +0x80000000
114// ## cp_filone() +1.0 +0x3f800000
115// ## cp_filtwo() +2.0 +0x40000000
116// ## cp_filinf() +INF +0x7f800000
117// ## cp_filninf() -INF +0xff800000
118// ## cp_filnan() NaN +0x7fc00000
119// ##
120
121static inline qword
122cp_filzero( void )
123{
124 return si_ilhu( (int16_t)0x0000 );
125}
126
127static inline qword
128cp_filnzero( void )
129{
130 return si_ilhu( (int16_t)0x8000 );
131}
132
133static inline qword
134cp_filone( void )
135{
136 return si_ilhu( (int16_t)0x3f80 );
137}
138
139static inline qword
140cp_filtwo( void )
141{
142 return si_ilhu( (int16_t)0x4000 );
143}
144
145static inline qword
146cp_filinf( void )
147{
148 return si_ilhu( (int16_t)0x7f80 );
149}
150
151static inline qword
152cp_filninf( void )
153{
154 return si_ilhu( (int16_t)0xff80 );
155}
156
157static inline qword
158cp_filnan( void )
159{
160 return si_ilhu( (int16_t)0x7fc0 );
161}
162
163// ##
164// ## cp_fatan() Coefficients and other constants
165// ##
166
167extern const vector unsigned int _cp_f_atan_q4;
168extern const vector unsigned int _cp_f_atan_q3;
169extern const vector unsigned int _cp_f_atan_q2;
170extern const vector unsigned int _cp_f_atan_q1;
171extern const vector unsigned int _cp_f_atan_q0;
172extern const vector unsigned int _cp_f_atan_p4;
173extern const vector unsigned int _cp_f_atan_p3;
174extern const vector unsigned int _cp_f_atan_p2;
175extern const vector unsigned int _cp_f_atan_p1;
176extern const vector unsigned int _cp_f_atan_p0;
177extern const vector unsigned int _cp_f_hmorebits;
178extern const vector unsigned int _cp_f_morebits;
179
180// ## cp_fatan(x)
181// ##
182// ## 0 <= x <= 0.66
183// ## -PI/2 <= cp_fatan(x) <= +PI/2
184// ##
185// ## Each floating-point component of the result is a function of
186// ## the corresponding components of x:
187// ##
188// ## 0.0 { x == 0.0
189// ##
190// ## +PI {
191// ## --- { x == INF
192// ## 2.0 {
193// ##
194// ## -PI {
195// ## --- { x == -INF
196// ## 2.0 {
197// ##
198// ##
199// ## 2 4 6 8 {
200// ## P + P x + P x + P x + P x {
201// ## 2 0 1 2 3 4 {
202// ## x x ----------------------------------- + x { otherwise
203// ## 2 4 6 8 10 {
204// ## Q + Q x + Q x + Q x + Q x + x {
205// ## 0 1 2 3 4 {
206
207static inline qword
208_cp_fatan( const qword x )
209{
210 // ##
211 // ## Load constants
212 // ##
213
214 const qword f_one = cp_filone();
215 const qword f_inf = cp_filinf();
216 const qword f_ninf = cp_filninf();
217 const qword f_msb = cp_filnzero();
218 const qword f_zero = cp_filzero();
219
220 const qword f_pt66 = si_lqa( (intptr_t)&_cp_f_pt66 );
221 const qword f_pio2 = si_lqa( (intptr_t)&_cp_f_pio2 );
222 const qword f_npio2 = si_lqa( (intptr_t)&_cp_f_npio2 );
223 const qword f_pio4 = si_lqa( (intptr_t)&_cp_f_pio4 );
224 const qword f_t3p8 = si_lqa( (intptr_t)&_cp_f_t3p8 );
225
226 const qword f_atan_p0 = si_lqa( (intptr_t)&_cp_f_atan_p0 );
227 const qword f_atan_p1 = si_lqa( (intptr_t)&_cp_f_atan_p1 );
228 const qword f_atan_p2 = si_lqa( (intptr_t)&_cp_f_atan_p2 );
229 const qword f_atan_p3 = si_lqa( (intptr_t)&_cp_f_atan_p3 );
230 const qword f_atan_p4 = si_lqa( (intptr_t)&_cp_f_atan_p4 );
231 const qword f_atan_q0 = si_lqa( (intptr_t)&_cp_f_atan_q0 );
232 const qword f_atan_q1 = si_lqa( (intptr_t)&_cp_f_atan_q1 );
233 const qword f_atan_q2 = si_lqa( (intptr_t)&_cp_f_atan_q2 );
234 const qword f_atan_q3 = si_lqa( (intptr_t)&_cp_f_atan_q3 );
235 const qword f_atan_q4 = si_lqa( (intptr_t)&_cp_f_atan_q4 );
236 const qword f_morebits = si_lqa( (intptr_t)&_cp_f_morebits );
237 const qword f_hmorebits = si_lqa( (intptr_t)&_cp_f_hmorebits );
238
239 // ##
240 // ## pos_x = -x { x < 0
241 // ## x { otherwise
242 // ##
243
244 const qword neg_x = si_xor( x, f_msb );
245 const qword sign_mask = si_fcgt( f_zero, x );
246 const qword pos_x = si_selb( x, neg_x, sign_mask );
247
248 // ##
249 // ## Range reduction
250 // ##
251
252 // ##
253 // ## range0_mask = ( pos_x > tan( 3.0 * PI / 8.0 ) )
254 // ## range1_mask = ( pos_x <= 0.66 )
255 // ## range2_mask = !( range0_mask || range1_mask )
256 // ##
257
258 const qword range0_mask = si_fcgt( pos_x, f_t3p8 );
259 const qword range1_gt_mask = si_fcgt( f_pt66, pos_x );
260 const qword range1_eq_mask = si_fceq( f_pt66, pos_x );
261 const qword range1_mask = si_or( range1_gt_mask, range1_eq_mask );
262 const qword range2_mask = si_nor( range0_mask, range1_mask );
263
264 // ##
265 // ## range0_x = -1.0
266 // ## -----
267 // ## pos_x
268 // ##
269 // ## range0_y = PI
270 // ## ---
271 // ## 2.0
272 // ##
273
274 const qword range0_x0 = si_frest( pos_x );
275 const qword range0_x1 = si_fi( pos_x, range0_x0 );
276 const qword range0_x2 = si_fnms( range0_x1, pos_x, f_one );
277 const qword range0_x3 = si_fma( range0_x2, range0_x1, range0_x1 );
278 const qword range0_x = si_xor( range0_x3, f_msb );
279 const qword range0_y = f_pio2;
280
281 // ##
282 // ## range1_x = pos_x
283 // ## range1_y = 0.0
284 // ##
285
286 const qword range1_x = pos_x;
287 const qword range1_y = f_zero;
288
289
290 // ##
291 // ## range2_x = (pos_x-1.0)
292 // ## -----------
293 // ## (pos_x+1.0)
294 // ##
295 // ## range2_y = PI
296 // ## ---
297 // ## 4.0
298 // ##
299
300 const qword range2_y = f_pio4;
301 const qword range2_x0num = si_fs( pos_x, f_one );
302 const qword range2_x0den = si_fa( pos_x, f_one );
303 const qword range2_x0 = si_frest( range2_x0den );
304 const qword range2_x1 = si_fnms( range2_x0, range2_x0den, f_one );
305 const qword range2_x2 = si_fma( range2_x1, range2_x0, range2_x0 );
306 const qword range2_x = si_fm( range2_x0num, range2_x2 );
307
308 // ##
309 // ## range_x = range0_x { range0_mask
310 // ## range1_x { range1_mask
311 // ## range2_x { range2_mask
312 // ##
313 // ## range_y = range0_y { range0_mask
314 // ## range1_y { range1_mask
315 // ## range2_y { range2_mask
316 // ##
317
318 const qword range_x0 = si_selb( range2_x, range0_x, range0_mask );
319 const qword range_x = si_selb( range_x0, range1_x, range1_mask );
320 const qword range_y0 = si_selb( range2_y, range0_y, range0_mask );
321 const qword range_y = si_selb( range_y0, range1_y, range1_mask );
322
323 // ##
324 // ## 2
325 // ## xp2 = range_x
326 // ## 2 3 4
327 // ## P + P xp2 + P xp2 + P xp2 + P xp2
328 // ## 0 1 2 3 4
329 // ## zdiv = ------------------------------------------
330 // ## 2 3 4 5
331 // ## Q + Q xp2 + Q xp2 + Q xp2 + Q xp2 + xp2
332 // ## 0 1 2 3 4
333 // ##
334 // ## z1 = range_x * ( xp2 * zdiv ) + range_x
335 // ##
336
337 const qword xp2 = si_fm( range_x, range_x );
338 const qword znum0 = f_atan_p0;
339 const qword znum1 = si_fma( znum0, xp2, f_atan_p1 );
340 const qword znum2 = si_fma( znum1, xp2, f_atan_p2 );
341 const qword znum3 = si_fma( znum2, xp2, f_atan_p3 );
342 const qword znum = si_fma( znum3, xp2, f_atan_p4 );
343 const qword zden0 = si_fa( xp2, f_atan_q0 );
344 const qword zden1 = si_fma( zden0, xp2, f_atan_q1 );
345 const qword zden2 = si_fma( zden1, xp2, f_atan_q2 );
346 const qword zden3 = si_fma( zden2, xp2, f_atan_q3 );
347 const qword zden = si_fma( zden3, xp2, f_atan_q4 );
348 const qword zden_r0 = si_frest( zden );
349 const qword zden_r1 = si_fnms( zden_r0, zden, f_one );
350 const qword zden_r = si_fma( zden_r1, zden_r0, zden_r0 );
351 const qword zdiv = si_fm( znum, zden_r );
352 const qword z0 = si_fm( xp2, zdiv );
353 const qword z1 = si_fma( range_x, z0, range_x );
354
355 // ##
356 // ## zadd = z1 + 0.5 * MOREBITS { range2_mask
357 // ## z1 + MOREBITS { range1_mask
358 // ## z1 { otherwise
359 // ##
360 // ## yaddz = range_y + zadd
361 // ##
362 // ## pos_yaddz = yaddz { yaddz >= 0
363 // ## -yaddz { yaddz < 0
364 // ##
365
366 const qword zadd0 = si_selb( f_zero, f_hmorebits, range2_mask );
367 const qword zadd1 = si_selb( zadd0, f_morebits, range1_mask );
368 const qword zadd = si_fa( z1, zadd1 );
369 const qword yaddz = si_fa( range_y, zadd );
370 const qword neg_yaddz = si_xor( yaddz, f_msb );
371 const qword pos_yaddz = si_selb( yaddz, neg_yaddz, sign_mask );
372
373 // ##
374 // ## result_y0 = 0.0 { x == 0.0
375 // ## pos_yaddz { otherwise
376 // ##
377
378 const qword x_eqz_mask = si_fceq( f_zero, x );
379 const qword result_y0 = si_selb( pos_yaddz, x, x_eqz_mask );
380
381 // ##
382 // ## result_y2 = +PI {
383 // ## --- { x == INF
384 // ## 2.0 {
385 // ##
386 // ## -PI {
387 // ## --- { x == -INF
388 // ## 2.0 {
389 // ##
390 // ## result_y0 { otherwise
391 // ##
392
393 const qword x_eqinf_mask = si_fceq( f_inf, x );
394 const qword x_eqninf_mask = si_fceq( f_ninf, x );
395 const qword result_y1 = si_selb( result_y0, f_pio2, x_eqinf_mask );
396 const qword result = si_selb( result_y1, f_npio2, x_eqninf_mask );
397
398 return (result);
399}
400
401static inline vector float
402cp_fatan( const vector float x )
403{
404 return (vector float)( _cp_fatan( (qword)x ) );
405}
406
407static inline float
408cp_fatan_scalar( const float x )
409{
410 const qword vx = si_from_float( x );
411 const qword vresult = _cp_fatan( vx );
412 const float result = si_to_float( vresult );
413
414 return (result);
415}
416
417// ## cp_fatan2(y,x)
418// ##
419// ## -INF <= x <= INF
420// ## -INF <= y <= INF
421// ## -PI <= cp_fatan2(y,x) <= +PI
422// ##
423// ## Each floating-point component of the result is a function of
424// ## the corresponding components of y and x:
425// ##
426// ## +PI { (y == +0.0) && (x < 0.0)
427// ##
428// ## -PI { (y == -0.0) && (x < 0.0)
429// ##
430// ## +0.0 { (y == +0.0) && (x > 0.0)
431// ##
432// ## -0.0 { (y == -0.0) && (x > 0.0)
433// ##
434// ## -PI {
435// ## ---- { (y < 0.0) && (x == 0.0)
436// ## +2.0 {
437// ##
438// ## +PI {
439// ## ---- { (y > 0.0) && (x == 0.0)
440// ## +2.0 {
441// ##
442// ## NaN { (y == NaN) || (x == NaN)
443// ##
444// ## +PI { (y == +0.0) && (x == -0.0)
445// ##
446// ## -PI { (y == -0.0) && (x == -0.0)
447// ##
448// ## +0.0 { (y == +0.0) && (x == +0.0)
449// ##
450// ## -0.0 { (y == -0.0) && (x == +0.0)
451// ##
452// ## +PI {
453// ## --- { (y == +INF) && (x == +INF)
454// ## 4.0 {
455// ##
456// ## -PI {
457// ## --- { (y == -INF) && (x == +INF)
458// ## 4.0 {
459// ##
460// ## +3.0 PI {
461// ## ------- { (y == +INF) && (x == -INF)
462// ## +4.0 {
463// ##
464// ## -3.0 PI {
465// ## ------- { (y == -INF) && (x == -INF)
466// ## +4.0 {
467// ##
468// ## +PI { isfinite(y) && (+y > 0) && (x == -INF)
469// ##
470// ## -PI { isfinite(y) && (-y > 0) && (x == -INF)
471// ##
472// ## +0.0 { isfinite(y) && (+y > 0) && (x == +INF)
473// ##
474// ## -0.0 { isfinite(y) && (-y > 0) && (x == +INF)
475// ##
476// ## +PI {
477// ## ---- { (isfinite(x) && (y == +INF)
478// ## +2.0 {
479// ##
480// ## -PI {
481// ## --- { (isfinite(x) && (y == -INF)
482// ## +2.0 {
483// ##
484// ## ( y ) {
485// ## +PI + cp_atan( - ) { ( x < 0.0 ) && ( y >= 0.0 )
486// ## ( x ) {
487// ##
488// ## ( y ) {
489// ## -PI + cp_atan( - ) { ( x < 0.0 ) && ( y < 0.0 )
490// ## ( x ) {
491// ##
492// ## ( y ) {
493// ## +0.0 + cp_atan( - ) { otherwise
494// ## ( x ) {
495// ##
496
497qword _cp_fatan2( qword y, qword x )
498{
499 const qword f_one = cp_filone();
500 const qword f_zero = cp_filzero();
501 const qword f_pi = si_lqa( (intptr_t)&_cp_f_pi );
502 const qword f_npi = si_lqa( (intptr_t)&_cp_f_npi );
503
504 // ##
505 // ## yox = y
506 // ## -
507 // ## x
508 // ##
509 // ## z = +PI + cp_atan( yox ) { ( x < 0.0 ) && ( y >= 0.0 )
510 // ## -PI + cp_atan( yox ) { ( x < 0.0 ) && ( y < 0.0 )
511 // ## 0.0 + cp_atan( yox ) { otherwise
512
513 const qword x_ltz_mask = si_fcgt( f_zero, x );
514 const qword y_ltz_mask = si_fcgt( f_zero, y );
515 const qword xy_ltz_mask = si_and( x_ltz_mask, y_ltz_mask );
516 const qword zadd0 = si_selb( f_zero, f_pi, x_ltz_mask );
517 const qword zadd = si_selb( zadd0, f_npi, xy_ltz_mask );
518 const qword x_r0 = si_frest( x );
519 const qword x_r1 = si_fnms( x_r0, x, f_one );
520 const qword x_r = si_fma( x_r1, x_r0, x_r0 );
521 const qword yox = si_fm( y, x_r );
522 const qword atan_yox = _cp_fatan( yox );
523 const qword result = si_fa( zadd, atan_yox );
524
525 return (result);
526}
527
528vector float cp_fatan2( vector float arg0 /* y */, vector float arg1 /* x */ )
529{
530 const qword y = (qword)arg0;
531 const qword x = (qword)arg1;
532 const qword result = _cp_fatan2( y, x );
533
534 return (vector float)(result);
535}
536
537float cp_fatan2_scalar( float arg0 /* y */, float arg1 /* x */ )
538{
539 const qword y = si_from_float( arg0 );
540 const qword x = si_from_float( arg1 );
541 const qword z = _cp_fatan2( y, x );
542 const float result = si_to_float( z );
543
544 return( result );
545}
546
547#endif /* CP_FATAN_CBE_SPU_H */
0// ## cp_fatan-cbe-spu.c (C99)
1// ## Version 1.0
2// ##
3// ## Copyright (c) 2006 Mike Acton
4// ##
5// ## SIGNIFICANT REFERENCES:
6// ##
7// ## [1] Cephes Math Library Release 2.8: June, 2000
8// ## Copyright 1984, 1995, 2000, Stephen L. Moshier
9// ## [2] Numerical Computation Guide (PDF)
10// ## Copyright 2000, Sun Microsystems, Inc.
11// ## [3] IEEE 754 Support in C99 (PDF)
12// ## Copyright 2001, Jim Thomas
13// ## [4] Solaris 10 Reference Manual : atan2(3M)
14// ## Copyright 1994-2005, Sun Microsystems, Inc.
15// ##
16// ## Permission is hereby granted, free of charge, to any person obtaining
17// ## a copy of this software and associated documentation files
18// ## (the "Software"), to deal in the Software without restriction, including
19// ## without limitation the rights to use, copy, modify, merge, publish,
20// ## distribute, sublicense, and/or sell copies of the Software, and to permit
21// ## persons to whom the Software is furnished to do so, subject to the
22// ## following conditions:
23// ##
24// ## The above copyright notice and this permission notice shall be included
25// ## in all copies or substantial portions of the Software.
26// ##
27// ## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
28// ## OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
29// ## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
30// ## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
31// ## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
32// ## OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
33// ## THE SOFTWARE.
34// ##
35
36// Loading these contants from (global) SPU local memory is going to be a win over building them
37// or storing them locally near the function.
38
39const vector unsigned int _cp_f_pio4 = {+0x3F490FDA,+0x3F490FDA,+0x3F490FDA,+0x3F490FDA};
40const vector unsigned int _cp_f_t3p8 = {+0x401A8279,+0x401A8279,+0x401A8279,+0x401A8279};
41const vector unsigned int _cp_f_npio2 = {-0x4036F026,-0x4036F026,-0x4036F026,-0x4036F026};
42const vector unsigned int _cp_f_pio2 = {+0x3FC90FDA,+0x3FC90FDA,+0x3FC90FDA,+0x3FC90FDA};
43const vector unsigned int _cp_f_pt66 = {+0x3F28F5C2,+0x3F28F5C2,+0x3F28F5C2,+0x3F28F5C2};
44const vector unsigned int _cp_f_pi = {+0x40490fda,+0x40490fda,+0x40490fda,+0x40490fda};
45const vector unsigned int _cp_f_npi = {-0x3fb6f026,-0x3fb6f026,-0x3fb6f026,-0x3fb6f026};
46
47const vector unsigned int _cp_f_atan_q4 = {+0x43428CF7,+0x43428CF7,+0x43428CF7,+0x43428CF7};
48const vector unsigned int _cp_f_atan_q3 = {+0x43F2B1F8,+0x43F2B1F8,+0x43F2B1F8,+0x43F2B1F8};
49const vector unsigned int _cp_f_atan_q2 = {+0x43D870C6,+0x43D870C6,+0x43D870C6,+0x43D870C6};
50const vector unsigned int _cp_f_atan_q1 = {+0x432506EA,+0x432506EA,+0x432506EA,+0x432506EA};
51const vector unsigned int _cp_f_atan_q0 = {+0x41C6DE22,+0x41C6DE22,+0x41C6DE22,+0x41C6DE22};
52const vector unsigned int _cp_f_atan_p4 = {-0x3D7E4CB1,-0x3D7E4CB1,-0x3D7E4CB1,-0x3D7E4CB1};
53const vector unsigned int _cp_f_atan_p3 = {-0x3D0A3A07,-0x3D0A3A07,-0x3D0A3A07,-0x3D0A3A07};
54const vector unsigned int _cp_f_atan_p2 = {-0x3D69FB9F,-0x3D69FB9F,-0x3D69FB9F,-0x3D69FB9F};
55const vector unsigned int _cp_f_atan_p1 = {-0x3E7EBD5E,-0x3E7EBD5E,-0x3E7EBD5E,-0x3E7EBD5E};
56const vector unsigned int _cp_f_atan_p0 = {-0x409FFC03,-0x409FFC03,-0x409FFC03,-0x409FFC03};
57const vector unsigned int _cp_f_hmorebits = {+0x240D3131,+0x240D3131,+0x240D3131,+0x240D3131};
58const vector unsigned int _cp_f_morebits = {+0x248D3131,+0x248D3131,+0x248D3131,+0x248D3131};
59