GDAL
gdalsse_priv.h
1 /******************************************************************************
2  * $Id: gdalsse_priv.h 34921 2016-08-04 22:26:31Z rouault $
3  *
4  * Project: GDAL
5  * Purpose: SSE2 helper
6  * Author: Even Rouault <even dot rouault at spatialys dot com>
7  *
8  ******************************************************************************
9  * Copyright (c) 2014, Even Rouault <even dot rouault at spatialys dot com>
10  *
11  * Permission is hereby granted, free of charge, to any person obtaining a
12  * copy of this software and associated documentation files (the "Software"),
13  * to deal in the Software without restriction, including without limitation
14  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15  * and/or sell copies of the Software, and to permit persons to whom the
16  * Software is furnished to do so, subject to the following conditions:
17  *
18  * The above copyright notice and this permission notice shall be included
19  * in all copies or substantial portions of the Software.
20  *
21  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
22  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
24  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27  * DEALINGS IN THE SOFTWARE.
28  ****************************************************************************/
29 
30 #ifndef GDALSSE_PRIV_H_INCLUDED
31 #define GDALSSE_PRIV_H_INCLUDED
32 
33 #ifndef DOXYGEN_SKIP
34 
35 #include "cpl_port.h"
36 
37 /* We restrict to 64bit processors because they are guaranteed to have SSE2 */
38 /* Could possibly be used too on 32bit, but we would need to check at runtime */
39 #if (defined(__x86_64) || defined(_M_X64)) && !defined(USE_SSE2_EMULATION)
40 
41 /* Requires SSE2 */
42 #include <emmintrin.h>
43 #include <string.h>
44 
45 #ifdef __SSE4_1__
46 #include <smmintrin.h>
47 #endif
48 
49 class XMMReg2Double
50 {
51  public:
52  __m128d xmm;
53 
54  /* coverity[uninit_member] */
55  XMMReg2Double() {}
56 
57  XMMReg2Double(double val) { xmm = _mm_load_sd (&val); }
58  XMMReg2Double(const XMMReg2Double& other) : xmm(other.xmm) {}
59 
60  static inline XMMReg2Double Zero()
61  {
62  XMMReg2Double reg;
63  reg.Zeroize();
64  return reg;
65  }
66 
67  static inline XMMReg2Double Load1ValHighAndLow(const double* ptr)
68  {
69  XMMReg2Double reg;
70  reg.nsLoad1ValHighAndLow(ptr);
71  return reg;
72  }
73 
74  static inline XMMReg2Double Load2Val(const double* ptr)
75  {
76  XMMReg2Double reg;
77  reg.nsLoad2Val(ptr);
78  return reg;
79  }
80 
81  static inline XMMReg2Double Load2Val(const float* ptr)
82  {
83  XMMReg2Double reg;
84  reg.nsLoad2Val(ptr);
85  return reg;
86  }
87 
88  static inline XMMReg2Double Load2ValAligned(const double* ptr)
89  {
90  XMMReg2Double reg;
91  reg.nsLoad2ValAligned(ptr);
92  return reg;
93  }
94 
95  static inline XMMReg2Double Load2Val(const unsigned char* ptr)
96  {
97  XMMReg2Double reg;
98  reg.nsLoad2Val(ptr);
99  return reg;
100  }
101 
102  static inline XMMReg2Double Load2Val(const short* ptr)
103  {
104  XMMReg2Double reg;
105  reg.nsLoad2Val(ptr);
106  return reg;
107  }
108 
109  static inline XMMReg2Double Load2Val(const unsigned short* ptr)
110  {
111  XMMReg2Double reg;
112  reg.nsLoad2Val(ptr);
113  return reg;
114  }
115 
116  static inline XMMReg2Double Equals(const XMMReg2Double& expr1, const XMMReg2Double& expr2)
117  {
118  XMMReg2Double reg;
119  reg.xmm = _mm_cmpeq_pd(expr1.xmm, expr2.xmm);
120  return reg;
121  }
122 
123  static inline XMMReg2Double NotEquals(const XMMReg2Double& expr1, const XMMReg2Double& expr2)
124  {
125  XMMReg2Double reg;
126  reg.xmm = _mm_cmpneq_pd(expr1.xmm, expr2.xmm);
127  return reg;
128  }
129 
130  static inline XMMReg2Double Greater(const XMMReg2Double& expr1, const XMMReg2Double& expr2)
131  {
132  XMMReg2Double reg;
133  reg.xmm = _mm_cmpgt_pd(expr1.xmm, expr2.xmm);
134  return reg;
135  }
136 
137  static inline XMMReg2Double And(const XMMReg2Double& expr1, const XMMReg2Double& expr2)
138  {
139  XMMReg2Double reg;
140  reg.xmm = _mm_and_pd(expr1.xmm, expr2.xmm);
141  return reg;
142  }
143 
144  static inline XMMReg2Double Ternary(const XMMReg2Double& cond, const XMMReg2Double& true_expr, const XMMReg2Double& false_expr)
145  {
146  XMMReg2Double reg;
147  reg.xmm = _mm_or_pd(_mm_and_pd (cond.xmm, true_expr.xmm), _mm_andnot_pd(cond.xmm, false_expr.xmm));
148  return reg;
149  }
150 
151  static inline XMMReg2Double Min(const XMMReg2Double& expr1, const XMMReg2Double& expr2)
152  {
153  XMMReg2Double reg;
154  reg.xmm = _mm_min_pd(expr1.xmm, expr2.xmm);
155  return reg;
156  }
157 
158  inline void nsLoad1ValHighAndLow(const double* ptr)
159  {
160  xmm = _mm_load1_pd(ptr);
161  }
162 
163  inline void nsLoad2Val(const double* ptr)
164  {
165  xmm = _mm_loadu_pd(ptr);
166  }
167 
168  inline void nsLoad2ValAligned(const double* pval)
169  {
170  xmm = _mm_load_pd(pval);
171  }
172 
173  inline void nsLoad2Val(const float* pval)
174  {
175  __m128 temp1 = _mm_load_ss(pval);
176  __m128 temp2 = _mm_load_ss(pval + 1);
177  temp1 = _mm_shuffle_ps(temp1, temp2, _MM_SHUFFLE(1,0,1,0));
178  temp1 = _mm_shuffle_ps(temp1, temp1, _MM_SHUFFLE(3,3,2,0));
179  xmm = _mm_cvtps_pd(temp1);
180  }
181 
182  inline void nsLoad2Val(const unsigned char* ptr)
183  {
184 #ifdef CPL_CPU_REQUIRES_ALIGNED_ACCESS
185  unsigned short s;
186  memcpy(&s, ptr, 2);
187  __m128i xmm_i = _mm_cvtsi32_si128(s);
188 #else
189  __m128i xmm_i = _mm_cvtsi32_si128(*(unsigned short*)(ptr));
190 #endif
191 #ifdef __SSE4_1__
192  xmm_i = _mm_cvtepu8_epi32(xmm_i);
193 #else
194  xmm_i = _mm_unpacklo_epi8(xmm_i, _mm_setzero_si128());
195  xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
196 #endif
197  xmm = _mm_cvtepi32_pd(xmm_i);
198  }
199 
200  inline void nsLoad2Val(const short* ptr)
201  {
202  int i;
203  memcpy(&i, ptr, 4);
204  __m128i xmm_i = _mm_cvtsi32_si128(i);
205 #ifdef __SSE4_1__
206  xmm_i = _mm_cvtepi16_epi32(xmm_i);
207 #else
208  xmm_i = _mm_unpacklo_epi16(xmm_i,xmm_i); /* 0|0|0|0|0|0|b|a --> 0|0|0|0|b|b|a|a */
209  xmm_i = _mm_srai_epi32(xmm_i, 16); /* 0|0|0|0|b|b|a|a --> 0|0|0|0|sign(b)|b|sign(a)|a */
210 #endif
211  xmm = _mm_cvtepi32_pd(xmm_i);
212  }
213 
214  inline void nsLoad2Val(const unsigned short* ptr)
215  {
216  int i;
217  memcpy(&i, ptr, 4);
218  __m128i xmm_i = _mm_cvtsi32_si128(i);
219 #ifdef __SSE4_1__
220  xmm_i = _mm_cvtepu16_epi32(xmm_i);
221 #else
222  xmm_i = _mm_unpacklo_epi16(xmm_i,_mm_setzero_si128()); /* 0|0|0|0|0|0|b|a --> 0|0|0|0|0|b|0|a */
223 #endif
224  xmm = _mm_cvtepi32_pd(xmm_i);
225  }
226 
227  static inline void Load4Val(const unsigned char* ptr, XMMReg2Double& low, XMMReg2Double& high)
228  {
229 #ifdef CPL_CPU_REQUIRES_ALIGNED_ACCESS
230  int i;
231  memcpy(&i, ptr, 4);
232  __m128i xmm_i = _mm_cvtsi32_si128(i);
233 #else
234  __m128i xmm_i = _mm_cvtsi32_si128(*(int*)(ptr));
235 #endif
236 #ifdef __SSE4_1__
237  xmm_i = _mm_cvtepu8_epi32(xmm_i);
238 #else
239  xmm_i = _mm_unpacklo_epi8(xmm_i, _mm_setzero_si128());
240  xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
241 #endif
242  low.xmm = _mm_cvtepi32_pd(xmm_i);
243  high.xmm = _mm_cvtepi32_pd(_mm_shuffle_epi32(xmm_i,_MM_SHUFFLE(3,2,3,2)));
244  }
245 
246  static inline void Load4Val(const short* ptr, XMMReg2Double& low, XMMReg2Double& high)
247  {
248  low.nsLoad2Val(ptr);
249  high.nsLoad2Val(ptr+2);
250  }
251 
252  static inline void Load4Val(const unsigned short* ptr, XMMReg2Double& low, XMMReg2Double& high)
253  {
254  low.nsLoad2Val(ptr);
255  high.nsLoad2Val(ptr+2);
256  }
257 
258  static inline void Load4Val(const double* ptr, XMMReg2Double& low, XMMReg2Double& high)
259  {
260  low.nsLoad2Val(ptr);
261  high.nsLoad2Val(ptr+2);
262  }
263 
264  static inline void Load4Val(const float* ptr, XMMReg2Double& low, XMMReg2Double& high)
265  {
266  __m128 temp1 = _mm_loadu_ps(ptr);
267  __m128 temp2 = _mm_shuffle_ps(temp1, temp1, _MM_SHUFFLE(3,2,3,2));
268  low.xmm = _mm_cvtps_pd(temp1);
269  high.xmm = _mm_cvtps_pd(temp2);
270  }
271 
272  inline void Zeroize()
273  {
274  xmm = _mm_setzero_pd();
275  }
276 
277  inline XMMReg2Double& operator= (const XMMReg2Double& other)
278  {
279  xmm = other.xmm;
280  return *this;
281  }
282 
283  inline XMMReg2Double& operator+= (const XMMReg2Double& other)
284  {
285  xmm = _mm_add_pd(xmm, other.xmm);
286  return *this;
287  }
288 
289  inline XMMReg2Double& operator*= (const XMMReg2Double& other)
290  {
291  xmm = _mm_mul_pd(xmm, other.xmm);
292  return *this;
293  }
294 
295  inline XMMReg2Double operator+ (const XMMReg2Double& other) const
296  {
297  XMMReg2Double ret;
298  ret.xmm = _mm_add_pd(xmm, other.xmm);
299  return ret;
300  }
301 
302  inline XMMReg2Double operator- (const XMMReg2Double& other) const
303  {
304  XMMReg2Double ret;
305  ret.xmm = _mm_sub_pd(xmm, other.xmm);
306  return ret;
307  }
308 
309  inline XMMReg2Double operator* (const XMMReg2Double& other) const
310  {
311  XMMReg2Double ret;
312  ret.xmm = _mm_mul_pd(xmm, other.xmm);
313  return ret;
314  }
315 
316  inline XMMReg2Double operator/ (const XMMReg2Double& other) const
317  {
318  XMMReg2Double ret;
319  ret.xmm = _mm_div_pd(xmm, other.xmm);
320  return ret;
321  }
322 
323  inline void AddLowAndHigh()
324  {
325  __m128d xmm2;
326  xmm2 = _mm_shuffle_pd(xmm,xmm,_MM_SHUFFLE2(0,1)); /* transfer high word into low word of xmm2 */
327  xmm = _mm_add_pd(xmm, xmm2);
328  }
329 
330  inline void Store2Double(double* pval) const
331  {
332  _mm_storeu_pd(pval, xmm);
333  }
334 
335  inline void Store2DoubleAligned(double* pval) const
336  {
337  _mm_store_pd(pval, xmm);
338  }
339 
340  void Store2Val(unsigned short* ptr) const
341  {
342  __m128i tmp = _mm_cvtpd_epi32(xmm); /* Convert the 2 double values to 2 integers */
343  ptr[0] = (GUInt16)_mm_extract_epi16(tmp, 0);
344  ptr[1] = (GUInt16)_mm_extract_epi16(tmp, 2);
345  }
346 
347  inline operator double () const
348  {
349  double val;
350  _mm_store_sd(&val, xmm);
351  return val;
352  }
353 };
354 
355 #else
356 
357 #warning "Software emulation of SSE2 !"
358 
359 class XMMReg2Double
360 {
361  public:
362  double low;
363  double high;
364 
365  XMMReg2Double() {}
366  XMMReg2Double(double val) { low = val; high = 0.0; }
367  XMMReg2Double(const XMMReg2Double& other) : low(other.low), high(other.high) {}
368 
369  static inline XMMReg2Double Zero()
370  {
371  XMMReg2Double reg;
372  reg.Zeroize();
373  return reg;
374  }
375 
376  static inline XMMReg2Double Load1ValHighAndLow(const double* ptr)
377  {
378  XMMReg2Double reg;
379  reg.nsLoad1ValHighAndLow(ptr);
380  return reg;
381  }
382 
383  static inline XMMReg2Double Equals(const XMMReg2Double& expr1, const XMMReg2Double& expr2)
384  {
385  XMMReg2Double reg;
386 
387  if (expr1.low == expr2.low)
388  memset(&(reg.low), 0xFF, sizeof(double));
389  else
390  reg.low = 0;
391 
392  if (expr1.high == expr2.high)
393  memset(&(reg.high), 0xFF, sizeof(double));
394  else
395  reg.high = 0;
396 
397  return reg;
398  }
399 
400  static inline XMMReg2Double NotEquals(const XMMReg2Double& expr1, const XMMReg2Double& expr2)
401  {
402  XMMReg2Double reg;
403 
404  if (expr1.low != expr2.low)
405  memset(&(reg.low), 0xFF, sizeof(double));
406  else
407  reg.low = 0;
408 
409  if (expr1.high != expr2.high)
410  memset(&(reg.high), 0xFF, sizeof(double));
411  else
412  reg.high = 0;
413 
414  return reg;
415  }
416 
417  static inline XMMReg2Double Greater(const XMMReg2Double& expr1, const XMMReg2Double& expr2)
418  {
419  XMMReg2Double reg;
420 
421  if (expr1.low > expr2.low)
422  memset(&(reg.low), 0xFF, sizeof(double));
423  else
424  reg.low = 0;
425 
426  if (expr1.high > expr2.high)
427  memset(&(reg.high), 0xFF, sizeof(double));
428  else
429  reg.high = 0;
430 
431  return reg;
432  }
433 
434  static inline XMMReg2Double And(const XMMReg2Double& expr1, const XMMReg2Double& expr2)
435  {
436  XMMReg2Double reg;
437  int low1[2], high1[2];
438  int low2[2], high2[2];
439  memcpy(low1, &expr1.low, sizeof(double));
440  memcpy(high1, &expr1.high, sizeof(double));
441  memcpy(low2, &expr2.low, sizeof(double));
442  memcpy(high2, &expr2.high, sizeof(double));
443  low1[0] &= low2[0];
444  low1[1] &= low2[1];
445  high1[0] &= high2[0];
446  high1[1] &= high2[1];
447  memcpy(&reg.low, low1, sizeof(double));
448  memcpy(&reg.high, high1, sizeof(double));
449  return reg;
450  }
451 
452  static inline XMMReg2Double Ternary(const XMMReg2Double& cond, const XMMReg2Double& true_expr, const XMMReg2Double& false_expr)
453  {
454  XMMReg2Double reg;
455  if( cond.low )
456  reg.low = true_expr.low;
457  else
458  reg.low = false_expr.low;
459  if( cond.high )
460  reg.high = true_expr.high;
461  else
462  reg.high = false_expr.high;
463  return reg;
464  }
465 
466  static inline XMMReg2Double Min(const XMMReg2Double& expr1, const XMMReg2Double& expr2)
467  {
468  XMMReg2Double reg;
469  reg.low = (expr1.low < expr2.low) ? expr1.low : expr2.high;
470  reg.high = (expr1.high < expr2.high) ? expr1.high : expr2.low;
471  return reg;
472  }
473 
474  static inline XMMReg2Double Load2Val(const double* ptr)
475  {
476  XMMReg2Double reg;
477  reg.nsLoad2Val(ptr);
478  return reg;
479  }
480 
481  static inline XMMReg2Double Load2ValAligned(const double* ptr)
482  {
483  XMMReg2Double reg;
484  reg.nsLoad2ValAligned(ptr);
485  return reg;
486  }
487 
488  static inline XMMReg2Double Load2Val(const float* ptr)
489  {
490  XMMReg2Double reg;
491  reg.nsLoad2Val(ptr);
492  return reg;
493  }
494 
495  static inline XMMReg2Double Load2Val(const unsigned char* ptr)
496  {
497  XMMReg2Double reg;
498  reg.nsLoad2Val(ptr);
499  return reg;
500  }
501 
502  static inline XMMReg2Double Load2Val(const short* ptr)
503  {
504  XMMReg2Double reg;
505  reg.nsLoad2Val(ptr);
506  return reg;
507  }
508 
509  static inline XMMReg2Double Load2Val(const unsigned short* ptr)
510  {
511  XMMReg2Double reg;
512  reg.nsLoad2Val(ptr);
513  return reg;
514  }
515 
516  inline void nsLoad1ValHighAndLow(const double* pval)
517  {
518  low = pval[0];
519  high = pval[0];
520  }
521 
522  inline void nsLoad2Val(const double* pval)
523  {
524  low = pval[0];
525  high = pval[1];
526  }
527 
528  inline void nsLoad2ValAligned(const double* pval)
529  {
530  low = pval[0];
531  high = pval[1];
532  }
533 
534  inline void nsLoad2Val(const float* pval)
535  {
536  low = pval[0];
537  high = pval[1];
538  }
539 
540  inline void nsLoad2Val(const unsigned char* ptr)
541  {
542  low = ptr[0];
543  high = ptr[1];
544  }
545 
546  inline void nsLoad2Val(const short* ptr)
547  {
548  low = ptr[0];
549  high = ptr[1];
550  }
551 
552  inline void nsLoad2Val(const unsigned short* ptr)
553  {
554  low = ptr[0];
555  high = ptr[1];
556  }
557 
558  static inline void Load4Val(const unsigned char* ptr, XMMReg2Double& low, XMMReg2Double& high)
559  {
560  low.low = ptr[0];
561  low.high = ptr[1];
562  high.low = ptr[2];
563  high.high = ptr[3];
564  }
565 
566  static inline void Load4Val(const short* ptr, XMMReg2Double& low, XMMReg2Double& high)
567  {
568  low.nsLoad2Val(ptr);
569  high.nsLoad2Val(ptr+2);
570  }
571 
572  static inline void Load4Val(const unsigned short* ptr, XMMReg2Double& low, XMMReg2Double& high)
573  {
574  low.nsLoad2Val(ptr);
575  high.nsLoad2Val(ptr+2);
576  }
577 
578  static inline void Load4Val(const double* ptr, XMMReg2Double& low, XMMReg2Double& high)
579  {
580  low.nsLoad2Val(ptr);
581  high.nsLoad2Val(ptr+2);
582  }
583 
584  static inline void Load4Val(const float* ptr, XMMReg2Double& low, XMMReg2Double& high)
585  {
586  low.nsLoad2Val(ptr);
587  high.nsLoad2Val(ptr+2);
588  }
589 
590  inline void Zeroize()
591  {
592  low = 0.0;
593  high = 0.0;
594  }
595 
596  inline XMMReg2Double& operator= (const XMMReg2Double& other)
597  {
598  low = other.low;
599  high = other.high;
600  return *this;
601  }
602 
603  inline XMMReg2Double& operator+= (const XMMReg2Double& other)
604  {
605  low += other.low;
606  high += other.high;
607  return *this;
608  }
609 
610  inline XMMReg2Double& operator*= (const XMMReg2Double& other)
611  {
612  low *= other.low;
613  high *= other.high;
614  return *this;
615  }
616 
617  inline XMMReg2Double operator+ (const XMMReg2Double& other) const
618  {
619  XMMReg2Double ret;
620  ret.low = low + other.low;
621  ret.high = high + other.high;
622  return ret;
623  }
624 
625  inline XMMReg2Double operator- (const XMMReg2Double& other) const
626  {
627  XMMReg2Double ret;
628  ret.low = low - other.low;
629  ret.high = high - other.high;
630  return ret;
631  }
632 
633  inline XMMReg2Double operator* (const XMMReg2Double& other) const
634  {
635  XMMReg2Double ret;
636  ret.low = low * other.low;
637  ret.high = high * other.high;
638  return ret;
639  }
640 
641  inline XMMReg2Double operator/ (const XMMReg2Double& other) const
642  {
643  XMMReg2Double ret;
644  ret.low = low / other.low;
645  ret.high = high / other.high;
646  return ret;
647  }
648 
649  inline void AddLowAndHigh()
650  {
651  double add = low + high;
652  low = add;
653  high = add;
654  }
655 
656  inline void Store2Double(double* pval) const
657  {
658  pval[0] = low;
659  pval[1] = high;
660  }
661 
662  inline void Store2DoubleAligned(double* pval) const
663  {
664  pval[0] = low;
665  pval[1] = high;
666  }
667 
668  void Store2Val(unsigned short* ptr) const
669  {
670  ptr[0] = (GUInt16)low;
671  ptr[1] = (GUInt16)high;
672  }
673 
674  inline operator double () const
675  {
676  return low;
677  }
678 };
679 
680 #endif /* defined(__x86_64) || defined(_M_X64) */
681 
682 class XMMReg4Double
683 {
684  public:
685  XMMReg2Double low, high;
686 
687  XMMReg4Double() {}
688  XMMReg4Double(const XMMReg4Double& other) : low(other.low), high(other.high) {}
689 
690  static inline XMMReg4Double Zero()
691  {
692  XMMReg4Double reg;
693  reg.low.Zeroize();
694  reg.high.Zeroize();
695  return reg;
696  }
697 
698  static inline XMMReg4Double Load1ValHighAndLow(const double* ptr)
699  {
700  XMMReg4Double reg;
701  reg.low.nsLoad1ValHighAndLow(ptr);
702  reg.high = reg.low;
703  return reg;
704  }
705 
706  static inline XMMReg4Double Load4Val(const unsigned char* ptr)
707  {
708  XMMReg4Double reg;
709  XMMReg2Double::Load4Val(ptr, reg.low, reg.high);
710  return reg;
711  }
712 
713  static inline XMMReg4Double Load4Val(const short* ptr)
714  {
715  XMMReg4Double reg;
716  reg.low.nsLoad2Val(ptr);
717  reg.high.nsLoad2Val(ptr+2);
718  return reg;
719  }
720 
721  static inline XMMReg4Double Load4Val(const unsigned short* ptr)
722  {
723  XMMReg4Double reg;
724  reg.low.nsLoad2Val(ptr);
725  reg.high.nsLoad2Val(ptr+2);
726  return reg;
727  }
728 
729  static inline XMMReg4Double Load4Val(const double* ptr)
730  {
731  XMMReg4Double reg;
732  reg.low.nsLoad2Val(ptr);
733  reg.high.nsLoad2Val(ptr+2);
734  return reg;
735  }
736 
737  static inline XMMReg4Double Load4ValAligned(const double* ptr)
738  {
739  XMMReg4Double reg;
740  reg.low.nsLoad2ValAligned(ptr);
741  reg.high.nsLoad2ValAligned(ptr+2);
742  return reg;
743  }
744 
745  static inline XMMReg4Double Load4Val(const float* ptr)
746  {
747  XMMReg4Double reg;
748  XMMReg2Double::Load4Val(ptr, reg.low, reg.high);
749  return reg;
750  }
751 
752  static inline XMMReg4Double Equals(const XMMReg4Double& expr1, const XMMReg4Double& expr2)
753  {
754  XMMReg4Double reg;
755  reg.low = XMMReg2Double::Equals(expr1.low, expr2.low);
756  reg.high = XMMReg2Double::Equals(expr1.high, expr2.high);
757  return reg;
758  }
759 
760  static inline XMMReg4Double NotEquals(const XMMReg4Double& expr1, const XMMReg4Double& expr2)
761  {
762  XMMReg4Double reg;
763  reg.low = XMMReg2Double::NotEquals(expr1.low, expr2.low);
764  reg.high = XMMReg2Double::NotEquals(expr1.high, expr2.high);
765  return reg;
766  }
767 
768  static inline XMMReg4Double Greater(const XMMReg4Double& expr1, const XMMReg4Double& expr2)
769  {
770  XMMReg4Double reg;
771  reg.low = XMMReg2Double::Greater(expr1.low, expr2.low);
772  reg.high = XMMReg2Double::Greater(expr1.high, expr2.high);
773  return reg;
774  }
775 
776  static inline XMMReg4Double And(const XMMReg4Double& expr1, const XMMReg4Double& expr2)
777  {
778  XMMReg4Double reg;
779  reg.low = XMMReg2Double::And(expr1.low, expr2.low);
780  reg.high = XMMReg2Double::And(expr1.high, expr2.high);
781  return reg;
782  }
783 
784  static inline XMMReg4Double Ternary(const XMMReg4Double& cond, const XMMReg4Double& true_expr, const XMMReg4Double& false_expr)
785  {
786  XMMReg4Double reg;
787  reg.low = XMMReg2Double::Ternary(cond.low, true_expr.low, false_expr.low);
788  reg.high = XMMReg2Double::Ternary(cond.high, true_expr.high, false_expr.high);
789  return reg;
790  }
791 
792  static inline XMMReg4Double Min(const XMMReg4Double& expr1, const XMMReg4Double& expr2)
793  {
794  XMMReg4Double reg;
795  reg.low = XMMReg2Double::Min(expr1.low, expr2.low);
796  reg.high = XMMReg2Double::Min(expr1.high, expr2.high);
797  return reg;
798  }
799 
800  inline XMMReg4Double& operator= (const XMMReg4Double& other)
801  {
802  low = other.low;
803  high = other.high;
804  return *this;
805  }
806 
807  inline XMMReg4Double& operator+= (const XMMReg4Double& other)
808  {
809  low += other.low;
810  high += other.high;
811  return *this;
812  }
813 
814  inline XMMReg4Double& operator*= (const XMMReg4Double& other)
815  {
816  low *= other.low;
817  high *= other.high;
818  return *this;
819  }
820 
821  inline XMMReg4Double operator+ (const XMMReg4Double& other) const
822  {
823  XMMReg4Double ret;
824  ret.low = low + other.low;
825  ret.high = high + other.high;
826  return ret;
827  }
828 
829  inline XMMReg4Double operator- (const XMMReg4Double& other) const
830  {
831  XMMReg4Double ret;
832  ret.low = low - other.low;
833  ret.high = high - other.high;
834  return ret;
835  }
836 
837  inline XMMReg4Double operator* (const XMMReg4Double& other) const
838  {
839  XMMReg4Double ret;
840  ret.low = low * other.low;
841  ret.high = high * other.high;
842  return ret;
843  }
844 
845  inline XMMReg4Double operator/ (const XMMReg4Double& other) const
846  {
847  XMMReg4Double ret;
848  ret.low = low / other.low;
849  ret.high = high / other.high;
850  return ret;
851  }
852 
853  inline void AddLowAndHigh()
854  {
855  low = low + high;
856  low.AddLowAndHigh();
857  }
858 
859  inline XMMReg2Double& GetLow()
860  {
861  return low;
862  }
863 
864  inline XMMReg2Double& GetHigh()
865  {
866  return high;
867  }
868 
869  void Store4Val(unsigned short* ptr) const
870  {
871  low.Store2Val(ptr);
872  high.Store2Val(ptr+2);
873  }
874 };
875 
876 #endif /* #ifndef DOXYGEN_SKIP */
877 
878 #endif /* GDALSSE_PRIV_H_INCLUDED */
Core portability definitions for CPL.
unsigned short GUInt16
Unsigned int16 type.
Definition: cpl_port.h:205

Generated for GDAL by doxygen 1.8.8.