OpenJPH
Open-source implementation of JPEG2000 Part-15
Loading...
Searching...
No Matches
ojph_colour_sse.cpp
Go to the documentation of this file.
1//***************************************************************************/
2// This software is released under the 2-Clause BSD license, included
3// below.
4//
5// Copyright (c) 2019, Aous Naman
6// Copyright (c) 2019, Kakadu Software Pty Ltd, Australia
7// Copyright (c) 2019, The University of New South Wales, Australia
8//
9// Redistribution and use in source and binary forms, with or without
10// modification, are permitted provided that the following conditions are
11// met:
12//
13// 1. Redistributions of source code must retain the above copyright
14// notice, this list of conditions and the following disclaimer.
15//
16// 2. Redistributions in binary form must reproduce the above copyright
17// notice, this list of conditions and the following disclaimer in the
18// documentation and/or other materials provided with the distribution.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
21// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
23// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
26// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31//***************************************************************************/
32// This file is part of the OpenJPH software implementation.
33// File: ojph_colour_sse.cpp
34// Author: Aous Naman
35// Date: 11 October 2019
36//***************************************************************************/
37
38#include <cmath>
39
40#include "ojph_defs.h"
41#include "ojph_arch.h"
42#include "ojph_colour.h"
43#include "ojph_colour_local.h"
44
45#include <immintrin.h>
46
47namespace ojph {
48 namespace local {
49
51 void sse_cnvrt_si32_to_float_shftd(const si32 *sp, float *dp, float mul,
52 ui32 width)
53 {
54 __m128 shift = _mm_set1_ps(0.5f);
56 for (ui32 i = (width + 3) >> 2; i > 0; --i, sp+=4, dp+=4)
57 {
60 s = _mm_mul_ps(s, m);
61 s = _mm_sub_ps(s, shift);
63 }
64 }
65
67 void sse_cnvrt_si32_to_float(const si32 *sp, float *dp, float mul,
68 ui32 width)
69 {
71 for (ui32 i = (width + 3) >> 2; i > 0; --i, sp+=4, dp+=4)
72 {
75 s = _mm_mul_ps(s, m);
77 }
78 }
79
81 void sse_cnvrt_float_to_si32_shftd(const float *sp, si32 *dp, float mul,
82 ui32 width)
83 {
86 __m128 shift = _mm_set1_ps(0.5f);
88 for (ui32 i = (width + 3) >> 2; i > 0; --i, sp+=4)
89 {
92 s = _mm_mul_ps(s, m);
93 // the following is a poorly designed code, but it is the only
94 // code that I am aware of that compiles on VS 32 and 64 modes
95 t = s;
96 *dp++ = _mm_cvtss_si32(t);
97 t = _mm_shuffle_ps(s, s, 1);
98 *dp++ = _mm_cvtss_si32(t);
99 t = _mm_shuffle_ps(s, s, 2);
100 *dp++ = _mm_cvtss_si32(t);
101 t = _mm_shuffle_ps(s, s, 3);
102 *dp++ = _mm_cvtss_si32(t);
103 }
105 }
106
108 void sse_cnvrt_float_to_si32(const float *sp, si32 *dp, float mul,
109 ui32 width)
110 {
114 for (ui32 i = (width + 3) >> 2; i > 0; --i, sp+=4)
115 {
117 __m128 s = _mm_mul_ps(t, m);
118 // the following is a poorly designed code, but it is the only
119 // code that I am aware of that compiles on VS 32 and 64 modes
120 t = s;
121 *dp++ = _mm_cvtss_si32(t);
122 t = _mm_shuffle_ps(s, s, 1);
123 *dp++ = _mm_cvtss_si32(t);
124 t = _mm_shuffle_ps(s, s, 2);
125 *dp++ = _mm_cvtss_si32(t);
126 t = _mm_shuffle_ps(s, s, 3);
127 *dp++ = _mm_cvtss_si32(t);
128 }
130 }
131
133 void sse_ict_forward(const float *r, const float *g, const float *b,
134 float *y, float *cb, float *cr, ui32 repeat)
135 {
141 for (ui32 i = (repeat + 3) >> 2; i > 0; --i)
142 {
148 _mm_store_ps(y, my);
151
152 r += 4; g += 4; b += 4;
153 y += 4; cb += 4; cr += 4;
154 }
155 }
156
158 void sse_ict_backward(const float *y, const float *cb, const float *cr,
159 float *r, float *g, float *b, ui32 repeat)
160 {
165 for (ui32 i = (repeat + 3) >> 2; i > 0; --i)
166 {
167 __m128 my = _mm_load_ps(y);
174
175 y += 4; cb += 4; cr += 4;
176 r += 4; g += 4; b += 4;
177 }
178 }
179 }
180}
void sse_cnvrt_float_to_si32_shftd(const float *sp, si32 *dp, float mul, ui32 width)
void sse_ict_forward(const float *r, const float *g, const float *b, float *y, float *cb, float *cr, ui32 repeat)
void sse_cnvrt_si32_to_float_shftd(const si32 *sp, float *dp, float mul, ui32 width)
void sse_cnvrt_si32_to_float(const si32 *sp, float *dp, float mul, ui32 width)
void sse_ict_backward(const float *y, const float *cb, const float *cr, float *r, float *g, float *b, ui32 repeat)
void sse_cnvrt_float_to_si32(const float *sp, si32 *dp, float mul, ui32 width)
int32_t si32
Definition ojph_defs.h:55
uint32_t ui32
Definition ojph_defs.h:54
static const float GAMMA_CR2R
static const float BETA_CbF
static const float GAMMA_CB2B
static const float ALPHA_RF
static const float GAMMA_CB2G
static const float GAMMA_CR2G
static const float ALPHA_BF
static const float BETA_CrF
static const float ALPHA_GF