IPU C++ intrinsics
Functions that target single IPU instructions.
These intrinsic functions target single IPU instructions and may be used in C++ IPU code. Each function is named after the instruction it targets. Float instructions that have a type (for example, f16v2) often omit this prefix in their function name. For example, use cmpeq() to target any of the cmpeq instructions, such as f16v2cmpeq.
Refer to the “Tile Worker ISA” for more detailed information on the instructions targeted by these intrinsics.
Defines
-
IPU_INTRINSICS_INCLUDED
Variables
-
namespace ipu
IPU intrinsic functions.
Functions
-
template<typename T0, typename T1, typename = std::enable_if_t<both_integral<T0, T1>>>
inline unsigned andc(T0 src0, T1 src1) Targets the
andcinstruction.- Parameters
src0 – An integer value.
src1 – An integer value, can be a 12-bit constant.
- Returns
The bitwise logical
andofsrc0and the negated value ofsrc1of typeunsigned.
-
inline float andc(float src0, float src1)
Targets the
andcinstruction.- Parameters
src0 – A value of type
float.src1 – A value of type
float.
- Returns
The bitwise logical
andofsrc0and the negated value ofsrc1of typefloat.
-
inline float2 andc(float2 src0, float2 src1)
Targets the
andc64instruction.- Parameters
src0 – A value of type
float2.src1 – A value of type
float2.
- Returns
The bitwise logical
andofsrc0and the negated value ofsrc1of typefloat2.
-
inline unsigned bitrev8(unsigned src)
Targets the
bitrev8instruction.- Parameters
src – A value of type
unsigned.- Returns
A result of type
unsignedthat is equivalent to the value ofsrcwith the bit order of each byte reversed.
-
inline unsigned cms(unsigned src)
Targets the
cmsinstruction.- Parameters
src – A value of type
unsigned.- Returns
The number of higher order bits in
srcthat match the sign bit (bit 31), as anunsigned.
-
inline float2 roll32(float2 src0, float2 src1)
Targets the
roll32instruction.- Parameters
src0 – A value of type
float2.src1 – A value of type
float2.
- Returns
The result of a SIMD roll permutation on the 4 32-bit float values across
src0andsrc1, as afloat2. src0 src1 -> Result | 3 | 2 | | 1 | 0 | | 2 | 1 |
-
template<typename T0, typename T1, typename = std::enable_if_t<both_integral<T0, T1>>>
inline unsigned roll8l(T0 src0, T1 src1) Targets the
roll8linstruction.- Parameters
src0 – An integer value.
src1 – An integer value.
- Returns
The result of a SIMD roll-left permutation on the 8 8-bit values across
src0andsrc1, as anunsigned. src0 src1 -> Result | 7 | 6 | 5 | 4 | | 3 | 2 | 1 | 0 | | 6 | 5 | 4 | 3 |
-
template<typename T0, typename T1, typename = std::enable_if_t<both_integral<T0, T1>>>
inline unsigned roll8r(T0 src0, T1 src1) Targets the
roll8rinstruction.- Parameters
src0 – An integer value.
src1 – An integer value.
- Returns
The result of a SIMD roll-right permutation on the 8 8-bit values across
src0andsrc1, as anunsigned. src0 src1 -> Result | 7 | 6 | 5 | 4 | | 3 | 2 | 1 | 0 | | 4 | 3 | 2 | 1 |
-
template<typename T0, typename T1, typename = std::enable_if_t<both_integral<T0, T1>>>
inline unsigned shuf8x8hi(T0 src0, T1 src1) Targets the
shuf8x8hiinstruction.- Parameters
src0 – An integer value.
src1 – An integer value.
- Returns
The upper word of a SIMD shuffle permutation on the 8 8-bit values across
src0andsrc1, as anunsigned. src0 src1 -> Result | 7 | 6 | 5 | 4 | | 3 | 2 | 1 | 0 | | 7 | 3 | 6 | 2 |
-
template<typename T0, typename T1, typename = std::enable_if_t<both_integral<T0, T1>>>
inline unsigned shuf8x8lo(T0 src0, T1 src1) Targets the
shuf8x8loinstruction.- Parameters
src0 – An integer value.
src1 – An integer value.
- Returns
The lower word of a SIMD shuffle permutation on the 8 8-bit values across
src0andsrc1, as anunsigned. src0 src1 -> Result | 7 | 6 | 5 | 4 | | 3 | 2 | 1 | 0 | | 5 | 1 | 4 | 0 |
-
inline float2 sort4x32hi(float2 src0, float2 src1)
Targets the
sort4x32hiinstruction.- Parameters
src0 – A value of type
float2.src1 – A value of type
float2.
- Returns
The result of a SIMD sort permutation on the 4 32-bit float values across
src0andsrc1, as afloat2. src0 src1 -> Result | 3 | 2 | | 1 | 0 | | 3 | 1 |
-
inline float2 sort4x32lo(float2 src0, float2 src1)
Targets the
sort4x32loinstruction.- Parameters
src0 – A value of type
float2.src1 – A value of type
float2.
- Returns
The result of a SIMD sort permutation on the 4 32-bit float values across
src0andsrc1, as afloat2. src0 src1 -> Result | 3 | 2 | | 1 | 0 | | 2 | 0 |
-
inline unsigned sort8(unsigned src)
Targets the
sort8instruction.- Parameters
src – A value of type
unsigned.- Returns
The result of a SIMD sort8 permutation on the 4 8-bit values in
src, as anunsigned. src -> Result | 3 | 2 | 1 | 0 | | 3 | 1 | 2 | 0 |
-
template<typename T0, typename T1, typename = std::enable_if_t<both_integral<T0, T1>>>
inline unsigned sort8x8hi(T0 src0, T1 src1) Targets the
sort8x8hiinstruction.- Parameters
src0 – An integer value.
src1 – An integer value.
- Returns
The upper word of the result of a SIMD sort8 permutation on the 8 8-bit values across
src0andsrc1, as anunsigned. src0 src1 -> Result | 7 | 6 | 5 | 4 | | 3 | 2 | 1 | 0 | | 7 | 5 | 3 | 1 |
-
template<typename T0, typename T1, typename = std::enable_if_t<both_integral<T0, T1>>>
inline unsigned sort8x8lo(T0 src0, T1 src1) Targets the
sort8x8loinstruction.- Parameters
src0 – An integer value.
src1 – An integer value.
- Returns
The lower word of the result of a SIMD sort8 permutation on the 8 8-bit values across
src0andsrc1, as anunsigned. src0 src1 -> Result | 7 | 6 | 5 | 4 | | 3 | 2 | 1 | 0 | | 6 | 4 | 2 | 0 |
-
inline unsigned swap8(unsigned src)
Targets the
sort8instruction.- Parameters
src – A value of type
unsigned.- Returns
The result of a SIMD swap permutation on the 4 8-bit values in
src, as anunsigned. src -> Result | 3 | 2 | 1 | 0 | | 2 | 3 | 0 | 1 |
-
inline half2 absadd(half2 src0, half2 src1)
Targets the
f16v2absaddinstruction.- Parameters
src0 – A value of type
half2.src1 – A value of type
half2.
- Returns
The result of an element-wise addition of absolute values in
src0andsrc1.
-
inline half4 absadd(half4 src0, half4 src1)
Targets the
f16v4absaddinstruction.- Parameters
src0 – A value of type
half4.src1 – A value of type
half4.
- Returns
The result of an element-wise addition of absolute values in
src0andsrc1.
-
inline float2 absadd(float2 src0, float2 src1)
Targets the
f32v2absaddinstruction.- Parameters
src0 – A value of type
float2.src1 – A value of type
float2.
- Returns
The result of an element-wise addition of absolute values in
src0andsrc1.
-
inline float absadd(float src0, float src1)
Targets the
f32absaddinstruction.- Parameters
src0 – A value of type
float.src1 – A value of type
float.
- Returns
The result of a scalar addition of absolute values
src0andsrc1.
-
inline half2 absmax(half2 src0, half2 src1)
Targets the
f16v2absmaxinstruction.- Parameters
src0 – A value of type
half2.src1 – A value of type
half2.
- Returns
The element-wise maximum of absolute values in
src0andsrc1.
-
inline half4 absmax(half4 src0, half4 src1)
Targets the
f16v4absmaxinstruction.- Parameters
src0 – A value of type
half4.src1 – A value of type
half4.
- Returns
The element-wise maximum of absolute values in
src0andsrc1.
-
inline float2 absmax(float2 src0, float2 src1)
Targets the
f32v2absmaxinstruction.- Parameters
src0 – A value of type
float2.src1 – A value of type
float2.
- Returns
The element-wise maximum of absolute values in
src0andsrc1.
-
inline float absmax(float src0, float src1)
Targets the
f32absmaxinstruction.- Parameters
src0 – A value of type
float.src1 – A value of type
float.
- Returns
The maximum of absolute values
src0andsrc1.
-
inline half2 max(half2 src0, half2 src1)
Targets the
f16v2maxinstruction.- Parameters
src0 – A value of type
half2.src1 – A value of type
half2.
- Returns
The element-wise maximum of
src0andsrc1.
-
inline half4 max(half4 src0, half4 src1)
Targets the
f16v4maxinstruction.- Parameters
src0 – A value of type
half4.src1 – A value of type
half4.
- Returns
The element-wise maximum of
src0andsrc1.
-
inline float2 max(float2 src0, float2 src1)
Targets the
f32v2maxinstruction.- Parameters
src0 – A value of type
float2.src1 – A value of type
float2.
- Returns
The element-wise maximum of
src0andsrc1.
-
inline float max(float src0, float src1)
Targets the
f32maxinstruction.- Parameters
src0 – A value of type
float.src1 – A value of type
float.
- Returns
The maximum of
src0andsrc1.
-
inline half2 maxc(half4 src)
Targets the
f16v4maxcinstruction.- Parameters
src – A value of type
half4.- Returns
The 2x2 lateral maximum of
src. The 0th element in the result vector is the maximum of src[0] and src[1], and the 1st element is the maximum of src[2] and src[3].
-
inline half2 min(half2 src0, half2 src1)
Targets the
f16v2mininstruction.- Parameters
src0 – A value of type
half2.src1 – A value of type
half2.
- Returns
The element-wise minimum of
src0andsrc1.
-
inline half4 min(half4 src0, half4 src1)
Targets the
f16v4mininstruction.- Parameters
src0 – A value of type
half4.src1 – A value of type
half4.
- Returns
The element-wise minimum of
src0andsrc1.
-
inline float2 min(float2 src0, float2 src1)
Targets the
f32v2mininstruction.- Parameters
src0 – A value of type
float2.src1 – A value of type
float2.
- Returns
The element-wise minimum of
src0andsrc1.
-
inline float min(float src0, float src1)
Targets the
f32mininstruction.- Parameters
src0 – A value of type
float.src1 – A value of type
float.
- Returns
The minimum of
src0andsrc1.
-
inline half2 clamp(half2 src0, half2 src1)
Targets the
f16v2clampinstruction.- Parameters
src0 – A value of type
half2.src1 – A value of type
half2.
- Returns
The min-of-maximum result of
src0andsrc1, of typehalf2. The first element is the median value of the first element ofsrc0and the two elements insrc1. The second element is the median of the second element ofsrc0and the two elements insrc1.
-
inline half4 clamp(half4 src0, half2 src1)
Targets the
f16v4clampinstruction.- Parameters
src0 – A value of type
half4.src1 – A value of type
half2.
- Returns
The min-of-maximum result of
src0andsrc1, of typehalf4. Each element is the median of the element insrc0at the same index, and the two values insrc1.
-
inline float2 clamp(float2 src0, float2 src1)
Targets the
f32v2clampinstruction.- Parameters
src0 – A value of type
float2.src1 – A value of type
float2.
- Returns
The min-of-maximum result of
src0andsrc1, of typefloat2. The first element is the median of the first element ofsrc0and the two elements insrc1. The second element is the median of the second element ofsrc0and the two elements insrc1.
-
inline float clamp(float src0, float2 src1)
Targets the
f32clampinstruction.- Parameters
src0 – A value of type
float.src1 – A value of type
float2.
- Returns
The median of
src0and the two elements insrc1.
-
inline void cmac(half2 src0, half2 src1)
Targets the
f16v2cmacinstruction.- Parameters
src0 – A value of type
half2.src1 – A value of type
half2.
-
inline void cmac(half4 src0, half4 src1)
Targets the
f16v4cmacinstruction.- Parameters
src0 – A value of type
half4.src1 – A value of type
half4.
-
inline half2 exp(half2 src)
Targets the
f16v2expinstruction.- Parameters
src – A value of type
half2.- Returns
A vector of the results of
e^Xof the two elements insrc.
-
inline float exp(float src)
Targets the
f32expinstruction.- Parameters
src – A value of type
float.- Returns
The result of
e^{src}.
-
inline half2 exp2(half2 src)
Targets the
f16v2expinstruction.- Parameters
src – A value of type
half2.- Returns
A vector of the results of 2^X of the two elements in
src.
-
inline float exp2(float src)
Targets the
f32expinstruction.- Parameters
src – A value of type
float.- Returns
The result of
2^{src}.
-
inline half2 log2(half2 src)
Targets the
f16v2log2instruction.- Parameters
src – A value of type
half2.- Returns
A vector of the results of the log (base 2) of the two elements in
src.
-
inline float log2(float src)
Targets the
f32lninstruction.- Parameters
src – A value of type
half2.- Returns
The result of the log (base 2) of
src.
-
inline half2 tanh(half2 src)
Targets the
f16v2tanhinstruction.- Parameters
src – A value of type
half2.- Returns
The result of tanh(src)`.
-
inline float tanh(float src)
Targets the
f32tanhinstruction.- Parameters
src – A value of type
float.- Returns
The result of tanh(src)`.
-
inline half2 ln(half2 src)
Targets the
f16v2lninstruction.- Parameters
src – A value of type
half2.- Returns
A vector of the results of the natural log of the two elements in
src.
-
inline float ln(float src)
Targets the
f32lninstruction.- Parameters
src – A value of type
half2.- Returns
The result of the natural log of
src.
-
inline float2 axpy(float2 src0, float2 src1)
Targets the
f32v2axpyinstruction.- Parameters
src0 – A value of type
float2.src1 – A value of type
float2.
- Returns
The single precision two-element vector
res = a*src0 + src1. The scalar multiplicandais provided by the internal state element$TAS.
-
inline half2 f16v2grand()
Targets the
f16v2grandinstruction.- Returns
Gaussian distribution, two-element half-precision random vector.
-
inline float2 f32v2grand()
Targets the
f32v2grandinstruction.- Returns
Gaussian distribution, two-element single-precision random vector.
-
inline half4 rmask(half4 src0, float src1)
Targets the
f16v4rmaskinstruction.- Parameters
src0 – A value of type
half4.src1 – A value of type
float.
- Returns
The result is a masked version of
src0, with each element of the input being individually masked with the probability specified by the bottom 17-bits ofsrc1:if
src1[16] == 1, no masking is applied;if
src1[16:0] == 0, the result is a zero vector;otherwise each element is individually unmasked with probability
src1[15:0] / 65536. PRNG is used by this instruction to generate 4 x 16-bit random values from the discrete uniform distribution.
-
inline float2 rmask(float2 src0, float src1)
Targets the
f32v2rmaskinstruction.- Parameters
src0 – A value of type
float2.src1 – A value of type
float.
- Returns
The result is a masked version of
src0, with each element of the input being individually masked with the probability specified by the bottom 17-bits ofsrc1:if
src1[16] == 1, no masking is applied;if
src1[16:0] == 0, the result is a zero vector;otherwise each element is individually unmasked with probability
src1[15:0] / 65536. PRNG is used by this instruction to generate 2 x 16-bit random values from the discrete uniform distribution.
-
inline half2 sigm(half2 src)
Targets the
f16v2sigminstruction.- Parameters
src – A value of type
half2.- Returns
The result of an element-wise application of the sigmoid function on
src.
-
inline float sigm(float src)
Targets the
f32sigminstruction.- Parameters
src – A value of type
float.- Returns
The result of an element-wise application of the sigmoid function on
src.
-
inline float sum(half2 src)
Targets the
f16v2suminstruction.- Parameters
src – A value of type
half2.- Returns
The sum of the two elements in
srcas afloat.
-
inline float2 sum(half4 src)
Targets the
f16v4suminstruction.- Parameters
src – A value of type
half2.- Returns
The 2x2 lateral summation of the elements in
srcas afloat2. The first element is the sum ofsrc[0] andsrc[1], the second element is the sum ofsrc[2] andsrc[3].
-
inline half2 cmpeq(half2 src0, half2 src1)
Targets the
f16v2cmpeqinstruction.- Parameters
src0 – A value of type
half2.src1 – A value of type
half2.
- Returns
Element-wise equality test of
src0andsrc1. If src0[i] == src1[i], the result vector element at indexiwill be0xffff, and0x0000otherwise.
-
inline half4 cmpeq(half4 src0, half4 src1)
Targets the
f16v4cmpeqinstruction.- Parameters
src0 – A value of type
half4.src1 – A value of type
half4.
- Returns
Element-wise equality test of
src0andsrc1. If src0[i] == src1[i], the result vector element at indexiwill be0xffff, and0x0000otherwise.
-
inline float2 cmpeq(float2 src0, float2 src1)
Targets the
f32v2cmpeqinstruction.- Parameters
src0 – A value of type
float2.src1 – A value of type
float2.
- Returns
Element-wise equality test of
src0andsrc1. If src0[i] == src1[i], the result vector element at indexiwill be0xffff, and0x0000otherwise.
-
inline float cmpeq(float src0, float src1)
Targets the
f32cmpeqinstruction.- Parameters
src0 – A value of type
float.src1 – A value of type
float.
- Returns
Equality test of
src0andsrc1. Ifsrc0==src1the result will be0xffff, and0x0000otherwise.
-
inline half2 cmpge(half2 src0, half2 src1)
Targets the
f16v2cmpgeinstruction.- Parameters
src0 – A value of type
half2.src1 – A value of type
half2.
- Returns
Element-wise greater-than-or-equal-to test of
src0andsrc1. Ifsrc0[i] >=src1[i] the result vector element at indexiwill be0xffff, and0x0000otherwise.
-
inline half4 cmpge(half4 src0, half4 src1)
Targets the
f16v4cmpgeinstruction.- Parameters
src0 – A value of type
half4.src1 – A value of type
half4.
- Returns
Element-wise greater-than-or-equal-to test of
src0andsrc1. Ifsrc0[i] >=src1[i] the result vector element at indexiwill be0xffff, and0x0000otherwise.
-
inline float2 cmpge(float2 src0, float2 src1)
Targets the
f32v2cmpgeinstruction.- Parameters
src0 – A value of type
float2.src1 – A value of type
float2.
- Returns
Element-wise greater-than-or-equal-to test of
src0andsrc1. Ifsrc0[i] >=src1[i] the result vector element at indexiwill be0xffff, and0x0000otherwise.
-
inline float cmpge(float src0, float src1)
Targets the
f32cmpgeinstruction.- Parameters
src0 – A value of type
float.src1 – A value of type
float.
- Returns
Greater-than-or-equal-to test of
src0andsrc1. Ifsrc0>=src1the result will be0xffff, and0x0000otherwise.
-
inline half2 cmpgt(half2 src0, half2 src1)
Targets the
f16v2cmpgtinstruction.- Parameters
src0 – A value of type
half2.src1 – A value of type
half2.
- Returns
Element-wise greater-than test of
src0andsrc1. Ifsrc0>src1the result vector element at indexiwill be0xffff, and0x0000otherwise.
-
inline half4 cmpgt(half4 src0, half4 src1)
Targets the
f16v4cmpgtinstruction.- Parameters
src0 – A value of type
half4.src1 – A value of type
half4.
- Returns
Element-wise greater-than test of
src0andsrc1. Ifsrc0>src1the result vector element at indexiwill be0xffff, and0x0000otherwise.
-
inline float2 cmpgt(float2 src0, float2 src1)
Targets the
f32v2cmpgtinstruction.- Parameters
src0 – A value of type
float2.src1 – A value of type
float2.
- Returns
Element-wise greater-than test of
src0andsrc1. Ifsrc0>src1the result vector element at indexiwill be0xffff, and0x0000otherwise.
-
inline float cmpgt(float src0, float src1)
Targets the
f32cmpgtinstruction.- Parameters
src0 – A value of type
float.src1 – A value of type
float.
- Returns
Greater-than test of
src0andsrc1. Ifsrc0>src1the result will be0xffff, and0x0000otherwise.
-
inline half2 cmple(half2 src0, half2 src1)
Targets the
f16v2cmpleinstruction.- Parameters
src0 – A value of type
half2.src1 – A value of type
half2.
- Returns
Element-wise less-than-or-equal-to test of
src0andsrc1. Ifsrc0<=src1the result vector element at indexiwill be0xffff, and0x0000otherwise.
-
inline half4 cmple(half4 src0, half4 src1)
Targets the
f16v4cmpleinstruction.- Parameters
src0 – A value of type
half4.src1 – A value of type
half4.
- Returns
Element-wise less-than-or-equal-to test of
src0andsrc1. Ifsrc0<=src1the result vector element at indexiwill be0xffff, and0x0000otherwise.
-
inline float2 cmple(float2 src0, float2 src1)
Targets the
f32v2cmpleinstruction.- Parameters
src0 – A value of type
float2.src1 – A value of type
float2.
- Returns
Element-wise less-than-or-equal-to test of
src0andsrc1. Ifsrc0<=src1the result vector element at indexiwill be0xffff, and0x0000otherwise.
-
inline float cmple(float src0, float src1)
Targets the
f32cmpleinstruction.- Parameters
src0 – A value of type
float.src1 – A value of type
float.
- Returns
Less-than-or-equal-to test of
src0andsrc1. Ifsrc0<=src1the result will be0xffff, and0x0000otherwise.
-
inline half2 cmplt(half2 src0, half2 src1)
Targets the
f16v2cmpltinstruction.- Parameters
src0 – A value of type
half2.src1 – A value of type
half2.
- Returns
Element-wise less-than test of
src0andsrc1. Ifsrc0<src1the result vector element at indexiwill be0xffff, and0x0000otherwise.
-
inline half4 cmplt(half4 src0, half4 src1)
Targets the
f16v4cmpltinstruction.- Parameters
src0 – A value of type
half4.src1 – A value of type
half4.
- Returns
Element-wise less-than test of
src0andsrc1. Ifsrc0<src1the result vector element at indexiwill be0xffff, and0x0000otherwise.
-
inline float2 cmplt(float2 src0, float2 src1)
Targets the
f32v2cmpltinstruction.- Parameters
src0 – A value of type
float2.src1 – A value of type
float2.
- Returns
Element-wise less-than test of
src0andsrc1. Ifsrc0<src1the result vector element at indexiwill be0xffff, and0x0000otherwise.
-
inline float cmplt(float src0, float src1)
Targets the
f32cmpltinstruction.- Parameters
src0 – A value of type
float.src1 – A value of type
float.
- Returns
Less-than test of
src0andsrc1. Ifsrc0<src1the result will be0xffff, and0x0000otherwise.
-
inline half2 cmpne(half2 src0, half2 src1)
Targets the
f16v2cmpneinstruction.- Parameters
src0 – A value of type
half2.src1 – A value of type
half2.
- Returns
Element-wise inequality test of
src0andsrc1. If src0[i] != src1[i], the result vector element at indexiwill be0xffff, and0x0000otherwise.
-
inline half4 cmpne(half4 src0, half4 src1)
Targets the
f16v4cmpneinstruction.- Parameters
src0 – A value of type
half4.src1 – A value of type
half4.
- Returns
Element-wise inequality test of
src0andsrc1. If src0[i] != src1[i], the result vector element at indexiwill be0xffff, and0x0000otherwise.
-
inline float2 cmpne(float2 src0, float2 src1)
Targets the
f32v2cmpneinstruction.- Parameters
src0 – A value of type
float2.src1 – A value of type
float2.
- Returns
Element-wise inequality test of
src0andsrc1. If src0[i] != src1[i], the result vector element at indexiwill be0xffff, and0x0000otherwise.
-
inline float cmpne(float src0, float src1)
Targets the
f32cmpneinstruction.- Parameters
src0 – A value of type
float.src1 – A value of type
float.
- Returns
Inequality test of
src0andsrc1. Ifsrc0!=src1the result will be0xffff, and0x0000otherwise.
-
inline unsigned clz(int src)
Targets the
clzinstruction.- Parameters
src – A value of type
int.- Returns
The number of higher bits in
srcthat are zero.
-
inline unsigned popc(int src)
Targets the
popcinstruction.- Parameters
src – A value of type
int.- Returns
The number of set bits in
src.
-
inline short2 roll16(short2 src0, short2 src1)
Targets the
roll16instruction.- Parameters
src0 – A value of type
short2.src1 – A value of type
short2.
- Returns
The result of a SIMD roll permutation on the 4 16-bit values across
src0andsrc1, as ashort2. src0 src1 -> Result | 3 | 2 | | 1 | 0 | | 2 | 1 |
-
inline ushort2 roll16(ushort2 src0, ushort2 src1)
Targets the
roll16instruction.- Parameters
src0 – A value of type
ushort2.src1 – A value of type
ushort2.
- Returns
The result of a SIMD roll permutation on the 4 16-bit values across
src0andsrc1, as aushort2. src0 src1 -> Result | 3 | 2 | | 1 | 0 | | 2 | 1 |
-
inline half2 roll16(half2 src0, half2 src1)
Targets the
roll16instruction.- Parameters
src0 – A value of type
half2.src1 – A value of type
half2.
- Returns
The result of a SIMD roll permutation on the 4 16-bit values across
src0andsrc1, as ahalf2. src0 src1 -> Result | 3 | 2 | | 1 | 0 | | 2 | 1 |
-
inline short2 sort4x16hi(short2 src0, short2 src1)
Targets the
sort4x16hiinstruction.- Parameters
src0 – A value of type
short2.src1 – A value of type
short2.
- Returns
The result of a SIMD sort permutation on the 4 16-bit values across
src0andsrc1, as ashort2. src0 src1 -> Result | 3 | 2 | | 1 | 0 | | 3 | 1 |
-
inline ushort2 sort4x16hi(ushort2 src0, ushort2 src1)
Targets the
sort4x16hiinstruction.- Parameters
src0 – A value of type
ushort2.src1 – A value of type
ushort2.
- Returns
The result of a SIMD sort permutation on the 4 16-bit values across
src0andsrc1, as aushort2. src0 src1 -> Result | 3 | 2 | | 1 | 0 | | 3 | 1 |
-
inline half2 sort4x16hi(half2 src0, half2 src1)
Targets the
sort4x16hiinstruction.- Parameters
src0 – A value of type
half2.src1 – A value of type
half2.
- Returns
The result of a SIMD sort permutation on the 4 16-bit values across
src0andsrc1, as ahalf2. src0 src1 -> Result | 3 | 2 | | 1 | 0 | | 3 | 1 |
-
inline short2 sort4x16lo(short2 src0, short2 src1)
Targets the
sort4x16loinstruction.- Parameters
src0 – A value of type
short2.src1 – A value of type
short2.
- Returns
The result of a SIMD sort permutation on the 4 16-bit values across
src0andsrc1, as ashort2. src0 src1 -> Result | 3 | 2 | | 1 | 0 | | 2 | 0 |
-
inline ushort2 sort4x16lo(ushort2 src0, ushort2 src1)
Targets the
sort4x16loinstruction.- Parameters
src0 – A value of type
ushort2.src1 – A value of type
ushort2.
- Returns
The result of a SIMD sort permutation on the 4 16-bit values across
src0andsrc1, as aushort2. src0 src1 -> Result | 3 | 2 | | 1 | 0 | | 2 | 0 |
-
inline half2 sort4x16lo(half2 src0, half2 src1)
Targets the
sort4x16loinstruction.- Parameters
src0 – A value of type
half2.src1 – A value of type
half2.
- Returns
The result of a SIMD sort permutation on the 4 16-bit values across
src0andsrc1, as ahalf2. src0 src1 -> Result | 3 | 2 | | 1 | 0 | | 2 | 0 |
-
template<typename T0, typename T1, typename = std::enable_if_t<both_integral<T0, T1>>>