IPU C++ intrinsics
Functions that target single IPU instructions.
These intrinsic functions target single IPU instructions and may be used in C++ IPU code. Each function is named after the instruction it targets. Float instructions that have a type (for example, f16v2
) often omit this prefix in their function name. For example, use cmpeq() to target any of the cmpeq
instructions, such as f16v2cmpeq
.
Refer to the “Tile Worker ISA” for more detailed information on the instructions targeted by these intrinsics.
Defines
-
IPU_INTRINSICS_INCLUDED
Variables
-
namespace ipu
IPU intrinsic functions.
Functions
-
template<typename T0, typename T1, typename = std::enable_if_t<both_integral<T0, T1>>>
inline unsigned andc(T0 src0, T1 src1) Targets the
andc
instruction.- Parameters
src0 – An integer value.
src1 – An integer value, can be a 12-bit constant.
- Returns
The bitwise logical
and
ofsrc0
and the negated value ofsrc1
of typeunsigned
.
-
inline float andc(float src0, float src1)
Targets the
andc
instruction.- Parameters
src0 – A value of type
float
.src1 – A value of type
float
.
- Returns
The bitwise logical
and
ofsrc0
and the negated value ofsrc1
of typefloat
.
-
inline float2 andc(float2 src0, float2 src1)
Targets the
andc64
instruction.- Parameters
src0 – A value of type
float2
.src1 – A value of type
float2
.
- Returns
The bitwise logical
and
ofsrc0
and the negated value ofsrc1
of typefloat2
.
-
inline unsigned bitrev8(unsigned src)
Targets the
bitrev8
instruction.- Parameters
src – A value of type
unsigned
.- Returns
A result of type
unsigned
that is equivalent to the value ofsrc
with the bit order of each byte reversed.
-
inline unsigned cms(unsigned src)
Targets the
cms
instruction.- Parameters
src – A value of type
unsigned
.- Returns
The number of higher order bits in
src
that match the sign bit (bit 31), as anunsigned
.
-
inline float2 roll32(float2 src0, float2 src1)
Targets the
roll32
instruction.- Parameters
src0 – A value of type
float2
.src1 – A value of type
float2
.
- Returns
The result of a SIMD roll permutation on the 4 32-bit float values across
src0
andsrc1
, as afloat2
. src0 src1 -> Result | 3 | 2 | | 1 | 0 | | 2 | 1 |
-
template<typename T0, typename T1, typename = std::enable_if_t<both_integral<T0, T1>>>
inline unsigned roll8l(T0 src0, T1 src1) Targets the
roll8l
instruction.- Parameters
src0 – An integer value.
src1 – An integer value.
- Returns
The result of a SIMD roll-left permutation on the 8 8-bit values across
src0
andsrc1
, as anunsigned
. src0 src1 -> Result | 7 | 6 | 5 | 4 | | 3 | 2 | 1 | 0 | | 6 | 5 | 4 | 3 |
-
template<typename T0, typename T1, typename = std::enable_if_t<both_integral<T0, T1>>>
inline unsigned roll8r(T0 src0, T1 src1) Targets the
roll8r
instruction.- Parameters
src0 – An integer value.
src1 – An integer value.
- Returns
The result of a SIMD roll-right permutation on the 8 8-bit values across
src0
andsrc1
, as anunsigned
. src0 src1 -> Result | 7 | 6 | 5 | 4 | | 3 | 2 | 1 | 0 | | 4 | 3 | 2 | 1 |
-
template<typename T0, typename T1, typename = std::enable_if_t<both_integral<T0, T1>>>
inline unsigned shuf8x8hi(T0 src0, T1 src1) Targets the
shuf8x8hi
instruction.- Parameters
src0 – An integer value.
src1 – An integer value.
- Returns
The upper word of a SIMD shuffle permutation on the 8 8-bit values across
src0
andsrc1
, as anunsigned
. src0 src1 -> Result | 7 | 6 | 5 | 4 | | 3 | 2 | 1 | 0 | | 7 | 3 | 6 | 2 |
-
template<typename T0, typename T1, typename = std::enable_if_t<both_integral<T0, T1>>>
inline unsigned shuf8x8lo(T0 src0, T1 src1) Targets the
shuf8x8lo
instruction.- Parameters
src0 – An integer value.
src1 – An integer value.
- Returns
The lower word of a SIMD shuffle permutation on the 8 8-bit values across
src0
andsrc1
, as anunsigned
. src0 src1 -> Result | 7 | 6 | 5 | 4 | | 3 | 2 | 1 | 0 | | 5 | 1 | 4 | 0 |
-
inline float2 sort4x32hi(float2 src0, float2 src1)
Targets the
sort4x32hi
instruction.- Parameters
src0 – A value of type
float2
.src1 – A value of type
float2
.
- Returns
The result of a SIMD sort permutation on the 4 32-bit float values across
src0
andsrc1
, as afloat2
. src0 src1 -> Result | 3 | 2 | | 1 | 0 | | 3 | 1 |
-
inline float2 sort4x32lo(float2 src0, float2 src1)
Targets the
sort4x32lo
instruction.- Parameters
src0 – A value of type
float2
.src1 – A value of type
float2
.
- Returns
The result of a SIMD sort permutation on the 4 32-bit float values across
src0
andsrc1
, as afloat2
. src0 src1 -> Result | 3 | 2 | | 1 | 0 | | 2 | 0 |
-
inline unsigned sort8(unsigned src)
Targets the
sort8
instruction.- Parameters
src – A value of type
unsigned
.- Returns
The result of a SIMD sort8 permutation on the 4 8-bit values in
src
, as anunsigned
. src -> Result | 3 | 2 | 1 | 0 | | 3 | 1 | 2 | 0 |
-
template<typename T0, typename T1, typename = std::enable_if_t<both_integral<T0, T1>>>
inline unsigned sort8x8hi(T0 src0, T1 src1) Targets the
sort8x8hi
instruction.- Parameters
src0 – An integer value.
src1 – An integer value.
- Returns
The upper word of the result of a SIMD sort8 permutation on the 8 8-bit values across
src0
andsrc1
, as anunsigned
. src0 src1 -> Result | 7 | 6 | 5 | 4 | | 3 | 2 | 1 | 0 | | 7 | 5 | 3 | 1 |
-
template<typename T0, typename T1, typename = std::enable_if_t<both_integral<T0, T1>>>
inline unsigned sort8x8lo(T0 src0, T1 src1) Targets the
sort8x8lo
instruction.- Parameters
src0 – An integer value.
src1 – An integer value.
- Returns
The lower word of the result of a SIMD sort8 permutation on the 8 8-bit values across
src0
andsrc1
, as anunsigned
. src0 src1 -> Result | 7 | 6 | 5 | 4 | | 3 | 2 | 1 | 0 | | 6 | 4 | 2 | 0 |
-
inline unsigned swap8(unsigned src)
Targets the
sort8
instruction.- Parameters
src – A value of type
unsigned
.- Returns
The result of a SIMD swap permutation on the 4 8-bit values in
src
, as anunsigned
. src -> Result | 3 | 2 | 1 | 0 | | 2 | 3 | 0 | 1 |
-
inline half2 absadd(half2 src0, half2 src1)
Targets the
f16v2absadd
instruction.- Parameters
src0 – A value of type
half2
.src1 – A value of type
half2
.
- Returns
The result of an element-wise addition of absolute values in
src0
andsrc1
.
-
inline half4 absadd(half4 src0, half4 src1)
Targets the
f16v4absadd
instruction.- Parameters
src0 – A value of type
half4
.src1 – A value of type
half4
.
- Returns
The result of an element-wise addition of absolute values in
src0
andsrc1
.
-
inline float2 absadd(float2 src0, float2 src1)
Targets the
f32v2absadd
instruction.- Parameters
src0 – A value of type
float2
.src1 – A value of type
float2
.
- Returns
The result of an element-wise addition of absolute values in
src0
andsrc1
.
-
inline float absadd(float src0, float src1)
Targets the
f32absadd
instruction.- Parameters
src0 – A value of type
float
.src1 – A value of type
float
.
- Returns
The result of a scalar addition of absolute values
src0
andsrc1
.
-
inline half2 absmax(half2 src0, half2 src1)
Targets the
f16v2absmax
instruction.- Parameters
src0 – A value of type
half2
.src1 – A value of type
half2
.
- Returns
The element-wise maximum of absolute values in
src0
andsrc1
.
-
inline half4 absmax(half4 src0, half4 src1)
Targets the
f16v4absmax
instruction.- Parameters
src0 – A value of type
half4
.src1 – A value of type
half4
.
- Returns
The element-wise maximum of absolute values in
src0
andsrc1
.
-
inline float2 absmax(float2 src0, float2 src1)
Targets the
f32v2absmax
instruction.- Parameters
src0 – A value of type
float2
.src1 – A value of type
float2
.
- Returns
The element-wise maximum of absolute values in
src0
andsrc1
.
-
inline float absmax(float src0, float src1)
Targets the
f32absmax
instruction.- Parameters
src0 – A value of type
float
.src1 – A value of type
float
.
- Returns
The maximum of absolute values
src0
andsrc1
.
-
inline half2 max(half2 src0, half2 src1)
Targets the
f16v2max
instruction.- Parameters
src0 – A value of type
half2
.src1 – A value of type
half2
.
- Returns
The element-wise maximum of
src0
andsrc1
.
-
inline half4 max(half4 src0, half4 src1)
Targets the
f16v4max
instruction.- Parameters
src0 – A value of type
half4
.src1 – A value of type
half4
.
- Returns
The element-wise maximum of
src0
andsrc1
.
-
inline float2 max(float2 src0, float2 src1)
Targets the
f32v2max
instruction.- Parameters
src0 – A value of type
float2
.src1 – A value of type
float2
.
- Returns
The element-wise maximum of
src0
andsrc1
.
-
inline float max(float src0, float src1)
Targets the
f32max
instruction.- Parameters
src0 – A value of type
float
.src1 – A value of type
float
.
- Returns
The maximum of
src0
andsrc1
.
-
inline half2 maxc(half4 src)
Targets the
f16v4maxc
instruction.- Parameters
src – A value of type
half4
.- Returns
The 2x2 lateral maximum of
src
. The 0th element in the result vector is the maximum of src[0] and src[1], and the 1st element is the maximum of src[2] and src[3].
-
inline half2 min(half2 src0, half2 src1)
Targets the
f16v2min
instruction.- Parameters
src0 – A value of type
half2
.src1 – A value of type
half2
.
- Returns
The element-wise minimum of
src0
andsrc1
.
-
inline half4 min(half4 src0, half4 src1)
Targets the
f16v4min
instruction.- Parameters
src0 – A value of type
half4
.src1 – A value of type
half4
.
- Returns
The element-wise minimum of
src0
andsrc1
.
-
inline float2 min(float2 src0, float2 src1)
Targets the
f32v2min
instruction.- Parameters
src0 – A value of type
float2
.src1 – A value of type
float2
.
- Returns
The element-wise minimum of
src0
andsrc1
.
-
inline float min(float src0, float src1)
Targets the
f32min
instruction.- Parameters
src0 – A value of type
float
.src1 – A value of type
float
.
- Returns
The minimum of
src0
andsrc1
.
-
inline half2 clamp(half2 src0, half2 src1)
Targets the
f16v2clamp
instruction.- Parameters
src0 – A value of type
half2
.src1 – A value of type
half2
.
- Returns
The min-of-maximum result of
src0
andsrc1
, of typehalf2
. The first element is the median value of the first element ofsrc0
and the two elements insrc1
. The second element is the median of the second element ofsrc0
and the two elements insrc1
.
-
inline half4 clamp(half4 src0, half2 src1)
Targets the
f16v4clamp
instruction.- Parameters
src0 – A value of type
half4
.src1 – A value of type
half2
.
- Returns
The min-of-maximum result of
src0
andsrc1
, of typehalf4
. Each element is the median of the element insrc0
at the same index, and the two values insrc1
.
-
inline float2 clamp(float2 src0, float2 src1)
Targets the
f32v2clamp
instruction.- Parameters
src0 – A value of type
float2
.src1 – A value of type
float2
.
- Returns
The min-of-maximum result of
src0
andsrc1
, of typefloat2
. The first element is the median of the first element ofsrc0
and the two elements insrc1
. The second element is the median of the second element ofsrc0
and the two elements insrc1
.
-
inline float clamp(float src0, float2 src1)
Targets the
f32clamp
instruction.- Parameters
src0 – A value of type
float
.src1 – A value of type
float2
.
- Returns
The median of
src0
and the two elements insrc1
.
-
inline void cmac(half2 src0, half2 src1)
Targets the
f16v2cmac
instruction.- Parameters
src0 – A value of type
half2
.src1 – A value of type
half2
.
-
inline void cmac(half4 src0, half4 src1)
Targets the
f16v4cmac
instruction.- Parameters
src0 – A value of type
half4
.src1 – A value of type
half4
.
-
inline half2 exp(half2 src)
Targets the
f16v2exp
instruction.The base-e exponential function.
- Parameters
src – A value of type
half2
.x – A value of type
half2
.
- Returns
A vector of the results of
e^X
of the two elements insrc
.- Returns
The element-wise result of
exp
ofx
.
-
inline float exp(float src)
Targets the
f32exp
instruction.The base-e exponential function.
- Parameters
src – A value of type
float
.x – A value of type
float
.
- Returns
The result of
e^{src}
.- Returns
The result of
exp
ofx
.
-
inline half2 exp2(half2 src)
Targets the
f16v2exp
instruction.The base-2 exponential function.
- Parameters
src – A value of type
half2
.x – A value of type
half2
.
- Returns
A vector of the results of 2^X of the two elements in
src
.- Returns
The element-wise result of
exp2
ofx
.
-
inline float exp2(float src)
Targets the
f32exp
instruction.The base-2 exponential function.
- Parameters
src – A value of type
float
.x – A value of type
float
.
- Returns
The result of
2^{src}
.- Returns
The result of
exp2
ofx
.
-
inline half2 log2(half2 src)
Targets the
f16v2log2
instruction.The base-2 logarithm.
- Parameters
src – A value of type
half2
.x – A value of type
half2
.
- Returns
A vector of the results of the log (base 2) of the two elements in
src
.- Returns
The element-wise result of
log2
ofx
.
-
inline float log2(float src)
Targets the
f32ln
instruction.The base-2 logarithm.
- Parameters
src – A value of type
half2
.x – A value of type
float
.
- Returns
The result of the log (base 2) of
src
.- Returns
The result of
log2
ofx
.
-
inline half2 tanh(half2 src)
Targets the
f16v2tanh
instruction.The hyperbolic tangent function.
- Parameters
src – A value of type
half2
.x – A value of type
half2
.
- Returns
The result of tanh(src)`.
- Returns
The element-wise result of
tanh
ofx
.
-
inline float tanh(float src)
Targets the
f32tanh
instruction.The hyperbolic tangent function.
- Parameters
src – A value of type
float
.x – A value of type
float
.
- Returns
The result of tanh(src)`.
- Returns
The result of
tanh
ofx
.
-
inline half2 ln(half2 src)
Targets the
f16v2ln
instruction.- Parameters
src – A value of type
half2
.- Returns
A vector of the results of the natural log of the two elements in
src
.
-
inline float ln(float src)
Targets the
f32ln
instruction.- Parameters
src – A value of type
half2
.- Returns
The result of the natural log of
src
.
-
inline float2 axpy(float2 src0, float2 src1)
Targets the
f32v2axpy
instruction.- Parameters
src0 – A value of type
float2
.src1 – A value of type
float2
.
- Returns
The single precision two-element vector
res = a*src0 + src1
. The scalar multiplicanda
is provided by the internal state element$TAS
.
-
inline half2 f16v2grand()
Targets the
f16v2grand
instruction.- Returns
Gaussian distribution, two-element half-precision random vector.
-
inline float2 f32v2grand()
Targets the
f32v2grand
instruction.- Returns
Gaussian distribution, two-element single-precision random vector.
-
inline half4 rmask(half4 src0, float src1)
Targets the
f16v4rmask
instruction.- Parameters
src0 – A value of type
half4
.src1 – A value of type
float
.
- Returns
The result is a masked version of
src0
, with each element of the input being individually masked with the probability specified by the bottom 17-bits ofsrc1:
if
src1
[16] == 1, no masking is applied;if
src1
[16:0] == 0, the result is a zero vector;otherwise each element is individually unmasked with probability
src1
[15:0] / 65536. PRNG is used by this instruction to generate 4 x 16-bit random values from the discrete uniform distribution.
-
inline float2 rmask(float2 src0, float src1)
Targets the
f32v2rmask
instruction.- Parameters
src0 – A value of type
float2
.src1 – A value of type
float
.
- Returns
The result is a masked version of
src0
, with each element of the input being individually masked with the probability specified by the bottom 17-bits ofsrc1:
if
src1
[16] == 1, no masking is applied;if
src1
[16:0] == 0, the result is a zero vector;otherwise each element is individually unmasked with probability
src1
[15:0] / 65536. PRNG is used by this instruction to generate 2 x 16-bit random values from the discrete uniform distribution.
-
inline half2 sigm(half2 src)
Targets the
f16v2sigm
instruction.- Parameters
src – A value of type
half2
.- Returns
The result of an element-wise application of the sigmoid function on
src
.
-
inline float sigm(float src)
Targets the
f32sigm
instruction.- Parameters
src – A value of type
float
.- Returns
The result of an element-wise application of the sigmoid function on
src
.
-
inline float sum(half2 src)
Targets the
f16v2sum
instruction.- Parameters
src – A value of type
half2
.- Returns
The sum of the two elements in
src
as afloat
.
-
inline float2 sum(half4 src)
Targets the
f16v4sum
instruction.- Parameters
src – A value of type
half2
.- Returns
The 2x2 lateral summation of the elements in
src
as afloat2
. The first element is the sum ofsrc
[0] andsrc
[1], the second element is the sum ofsrc
[2] andsrc
[3].
-
inline half2 cmpeq(half2 src0, half2 src1)
Targets the
f16v2cmpeq
instruction.- Parameters
src0 – A value of type
half2
.src1 – A value of type
half2
.
- Returns
Element-wise equality test of
src0
andsrc1
. If src0[i] == src1[i], the result vector element at indexi
will be0xffff
, and0x0000
otherwise.
-
inline half4 cmpeq(half4 src0, half4 src1)
Targets the
f16v4cmpeq
instruction.- Parameters
src0 – A value of type
half4
.src1 – A value of type
half4
.
- Returns
Element-wise equality test of
src0
andsrc1
. If src0[i] == src1[i], the result vector element at indexi
will be0xffff
, and0x0000
otherwise.
-
inline float2 cmpeq(float2 src0, float2 src1)
Targets the
f32v2cmpeq
instruction.- Parameters
src0 – A value of type
float2
.src1 – A value of type
float2
.
- Returns
Element-wise equality test of
src0
andsrc1
. If src0[i] == src1[i], the result vector element at indexi
will be0xffff
, and0x0000
otherwise.
-
inline float cmpeq(float src0, float src1)
Targets the
f32cmpeq
instruction.- Parameters
src0 – A value of type
float
.src1 – A value of type
float
.
- Returns
Equality test of
src0
andsrc1
. Ifsrc0
==src1
the result will be0xffff
, and0x0000
otherwise.
-
inline half2 cmpge(half2 src0, half2 src1)
Targets the
f16v2cmpge
instruction.- Parameters
src0 – A value of type
half2
.src1 – A value of type
half2
.
- Returns
Element-wise greater-than-or-equal-to test of
src0
andsrc1
. Ifsrc0
[i] >=src1
[i] the result vector element at indexi
will be0xffff
, and0x0000
otherwise.
-
inline half4 cmpge(half4 src0, half4 src1)
Targets the
f16v4cmpge
instruction.- Parameters
src0 – A value of type
half4
.src1 – A value of type
half4
.
- Returns
Element-wise greater-than-or-equal-to test of
src0
andsrc1
. Ifsrc0
[i] >=src1
[i] the result vector element at indexi
will be0xffff
, and0x0000
otherwise.
-
inline float2 cmpge(float2 src0, float2 src1)
Targets the
f32v2cmpge
instruction.- Parameters
src0 – A value of type
float2
.src1 – A value of type
float2
.
- Returns
Element-wise greater-than-or-equal-to test of
src0
andsrc1
. Ifsrc0
[i] >=src1
[i] the result vector element at indexi
will be0xffff
, and0x0000
otherwise.
-
inline float cmpge(float src0, float src1)
Targets the
f32cmpge
instruction.- Parameters
src0 – A value of type
float
.src1 – A value of type
float
.
- Returns
Greater-than-or-equal-to test of
src0
andsrc1
. Ifsrc0
>=src1
the result will be0xffff
, and0x0000
otherwise.
-
inline half2 cmpgt(half2 src0, half2 src1)
Targets the
f16v2cmpgt
instruction.- Parameters
src0 – A value of type
half2
.src1 – A value of type
half2
.
- Returns
Element-wise greater-than test of
src0
andsrc1
. Ifsrc0
>src1
the result vector element at indexi
will be0xffff
, and0x0000
otherwise.
-
inline half4 cmpgt(half4 src0, half4 src1)
Targets the
f16v4cmpgt
instruction.- Parameters
src0 – A value of type
half4
.src1 – A value of type
half4
.
- Returns
Element-wise greater-than test of
src0
andsrc1
. Ifsrc0
>src1
the result vector element at indexi
will be0xffff
, and0x0000
otherwise.
-
inline float2 cmpgt(float2 src0, float2 src1)
Targets the
f32v2cmpgt
instruction.- Parameters
src0 – A value of type
float2
.src1 – A value of type
float2
.
- Returns
Element-wise greater-than test of
src0
andsrc1
. Ifsrc0
>src1
the result vector element at indexi
will be0xffff
, and0x0000
otherwise.
-
inline float cmpgt(float src0, float src1)
Targets the
f32cmpgt
instruction.- Parameters
src0 – A value of type
float
.src1 – A value of type
float
.
- Returns
Greater-than test of
src0
andsrc1
. Ifsrc0
>src1
the result will be0xffff
, and0x0000
otherwise.
-
inline half2 cmple(half2 src0, half2 src1)
Targets the
f16v2cmple
instruction.- Parameters
src0 – A value of type
half2
.src1 – A value of type
half2
.
- Returns
Element-wise less-than-or-equal-to test of
src0
andsrc1
. Ifsrc0
<=src1
the result vector element at indexi
will be0xffff
, and0x0000
otherwise.
-
inline half4 cmple(half4 src0, half4 src1)
Targets the
f16v4cmple
instruction.- Parameters
src0 – A value of type
half4
.src1 – A value of type
half4
.
- Returns
Element-wise less-than-or-equal-to test of
src0
andsrc1
. Ifsrc0
<=src1
the result vector element at indexi
will be0xffff
, and0x0000
otherwise.
-
inline float2 cmple(float2 src0, float2 src1)
Targets the
f32v2cmple
instruction.- Parameters
src0 – A value of type
float2
.src1 – A value of type
float2
.
- Returns
Element-wise less-than-or-equal-to test of
src0
andsrc1
. Ifsrc0
<=src1
the result vector element at indexi
will be0xffff
, and0x0000
otherwise.
-
inline float cmple(float src0, float src1)
Targets the
f32cmple
instruction.- Parameters
src0 – A value of type
float
.src1 – A value of type
float
.
- Returns
Less-than-or-equal-to test of
src0
andsrc1
. Ifsrc0
<=src1
the result will be0xffff
, and0x0000
otherwise.
-
inline half2 cmplt(half2 src0, half2 src1)
Targets the
f16v2cmplt
instruction.- Parameters
src0 – A value of type
half2
.src1 – A value of type
half2
.
- Returns
Element-wise less-than test of
src0
andsrc1
. Ifsrc0
<src1
the result vector element at indexi
will be0xffff
, and0x0000
otherwise.
-
inline half4 cmplt(half4 src0, half4 src1)
Targets the
f16v4cmplt
instruction.- Parameters
src0 – A value of type
half4
.src1 – A value of type
half4
.
- Returns
Element-wise less-than test of
src0
andsrc1
. Ifsrc0
<src1
the result vector element at indexi
will be0xffff
, and0x0000
otherwise.
-
inline float2 cmplt(float2 src0, float2 src1)
Targets the
f32v2cmplt
instruction.- Parameters
src0 – A value of type
float2
.src1 – A value of type
float2
.
- Returns
Element-wise less-than test of
src0
andsrc1
. Ifsrc0
<src1
the result vector element at indexi
will be0xffff
, and0x0000
otherwise.
-
inline float cmplt(float src0, float src1)
Targets the
f32cmplt
instruction.- Parameters
src0 – A value of type
float
.src1 – A value of type
float
.
- Returns
Less-than test of
src0
andsrc1
. Ifsrc0
<src1
the result will be0xffff
, and0x0000
otherwise.
-
inline half2 cmpne(half2 src0, half2 src1)
Targets the
f16v2cmpne
instruction.- Parameters
src0 – A value of type
half2
.src1 – A value of type
half2
.
- Returns
Element-wise inequality test of
src0
andsrc1
. If src0[i] != src1[i], the result vector element at indexi
will be0xffff
, and0x0000
otherwise.
-
inline half4 cmpne(half4 src0, half4 src1)
Targets the
f16v4cmpne
instruction.- Parameters
src0 – A value of type
half4
.src1 – A value of type
half4
.
- Returns
Element-wise inequality test of
src0
andsrc1
. If src0[i] != src1[i], the result vector element at indexi
will be0xffff
, and0x0000
otherwise.
-
inline float2 cmpne(float2 src0, float2 src1)
Targets the
f32v2cmpne
instruction.- Parameters
src0 – A value of type
float2
.src1 – A value of type
float2
.
- Returns
Element-wise inequality test of
src0
andsrc1
. If src0[i] != src1[i], the result vector element at indexi
will be0xffff
, and0x0000
otherwise.
-
inline float cmpne(float src0, float src1)
Targets the
f32cmpne
instruction.- Parameters
src0 – A value of type
float
.src1 – A value of type
float
.
- Returns
Inequality test of
src0
andsrc1
. Ifsrc0
!=src1
the result will be0xffff
, and0x0000
otherwise.
-
inline unsigned clz(int src)
Targets the
clz
instruction.- Parameters
src – A value of type
int
.- Returns
The number of higher bits in
src
that are zero.
-
inline unsigned popc(int src)
Targets the
popc
instruction.- Parameters
src – A value of type
int
.- Returns
The number of set bits in
src
.
-
inline short2 roll16(short2 src0, short2 src1)
Targets the
roll16
instruction.- Parameters
src0 – A value of type
short2
.src1 – A value of type
short2
.
- Returns
The result of a SIMD roll permutation on the 4 16-bit values across
src0
andsrc1
, as ashort2
. src0 src1 -> Result | 3 | 2 | | 1 | 0 | | 2 | 1 |
-
inline ushort2 roll16(ushort2 src0, ushort2 src1)
Targets the
roll16
instruction.- Parameters
src0 – A value of type
ushort2
.src1 – A value of type
ushort2
.
- Returns
The result of a SIMD roll permutation on the 4 16-bit values across
src0
andsrc1
, as aushort2
. src0 src1 -> Result | 3 | 2 | | 1 | 0 | | 2 | 1 |
-
inline half2 roll16(half2 src0, half2 src1)
Targets the
roll16
instruction.- Parameters
src0 – A value of type
half2
.src1 – A value of type
half2
.
- Returns
The result of a SIMD roll permutation on the 4 16-bit values across
src0
andsrc1
, as ahalf2
. src0 src1 -> Result | 3 | 2 | | 1 | 0 | | 2 | 1 |
-
inline short2 sort4x16hi(short2 src0, short2 src1)
Targets the
sort4x16hi
instruction.- Parameters
src0 – A value of type
short2
.src1 – A value of type
short2
.
- Returns
The result of a SIMD sort permutation on the 4 16-bit values across
src0
andsrc1
, as ashort2
. src0 src1 -> Result | 3 | 2 | | 1 | 0 | | 3 | 1 |
-
inline ushort2 sort4x16hi(ushort2 src0, ushort2 src1)
Targets the
sort4x16hi
instruction.- Parameters
src0 – A value of type
ushort2
.src1 – A value of type
ushort2
.
- Returns
The result of a SIMD sort permutation on the 4 16-bit values across
src0
andsrc1
, as aushort2
. src0 src1 -> Result | 3 | 2 | | 1 | 0 | | 3 | 1 |
-
inline half2 sort4x16hi(half2 src0, half2 src1)
Targets the
sort4x16hi
instruction.- Parameters
src0 – A value of type
half2
.src1 – A value of type
half2
.
- Returns
The result of a SIMD sort permutation on the 4 16-bit values across
src0
andsrc1
, as ahalf2
. src0 src1 -> Result | 3 | 2 | | 1 | 0 | | 3 | 1 |
-
inline short2 sort4x16lo(short2 src0, short2 src1)
Targets the
sort4x16lo
instruction.- Parameters
src0 – A value of type
short2
.src1 – A value of type
short2
.
- Returns
The result of a SIMD sort permutation on the 4 16-bit values across
src0
andsrc1
, as ashort2
. src0 src1 -> Result | 3 | 2 | | 1 | 0 | | 2 | 0 |
-
inline ushort2 sort4x16lo(ushort2 src0, ushort2 src1)
Targets the
sort4x16lo
instruction.- Parameters
src0 – A value of type
ushort2
.src1 – A value of type
ushort2
.
- Returns
The result of a SIMD sort permutation on the 4 16-bit values across
src0
andsrc1
, as aushort2
. src0 src1 -> Result | 3 | 2 | | 1 | 0 | | 2 | 0 |
-
inline half2 sort4x16lo(half2 src0, half2 src1)
Targets the
sort4x16lo
instruction.- Parameters
src0 – A value of type
half2
.src1 – A value of type
half2
.
- Returns
The result of a SIMD sort permutation on the 4 16-bit values across
src0
andsrc1
, as ahalf2
. src0 src1 -> Result | 3 | 2 | | 1 | 0 | | 2 | 0 |
-
template<typename T0, typename T1, typename = std::enable_if_t<both_integral<T0, T1>>>