IPU C++ intrinsics
Functions that target single IPU instructions.
#include <ipu_memory_intrinsics>
These intrinsic functions target single IPU instructions and may be used in C++ IPU code. Each function is named after the instruction it targets. Float instructions that have a type (for example, f16v2) often omit this prefix in their function name. For example, use cmpeq() to target any of the cmpeq instructions, such as f16v2cmpeq.
Refer to the Tile Vertex Instruction Set Architecture for Mk2 IPUs for more detailed information on the instructions targeted by these intrinsics.
Defines
-
IPU_INTRINSICS_INCLUDED
Variables
-
namespace ipu
IPU intrinsic functions.
Functions
-
template<typename T0, typename T1, typename = std::enable_if_t<both_integral<T0, T1>>>
inline unsigned andc(T0 src0, T1 src1) Targets the
andcinstruction.- Parameters
src0 – An integer value.
src1 – An integer value, can be a 12-bit constant.
- Returns
The bitwise logical
andofsrc0and the negated value ofsrc1of typeunsigned.
-
inline float andc(float src0, float src1)
Targets the
andcinstruction.- Parameters
src0 – A value of type
float.src1 – A value of type
float.
- Returns
The bitwise logical
andofsrc0and the negated value ofsrc1of typefloat.
-
inline float2 andc(float2 src0, float2 src1)
Targets the
andc64instruction.- Parameters
src0 – A value of type
float2.src1 – A value of type
float2.
- Returns
The bitwise logical
andofsrc0and the negated value ofsrc1of typefloat2.
-
inline unsigned bitrev8(unsigned src)
Targets the
bitrev8instruction.- Parameters
src – A value of type
unsigned.- Returns
A result of type
unsignedthat is equivalent to the value ofsrcwith the bit order of each byte reversed.
-
inline unsigned cms(unsigned src)
Targets the
cmsinstruction.- Parameters
src – A value of type
unsigned.- Returns
The number of higher order bits in
srcthat match the sign bit (bit 31), as anunsigned.
-
inline float2 roll32(float2 src0, float2 src1)
Targets the
roll32instruction.- Parameters
src0 – A value of type
float2.src1 – A value of type
float2.
- Returns
The result of a SIMD roll permutation on the 4 32-bit float values across
src0andsrc1, as afloat2. src0 src1 -> Result | 3 | 2 | | 1 | 0 | | 2 | 1 |
-
template<typename T0, typename T1, typename = std::enable_if_t<both_integral<T0, T1>>>
inline unsigned roll8l(T0 src0, T1 src1) Targets the
roll8linstruction.- Parameters
src0 – An integer value.
src1 – An integer value.
- Returns
The result of a SIMD roll-left permutation on the 8 8-bit values across
src0andsrc1, as anunsigned. src0 src1 -> Result | 7 | 6 | 5 | 4 | | 3 | 2 | 1 | 0 | | 6 | 5 | 4 | 3 |
-
template<typename T0, typename T1, typename = std::enable_if_t<both_integral<T0, T1>>>
inline unsigned roll8r(T0 src0, T1 src1) Targets the
roll8rinstruction.- Parameters
src0 – An integer value.
src1 – An integer value.
- Returns
The result of a SIMD roll-right permutation on the 8 8-bit values across
src0andsrc1, as anunsigned. src0 src1 -> Result | 7 | 6 | 5 | 4 | | 3 | 2 | 1 | 0 | | 4 | 3 | 2 | 1 |
-
template<typename T0, typename T1, typename = std::enable_if_t<both_integral<T0, T1>>>
inline unsigned shuf8x8hi(T0 src0, T1 src1) Targets the
shuf8x8hiinstruction.- Parameters
src0 – An integer value.
src1 – An integer value.
- Returns
The upper word of a SIMD shuffle permutation on the 8 8-bit values across
src0andsrc1, as anunsigned. src0 src1 -> Result | 7 | 6 | 5 | 4 | | 3 | 2 | 1 | 0 | | 7 | 3 | 6 | 2 |
-
template<typename T0, typename T1, typename = std::enable_if_t<both_integral<T0, T1>>>
inline unsigned shuf8x8lo(T0 src0, T1 src1) Targets the
shuf8x8loinstruction.- Parameters
src0 – An integer value.
src1 – An integer value.
- Returns
The lower word of a SIMD shuffle permutation on the 8 8-bit values across
src0andsrc1, as anunsigned. src0 src1 -> Result | 7 | 6 | 5 | 4 | | 3 | 2 | 1 | 0 | | 5 | 1 | 4 | 0 |
-
inline float2 sort4x32hi(float2 src0, float2 src1)
Targets the
sort4x32hiinstruction.- Parameters
src0 – A value of type
float2.src1 – A value of type
float2.
- Returns
The result of a SIMD sort permutation on the 4 32-bit float values across
src0andsrc1, as afloat2. src0 src1 -> Result | 3 | 2 | | 1 | 0 | | 3 | 1 |
-
inline float2 sort4x32lo(float2 src0, float2 src1)
Targets the
sort4x32loinstruction.- Parameters
src0 – A value of type
float2.src1 – A value of type
float2.
- Returns
The result of a SIMD sort permutation on the 4 32-bit float values across
src0andsrc1, as afloat2. src0 src1 -> Result | 3 | 2 | | 1 | 0 | | 2 | 0 |
-
inline unsigned sort8(unsigned src)
Targets the
sort8instruction.- Parameters
src – A value of type
unsigned.- Returns
The result of a SIMD sort8 permutation on the 4 8-bit values in
src, as anunsigned. src -> Result | 3 | 2 | 1 | 0 | | 3 | 1 | 2 | 0 |
-
template<typename T0, typename T1, typename = std::enable_if_t<both_integral<T0, T1>>>
inline unsigned sort8x8hi(T0 src0, T1 src1) Targets the
sort8x8hiinstruction.- Parameters
src0 – An integer value.
src1 – An integer value.
- Returns
The upper word of the result of a SIMD sort8 permutation on the 8 8-bit values across
src0andsrc1, as anunsigned. src0 src1 -> Result | 7 | 6 | 5 | 4 | | 3 | 2 | 1 | 0 | | 7 | 5 | 3 | 1 |
-
template<typename T0, typename T1, typename = std::enable_if_t<both_integral<T0, T1>>>
inline unsigned sort8x8lo(T0 src0, T1 src1) Targets the
sort8x8loinstruction.- Parameters
src0 – An integer value.
src1 – An integer value.
- Returns
The lower word of the result of a SIMD sort8 permutation on the 8 8-bit values across
src0andsrc1, as anunsigned. src0 src1 -> Result | 7 | 6 | 5 | 4 | | 3 | 2 | 1 | 0 | | 6 | 4 | 2 | 0 |
-
inline unsigned swap8(unsigned src)
Targets the
sort8instruction.- Parameters
src – A value of type
unsigned.- Returns
The result of a SIMD swap permutation on the 4 8-bit values in
src, as anunsigned. src -> Result | 3 | 2 | 1 | 0 | | 2 | 3 | 0 | 1 |
-
inline half2 absadd(half2 src0, half2 src1)
Targets the
f16v2absaddinstruction.- Parameters
src0 – A value of type
half2.src1 – A value of type
half2.
- Returns
The result of an element-wise addition of absolute values in
src0andsrc1.
-
inline half4 absadd(half4 src0, half4 src1)
Targets the
f16v4absaddinstruction.- Parameters
src0 – A value of type
half4.src1 – A value of type
half4.
- Returns
The result of an element-wise addition of absolute values in
src0andsrc1.
-
inline float2 absadd(float2 src0, float2 src1)
Targets the
f32v2absaddinstruction.- Parameters
src0 – A value of type
float2.src1 – A value of type
float2.
- Returns
The result of an element-wise addition of absolute values in
src0andsrc1.
-
inline float absadd(float src0, float src1)
Targets the
f32absaddinstruction.- Parameters
src0 – A value of type
float.src1 – A value of type
float.
- Returns
The result of a scalar addition of absolute values
src0andsrc1.
-
inline half2 absmax(half2 src0, half2 src1)
Targets the
f16v2absmaxinstruction.- Parameters
src0 – A value of type
half2.src1 – A value of type
half2.
- Returns
The element-wise maximum of absolute values in
src0andsrc1.
-
inline half4 absmax(half4 src0, half4 src1)
Targets the
f16v4absmaxinstruction.- Parameters
src0 – A value of type
half4.src1 – A value of type
half4.
- Returns
The element-wise maximum of absolute values in
src0andsrc1.
-
inline float2 absmax(float2 src0, float2 src1)
Targets the
f32v2absmaxinstruction.- Parameters
src0 – A value of type
float2.src1 – A value of type
float2.
- Returns
The element-wise maximum of absolute values in
src0andsrc1.
-
inline float absmax(float src0, float src1)
Targets the
f32absmaxinstruction.- Parameters
src0 – A value of type
float.src1 – A value of type
float.
- Returns
The maximum of absolute values
src0andsrc1.
-
inline half2 max(half2 src0, half2 src1)
Targets the
f16v2maxinstruction.- Parameters
src0 – A value of type
half2.src1 – A value of type
half2.
- Returns
The element-wise maximum of
src0andsrc1.
-
inline half4 max(half4 src0, half4 src1)
Targets the
f16v4maxinstruction.- Parameters
src0 – A value of type
half4.src1 – A value of type
half4.
- Returns
The element-wise maximum of
src0andsrc1.
-
inline float2 max(float2 src0, float2 src1)
Targets the
f32v2maxinstruction.- Parameters
src0 – A value of type
float2.src1 – A value of type
float2.
- Returns
The element-wise maximum of
src0andsrc1.
-
inline float max(float src0, float src1)
Targets the
f32maxinstruction.- Parameters
src0 – A value of type
float.src1 – A value of type
float.
- Returns
The maximum of
src0andsrc1.
-
inline half2 maxc(half4 src)
Targets the
f16v4maxcinstruction.- Parameters
src – A value of type
half4.- Returns
The 2x2 lateral maximum of
src. The 0th element in the result vector is the maximum of src[0] and src[1], and the 1st element is the maximum of src[2] and src[3].
-
inline half2 min(half2 src0, half2 src1)
Targets the
f16v2mininstruction.- Parameters
src0 – A value of type
half2.src1 – A value of type
half2.
- Returns
The element-wise minimum of
src0andsrc1.
-
inline half4 min(half4 src0, half4 src1)
Targets the
f16v4mininstruction.- Parameters
src0 – A value of type
half4.src1 – A value of type
half4.
- Returns
The element-wise minimum of
src0andsrc1.
-
inline float2 min(float2 src0, float2 src1)
Targets the
f32v2mininstruction.- Parameters
src0 – A value of type
float2.src1 – A value of type
float2.
- Returns
The element-wise minimum of
src0andsrc1.
-
inline float min(float src0, float src1)
Targets the
f32mininstruction.- Parameters
src0 – A value of type
float.src1 – A value of type
float.
- Returns
The minimum of
src0andsrc1.
-
inline half2 clamp(half2 src0, half2 src1)
Targets the
f16v2clampinstruction.- Parameters
src0 – A value of type
half2.src1 – A value of type
half2.
- Returns
The min-of-maximum result of
src0andsrc1, of typehalf2. The first element is the median value of the first element ofsrc0and the two elements insrc1. The second element is the median of the second element ofsrc0and the two elements insrc1.
-
inline half4 clamp(half4 src0, half2 src1)
Targets the
f16v4clampinstruction.- Parameters
src0 – A value of type
half4.src1 – A value of type
half2.
- Returns
The min-of-maximum result of
src0andsrc1, of typehalf4. Each element is the median of the element insrc0at the same index, and the two values insrc1.
-
inline float2 clamp(float2 src0, float2 src1)
Targets the
f32v2clampinstruction.- Parameters
src0 – A value of type
float2.src1 – A value of type
float2.
- Returns
The min-of-maximum result of
src0andsrc1, of typefloat2. The first element is the median of the first element ofsrc0and the two elements insrc1. The second element is the median of the second element ofsrc0and the two elements insrc1.
-
inline float clamp(float src0, float2 src1)
Targets the
f32clampinstruction.- Parameters
src0 – A value of type
float.src1 – A value of type
float2.
- Returns
The median of
src0and the two elements insrc1.
-
inline void cmac(half2 src0, half2 src1)
Targets the
f16v2cmacinstruction.- Parameters
src0 – A value of type
half2.src1 – A value of type
half2.
-
inline void cmac(half4 src0, half4 src1)
Targets the
f16v4cmacinstruction.- Parameters
src0 – A value of type
half4.src1 – A value of type
half4.
-
inline half2 exp(half2 src)
Targets the
f16v2expinstruction.- Parameters
src – A value of type
half2.- Returns
A vector of the results of
e^Xof the two elements insrc.
-
inline float exp(float src)
Targets the
f32expinstruction.- Parameters
src – A value of type
float.- Returns
The result of
e^{src}.
-
inline half2 exp2(half2 src)
Targets the
f16v2expinstruction.- Parameters
src – A value of type
half2.- Returns
A vector of the results of 2^X of the two elements in
src.
-
inline float exp2(float src)
Targets the
f32expinstruction.- Parameters
src – A value of type
float.- Returns
The result of
2^{src}.
-
inline half2 log2(half2 src)
Targets the
f16v2log2instruction.- Parameters
src – A value of type
half2.- Returns
A vector of the results of the log (base 2) of the two elements in
src.
-
inline float log2(float src)
Targets the
f32lninstruction.- Parameters
src – A value of type
half2.- Returns
The result of the log (base 2) of
src.
-
inline half2 tanh(half2 src)
Targets the
f16v2tanhinstruction.- Parameters
src – A value of type
half2.- Returns
The result of tanh(src)`.
-
inline float tanh(float src)
Targets the
f32tanhinstruction.- Parameters
src – A value of type
float.- Returns
The result of tanh(src)`.
-
inline half2 ln(half2 src)
Targets the
f16v2lninstruction.- Parameters
src – A value of type
half2.- Returns
A vector of the results of the natural log of the two elements in
src.
-
inline float ln(float src)
Targets the
f32lninstruction.- Parameters
src – A value of type
half2.- Returns
The result of the natural log of
src.
-
inline float2 axpy(float2 src0, float2 src1)
Targets the
f32v2axpyinstruction.- Parameters
src0 – A value of type
float2.src1 – A value of type
float2.
- Returns
The single precision two-element vector
res = a*src0 + src1. The scalar multiplicandais provided by the internal state element$TAS.
-
inline half2 f16v2grand()
Targets the
f16v2grandinstruction.- Returns
Gaussian distribution, two-element half-precision random vector.
-
inline float2 f32v2grand()
Targets the
f32v2grandinstruction.- Returns
Gaussian distribution, two-element single-precision random vector.
-
inline half4 rmask(half4 src0, float src1)
Targets the
f16v4rmaskinstruction.- Parameters
src0 – A value of type
half4.src1 – A value of type
float.
- Returns
The result is a masked version of
src0, with each element of the input being individually masked with the probability specified by the bottom 17-bits ofsrc1:if
src1[16] == 1, no masking is applied;if
src1[16:0] == 0, the result is a zero vector;otherwise each element is individually unmasked with probability
src1[15:0] / 65536. PRNG is used by this instruction to generate 4 x 16-bit random values from the discrete uniform distribution.
-
inline float2 rmask(float2 src0, float src1)
Targets the
f32v2rmaskinstruction.- Parameters
src0 – A value of type
float2.src1 – A value of type
float.
- Returns
The result is a masked version of
src0, with each element of the input being individually masked with the probability specified by the bottom 17-bits ofsrc1:if
src1[16] == 1, no masking is applied;if
src1[16:0] == 0, the result is a zero vector;otherwise each element is individually unmasked with probability
src1[15:0] / 65536. PRNG is used by this instruction to generate 2 x 16-bit random values from the discrete uniform distribution.
-
inline half2 sigm(half2 src)
Targets the
f16v2sigminstruction.- Parameters
src – A value of type
half2.- Returns
The result of an element-wise application of the sigmoid function on
src.
-
inline float sigm(float src)
Targets the
f32sigminstruction.- Parameters
src – A value of type
float.- Returns
The result of an element-wise application of the sigmoid function on
src.
-
inline float sum(half2 src)
Targets the
f16v2suminstruction.- Parameters
src – A value of type
half2.- Returns
The sum of the two elements in
srcas afloat.
-
inline float2 sum(half4 src)
Targets the
f16v4suminstruction.- Parameters
src – A value of type
half2.- Returns
The 2x2 lateral summation of the elements in
srcas afloat2. The first element is the sum ofsrc[0] andsrc[1], the second element is the sum ofsrc[2] andsrc[3].
-
inline half2 cmpeq(half2 src0, half2 src1)
Targets the
f16v2cmpeqinstruction.- Parameters
src0 – A value of type
half2.src1 – A value of type
half2.
- Returns
Element-wise equality test of
src0andsrc1. If src0[i] == src1[i], the result vector element at indexiwill be0xffff, and0x0000otherwise.
-
inline half4 cmpeq(half4 src0, half4 src1)
Targets the
f16v4cmpeqinstruction.- Parameters
src0 – A value of type
half4.src1 – A value of type
half4.
- Returns
Element-wise equality test of
src0andsrc1. If src0[i] == src1[i], the result vector element at indexiwill be0xffff, and0x0000otherwise.
-
inline float2 cmpeq(float2 src0, float2 src1)
Targets the
f32v2cmpeqinstruction.- Parameters
src0 – A value of type
float2.src1 – A value of type
float2.
- Returns
Element-wise equality test of
src0andsrc1. If src0[i] == src1[i], the result vector element at indexiwill be0xffff, and0x0000otherwise.
-
inline float cmpeq(float src0, float src1)
Targets the
f32cmpeqinstruction.- Parameters
src0 – A value of type
float.src1 – A value of type
float.
- Returns
Equality test of
src0andsrc1. Ifsrc0==src1the result will be0xffff, and0x0000otherwise.
-
inline half2 cmpge(half2 src0, half2 src1)
Targets the
f16v2cmpgeinstruction.- Parameters
src0 – A value of type
half2.src1 – A value of type
half2.
- Returns
Element-wise greater-than-or-equal-to test of
src0andsrc1. Ifsrc0[i] >=src1[i] the result vector element at indexiwill be0xffff, and0x0000otherwise.
-
inline half4 cmpge(half4 src0, half4 src1)
Targets the
f16v4cmpgeinstruction.- Parameters
src0 – A value of type
half4.src1 – A value of type
half4.
- Returns
Element-wise greater-than-or-equal-to test of
src0andsrc1. Ifsrc0[i] >=src1[i] the result vector element at indexiwill be0xffff, and0x0000otherwise.
-
inline float2 cmpge(float2 src0, float2 src1)
Targets the
f32v2cmpgeinstruction.- Parameters
src0 – A value of type
float2.src1 – A value of type
float2.
- Returns
Element-wise greater-than-or-equal-to test of
src0andsrc1. Ifsrc0[i] >=src1[i] the result vector element at indexiwill be0xffff, and0x0000otherwise.
-
inline float cmpge(float src0, float src1)
Targets the
f32cmpgeinstruction.- Parameters
src0 – A value of type
float.src1 – A value of type
float.
- Returns
Greater-than-or-equal-to test of
src0andsrc1. Ifsrc0>=src1the result will be0xffff, and0x0000otherwise.
-
inline half2 cmpgt(half2 src0, half2 src1)
Targets the
f16v2cmpgtinstruction.- Parameters
src0 – A value of type
half2.src1 – A value of type
half2.
- Returns
Element-wise greater-than test of
src0andsrc1. Ifsrc0>src1the result vector element at indexiwill be0xffff, and0x0000otherwise.
-
inline half4 cmpgt(half4 src0, half4 src1)
Targets the
f16v4cmpgtinstruction.- Parameters
src0 – A value of type
half4.src1 – A value of type
half4.
- Returns
Element-wise greater-than test of
src0andsrc1. Ifsrc0>src1the result vector element at indexiwill be0xffff, and0x0000otherwise.
-
inline float2 cmpgt(float2 src0, float2 src1)
Targets the
f32v2cmpgtinstruction.- Parameters
src0 – A value of type
float2.src1 – A value of type
float2.
- Returns
Element-wise greater-than test of
src0andsrc1. Ifsrc0>src1the result vector element at indexiwill be0xffff, and0x0000otherwise.
-
inline float cmpgt(float src0, float src1)
Targets the
f32cmpgtinstruction.- Parameters
src0 – A value of type
float.src1 – A value of type
float.
- Returns
Greater-than test of
src0andsrc1. Ifsrc0>src1the result will be0xffff, and0x0000otherwise.
-
inline half2 cmple(half2 src0, half2 src1)
Targets the
f16v2cmpleinstruction.- Parameters
src0 – A value of type
half2.src1 – A value of type
half2.
- Returns
Element-wise less-than-or-equal-to test of
src0andsrc1. Ifsrc0<=src1the result vector element at indexiwill be0xffff, and0x0000otherwise.
-
inline half4 cmple(half4 src0, half4 src1)
Targets the
f16v4cmpleinstruction.- Parameters
src0 – A value of type
half4.src1 – A value of type
half4.
- Returns
Element-wise less-than-or-equal-to test of
src0andsrc1. Ifsrc0<=src1the result vector element at indexiwill be0xffff, and0x0000otherwise.
-
inline float2 cmple(float2 src0, float2 src1)
Targets the
f32v2cmpleinstruction.- Parameters
src0 – A value of type
float2.src1 – A value of type
float2.
- Returns
Element-wise less-than-or-equal-to test of
src0andsrc1. Ifsrc0<=src1the result vector element at indexiwill be0xffff, and0x0000otherwise.
-
inline float cmple(float src0, float src1)
Targets the
f32cmpleinstruction.- Parameters
src0 – A value of type
float.src1 – A value of type
float.
- Returns
Less-than-or-equal-to test of
src0andsrc1. Ifsrc0<=src1the result will be0xffff, and0x0000otherwise.
-
inline half2 cmplt(half2 src0, half2 src1)
Targets the
f16v2cmpltinstruction.- Parameters
src0 – A value of type
half2.src1 – A value of type
half2.
- Returns
Element-wise less-than test of
src0andsrc1. Ifsrc0<src1the result vector element at indexiwill be0xffff, and0x0000otherwise.
-
inline half4 cmplt(half4 src0, half4 src1)
Targets the
f16v4cmpltinstruction.- Parameters
src0 – A value of type
half4.src1 – A value of type
half4.
- Returns
Element-wise less-than test of
src0andsrc1. Ifsrc0<src1the result vector element at indexiwill be0xffff, and0x0000otherwise.
-
inline float2 cmplt(float2 src0, float2 src1)
Targets the
f32v2cmpltinstruction.- Parameters
src0 – A value of type
float2.src1 – A value of type
float2.
- Returns
Element-wise less-than test of
src0andsrc1. Ifsrc0<src1the result vector element at indexiwill be0xffff, and0x0000otherwise.
-
inline float cmplt(float src0, float src1)
Targets the
f32cmpltinstruction.- Parameters
src0 – A value of type
float.src1 – A value of type
float.
- Returns
Less-than test of
src0andsrc1. Ifsrc0<src1the result will be0xffff, and0x0000otherwise.
-
inline half2 cmpne(half2 src0, half2 src1)
Targets the
f16v2cmpneinstruction.- Parameters
src0 – A value of type
half2.src1 – A value of type
half2.
- Returns
Element-wise inequality test of
src0andsrc1. If src0[i] != src1[i], the result vector element at indexiwill be0xffff, and0x0000otherwise.
-
inline half4 cmpne(half4 src0, half4 src1)
Targets the
f16v4cmpneinstruction.- Parameters
src0 – A value of type
half4.src1 – A value of type
half4.
- Returns
Element-wise inequality test of
src0andsrc1. If src0[i] != src1[i], the result vector element at indexiwill be0xffff, and0x0000otherwise.
-
inline float2 cmpne(float2 src0, float2 src1)
Targets the
f32v2cmpneinstruction.- Parameters
src0 – A value of type
float2.src1 – A value of type
float2.
- Returns
Element-wise inequality test of
src0andsrc1. If src0[i] != src1[i], the result vector element at indexiwill be0xffff, and0x0000otherwise.
-
inline float cmpne(float src0, float src1)
Targets the
f32cmpneinstruction.- Parameters
src0 – A value of type
float.src1 – A value of type
float.
- Returns
Inequality test of
src0andsrc1. Ifsrc0!=src1the result will be0xffff, and0x0000otherwise.
-
inline float to_float(half src)
Conversion from a half-precision float to single-precision.
Targets the
f16tof32instruction.- Parameters
src – A value of type
half.- Returns
The half-precision float value
srcconverted to single-precision.
-
inline float2 to_float(half2 src)
Two-element vector conversion from half-precision to single-precision.
Targets the
f16v2tof32instruction.- Parameters
src – A vector of type
half2.- Returns
The vector of two half-precision float values
srcconverted to single-precision.
-
inline float to_float(unsigned src)
Conversion from an unsigned integer to a single-precision float.
Targets the
f32fromui32instruction.- Parameters
src – An unsigned integer.
- Returns
The unsigned integer
srcconverted to a single-precision float value.
-
inline float to_float(int src)
Conversion from a signed integer to a single-precision float.
Targets the
f32fromi32instruction.- Parameters
src – A signed integer.
- Returns
The signed integer
srcconverted to a single-precision float value.
-
inline float to_float_su(unsigned src)
Symmetric, unbiased conversion from an unsigned integer to a single-precision float.
Targets the
f32sufromuiinstruction.- Parameters
src – An unsigned integer.
- Returns
The unsigned integer
srcconverted to a single-precision float value via symmetric, unbiased conversion.
-
inline float2 to_float_su(uint2 src)
Symmetric, unbiased conversion from a vector of two unsigned integers to single-precision floats.
Targets the
f32v2sufromuiinstruction.- Parameters
src – A vector of two unsigned integers.
- Returns
The vector of two unsigned integers
srcconverted to single-precision floats via symmetric, unbiased conversion.
-
inline half to_half(float src)
Conversion from a single-precision float to half-precision.
Targets the
f32tof16instruction.- Parameters
src – A value of type
float.- Returns
The single-precision float value
srcconverted to half-precision.
-
inline half2 to_half(float2 src)
Conversion from a vector of two single-precision floats to half-precision.
Targets the
f32v2tof16instruction.- Parameters
src – A vector of two single-precision float values.
- Returns
The vector of two single-precision float values
srcconverted to half-precision.
-
inline half2 to_half_su(ushort2 src)
Symmetric, unbiased conversion from a vector of two unsigned 16-bit integers to half-precision floats.
Targets the
f16v2sufromuiinstruction.- Parameters
src – A two-element vector of unsigned 16-bit integers.
- Returns
The vector of two unsigned 16-bit integers
srcconverted to half precision floats via symmetric, unbiased conversion.
-
inline half4 to_half_su(ushort4 src)
Symmetric, unbiased conversion from a vector of four unsigned 16-bit integers to half-precision floats.
Targets the
f16v4sufromuiinstruction.- Parameters
src – A four-element vector of unsigned 16-bit integers.
- Returns
The vector of four unsigned 16-bit integers
srcconverted to half precision floats via symmetric, unbiased conversion.
-
inline unsigned to_uint(float src)
Conversion from a single-precision float to an unsigned integer.
Targets the
f32toui32instruction.- Parameters
src – A float value.
- Returns
The single-precision float
srcconverted to an unsigned integer.
-
inline int to_int(float src)
Conversion from a single-precision float to a signed integer.
Targets the
f32toi32instruction.- Parameters
src – A float value.
- Returns
The single-precision float
srcconverted to a signed integer.
-
inline float f16tof32(half src)
Targets the
f16tof32instruction.- Parameters
src – A value of type
half.- Returns
The half-precision float value
srcconverted to single-precision.
-
inline float2 f16v2tof32(half2 src)
Targets the
f16v2tof32instruction.- Parameters
src – A vector of type
half2.- Returns
The vector of two half-precision float values
srcconverted to single-precision.
-
inline half2 f16v2sufromui(half2 src)
Targets the
f16v2sufromuiinstruction.- Parameters
src – A two-element vector of unsigned 16-bit integers, as a variable of type
half2. Note: this builtin directly targets thef16v2sufromuiinstruction, whose operand and result are both in the floating point register file. To achieve this conversion from aushort2type, see theto_halffunction in this header.- Returns
The vector of two unsigned 16-bit integers
srcconverted to half precision floats via symmetric, unbiased conversion.
-
inline half4 f16v4sufromui(half4 src)
Targets the
f16v4sufromuiinstruction.- Parameters
src – A four-element vector of unsigned 16-bit integers, as a variable of type
half4. Note: this builtin directly targets thef16v4sufromuiinstruction, whose operand and result are both in the floating point register file. To achieve this conversion from aushort4type, see theto_halffunction in this header.- Returns
The vector of four unsigned 16-bit integers
srcconverted to half precision floats via symmetric, unbiased conversion.
-
inline float f32fromi32(float src)
Targets the
f32fromi32instruction.- Parameters
src – A signed integer as a float variable. Note: this builtin directly targets the
f32fromi32instruction, whose operand and result are both in the floating point register file. To achieve this conversion from aninttype, see theto_floatfunction in this header.- Returns
The signed integer
srcconverted to a single-precision float value.
-
inline float f32fromui32(float src)
Targets the
f32fromui32instruction.- Parameters
src – An unsigned integer as a float variable. Note: this builtin directly targets the
f32fromui32instruction, whose operand and result are both in the floating point register file. To achieve this conversion from anunsignedtype, see theto_floatfunction in this header.- Returns
The unsigned integer
srcconverted to a single-precision float value.
-
inline float f32sufromui(float src)
Targets the
f32sufromuiinstruction.- Parameters
src – An unsigned integer as a float variable. Note: this builtin directly targets the
f32sufromuiinstruction, whose operand and result are both in the floating point register file. To achieve this conversion from anunsignedtype, see theto_float_sufunction in this header.- Returns
The unsigned integer
srcconverted to a single-precision float value via symmetric, unbiased conversion.
-
inline half f32tof16(float src)
Targets the
f32tof16instruction.- Parameters
src – A value of type
float.- Returns
The single-precision float value
srcconverted to half-precision.
-
inline float f32toi32(float src)
Targets the
f32toi32instruction.- Parameters
src – A float value.
- Returns
The single-precision float
srcconverted to an integer, as a float-type variable. Note: this builtin directly targets thef32toi32instruction, whose operand and result are both in the floating point register file. To achieve this conversion to aninttype, see theto_intfunction in this header.
-
inline float f32toui32(float src)
Targets the
f32toui32instruction.- Parameters
src – A float value.
- Returns
The single-precision float
srcconverted to an unsigned integer, as a float-type variable. Note: this builtin directly targets thef32toui32instruction, whose operand and result are both in the floating point register file. To achieve this conversion to anunsignedtype, see theto_uintfunction in this header.
-
inline float2 f32v2sufromui(float2 src)
Targets the
f32v2sufromuiinstruction.- Parameters
src – A two-element vector of unsigned integers, as a variable of type type
float2. Note: this builtin directly targets thef32v2sufromuiinstruction, whose operand and result are both in the floating point register file. To achieve this conversion from auint2type, see theto_floatfunction in this header.- Returns
The vector of two unsigned integers
srcconverted to single precision floats via symmetric, unbiased conversion.
-
inline half2 f32v2tof16(float2 src)
Targets the
f32v2tof16instruction.- Parameters
src – A vector of two single-precision float values.
- Returns
The vector of two single-precision float values
srcconverted to half-precision.
-
inline unsigned clz(int src)
Targets the
clzinstruction.- Parameters
src – A value of type
int.- Returns
The number of higher bits in
srcthat are zero.
-
inline unsigned popc(int src)
Targets the
popcinstruction.- Parameters
src – A value of type
int.- Returns
The number of set bits in
src.
-
inline short2 roll16(short2 src0, short2 src1)
Targets the
roll16instruction.- Parameters
src0 – A value of type
short2.src1 – A value of type
short2.
- Returns
The result of a SIMD roll permutation on the 4 16-bit values across
src0andsrc1, as ashort2. src0 src1 -> Result | 3 | 2 | | 1 | 0 | | 2 | 1 |
-
inline ushort2 roll16(ushort2 src0, ushort2 src1)
Targets the
roll16instruction.- Parameters
src0 – A value of type
ushort2.src1 – A value of type
ushort2.
- Returns
The result of a SIMD roll permutation on the 4 16-bit values across
src0andsrc1, as aushort2. src0 src1 -> Result | 3 | 2 | | 1 | 0 | | 2 | 1 |
-
inline half2 roll16(half2 src0, half2 src1)
Targets the
roll16instruction.- Parameters
src0 – A value of type
half2.src1 – A value of type
half2.
- Returns
The result of a SIMD roll permutation on the 4 16-bit values across
src0andsrc1, as ahalf2. src0 src1 -> Result | 3 | 2 | | 1 | 0 | | 2 | 1 |
-
inline short2 sort4x16hi(short2 src0, short2 src1)
Targets the
sort4x16hiinstruction.- Parameters
src0 – A value of type
short2.src1 – A value of type
short2.
- Returns
The result of a SIMD sort permutation on the 4 16-bit values across
src0andsrc1, as ashort2. src0 src1 -> Result | 3 | 2 | | 1 | 0 | | 3 | 1 |
-
inline ushort2 sort4x16hi(ushort2 src0, ushort2 src1)
Targets the
sort4x16hiinstruction.- Parameters
src0 – A value of type
ushort2.src1 – A value of type
ushort2.
- Returns
The result of a SIMD sort permutation on the 4 16-bit values across
src0andsrc1, as aushort2. src0 src1 -> Result | 3 | 2 | | 1 | 0 | | 3 | 1 |
-
inline half2 sort4x16hi(half2 src0, half2 src1)
Targets the
sort4x16hiinstruction.- Parameters
src0 – A value of type
half2.src1 – A value of type
half2.
- Returns
The result of a SIMD sort permutation on the 4 16-bit values across
src0andsrc1, as ahalf2. src0 src1 -> Result | 3 | 2 | | 1 | 0 | | 3 | 1 |
-
inline short2 sort4x16lo(short2 src0, short2 src1)
Targets the
sort4x16loinstruction.- Parameters
src0 – A value of type
short2.src1 – A value of type
short2.
- Returns
The result of a SIMD sort permutation on the 4 16-bit values across
src0andsrc1, as ashort2. src0 src1 -> Result | 3 | 2 | | 1 | 0 | | 2 | 0 |
-
inline ushort2 sort4x16lo(ushort2 src0, ushort2 src1)
Targets the
sort4x16loinstruction.- Parameters
src0 – A value of type
ushort2.src1 – A value of type
ushort2.
- Returns
The result of a SIMD sort permutation on the 4 16-bit values across
src0andsrc1, as aushort2. src0 src1 -> Result | 3 | 2 | | 1 | 0 | | 2 | 0 |
-
inline half2 sort4x16lo(half2 src0, half2 src1)
Targets the
sort4x16loinstruction.- Parameters
src0 – A value of type
half2.src1 – A value of type
half2.
- Returns
The result of a SIMD sort permutation on the 4 16-bit values across
src0andsrc1, as ahalf2. src0 src1 -> Result | 3 | 2 | | 1 | 0 | | 2 | 0 |
-
template<typename T0, typename T1, typename = std::enable_if_t<both_integral<T0, T1>>>