Linear algebra functions. More...

Classes
struct	MatMulParams
	Parameters to define a Matrix multiplication. More...

struct	PlanCosts
	Structure for estimated costs returned by reportPlanEstimatedCosts() More...

Typedefs
using	MatMulPlanParams = std::tuple< const poplar::Target , const MatMulParams, const poplar::OptionFlags >
	A tuple containing the required parameters to preplan a matmul: More...

using	MatMulToConvOptions = std::unordered_map< const poplar::OptionFlags *, poplar::OptionFlags >
	Mapping of pointers to matrix multiplication option flags to the corresponding convolution option flags.

using	DistributedNormReduceCallback = std::function< std::vector< poplar::Tensor >(poplar::Graph &replicatedGraph, const std::vector< poplar::Tensor > &inputsToReduce, poplar::program::Sequence &prog, unsigned groupSize, const poplar::DebugContext &debugContext, const poplar::OptionFlags &options)>
	Callback to reduce statistics and gradients. More...

Functions
std::vector< std::pair< MatMulParams, poplar::OptionFlags > >	getCholeskyMatMulPrePlanParameters (const poplar::Type &type, const std::vector< std::size_t > &shape, bool lower, poplar::OptionFlags options)
	Plan matrix multiplication for the Cholesky factoriser. More...

poplar::Tensor	createCholeskyInput (poplar::Graph &graph, const poplar::Type &type, const std::vector< std::size_t > &shape, bool lower, const poplar::DebugContext &debugContext, const poplar::OptionFlags &options={}, PlanningCache *cache=nullptr)
	Create a tensor that is used as the input for the Cholesky factoriser. More...

poplar::Tensor	cholesky (poplar::Graph &graph, const poplar::Tensor &a, bool lower, poplar::program::Sequence &prog, const poplar::DebugContext &debugContext={}, poplar::OptionFlags options={}, PlanningCache *cache=nullptr)
	Computes Cholesky factor for a symmetric positive definite matrix. More...

void	choleskyInPlace (poplar::Graph &graph, const poplar::Tensor &a, bool lower, poplar::program::Sequence &prog, const poplar::DebugContext &debugContext={}, poplar::OptionFlags options={}, PlanningCache *cache=nullptr)
	Computes Cholesky factor in place for a symmetric positive definite matrix. More...

uint64_t	getFwdFlops (const ConvParams &params)
	Calculate the minimum number of floating point operations required to perform the forward pass convolution given a set of `params`.

uint64_t	getBwdFlops (const ConvParams &params)
	Calculate the minimum number of floating point operations required to perform the backward pass convolution given a set of `params`.

uint64_t	getWuFlops (const ConvParams &params)
	Calculate minimum number of floating point operations required to perform the weight update pass convolution given a set of `params`.

double	getFwdPerfectCycleCount (const poplar::Graph &graph, const ConvParams &params)
	Calculate the number of cycles to perform the forward pass assuming maximal utilisation of target hardware performing the minimum number of floating point operations. More...

double	getBwdPerfectCycleCount (const poplar::Graph &graph, const ConvParams &params)
	Calculate the number of cycles to perform the backward pass assuming maximal utilisation of the target hardware, performing the minimum number of floating point operations. More...

double	getWuPerfectCycleCount (const poplar::Graph &graph, const ConvParams &params)
	Calculate the number of cycles to perform the weight update pass assuming maximal utilisation of the target hardware, performing the minimum number of floating point operations. More...

poplar::Tensor	createWeights (poplar::Graph &graph, const ConvParams &params, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options={}, PlanningCache *cache=nullptr)
	Create a weight tensor suitable for use with convolution() More...

poplar::Tensor	createBiases (poplar::Graph &graph, const poplar::Tensor &activations, const poplar::DebugContext &debugContext={"biases"})
	Create a bias tensor suitable for input to the addBias() function. More...

poplar::Tensor	createBiases (poplar::Graph &graph, const poplar::Tensor &activations, const ConvParams &params, const poplar::DebugContext &debugContext={"biases"}, const poplar::OptionFlags &options={}, PlanningCache *cache=nullptr)
	Create a bias tensor suitable for input to the addBias() function with allocation consistent with plan parameters. More...

poplar::Tensor	createInput (poplar::Graph &graph, const ConvParams &params, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options={}, PlanningCache *cache=nullptr)
	Create an input tensor for a convolution. More...

poplar::Tensor	createConvOutput (poplar::Graph &graph, const ConvParams &params, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options={}, PlanningCache *cache=nullptr)
	Create an output tensor for a convolution. More...

poplar::Tensor	convolution (poplar::Graph &graph, const poplar::Tensor &in, const poplar::Tensor &weights, const ConvParams &params, bool transposeAndFlipWeights, poplar::program::Sequence &prog, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options={}, PlanningCache *cache=nullptr)
	Convolve an input with a set of weights. More...

void	convolutionWithOutput (poplar::Graph &graph, const poplar::Tensor &in, const poplar::Tensor &weights, const poplar::Tensor &out, const ConvParams &params, bool transposeAndFlipWeights, poplar::program::Sequence &prog, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options={}, PlanningCache *cache=nullptr)
	Convolve an input with a set of weights into a pre-allocated output tensor. More...

void	preplanConvolutions (const std::set< ConvPlanParams > &convs, PlanningCache &cache)

void	preplanConvolutions (poplar::Graph &graph, const std::set< ConvPlanParams > &convs, PlanningCache &cache)

void	weightsTransposeChansFlipXY (poplar::Graph &graph, const poplar::Tensor &weightsIn, const poplar::Tensor &weightsOut, poplar::program::Sequence &prog, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options={})
	Copy the weights in `weightsIn` into `weightsOut` such that each element of the kernel is transposed with respect to the input and output channels and flip each spatial dimension of the kernel. More...

poplar::Tensor	calculateWeightDeltas (poplar::Graph &graph, const poplar::Tensor &zDeltas, const poplar::Tensor &activations, const ConvParams &params, poplar::program::Sequence &prog, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options={}, PlanningCache *cache=nullptr)
	Append an operation to a poplar::Program to generate the tensor of weight deltas. More...

void	convolutionWeightUpdate (poplar::Graph &graph, const poplar::Tensor &zDeltas, const poplar::Tensor &weights, const poplar::Tensor &activations, ConvParams params, const poplar::Tensor &scale, poplar::program::Sequence &prog, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options={}, PlanningCache *cache=nullptr)
	Append operations to a poplar::Program to generate and apply the weight update. More...

void	convolutionWeightUpdate (poplar::Graph &graph, const poplar::Tensor &zDeltas, const poplar::Tensor &weights, const poplar::Tensor &activations, ConvParams params, float scale, poplar::program::Sequence &prog, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options={}, PlanningCache *cache=nullptr)
	Append operations to a poplar::Program to generate and apply the weight update. More...

void	convolutionBiasUpdate (poplar::Graph &graph, const poplar::Tensor &zDeltas, const poplar::Tensor &biases, const poplar::Tensor &scale, const poplar::OptionFlags &options, poplar::program::Sequence &prog, const poplar::DebugContext &debugContext={})
	Add a program to update `biases` tensor with the gradients derived from the `zDeltas` tensor. More...

void	convolutionBiasUpdate (poplar::Graph &graph, const poplar::Tensor &zDeltas, const poplar::Tensor &biases, float scale, const poplar::OptionFlags &options, poplar::program::Sequence &prog, const poplar::DebugContext &debugContext={})
	Add a program to update `biases` tensor with the gradients derived from the `zDeltas` tensor. More...

void	addBias (poplar::Graph &graph, const poplar::Tensor &in, const poplar::Tensor &biases, poplar::program::Sequence &prog, const poplar::DebugContext &debugContext={})
	Adds a program to `prog` which adds `biases` to `activations` tensor. More...

void	reportPlanInfo (std::ostream &out, const poplar::Graph &graph, const ConvParams &params, const poplar::OptionFlags &options={}, PlanningCache *cache=nullptr)
	Report the convolution plan corresponding to the `params` and `options` provided. More...

PlanCosts	reportPlanEstimatedCosts (const poplar::Graph &graph, const ConvParams &params, const poplar::OptionFlags &options={}, PlanningCache *cache=nullptr)
	Report the estimated cycles and memory costs of the convolution plan corresponding to the `params` and `options` provided. More...

void	reportWeightUpdatePlanInfo (std::ostream &out, const poplar::Graph &graph, const ConvParams &fwdParams, const poplar::OptionFlags &fwdOptions={}, PlanningCache *cache=nullptr)
	Report the convolution plan corresponding to the weight update pass given the forward pass `params` and `options`. More...

poplar::Tensor	fullyConnectedWeightTranspose (poplar::Graph &graph, poplar::Tensor weights, const ConvParams &params, poplar::program::Sequence &prog, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options={}, PlanningCache *cache=nullptr)
	Arranges the weights (activations) such that they are suited for the backward pass in a fully connected layer. More...

void	convolutionValidateOptions (const poplar::OptionFlags &options)
	Provides an interface to validate the convolution options. More...

void	preplan (const std::set< ConvPlanParams > &convs, const std::set< MatMulPlanParams > &matmuls, PlanningCache &cache)
	Plan the specified convolutions & matmuls. More...

unsigned	getDilatedSize (unsigned size, unsigned dilation)
	Return the output size when the specified dilation is applied to an input of the specified size.

unsigned	getInputIndex (unsigned dim, unsigned outputIndex, unsigned kernelIndex, const ConvParams &params)
	Return the index of the input element that is multiplied by the specified kernel index to produce the specified output. More...

unsigned	getKernelIndex (unsigned dim, unsigned outputIndex, unsigned inputIndex, const ConvParams &params)
	Return the index of the kernel element that is multiplied by the specified input index to produce the specified output. More...

std::pair< unsigned, unsigned >	getOutputRangeForKernelIndex (unsigned dim, std::pair< unsigned, unsigned > outputRange, unsigned kernelIndex, const ConvParams &params)
	Given an output range, return the subset whose calculation involves the specified kernel index.

std::pair< unsigned, unsigned >	getOutputRangeForInputIndex (unsigned dim, std::pair< unsigned, unsigned > outputRange, unsigned inputIndex, const ConvParams &params)
	Given an output range, return the subset whose calculation involves the specified input.

std::pair< unsigned, unsigned >	getOutputRangeForKernelRange (unsigned dim, std::pair< unsigned, unsigned > outputRange, std::pair< unsigned, unsigned > kernelIndexRange, const ConvParams &params)
	Given an output range, return the subset whose calculation involves the specified range of kernel indicies.

std::pair< unsigned, unsigned >	getOutputRangeForInputRange (unsigned dim, std::pair< unsigned, unsigned > outputRange, std::pair< unsigned, unsigned > inputRange, const ConvParams &params)
	Given an output range, return the subset whose calculation involves the specified range of input indicies.

std::pair< unsigned, unsigned >	getInputRange (unsigned dim, std::pair< unsigned, unsigned > outputRange, unsigned kernelIndex, const ConvParams &params)
	Return the input range that is associated with the specified kernel index when calculating the specified output range.

std::pair< unsigned, unsigned >	getKernelRange (unsigned dim, std::pair< unsigned, unsigned > outputRange, unsigned inputIndex, const ConvParams &params)
	Return the kernel range that is associated with the specified input index when calculating the specified output range.

std::pair< unsigned, unsigned >	getInputRange (unsigned dim, std::pair< unsigned, unsigned > outputRange, std::pair< unsigned, unsigned > kernelIndexRange, const ConvParams &params)
	Return the input range that is associated with the specified kernel index range when calculating the specified output range.

std::pair< unsigned, unsigned >	getKernelRange (unsigned dim, std::pair< unsigned, unsigned > outputRange, std::pair< unsigned, unsigned > inputRange, const ConvParams &params)
	Return the kernel range that is associated with the specified input index range when calculating the specified output range.

ConvParams	getGradientParams (const ConvParams &params)
	Given a set of parameters, return the set of params that represent the convolution to be applied to the output gradients to get the input gradients (provided the weights have been transposed in the channel axes and flipped in the spatial axes).

ConvParams	getWeightUpdateParams (const ConvParams &fwdParams)
	Given a set of convolution parameters, return the set of params that represent the convolution to be applied to the output gradients to get the weight update gradients.

poplar::Tensor	matMul (poplar::Graph &graph, const poplar::Tensor &A, const poplar::Tensor &B, poplar::program::Sequence &prog, const poplar::Type &outputType, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options={}, PlanningCache *cache=nullptr)
	Multiply two matrices. More...

poplar::Tensor	matMul (poplar::Graph &graph, const poplar::Tensor &A, const poplar::Tensor &B, poplar::program::Sequence &prog, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options={}, PlanningCache *cache=nullptr)
	Matrix multiply where output type is the same as input `A`.

void	matMulWithOutput (poplar::Graph &graph, const poplar::Tensor &A_, const poplar::Tensor &B_, poplar::Tensor &out, poplar::program::Sequence &prog, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options_={}, PlanningCache *cache=nullptr)
	Matrix multiply with explicitly defined output.

void	matMulReportPlan (std::ostream &out, const poplar::Graph &graph, const poplar::Type &inputType, const poplar::Type &outputType, const std::vector< std::size_t > &aShape, const std::vector< std::size_t > &bShape, const poplar::OptionFlags &options={}, PlanningCache *cache=nullptr)
	Report the convolution plan corresponding to the parameters and options provided. More...

poplar::Tensor	matMulGrouped (poplar::Graph &graph, const poplar::Tensor &A, const poplar::Tensor &B, poplar::program::Sequence &prog, const poplar::Type &outputType, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options={}, PlanningCache *cache=nullptr)
	Multiply two grouped matrices. More...

void	matMulGroupedWithOutput (poplar::Graph &graph, const poplar::Tensor &A, const poplar::Tensor &B, poplar::Tensor &out, poplar::program::Sequence &prog, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options_={}, PlanningCache *cache=nullptr)
	Grouped matmul with explicit output argument.

void	matMulGroupedReportPlan (std::ostream &out, const poplar::Graph &graph, const poplar::Type &inputType, const poplar::Type &outputType, const std::vector< std::size_t > &aShape, const std::vector< std::size_t > &bShape, const poplar::OptionFlags &options={}, PlanningCache *cache=nullptr)
	Report the convolution plan corresponding to the `params` and `options` provided. More...

void	matMulAcc (poplar::Graph &graph, const poplar::Tensor &C, float k, const poplar::Tensor &A, const poplar::Tensor &B, poplar::program::Sequence &prog, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options={}, PlanningCache *cache=nullptr)
	Multiply two matrices and add to a third (with a scaling factor). More...

void	matMulAcc (poplar::Graph &graph, const poplar::Tensor &C, const poplar::Tensor &k, const poplar::Tensor &A, const poplar::Tensor &B, poplar::program::Sequence &prog, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options={}, PlanningCache *cache=nullptr)
	Matrix multiply and accumulate with a single-element scaling factor.

void	matMulGroupedAcc (poplar::Graph &graph, const poplar::Tensor &C, float k, const poplar::Tensor &A, const poplar::Tensor &B, poplar::program::Sequence &prog, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options={}, PlanningCache *cache=nullptr)
	Grouped matrix multiply and accumulate. More...

void	matMulGroupedAcc (poplar::Graph &graph, const poplar::Tensor &C, const poplar::Tensor &k, const poplar::Tensor &A, const poplar::Tensor &B, poplar::program::Sequence &prog, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options={}, PlanningCache *cache=nullptr)
	Grouped matrix multiply and accumulate with a single-element scaling factor.

poplar::Tensor	createMatMulInputLHS (poplar::Graph &graph, const poplar::Type &inputType, const poplar::Type &outputType, const std::vector< std::size_t > &aShape, const std::vector< std::size_t > &bShape, const poplar::DebugContext &debugContext, const poplar::OptionFlags &options={}, PlanningCache *cache=nullptr)
	Create a tensor that is used as the left operand of matrix multiplication. More...

poplar::Tensor	createMatMulInputLHS (poplar::Graph &graph, const poplar::Type &dataType, const std::vector< std::size_t > &aShape, const std::vector< std::size_t > &bShape, const poplar::DebugContext &debugContext, const poplar::OptionFlags &options={}, PlanningCache *cache=nullptr)
	Create a tensor that is used as the left operand of matrix multiplication. More...

poplar::Tensor	createMatMulGroupedInputLHS (poplar::Graph &graph, const poplar::Type &inputType, const poplar::Type &outputType, const std::vector< std::size_t > &aShape, const std::vector< std::size_t > &bShape, const poplar::DebugContext &debugContext, const poplar::OptionFlags &options={}, PlanningCache *cache=nullptr)
	Create a tensor that is used as the left operand of a grouped matrix multiplication. More...

poplar::Tensor	createMatMulInputRHS (poplar::Graph &graph, const poplar::Type &inputType, const poplar::Type &outputType, const std::vector< std::size_t > &aShape, const std::vector< std::size_t > &bShape, const poplar::DebugContext &debugContext, const poplar::OptionFlags &options={}, PlanningCache *cache=nullptr)
	Create a tensor that is used as the right operand of matrix multiplication. More...

poplar::Tensor	createMatMulInputRHS (poplar::Graph &graph, const poplar::Type &dataType, const std::vector< std::size_t > &aShape, const std::vector< std::size_t > &bShape, const poplar::DebugContext &debugContext, const poplar::OptionFlags &options={}, PlanningCache *cache=nullptr)
	Overloaded function for when inputType == outputType (represented by the dataType parameter).

poplar::Tensor	createMatMulOutput (poplar::Graph &graph, const poplar::Type &inputType, const poplar::Type &outputType, const std::vector< std::size_t > &aShape, const std::vector< std::size_t > &bShape, const poplar::DebugContext &debugContext, const poplar::OptionFlags &options={}, PlanningCache *cache=nullptr)
	Create a tensor that is used as the output operand of matrix multiplication. More...

poplar::Tensor	createMatMulOutput (poplar::Graph &graph, const poplar::Type &dataType, const std::vector< std::size_t > &aShape, const std::vector< std::size_t > &bShape, const poplar::DebugContext &debugContext, const poplar::OptionFlags &options={}, PlanningCache *cache=nullptr)
	Overloaded function for when inputType == outputType (represented by the dataType parameter).

poplar::Tensor	createMatMulGroupedInputRHS (poplar::Graph &graph, const poplar::Type &inputType, const poplar::Type &outputType, const std::vector< std::size_t > &aShape, const std::vector< std::size_t > &bShape, const poplar::DebugContext &debugContext, const poplar::OptionFlags &options={}, PlanningCache *cache=nullptr)
	Create a tensor that is used as the right operand of grouped matrix multiplication. More...

poplar::Tensor	createMatMulGroupedOutput (poplar::Graph &graph, const poplar::Type &inputType, const poplar::Type &outputType, const std::vector< std::size_t > &aShape, const std::vector< std::size_t > &bShape, const poplar::DebugContext &debugContext, const poplar::OptionFlags &options={}, PlanningCache *cache=nullptr)
	Create a tensor that is used as the output operand of grouped matrix multiplication (with output). More...

poplar::Tensor	preArrangeMatMulInputRHS (poplar::Graph &graph, const std::vector< std::size_t > &aShape, const poplar::Tensor &B, poplar::program::Sequence &prog, const poplar::Type &outputType, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options={}, PlanningCache *cache=nullptr)
	Pre-arrange right-hand side input. More...

poplar::Tensor	preArrangeMatMulInputRHS (poplar::Graph &graph, const std::vector< std::size_t > &aShape, const poplar::Tensor &B, poplar::program::Sequence &prog, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options={}, PlanningCache *cache=nullptr)
	Pre-arrange input where the output type is the same as `B`.

poplar::Tensor	preArrangeMatMulGroupedInputRHS (poplar::Graph &graph, const std::vector< std::size_t > &aShape, const poplar::Tensor &B, poplar::program::Sequence &prog, const poplar::Type &outputType, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options={}, PlanningCache *cache=nullptr)
	Pre-arrange grouped input with explicitly defined output type.

poplar::Tensor	transposeGroupedMatrix (const poplar::Tensor &A)
	Transposes a grouped matrix tensor. More...

std::set< ConvPlanParams >	matMulGetConvPlanParams (const std::set< MatMulPlanParams > &matmuls, MatMulToConvOptions &matmulToConvOpts)
	Obtain the set of convolution parameters corresponding to the user supplied set of parameters for matrix multiplication. More...

void	preplanMatMuls (const std::set< MatMulPlanParams > &matmuls, matmul::PlanningCache &cache)

void	matmulValidateOptions (const poplar::OptionFlags &options)
	Provides an interface to validate the matmul options. More...

poplar::Tensor	linspace (poplar::Graph &graph, const poplar::Type &type, float left, float right, size_t count, const poplar::DebugContext &debugContext={})
	Create a constant variable that contains values equally spaced in the specified closed range [`left`, `right`]. More...

std::vector< poplar::Tensor >	meshgrid2d (poplar::Graph &graph, poplar::Tensor x, poplar::Tensor y)
	Create a coordinate grid for each axis by broadcasting the input tensors. More...

poplar::Tensor	createNormGamma (poplar::Graph &graph, const poplar::Tensor &acts, const poplar::Type &type, const poplar::DebugContext &debugContext={})
	Create and map the per-channel multiplicative gamma parameter tensor used for normalisation in convolution layers. More...

poplar::Tensor	createNormGamma (poplar::Graph &graph, const poplar::Tensor &acts, const poplar::DebugContext &debugContext={})
	Create and map the per-channel multiplicative gamma parameter tensor used for normalisation in convolution layers. More...

poplar::Tensor	createNormBeta (poplar::Graph &graph, const poplar::Tensor &acts, const poplar::Type &type, const poplar::DebugContext &debugContext={})
	Create and map the per-channel additive beta parameter tensor used for normalisation in convolution layers. More...

poplar::Tensor	createNormBeta (poplar::Graph &graph, const poplar::Tensor &acts, const poplar::DebugContext &debugContext={})
	Create and map the per-channel additive beta parameter tensor used for normalisation in convolution layers. More...

std::pair< poplar::Tensor, poplar::Tensor >	createNormParams (poplar::Graph &graph, const poplar::Tensor &acts, const poplar::DebugContext &debugContext={})
	Creates a tensor pair of normalisation parameters (gamma, beta). More...

std::pair< poplar::Tensor, poplar::Tensor >	normStatistics (poplar::Graph &graph, const poplar::Tensor &actsUngrouped, float eps, poplar::program::Sequence &prog, bool unbiasedVarEstimate, bool stableAlgo=false, const poplar::Type &partialsType=poplar::FLOAT, const poplar::DebugContext &debugContext={})
	Compute the normalisation statistics from the activations tensor. More...

std::pair< poplar::Tensor, poplar::Tensor >	distributedNormStatistics (poplar::Graph &replicatedGraph, const poplar::Tensor &actsUngrouped, float eps, poplar::program::Sequence &prog, bool unbiasedVarEstimate, DistributedNormReduceCallback allReduceCallback, unsigned normSize, bool stableAlgo=false, const poplar::Type &partialsType=poplar::FLOAT, const poplar::DebugContext &debugContext={})
	Compute the normalisation statistics for a part of the activations tensor which is distributed over multiple replicas. More...

poplar::Tensor	normWhiten (poplar::Graph &graph, const poplar::Tensor &acts, const poplar::Tensor &mean, const poplar::Tensor &iStdDev, poplar::program::Sequence &prog, const poplar::DebugContext &debugContext={})
	Compute the whitened activations using the supplied mean and inverse standard deviation. More...

poplar::Tensor	normalise (poplar::Graph &graph, const poplar::Tensor &actsWhitened, const poplar::Tensor &gamma, const poplar::Tensor &beta, poplar::program::Sequence &prog, const poplar::DebugContext &debugContext={})
	Computes the normalised output from whitened activations. More...

std::pair< poplar::Tensor, poplar::Tensor >	normParamGradients (poplar::Graph &graph, const poplar::Tensor &actsWhitened, const poplar::Tensor &gradsIn, poplar::program::Sequence &prog, const poplar::Type &partialsType=poplar::FLOAT, const poplar::DebugContext &debugContext={})
	Compute gradients with respect to parameters required for parameter update. More...

poplar::Tensor	normGradients (poplar::Graph &graph, const poplar::Tensor &gradsIn, const poplar::Tensor &gamma, poplar::program::Sequence &prog, const poplar::DebugContext &debugContext={})
	Propagate the gradients through the normalisation layer. More...

poplar::Tensor	normStatisticsGradients (poplar::Graph &graph, const poplar::Tensor &actsWhitened, const poplar::Tensor &gradsIn, const poplar::Tensor &invStdDev, poplar::program::Sequence &prog, const poplar::Type &partialsType=poplar::FLOAT, const poplar::DebugContext &debugContext={})
	Propagate the gradients through the norm statistics layer. More...

poplar::Tensor	distributedNormStatisticsGradients (poplar::Graph &replicatedGraph, const poplar::Tensor &actsWhitened, const poplar::Tensor &gradsIn, const poplar::Tensor &invStdDev, poplar::program::Sequence &prog, poplin::DistributedNormReduceCallback reduceCallback, unsigned normSize, const poplar::Type &partialsType=poplar::FLOAT, const poplar::DebugContext &debugContext={})
	Propagate the gradients through the norm statistics layer where equal sized batch elements are distributed over replicas. More...

poplar::Tensor	createTriangularSolveInputLHS (poplar::Graph &graph, const poplar::Type &inputType, const poplar::Type &outputType, const std::vector< std::size_t > &aShape, const std::vector< std::size_t > &bShape, bool leftSide, const poplar::DebugContext &debugContext, const poplar::OptionFlags &options={}, PlanningCache *cache=nullptr)
	Create a tensor that is used as the left operand of triangular solve. More...

poplar::Tensor	createTriangularSolveInputRHS (poplar::Graph &graph, const poplar::Type &inputType, const poplar::Type &outputType, const std::vector< std::size_t > &aShape, const std::vector< std::size_t > &bShape, bool leftSide, const poplar::DebugContext &debugContext, const poplar::OptionFlags &options={}, PlanningCache *cache=nullptr)
	Create a tensor that is used as the right operand of triangular solve. More...

poplar::Tensor	triangularMask (poplar::Graph &graph, const poplar::Tensor &a, bool lower, bool unitDiagonal, poplar::program::Sequence &prog, const poplar::DebugContext &debugContext={})
	Masks the unused components of the input tensor with zeroes, optionally allowing for a unit diagonal. More...

poplar::Tensor	triangularSolve (poplar::Graph &graph, const poplar::Tensor &a, const poplar::Tensor &b, bool leftSide, bool lower, bool unitDiagonal, poplar::program::Sequence &prog, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options={}, PlanningCache *cache=nullptr)
	Solves systems of linear equations with lower or upper triangular coefficients. More...

std::vector< std::pair< MatMulParams, poplar::OptionFlags > >	getTriangularSolveMatMulPrePlanParameters (const poplar::Type &inputType, const poplar::Type &outputType, const std::vector< std::size_t > &aShape, const std::vector< std::size_t > &bShape, bool leftSide, bool lower, const poplar::OptionFlags &options)
	Plan matrix multiplication for given triangular solver. More...

Detailed Description

Linear algebra functions.

Typedef Documentation

◆ DistributedNormReduceCallback

using poplin::DistributedNormReduceCallback = typedef std::function<std::vector<poplar::Tensor>( poplar::Graph &replicatedGraph, const std::vector<poplar::Tensor> &inputsToReduce, poplar::program::Sequence &prog, unsigned groupSize, const poplar::DebugContext &debugContext, const poplar::OptionFlags &options)>

Callback to reduce statistics and gradients.

The reduce operation is reduce-add.

Parameters

graph	The replicated graph in which the computation is performed.
inputsToReduce	A vector of independent tensors to reduce
prog	A program sequence that the code to perform the normalisation will be appended to.
groupSize	The number of replicas that need to be reduced. This may be less than the total number of replicas in the top level graph. A group is formed by adjacent replicas such that the top level graph contains an integral number of `groupSize` replicas.
debugContext	Optional debug information.
options	The structure describing options on how the reduction should be implemented.

Returns: A vector of reduced tensors in the same order as supplied in inputsToReduce

◆ MatMulPlanParams

using poplin::MatMulPlanParams = typedef std::tuple<const poplar::Target *, const MatMulParams, const poplar::OptionFlags *>

A tuple containing the required parameters to preplan a matmul:

matmul-specific target for tile / IPU sizing
matmul parameters
implementation options (see matMul() above)

All entries must have matching machine parameters.

Function Documentation

◆ addBias()

void poplin::addBias	(	poplar::Graph &	graph,
		const poplar::Tensor &	in,
		const poplar::Tensor &	biases,
		poplar::program::Sequence &	prog,
		const poplar::DebugContext &	debugContext = `{}`
	)

Adds a program to prog which adds biases to activations tensor.

Parameters

graph	The graph that the operation will be added to.
input	Tensor containing values which to add the biases.
biases	Biases to add to the `input` tensor.
prog	Poplar program sequence to append the operation onto.
debugContext	Optional debug information.

◆ calculateWeightDeltas()

poplar::Tensor poplin::calculateWeightDeltas	(	poplar::Graph &	graph,
		const poplar::Tensor &	zDeltas,
		const poplar::Tensor &	activations,
		const ConvParams &	params,
		poplar::program::Sequence &	prog,
		const poplar::DebugContext &	debugContext = `{}`,
		const poplar::OptionFlags &	options = `{}`,
		PlanningCache *	cache = `nullptr`
	)

Append an operation to a poplar::Program to generate the tensor of weight deltas.

Parameters

graph	The tensor will be added to this graph.
zDeltas	Tensor containing the gradients with respect to the output of the convolution.
activation	Tensor containing the inputs to the convolution in the forward pass.
params	Parameters of the convolution.
prog	Poplar program sequence to append the operation onto.
debugContext	Optional debug information.
options	Options controlling the implementation. See createWeights().
cache	Optional pointer to planning cache to use.

Returns: The weight deltas are the gradients with respect to the weights of the convolution. These are populated when the operation runs.

◆ cholesky()

poplar::Tensor poplin::cholesky	(	poplar::Graph &	graph,
		const poplar::Tensor &	a,
		bool	lower,
		poplar::program::Sequence &	prog,
		const poplar::DebugContext &	debugContext = `{}`,
		poplar::OptionFlags	options = `{}`,
		PlanningCache *	cache = `nullptr`
	)

Computes Cholesky factor for a symmetric positive definite matrix.

Supported options:

blockSize: A hint for the size of the block to be used.

See also: matMul() for additional options.

Parameters

graph	The Poplar graph.
a	A tensor of floating-point type with shape [..., N,N].
lower	If true, return a lower triangular matrix, else return an upper triangular matrix.
prog	A reference to a program sequence which the code to perform the arrangement will be appended to.
debugContext	Optional debug information.
options	A structure describing options on how the decomposition should be implemented.
cache	Optional pointer to a planning cache to use.

Returns: A tensor with the same shape as a with a triangular factor.

◆ choleskyInPlace()

void poplin::choleskyInPlace	(	poplar::Graph &	graph,
		const poplar::Tensor &	a,
		bool	lower,
		poplar::program::Sequence &	prog,
		const poplar::DebugContext &	debugContext = `{}`,
		poplar::OptionFlags	options = `{}`,
		PlanningCache *	cache = `nullptr`
	)

Computes Cholesky factor in place for a symmetric positive definite matrix.

Supported options:

blockSize: A hint for the size of the block to be used.

See also: matMul() for additional options.

Parameters

graph	The Poplar graph.
a	A tensor of floating-point type with shape [..., N,N].
lower	If true, return a lower triangular matrix, else return an upper triangular matrix.
prog	A reference to a program sequence which the code to perform the arrangement will be appended to.
debugContext	Optional debug information.
options	A structure describing options on how the decomposition should be implemented.
cache	Optional pointer to a planning cache to use.

Returns: None.

◆ convolution()

poplar::Tensor poplin::convolution	(	poplar::Graph &	graph,
		const poplar::Tensor &	in,
		const poplar::Tensor &	weights,
		const ConvParams &	params,
		bool	transposeAndFlipWeights,
		poplar::program::Sequence &	prog,
		const poplar::DebugContext &	debugContext = `{}`,
		const poplar::OptionFlags &	options = `{}`,
		PlanningCache *	cache = `nullptr`
	)

Convolve an input with a set of weights.

The input tensor is in the form [B x inChans x H x W], and can be allocated using createInput(). The weights tensor is in the form [convGroups x outChansPerConvGroup x inChansPerConvGroup x H x W], and can be allocated using createWeights().

The returned tensor has the shape [B x outChans x H x W]

Padding and striding are specified in the ConvParams structure.

Parameters

graph	The graph that the operation will be added to.
in	Input data tensor.
weights	Weights tensor.
params	Parameters for the form of the convolution.
transposeAndFlipWeights	For the weight update pass.
prog	Poplar program sequence to append the operation onto.
debugContext	Optional debug information.
options	Options that control the implementation. See createWeights().
cache	Optional pointer to planning cache to use.

Returns: The convolved output tensor.

◆ convolutionBiasUpdate() [1/2]

void poplin::convolutionBiasUpdate	(	poplar::Graph &	graph,
		const poplar::Tensor &	zDeltas,
		const poplar::Tensor &	biases,
		const poplar::Tensor &	scale,
		const poplar::OptionFlags &	options,
		poplar::program::Sequence &	prog,
		const poplar::DebugContext &	debugContext = `{}`
	)

Add a program to update biases tensor with the gradients derived from the zDeltas tensor.

Parameters

graph	The graph that the operation will be added to.
zDeltas	Tensor containing the gradients with respect to the output of the convolution.
biases	Biases tensor to update.
scale	Scale to apply to to zDeltas tensor.
options	Options controlling the implementation. See createWeights().
prog	Poplar program sequence to append the operation onto.
debugContext	Optional debug information.

◆ convolutionBiasUpdate() [2/2]

void poplin::convolutionBiasUpdate	(	poplar::Graph &	graph,
		const poplar::Tensor &	zDeltas,
		const poplar::Tensor &	biases,
		float	scale,
		const poplar::OptionFlags &	options,
		poplar::program::Sequence &	prog,
		const poplar::DebugContext &	debugContext = `{}`
	)

Add a program to update biases tensor with the gradients derived from the zDeltas tensor.

Parameters

graph	The graph that the operation will be added to.
zDeltas	Tensor containing the gradients with respect to the output of the convolution.
biases	Biases tensor to update.
scale	Scale to apply to to `zDeltas` tensor.
options	Options controlling the implementation. See createWeights().
prog	Poplar program sequence to append the operation onto.
debugContext	Optional debug information.

◆ convolutionValidateOptions()

void poplin::convolutionValidateOptions ( const poplar::OptionFlags & options )

Provides an interface to validate the convolution options.

Presence of invalid key or a value will throw an exception.

Parameters

options Options controlling the implementation. See createWeights().

◆ convolutionWeightUpdate() [1/2]

void poplin::convolutionWeightUpdate	(	poplar::Graph &	graph,
		const poplar::Tensor &	zDeltas,
		const poplar::Tensor &	weights,
		const poplar::Tensor &	activations,
		ConvParams	params,
		const poplar::Tensor &	scale,
		poplar::program::Sequence &	prog,
		const poplar::DebugContext &	debugContext = `{}`,
		const poplar::OptionFlags &	options = `{}`,
		PlanningCache *	cache = `nullptr`
	)

Append operations to a poplar::Program to generate and apply the weight update.

See also: calculateWeightDeltas().

Parameters

graph	The graph that the operation will be added to.
zDeltas	Tensor containing the gradients with respect to the output of the convolution.
weights	Weights tensor.
activations	Tensor containing the inputs to the convolution in the forward pass.
params	Parameters of the convolution.
scale	Scale to apply to the `zDeltas`.
prog	Poplar program sequence to append the operations onto.
debugContext	Optional debug information.
options	Options controlling the implementation. See createWeights().
cache	Optional pointer to planning cache to use.

◆ convolutionWeightUpdate() [2/2]

void poplin::convolutionWeightUpdate	(	poplar::Graph &	graph,
		const poplar::Tensor &	zDeltas,
		const poplar::Tensor &	weights,
		const poplar::Tensor &	activations,
		ConvParams	params,
		float	scale,
		poplar::program::Sequence &	prog,
		const poplar::DebugContext &	debugContext = `{}`,
		const poplar::OptionFlags &	options = `{}`,
		PlanningCache *	cache = `nullptr`
	)

Append operations to a poplar::Program to generate and apply the weight update.

See also: calculateWeightDeltas().

Parameters

graph	The graph that the operation will be added to.
zDeltas	Tensor containing the gradients with respect to the output of the convolution.
weights	Weights tensor.
activations	Tensor containing the inputs to the convolution in the forward pass.
params	Parameters of the convolution.
scale	Scale to apply to the zDeltas.
prog	Poplar program sequence to append the operations onto.
debugContext	Optional debug information.
options	Options controlling the implementation. See createWeights().
cache	Optional pointer to planning cache to use.

◆ convolutionWithOutput()

void poplin::convolutionWithOutput	(	poplar::Graph &	graph,
		const poplar::Tensor &	in,
		const poplar::Tensor &	weights,
		const poplar::Tensor &	out,
		const ConvParams &	params,
		bool	transposeAndFlipWeights,
		poplar::program::Sequence &	prog,
		const poplar::DebugContext &	debugContext = `{}`,
		const poplar::OptionFlags &	options = `{}`,
		PlanningCache *	cache = `nullptr`
	)

Convolve an input with a set of weights into a pre-allocated output tensor.

The output tensor is in the form [B x OutChans x H x W], and can be allocated using createConvOutput(). The weights tensor is in the form [convGroups x outChansPerConvGroup x inChansPerConvGroup x H x W], and can be allocated using createWeights(). The input tensor is in the form [B x inChans x H x W], and can be allocated using createInput().

Padding and striding are specified in the ConvParams structure.

Parameters

graph	The graph that the operation will be added to.
in	Input data tensor.
weights	Weights tensor.
out	Pre-allocated output tensor.
params	Parameters for the form of the convolution.
transposeAndFlipWeights	For the weight update pass.
prog	Poplar program sequence to append the operation onto.
debugContext	Optional debug information.
options	Options that control the implementation. See createWeights().
cache	Optional pointer to planning cache to use.

◆ createBiases() [1/2]

poplar::Tensor poplin::createBiases	(	poplar::Graph &	graph,
		const poplar::Tensor &	activations,
		const ConvParams &	params,
		const poplar::DebugContext &	debugContext = `{"biases"}`,
		const poplar::OptionFlags &	options = `{}`,
		PlanningCache *	cache = `nullptr`
	)

Create a bias tensor suitable for input to the addBias() function with allocation consistent with plan parameters.

The tensor will have the shape [outChans]

Parameters

graph	The graph that the tensor will be added to.
activations	The activation tensor which is output from the convolution.
params	Parameters as passed to the target convolution.
name	Debugging name for the tensor.
options	Options controlling the implementation. See createWeights().
cache	Optional pointer to planning cache to use.

Returns: The tensor of biases.

◆ createBiases() [2/2]

poplar::Tensor poplin::createBiases	(	poplar::Graph &	graph,
		const poplar::Tensor &	activations,
		const poplar::DebugContext &	debugContext = `{"biases"}`
	)

Create a bias tensor suitable for input to the addBias() function.

The tensor will have the shape [outChans]

Parameters

graph	The graph that the tensor will be added to.
activations	The activation tensor which is output from the convolution.
name	Debugging name for the tensor.

Returns: The tensor of biases.

◆ createCholeskyInput()

poplar::Tensor poplin::createCholeskyInput	(	poplar::Graph &	graph,
		const poplar::Type &	type,
		const std::vector< std::size_t > &	shape,
		bool	lower,
		const poplar::DebugContext &	debugContext,
		const poplar::OptionFlags &	options = `{}`,
		PlanningCache *	cache = `nullptr`
	)

Create a tensor that is used as the input for the Cholesky factoriser.

Supported options:

blockSize: A hint for the size of the block to be used.

See also: matMul() for additional options.

This will create a 2D/3D tensor in the graph. The ordering and tile mapping of the tensor will be set to make a triangular factoriser with this tensor as the left argument efficient.

Parameters

graph	The Poplar graph.
type	The input data type.
shape	The shape of the tensor.
lower	If true, return a lower triangular matrix, else return an upper triangular matrix.
debugContext	Debug information.
options	A structure describing options on how the decomposition should be implemented.
cache	Optional pointer to a planning cache to use.

Returns: A matrix of type type and shape shape. The tensor will have been mapped to tiles.

◆ createConvOutput()

poplar::Tensor poplin::createConvOutput	(	poplar::Graph &	graph,
		const ConvParams &	params,
		const poplar::DebugContext &	debugContext = `{}`,
		const poplar::OptionFlags &	options = `{}`,
		PlanningCache *	cache = `nullptr`
	)

Create an output tensor for a convolution.

Use this when you need to create an output data tensor for a convolution. The same set of parameters which will be passed to the convolution() should also be passed to createInput().

The returned tensor has the shape [B x inChans x H x W].

Parameters

graph	The tensor will be added to this graph.
params	Parameters as passed to the target convolution.
debugContext	Debugging name for the tensor.
options	Options controlling the implementation. See createWeights().
cache	Optional pointer to planning cache to use.

Returns: The allocated output tensor.

◆ createInput()

poplar::Tensor poplin::createInput	(	poplar::Graph &	graph,
		const ConvParams &	params,
		const poplar::DebugContext &	debugContext = `{}`,
		const poplar::OptionFlags &	options = `{}`,
		PlanningCache *	cache = `nullptr`
	)

Create an input tensor for a convolution.

Use this when you need to create an input data tensor for a convolution. The same set of parameters which will be passed to the convolution() should also be passed to createInput().

The returned tensor has the shape [B x inChans x H x W].

Parameters

graph	The tensor will be added to this graph.
params	Parameters as passed to the target convolution.
name	Debugging name for the tensor.
options	Options controlling the implementation. See createWeights().
cache	Optional pointer to planning cache to use.

Returns: The allocated input tensor.

◆ createMatMulGroupedInputLHS()

poplar::Tensor poplin::createMatMulGroupedInputLHS	(	poplar::Graph &	graph,
		const poplar::Type &	inputType,
		const poplar::Type &	outputType,
		const std::vector< std::size_t > &	aShape,
		const std::vector< std::size_t > &	bShape,
		const poplar::DebugContext &	debugContext,
		const poplar::OptionFlags &	options = `{}`,
		PlanningCache *	cache = `nullptr`
	)

Create a tensor that is used as the left operand of a grouped matrix multiplication.

This will create a 3D tensor in the graph. The ordering and tile mapping of the tensor will be set to make a grouped matrix multiplication with this tensor as the left argument efficient.

The first dimension of the required matrix and the matrix it multiplies by must the number of groups.

Parameters

graph	The Poplar graph.
type	The data type of the required matrix.
aShape	The grouped shape [g, r, c] of the required matrix.
bShape	The grouped shape [g, r, c] of the matrix that the required matrix will be multiplied by.
debugContext	Debug information.
options	The implementation options of the multiplication. See matMul().
cache	Optional pointer to a planning cache to use.

Returns: A matrix of type type and grouped shape aShape. The tensor will have been mapped to tiles.

◆ createMatMulGroupedInputRHS()

poplar::Tensor poplin::createMatMulGroupedInputRHS	(	poplar::Graph &	graph,
		const poplar::Type &	inputType,
		const poplar::Type &	outputType,
		const std::vector< std::size_t > &	aShape,
		const std::vector< std::size_t > &	bShape,
		const poplar::DebugContext &	debugContext,
		const poplar::OptionFlags &	options = `{}`,
		PlanningCache *	cache = `nullptr`
	)

Create a tensor that is used as the right operand of grouped matrix multiplication.

This will create a 3D tensor in the graph. The ordering and tile mapping of the tensor will be set to make a grouped matrix multiplication with this tensor as the right argument efficient.

The first dimension of the required matrix and the matrix it multiplies by must the number of groups.

Parameters

graph	The Poplar graph.
type	The data type of the required matrix.
aShape	The grouped shape [g, r, c] of the matrix that the required matrix will be multiplied by.
bShape	The grouped shape [g, r, c] of the required matrix.
debugContext	Debug information.
options	The implementation options of the multiplication. See matMul().
cache	Optional pointer to planning cache to use.

Returns: A matrix of type type and grouped shape bShape. The tensor will have been mapped to tiles.

◆ createMatMulGroupedOutput()

poplar::Tensor poplin::createMatMulGroupedOutput	(	poplar::Graph &	graph,
		const poplar::Type &	inputType,
		const poplar::Type &	outputType,
		const std::vector< std::size_t > &	aShape,
		const std::vector< std::size_t > &	bShape,
		const poplar::DebugContext &	debugContext,
		const poplar::OptionFlags &	options = `{}`,
		PlanningCache *	cache = `nullptr`
	)

Create a tensor that is used as the output operand of grouped matrix multiplication (with output).

This will create a 3D tensor in the graph. The ordering and tile mapping of the tensor will be set to make a grouped matrix multiplication with this tensor as the output argument efficient.

The first dimension of the required matrix and the matrix it multiplies by must the number of groups.

Parameters

graph	The Poplar graph.
type	The data type of the required matrix.
aShape	The grouped shape [g, r, c] of the matrix that the required matrix will be multiplied by.
bShape	The grouped shape [g, r, c] of the required matrix.
debugContext	Debug information.
options	The implementation options of the multiplication. See matMul().
cache	Optional pointer to planning cache to use.

Returns: A matrix of type type and grouped shape [ aShape[g], aShape[r], bShape[c] ]. The tensor will have been mapped to tiles.

◆ createMatMulInputLHS() [1/2]

poplar::Tensor poplin::createMatMulInputLHS	(	poplar::Graph &	graph,
		const poplar::Type &	dataType,
		const std::vector< std::size_t > &	aShape,
		const std::vector< std::size_t > &	bShape,
		const poplar::DebugContext &	debugContext,
		const poplar::OptionFlags &	options = `{}`,
		PlanningCache *	cache = `nullptr`
	)

Create a tensor that is used as the left operand of matrix multiplication.

The type of both input and output tensors is specified by dataType. This will create a 2D tensor in the graph. The ordering and tile mapping of the tensor will be set to make a matrix multiplication with this tensor as the left argument efficient.

Parameters

graph	The Poplar graph.
dataType	The data type of both the input and output tensors.
aShape	The shape of the required matrix.
bShape	The shape of the matrix that the required matrix will be multiplied by.
debugContext	Debug information.
options	The implementation options of the multiplication. See matMul().
cache	Optional pointer to a planning cache to use.

Returns: A matrix of type type and shape aShape. The tensor will have been mapped to tiles.

◆ createMatMulInputLHS() [2/2]

poplar::Tensor poplin::createMatMulInputLHS	(	poplar::Graph &	graph,
		const poplar::Type &	inputType,
		const poplar::Type &	outputType,
		const std::vector< std::size_t > &	aShape,
		const std::vector< std::size_t > &	bShape,
		const poplar::DebugContext &	debugContext,
		const poplar::OptionFlags &	options = `{}`,
		PlanningCache *	cache = `nullptr`
	)

Create a tensor that is used as the left operand of matrix multiplication.

The types of the input and and output tensors are specified separately. This will create a 2D tensor in the graph. The ordering and tile mapping of the tensor will be set to make a matrix multiplication with this tensor as the left argument efficient.

Parameters

graph	The Poplar graph.
inputType	The input data type.
outputType	The data type of the returned tensor.
aShape	The shape of the required matrix.
bShape	The shape of the matrix that the required matrix will be multiplied by.
debugContext	Debug information.
options	The implementation options of the multiplication. See matMul().
cache	Optional pointer to a planning cache to use.

Returns: A matrix of type type and shape aShape. The tensor will have been mapped to tiles.

◆ createMatMulInputRHS()

poplar::Tensor poplin::createMatMulInputRHS	(	poplar::Graph &	graph,
		const poplar::Type &	inputType,
		const poplar::Type &	outputType,
		const std::vector< std::size_t > &	aShape,
		const std::vector< std::size_t > &	bShape,
		const poplar::DebugContext &	debugContext,
		const poplar::OptionFlags &	options = `{}`,
		PlanningCache *	cache = `nullptr`
	)

Create a tensor that is used as the right operand of matrix multiplication.

This will create a 2D tensor in the graph. The ordering and tile mapping of the tensor will be set to make a matrix multiplication with this tensor as the right argument efficient.

Parameters

graph	The Poplar graph.
inputType	The input data type.
outputType	The data type of the returned tensor.
aShape	The shape of the matrix that the required matrix will be multiplied by.
bShape	The shape of the required matrix.
debugContext	Debug information.
options	The implementation options of the multiplication. See matMul().
cache	Optional pointer to a planning cache to use.

Returns: A matrix of type type and shape bShape. The tensor will have been mapped to tiles.

◆ createMatMulOutput()

poplar::Tensor poplin::createMatMulOutput	(	poplar::Graph &	graph,
		const poplar::Type &	inputType,
		const poplar::Type &	outputType,
		const std::vector< std::size_t > &	aShape,
		const std::vector< std::size_t > &	bShape,
		const poplar::DebugContext &	debugContext,
		const poplar::OptionFlags &	options = `{}`,
		PlanningCache *	cache = `nullptr`
	)

Create a tensor that is used as the output operand of matrix multiplication.

This will create a 2D tensor in the graph. The ordering and tile mapping of the tensor will be set to make a matrix multiplication with this tensor as the output argument efficient.

Parameters

graph	The Poplar graph.
inputType	The input data type.
outputType	The data type of the returned tensor.
aShape	The shape of the matrix that the required matrix will be multiplied by.
bShape	The shape of the required matrix.
debugContext	Debug information.
options	The implementation options of the multiplication. See matMul().
cache	Optional pointer to a planning cache to use.

Returns: A matrix of type type and shape [ aShape[0], bShape[1] ]. The tensor will have been mapped to tiles.

◆ createNormBeta() [1/2]

poplar::Tensor poplin::createNormBeta	(	poplar::Graph &	graph,
		const poplar::Tensor &	acts,
		const poplar::DebugContext &	debugContext = `{}`
	)

Create and map the per-channel additive beta parameter tensor used for normalisation in convolution layers.

Parameters

graph	The graph with the activations and beta tensor.
acts	The activations tensor has shape `[N][C][..F..]` where: `N` is the batch size `C` is the number of channels `..F..` is dimensions of a N-dimensional field
debugContext	Optional debug information.

Returns: Beta vector of dimension C.

◆ createNormBeta() [2/2]

poplar::Tensor poplin::createNormBeta	(	poplar::Graph &	graph,
		const poplar::Tensor &	acts,
		const poplar::Type &	type,
		const poplar::DebugContext &	debugContext = `{}`
	)

Create and map the per-channel additive beta parameter tensor used for normalisation in convolution layers.

Parameters

graph	The graph with the activations and beta tensor.
acts	The activations tensor has shape `[N][C][..F..]` where: `N` is the batch size `C` is the number of channels `..F..` is dimensions of a N-dimensional field
type	The type of the output tensor.
debugContext	Optional debug information.

Returns: Beta vector of dimension C.

◆ createNormGamma() [1/2]

poplar::Tensor poplin::createNormGamma	(	poplar::Graph &	graph,
		const poplar::Tensor &	acts,
		const poplar::DebugContext &	debugContext = `{}`
	)

Create and map the per-channel multiplicative gamma parameter tensor used for normalisation in convolution layers.

Parameters

graph	The graph with the activations and gamma tensor.
acts	The activations tensor has shape `[N][C][..F..]` where: `N` is the batch size `C` is the number of channels `..F..` is dimensions of a N-dimensional field.
debugContext	Optional debug information.

Returns: Gamma vector of dimension C.

◆ createNormGamma() [2/2]

poplar::Tensor poplin::createNormGamma	(	poplar::Graph &	graph,
		const poplar::Tensor &	acts,
		const poplar::Type &	type,
		const poplar::DebugContext &	debugContext = `{}`
	)

Create and map the per-channel multiplicative gamma parameter tensor used for normalisation in convolution layers.

Parameters

graph	The graph with the activations and gamma tensor.
acts	The activations tensor has shape `[N][C][..F..]` where: `N` is the batch size `C` is the number of channels `..F..` is dimensions of a N-dimensional field.
type	The type of the output tensor.
debugContext	Optional debug information.

Returns: Gamma vector of dimension C.

◆ createNormParams()

std::pair< poplar::Tensor, poplar::Tensor > poplin::createNormParams	(	poplar::Graph &	graph,
		const poplar::Tensor &	acts,
		const poplar::DebugContext &	debugContext = `{}`
	)

Creates a tensor pair of normalisation parameters (gamma, beta).

Parameters

graph	The graph with the activations and beta/gamma tensors.
acts	The activations tensor has shape `[N][C][..F..]` where: `N` is the batch size `C` is the number of channels `..F..` is dimensions of a N-dimensional field
debugContext	Optional debug information.

Returns: A pair of vectors of dimension C.

◆ createTriangularSolveInputLHS()

poplar::Tensor poplin::createTriangularSolveInputLHS	(	poplar::Graph &	graph,
		const poplar::Type &	inputType,
		const poplar::Type &	outputType,
		const std::vector< std::size_t > &	aShape,
		const std::vector< std::size_t > &	bShape,
		bool	leftSide,
		const poplar::DebugContext &	debugContext,
		const poplar::OptionFlags &	options = `{}`,
		PlanningCache *	cache = `nullptr`
	)

Create a tensor that is used as the left operand of triangular solve.

This will create a 2D/3D tensor in the graph. The ordering and tile mapping of the tensor will be set to make a triangular solver with this tensor as the left argument efficient.

Supported options:

blockSize

A hint for the size of block to be used. See triangularSolve() for more information.

See matMul() for additional options.

Parameters

graph	The Poplar graph.
inputType	The input data type.
outputType	The data type of the returned tensor.
aShape	The shape of the left operand.
bShape	The shape of the right operand.
leftSide	Solve AX = B if true, XA = B otherwise.
debugContext	Debug information.
options	The implementation options of the triangular solver.
cache	Optional pointer to a planning cache to use.

Returns: A matrix of type type and shape aShape. The tensor will have been mapped to tiles.

◆ createTriangularSolveInputRHS()

poplar::Tensor poplin::createTriangularSolveInputRHS	(	poplar::Graph &	graph,
		const poplar::Type &	inputType,
		const poplar::Type &	outputType,
		const std::vector< std::size_t > &	aShape,
		const std::vector< std::size_t > &	bShape,
		bool	leftSide,
		const poplar::DebugContext &	debugContext,
		const poplar::OptionFlags &	options = `{}`,
		PlanningCache *	cache = `nullptr`
	)

Create a tensor that is used as the right operand of triangular solve.

This will create a 2D/3D tensor in the graph. The ordering and tile mapping of the tensor will be set to make a triangular solver with this tensor as the left argument efficient.

Supported options:

blockSize

A hint for the size of block to be used. See triangularSolve() for more information.

See matMul() for additional options.

Parameters

graph	The Poplar graph.
inputType	The input data type.
outputType	The data type of the returned tensor.
aShape	The shape of the left operand.
bShape	The shape of the right operand.
leftSide	Solve AX = B if true, XA = B otherwise.
debugContext	Debug information.
options	The implementation options of the triangular solver.
cache	Optional pointer to a planning cache to use.

Returns: A matrix of type type and shape bShape. The tensor will have been mapped to tiles.

◆ createWeights()

poplar::Tensor poplin::createWeights	(	poplar::Graph &	graph,
		const ConvParams &	params,
		const poplar::DebugContext &	debugContext = `{}`,
		const poplar::OptionFlags &	options = `{}`,
		PlanningCache *	cache = `nullptr`
	)

Create a weight tensor suitable for use with convolution()

The shape of the tensor will be [convGroups x outChansPerConvGroup x inChansPerConvGroup x H x W]

Convolution options

availableMemoryProportion Decimal between 0 and 1 (inclusive) [=0.6]

The amount of memory allocated for use for temporary data whilst the operation is executing (for example, for intermediate calculated values or temporary values passed between tiles on the IPU). The value is specified as a proportion of available memory on the IPU. So, for example, a value of 0.1 will constrain the library to use 10% of the total memory for temporary data.

The library will try and constrain the use of temporary memory to below this value. An operation that has more temporary memory available to use will run in the same or fewer cycles.

For a specific operation, the minimum amount of temporary memory the library is able to use may be more than the amount specified by this option. In this case, if POPLIBS_LOG_LEVEL=WARN or POPLIBS_POPLIN_LOG_LEVEL=WARN, a warning message will be output, and the amount specified by this option is ignored.

Note: if this value is set to less than 5% of memory (so, a value less than 0.05) then it is often the case that the library will need to create a large amount of code and data structures to keep the temporary memory low which could have a permanent memory overhead larger than the saving of temporary memory. You should take great care when setting a value this low.

See also
[Optimising Temporary Memory Usage for Convolutions and Matmuls on the IPU] (https://docs.graphcore.ai/projects/available-memory/) technical note for some practical examples of using availableMemoryProportion
partialsType (half, float) [=float]

Data type used for intermediate calculations. If the type specified is smaller than the output type then the option is ignored and the output type is used instead.
pass (NONE, INFERENCE_FWD, TRAINING_FWD, TRAINING_BWD, TRAINING_WU, FC_INFERENCE_FWD, FC_TRAINING_FWD, FC_TRAINING_BWD, FC_TRAINING_WU) [=NONE]

Optimize the plan for the specified type of pass. Note the abbreviations: FWD (forward), BWD (backward), WU (weight-update), FC (fully-connected).
use128BitConvUnitLoad (true, false) [=false]

If true, convolution weights are loaded 128-bits at a time. Otherwise, they are loaded 64-bits at a time. Not all codelets support 128-bit loads. This option affects memory usage and cycle count.
enableMultiStageReduce (true, false) [=true]

If true, perform the reduction following the convolution in multiple stages if it would significantly reduce code size. This comes at the cost of increasing the number of cycles.
enableFastReduce (true, false) [=false]

If true, use a faster reduction vertex if the data types and widths allow it. This comes at the cost of further constraints on memory allocation

enableConvDithering (true, false) [=false]

If true, then convolutions with different parameters will be laid out from different tiles in an effort to improve tile balance in models.

Parameters

graph	The graph that the tensor will be added to.
params	The same parameters as used by the convolution().
name	Debugging name for the tensor.
options	Options controlling the implementation.
cache	Optional pointer to planning cache to use.

Returns: The weights tensor suitable for use with convolution().

◆ distributedNormStatistics()

std::pair< poplar::Tensor, poplar::Tensor > poplin::distributedNormStatistics	(	poplar::Graph &	replicatedGraph,
		const poplar::Tensor &	actsUngrouped,
		float	eps,
		poplar::program::Sequence &	prog,
		bool	unbiasedVarEstimate,
		DistributedNormReduceCallback	allReduceCallback,
		unsigned	normSize,
		bool	stableAlgo = `false`,
		const poplar::Type &	partialsType = `poplar::FLOAT`,
		const poplar::DebugContext &	debugContext = `{}`
	)

Compute the normalisation statistics for a part of the activations tensor which is distributed over multiple replicas.

Each replica gets equal sized batches (N) with normalisation done over normSize batches. A callback does the required mean reduction over multiple replicas. The activations tensor is of shape [N][C][..F..]. The mean and inverse standard deviation is computed over dimensions {[N] [..F..]} and vectors of length C are returned as estimates.

The input activations tensor must be rearranged such that statistics are computed for C channels.

Parameters

replicatedGraph	The replicated graph in which the computation is performed.
actsUngrouped	The activation with shape `[N][C][..F..]` where: `N` is the batch size `C` is the number of channels `..F..` is dimensions of a N-dimensional field.
eps	The epsilon added to the variance to avoid divide by zero.
prog	A program sequence that the code to perform the normalisation will be appended to.
unbiasedVarEstimate	Compute unbiased variance estimate.
stableAlgo	If true, computes the mean first and subtracts the activations by it before computing the variance. The implementation with this flag set to true is
partialsType	Poplar type used for partials.
allReduceCallback	Callback to perform all-reduce over 'normSize' batch elements.
normSize	Number of batch elements over which statistics are estimated.
debugContext	Optional debug information.

Returns: A vector pair with mean and inverse standard deviation.

◆ distributedNormStatisticsGradients()

poplar::Tensor poplin::distributedNormStatisticsGradients	(	poplar::Graph &	replicatedGraph,
		const poplar::Tensor &	actsWhitened,
		const poplar::Tensor &	gradsIn,
		const poplar::Tensor &	invStdDev,
		poplar::program::Sequence &	prog,
		poplin::DistributedNormReduceCallback	reduceCallback,
		unsigned	normSize,
		const poplar::Type &	partialsType = `poplar::FLOAT`,
		const poplar::DebugContext &	debugContext = `{}`
	)

Propagate the gradients through the norm statistics layer where equal sized batch elements are distributed over replicas.

Each replica gets the same number of batches and norm gradients are computed over normSize batch elements. Each replica is given N batch elements. A callback does the required reduction over multiple replicas.

The input to the layer is the output gradients from the normalisation layer. The whitened activations and the input gradients must have undergone a prior rearrangement such that the channel dimension has the same elements as invStdDev.

Parameters

replicatedGraph	The replicated graph to which the normalisation operation is added.
actsWhitened	Forward whitened activations.
gradsIn	The gradient with respect to the output of this layer.
invStdDev	Inverse standard deviation from norm statistics.
prog	A program sequence that the code to perform the normalisation will be appended to.
reduceCallback	A call back to perform all reduce of the statistics gradients across the replicas.
normSize	The batch size over which the norm is done.
debugContext	Optional debug information.

Returns: The gradient with respect to the input of this layer.

◆ fullyConnectedWeightTranspose()

poplar::Tensor poplin::fullyConnectedWeightTranspose	(	poplar::Graph &	graph,
		poplar::Tensor	weights,
		const ConvParams &	params,
		poplar::program::Sequence &	prog,
		const poplar::DebugContext &	debugContext = `{}`,
		const poplar::OptionFlags &	options = `{}`,
		PlanningCache *	cache = `nullptr`
	)

Arranges the weights (activations) such that they are suited for the backward pass in a fully connected layer.

Parameters

graph	The graph that the operation will be added to.
activations	Tensor containing the inputs to the convolution.
params	Parameters of the convolution.
prog	Poplar program sequence to append the operation onto.
debugContext	Optional debug information.
options	Options controlling the implementation. See createWeights().
cache	Optional pointer to planning cache to use.

Returns: A tensor with the weights suitably arranged.

◆ getBwdPerfectCycleCount()

double poplin::getBwdPerfectCycleCount	(	const poplar::Graph &	graph,
		const ConvParams &	params
	)

Calculate the number of cycles to perform the backward pass assuming maximal utilisation of the target hardware, performing the minimum number of floating point operations.

This takes into account the number of tiles available and vectorization support on the target.

This is an optimistic number useful for estimating efficiency: cycleCount = getBwdFlops() / maximumHardwareVectorization.

Parameters

graph	Provides target the convolution will run on.
params	Description of convolution.

Returns: Estimated number of cycles to perform the backward pass.

◆ getCholeskyMatMulPrePlanParameters()

std::vector< std::pair< MatMulParams, poplar::OptionFlags > > poplin::getCholeskyMatMulPrePlanParameters	(	const poplar::Type &	type,
		const std::vector< std::size_t > &	shape,
		bool	lower,
		poplar::OptionFlags	options
	)

Plan matrix multiplication for the Cholesky factoriser.

Supported options:

blockSize: A hint for the size of the block to be used.

See also: matMul() for additional options.

Parameters

type	The data type of the input tensor.
shape	The shape of the input tensor.
lower	If true, return a lower triangular matrix, else return an upper triangular matrix.
options	A structure describing options on how the decomposition should be implemented.

Returns: Preplan parameters for matMul().

◆ getFwdPerfectCycleCount()

double poplin::getFwdPerfectCycleCount	(	const poplar::Graph &	graph,
		const ConvParams &	params
	)

Calculate the number of cycles to perform the forward pass assuming maximal utilisation of target hardware performing the minimum number of floating point operations.

This takes into account the number of tiles available and vectorization support on the target.

This is an optimistic number useful for estimating efficiency: cycleCount = getFwdFlops() / maximumHardwareVectorization.

Parameters

graph	Provides target the convolution will run on.
params	Description of convolution.

Returns: Estimated number of cycles to perform the forward pass.

◆ getInputIndex()

unsigned poplin::getInputIndex	(	unsigned	dim,
		unsigned	outputIndex,
		unsigned	kernelIndex,
		const ConvParams &	params
	)

Return the index of the input element that is multiplied by the specified kernel index to produce the specified output.

Return ~0U if there is no such input element.

◆ getKernelIndex()

unsigned poplin::getKernelIndex	(	unsigned	dim,
		unsigned	outputIndex,
		unsigned	inputIndex,
		const ConvParams &	params
	)

Return the index of the kernel element that is multiplied by the specified input index to produce the specified output.

Return ~0U if there is no such kernel element.

◆ getTriangularSolveMatMulPrePlanParameters()

std::vector< std::pair< MatMulParams, poplar::OptionFlags > > poplin::getTriangularSolveMatMulPrePlanParameters	(	const poplar::Type &	inputType,
		const poplar::Type &	outputType,
		const std::vector< std::size_t > &	aShape,
		const std::vector< std::size_t > &	bShape,
		bool	leftSide,
		bool	lower,
		const poplar::OptionFlags &	options
	)

Plan matrix multiplication for given triangular solver.

Parameters

inputType	The data type of the lhs tensor.
outputType	The data type of the rhs tensor.
aShape	The shape of the left operand.
bShape	The shape of the right operand.
leftSide	Solve AX = B if true, XA = B otherwise.
options	The implementation options of the triangular solver.

Returns: Matmul preplan parameters.

◆ getWuPerfectCycleCount()

double poplin::getWuPerfectCycleCount	(	const poplar::Graph &	graph,
		const ConvParams &	params
	)

Calculate the number of cycles to perform the weight update pass assuming maximal utilisation of the target hardware, performing the minimum number of floating point operations.

This takes into account the number of tiles available and vectorization support on the target.

This is an optimistic number useful for estimating efficiency. cycleCount = getWuFlops() / maximumHardwareVectorization

Parameters

graph	Provides target the convolution will run on.
params	Description of convolution.

Returns: Estimated number of cycles to perform the weight update pass.

◆ linspace()

poplar::Tensor poplin::linspace	(	poplar::Graph &	graph,
		const poplar::Type &	type,
		float	left,
		float	right,
		size_t	count,
		const poplar::DebugContext &	debugContext = `{}`
	)

Create a constant variable that contains values equally spaced in the specified closed range [left, right].

Parameters

graph	Graph to which the variable is added.
left	The first value in the range.
right	The last value in the range.
type	Data type of variable to create. Must be `FLOAT` or `HALF`.
debugContext	Optional debug information.

Returns: Constant Tensor of rank 1 (vector) containing the linspace values.

◆ matMul()

poplar::Tensor poplin::matMul	(	poplar::Graph &	graph,
		const poplar::Tensor &	A,
		const poplar::Tensor &	B,
		poplar::program::Sequence &	prog,
		const poplar::Type &	outputType,
		const poplar::DebugContext &	debugContext = `{}`,
		const poplar::OptionFlags &	options = `{}`,
		PlanningCache *	cache = `nullptr`
	)

Multiply two matrices.

Calculates C = A * B where A and B are matrices.

Matrix multiply options

availableMemoryProportion Decimal between 0 and 1 (inclusive) [=0.6]

See createWeights().
fullyConnectedPass (NONE, INFERENCE_FWD, TRAINING_FWD, TRAINING_BWD, TRAINING_WU) [=NONE]

Optimize the plan for the specified type of pass. Note the abbreviations: FWD (forward), BWD (backward), WU (weight-update).
inputRHSIsPreArranged (true, false) [=false]

Indicates to matMul functions whether the input data has already been re-arranged (using preArrangeMatMulInputRHS()). This allows data to be re-arranged once then used many times.
use128BitConvUnitLoad (true, false) [=false]

If true, weights are loaded into the convolution unit 128-bits at a time. Otherwise, they are loaded 64-bits at a time. Not all codelets support 128-bit loads. This option affects memory usage and cycle count.
enableMultiStageReduce (true, false) [=true]

If true, perform the reduction following the matrix multiplication in multiple stages if it would significantly reduce code size. This comes at the cost of increasing the number of cycles.
enableFastReduce (true, false) [=false]

If true, use a faster reduction vertex if the data types and widths allow it. This comes at the cost of further constraints on memory allocation
remapOutputTensor (true, false) [=true]

If true, the output of the convolution is remapped if the output is detected to have a poor layout.
partialsType (half, float) [=float]

See createWeights().

Parameters

graph	The Poplar graph.
A	The left argument to the multiplication. This 2D tensor must be already mapped to tiles.
B	The right argument to the multiplication. This 2D tensor must be already mapped to tiles.
prog	A reference to a program sequence which will be appended with the code to perform the multiplication.
outputType	Optional via overloaded function. Element type of returned tensor. The default is `A.elementType()` if omitted.
debugContext	Optional debug information.
options	The structure describing options on how the multiplication should be implemented.
cache	Optional pointer to a planning cache to use.

Returns: The tensor holding the result of the multiplication. This tensor will be created, added to the graph and mapped to tiles. Matrix multiply with explicitly defined output type.

◆ matMulAcc()

void poplin::matMulAcc	(	poplar::Graph &	graph,
		const poplar::Tensor &	C,
		float	k,
		const poplar::Tensor &	A,
		const poplar::Tensor &	B,
		poplar::program::Sequence &	prog,
		const poplar::DebugContext &	debugContext = `{}`,
		const poplar::OptionFlags &	options = `{}`,
		PlanningCache *	cache = `nullptr`
	)

Multiply two matrices and add to a third (with a scaling factor).

Calculates C += k * A * B where A, B are matrices and k is a constant scalar.

Parameters

graph	The Poplar graph.
C	The matrix to add to. This 2D tensor must be already mapped to tiles.
k	The constant or a single element tensor to multiply the result of the multiplication. If `k` is a tensor, it must be of the same type as `A`
A	The left argument to the multiplication. This 2D tensor must be already mapped to tiles.
B	The right argument to the multiplication. This 2D tensor must be already mapped to tiles.
prog	A reference to a program sequence which will be appended with the code to perform the multiplication and add.
debugContext	Optional debug information.
options	The structure describing options on how the multiplication should be implemented. See matMul().
cache	Optional pointer to a planning cache to use. Matrix multiply and accumulate with a scalar scaling factor.

◆ matMulGetConvPlanParams()

std::set< ConvPlanParams > poplin::matMulGetConvPlanParams	(	const std::set< MatMulPlanParams > &	matmuls,
		MatMulToConvOptions &	matmulToConvOpts
	)

Obtain the set of convolution parameters corresponding to the user supplied set of parameters for matrix multiplication.

Parameters

matmuls	Set of Matrix multiplication parameter tuples
matmulToConvOpts	Convolution options corresponding to every matrix multiplication options.

Returns: Set of Convolution parameters

◆ matMulGrouped()

poplar::Tensor poplin::matMulGrouped	(	poplar::Graph &	graph,
		const poplar::Tensor &	A,
		const poplar::Tensor &	B,
		poplar::program::Sequence &	prog,
		const poplar::Type &	outputType,
		const poplar::DebugContext &	debugContext = `{}`,
		const poplar::OptionFlags &	options = `{}`,
		PlanningCache *	cache = `nullptr`
	)

Multiply two grouped matrices.

Calculates C[g] = A[g] * B[g] where A[g] and B[g] are matrices for each element in the group, and g is an element of the set {0, 1, ..., G-1}.

The multiplication is done for every element in the group. The first dimension of the matrices is the group dimension with value equal to G.

Parameters

graph	The Poplar graph.
A	The left argument to the grouped multiplication. This 3D tensor must be already mapped to tiles.
B	The right argument to the grouped multiplication. This 3D tensor must be already mapped to tiles.
prog	A reference to a program sequence which will be appended with the code to perform the multiplication.
outputType	Data type to be used for the returned tensor.
debugContext	Optional debug information.
options	The structure describing options on how the grouped multiplication should be implemented. See matMul().
cache	Optional pointer to a planning cache to use.

Returns: The tensor holding the result of the grouped multiplication. This tensor will be created, added to the graph and mapped to tiles.

◆ matMulGroupedAcc()

void poplin::matMulGroupedAcc	(	poplar::Graph &	graph,
		const poplar::Tensor &	C,
		float	k,
		const poplar::Tensor &	A,
		const poplar::Tensor &	B,
		poplar::program::Sequence &	prog,
		const poplar::DebugContext &	debugContext = `{}`,
		const poplar::OptionFlags &	options = `{}`,
		PlanningCache *	cache = `nullptr`
	)

Grouped matrix multiply and accumulate.

Multiply two grouped matrices and add to a third (with a scaling factor).

Calculates C[g] += k * A[g] * B[g] where A[g], B[g] are matrices and k is a constant scalar. g is element of the set g = {0, 1, ..., G-1}

The multiplication is done for every element in the group. The first dimension of the matrices is the group dimension with value equal to G.

Parameters

graph	The Poplar graph.
C	The matrix to add to. This 3D tensor must be already mapped to tiles.
k	The constant or a single element tensor to multiply the result of the multiplication. If `k` is a tensor, it must be of the same type as `A`
A	The left argument to the grouped multiplication. This 3D tensor must be already mapped to tiles.
B	The right argument to the multiplication. This 3D tensor must be already mapped to tiles.
prog	A reference to a program sequence which will be appended with the code to perform the grouped multiplication and add.
debugContext	Optional debug information.
options	The structure describing options on how the multiplication should be implemented. See matMul().
cache	Optional pointer to planning cache to use. Grouped matrix multiply and accumulate with a scalar scaling factor.

◆ matMulGroupedReportPlan()

void poplin::matMulGroupedReportPlan	(	std::ostream &	out,
		const poplar::Graph &	graph,
		const poplar::Type &	inputType,
		const poplar::Type &	outputType,
		const std::vector< std::size_t > &	aShape,
		const std::vector< std::size_t > &	bShape,
		const poplar::OptionFlags &	options = `{}`,
		PlanningCache *	cache = `nullptr`
	)

Report the convolution plan corresponding to the params and options provided.

Parameters

out	Stream to write report to.
graph	The Poplar graph.
inputType	Element type of input tensors.
outputType	Element type of output tensor.
aShape	Shape of input tensor A.
bShape	Shape of input tensor B.
options	The structure describing options on how the multiplication should be implemented.
cache	Optional pointer to a planning cache to use.

◆ matMulReportPlan()

void poplin::matMulReportPlan	(	std::ostream &	out,
		const poplar::Graph &	graph,
		const poplar::Type &	inputType,
		const poplar::Type &	outputType,
		const std::vector< std::size_t > &	aShape,
		const std::vector< std::size_t > &	bShape,
		const poplar::OptionFlags &	options = `{}`,
		PlanningCache *	cache = `nullptr`
	)

Report the convolution plan corresponding to the parameters and options provided.

Parameters

out	Stream to write report to.
graph	The Poplar graph.
inputType	Element type of input tensors.
outputType	Element type of output tensor.
aShape	Shape of input tensor A.
bShape	Shape of input tensor B.
options	The structure describing options on how the multiplication should be implemented.
cache	Optional pointer to a planning cache to use.

◆ matmulValidateOptions()

void poplin::matmulValidateOptions ( const poplar::OptionFlags & options )

Provides an interface to validate the matmul options.

Presence of invalid key or a value will throw an exception.

Parameters

options Flags describing options for how the multiplication should be implemented. See matMul().

◆ meshgrid2d()

std::vector< poplar::Tensor > poplin::meshgrid2d	(	poplar::Graph &	graph,
		poplar::Tensor	x,
		poplar::Tensor	y
	)

Create a coordinate grid for each axis by broadcasting the input tensors.

This 2D specialisation only supports two inputs that must be of rank 1 (vectors) and hence the output coordinate grids are always two matrices (so two outputs of rank 2).

Parameters

graph	Graph to which the variables are added.
x	Co-ordinates for the x-axis.
y	Co-ordinates for the y-axis.

Returns: A list of (two) tensors that form co-ordinate grids for each input axis. These output tensors will be views of the inputs (reshaped and broadcast).

◆ normalise()

poplar::Tensor poplin::normalise	(	poplar::Graph &	graph,
		const poplar::Tensor &	actsWhitened,
		const poplar::Tensor &	gamma,
		const poplar::Tensor &	beta,
		poplar::program::Sequence &	prog,
		const poplar::DebugContext &	debugContext = `{}`
	)

Computes the normalised output from whitened activations.

Parameters

graph	The graph to which the normalisation operation is added.
actsWhitened	Whitened activations.
gamma	Per-channel multiplicative normalisation parameter.
beta	Per-channel additive normalisation parameter.
prog	A program sequence that the code to perform the normalisation will be appended to.
debugContext	Optional debug information.

Returns: A tensor containing the normalised activations.

◆ normGradients()

poplar::Tensor poplin::normGradients	(	poplar::Graph &	graph,
		const poplar::Tensor &	gradsIn,
		const poplar::Tensor &	gamma,
		poplar::program::Sequence &	prog,
		const poplar::DebugContext &	debugContext = `{}`
	)

Propagate the gradients through the normalisation layer.

Parameters

graph	The graph to which the normalisation operation is added.
gradsIn	The gradient with respect to the output of this layer.
gamma	Multiplicative parameter used in the normalisation.
prog	A program sequence that the code to perform the normalisation will be appended to.
debugContext	Optional debug information.

Returns: The gradient with respect to the input of this layer.

◆ normParamGradients()

std::pair< poplar::Tensor, poplar::Tensor > poplin::normParamGradients	(	poplar::Graph &	graph,
		const poplar::Tensor &	actsWhitened,
		const poplar::Tensor &	gradsIn,
		poplar::program::Sequence &	prog,
		const poplar::Type &	partialsType = `poplar::FLOAT`,
		const poplar::DebugContext &	debugContext = `{}`
	)

Compute gradients with respect to parameters required for parameter update.

Parameters

graph	The graph to which the normalisation operation is added.
actsWhitened	Whitened activations.
gradsIn	The gradient with respect to the output of this layer.
prog	A program sequence that the code to perform the normalisation will be appended to.
partialsType	The intermediate type kept in the computation.
debugContext	Optional debug information.

Returns: A pair of tensors, gammaDelta and betaDelta which are the gradients with respect to gamma and beta.

◆ normStatistics()

std::pair< poplar::Tensor, poplar::Tensor > poplin::normStatistics	(	poplar::Graph &	graph,
		const poplar::Tensor &	actsUngrouped,
		float	eps,
		poplar::program::Sequence &	prog,
		bool	unbiasedVarEstimate,
		bool	stableAlgo = `false`,
		const poplar::Type &	partialsType = `poplar::FLOAT`,
		const poplar::DebugContext &	debugContext = `{}`
	)

Compute the normalisation statistics from the activations tensor.

The activations tensor is of shape [N][C][..F..]. The mean and inverse standard deviation is computed over dimensions {[N] [..F..]} and vectors of length C are returned as estimates.

The input activations tensor must be rearranged such that statistics are computed for C channels.

Parameters

graph	The graph in which the computation is performed.
actsUngrouped	The activation with shape `[N][C][..F..]` where: `N` is the batch size `C` is the number of channels `..F..` is dimensions of a N-dimensional field.
eps	The epsilon added to the variance to avoid divide by zero.
prog	A program sequence that the code to perform the normalisation will be appended to.
unbiasedVarEstimate	Compute unbiased variance estimate.
stableAlgo	If true, computes the mean first and subtracts the activations by it before computing the variance. The implementation with this flag set to true is
partialsType	Poplar type used for partials.
debugContext	Optional debug information.

Returns: A vector pair with mean and inverse standard deviation.

◆ normStatisticsGradients()

poplar::Tensor poplin::normStatisticsGradients	(	poplar::Graph &	graph,
		const poplar::Tensor &	actsWhitened,
		const poplar::Tensor &	gradsIn,
		const poplar::Tensor &	invStdDev,
		poplar::program::Sequence &	prog,
		const poplar::Type &	partialsType = `poplar::FLOAT`,
		const poplar::DebugContext &	debugContext = `{}`
	)

Propagate the gradients through the norm statistics layer.

The input to the layer is the output gradients from the normalisation layer. The whitened activations and the input gradients must have undergone a prior rearrangement such that the channel dimension has the same elements as invStdDev.

Parameters

graph	The graph to which the normalisation operation is added.
actsWhitened	Forward whitened activations.
gradsIn	The gradient with respect to the output of this layer.
invStdDev	Inverse standard deviation from norm statistics.
prog	A program sequence that the code to perform the normalisation will be appended to.
debugContext	Optional debug information.

Returns: The gradient with respect to the input of this layer.

◆ normWhiten()

poplar::Tensor poplin::normWhiten	(	poplar::Graph &	graph,
		const poplar::Tensor &	acts,
		const poplar::Tensor &	mean,
		const poplar::Tensor &	iStdDev,
		poplar::program::Sequence &	prog,
		const poplar::DebugContext &	debugContext = `{}`
	)

Compute the whitened activations using the supplied mean and inverse standard deviation.

The input activations undergo a prior rearrangement such that C is the size of the statistics mean and iStdDev tensors.

Parameters

graph	The graph which the computation is in.
acts	The activations tensor of shape [N][C][..F..].
mean	Mean of the activations with dimension C.
iStdDev	Inverse standard deviation with dimension C.
prog	A program sequence that the code to perform the normalisation will be appended to.
debugContext	Optional debug information.

Returns: A new tensor with the whitened activations.

◆ preArrangeMatMulInputRHS()

poplar::Tensor poplin::preArrangeMatMulInputRHS	(	poplar::Graph &	graph,
		const std::vector< std::size_t > &	aShape,
		const poplar::Tensor &	B,
		poplar::program::Sequence &	prog,
		const poplar::Type &	outputType,
		const poplar::DebugContext &	debugContext = `{}`,
		const poplar::OptionFlags &	options = `{}`,
		PlanningCache *	cache = `nullptr`
	)

Pre-arrange right-hand side input.

Re-arrange memory for RHS operand to an upcoming matmul operation. This allows the rearrangement of the memory of a tensor that would otherwise be rearranged as part of the matmul operation for efficiency.

Use this function and the matMul*() functions with the inputRHSIsPreArranged option flag to do any re-arrangement necessary once and then re-use that input multiple times.

Only valid for fully connected layers.

Parameters

graph	The Poplar graph.
aShape	The shape of the left argument to the multiplication.
B	The right argument to the multiplication. This 2D tensor must be already mapped to tiles.
prog	A reference to a program sequence which will be appended with the code to perform the arrangement.
outputType	Optional via overloaded function. Element type of returned tensor. The default is `B.elementType()` if omitted.
debugContext	Optional debug information.
options	Flags describing options for how the multiplication should be implemented. See matMul().
cache	Optional pointer to planning cache to use.

Returns: New tensor holding the rearranged input. This tensor has the same shape as the given tensor. Pre-arrange input with explicitly defined output type.

◆ preplan()

void poplin::preplan	(	const std::set< ConvPlanParams > &	convs,
		const std::set< MatMulPlanParams > &	matmuls,
		PlanningCache &	cache
	)

Plan the specified convolutions & matmuls.

Parameters

convs

A set of tuples of:

conv-specific target for tile / IPU sizing
convolution parameters
implementation options. See createWeights().

All entries must have matching machine parameters.

Parameters

matmuls

A set of tuples of:

matmul-specific target for tile / IPU sizing
convolution parameters
implementation options. See createWeights().

All entries must have matching machine parameters.

Parameters

cache The planning cache to update.

◆ preplanConvolutions() [1/2]

void poplin::preplanConvolutions	(	const std::set< ConvPlanParams > &	convs,
		PlanningCache &	cache
	)

Deprecated:: Use preplan() instead.

Plan the specified convolutions.

Parameters

convs

A set of tuples of:

conv-specific target for tile / IPU sizing
convolution parameters
implementation options. See createWeights().

All entries must have matching machine parameters.

Parameters

cache The planning cache to update.

◆ preplanConvolutions() [2/2]

void poplin::preplanConvolutions	(	poplar::Graph &	graph,
		const std::set< ConvPlanParams > &	convs,
		PlanningCache &	cache
	)

Deprecated:: Use preplan() instead.

Plan the specified convolutions.

Parameters

graph	The graph the convolutions will belong to
convs	A set of tuples of: conv-specific target for tile / IPU sizing convolution parameters implementation options. See createWeights().

All entries must have matching machine parameters.

Parameters

cache The planning cache to update.

◆ preplanMatMuls()

void poplin::preplanMatMuls	(	const std::set< MatMulPlanParams > &	matmuls,
		matmul::PlanningCache &	cache
	)

Deprecated:: Use preplan() instead.

Plan the specified matrix multiplications.

Parameters

matmuls	A set of parameters to preplan matmuls
cache	The planning cache to update

◆ reportPlanEstimatedCosts()

PlanCosts poplin::reportPlanEstimatedCosts	(	const poplar::Graph &	graph,
		const ConvParams &	params,
		const poplar::OptionFlags &	options = `{}`,
		PlanningCache *	cache = `nullptr`
	)

Report the estimated cycles and memory costs of the convolution plan corresponding to the params and options provided.

Parameters

graph	The graph that the convolution is planned with.
params	The same parameters as used by the convolution().
options	Options controlling the implementation. See createWeights().
cache	Optional pointer to planning cache to use.

Returns: Cycles and memory cost estimates for the planned convolution.

◆ reportPlanInfo()

void poplin::reportPlanInfo	(	std::ostream &	out,
		const poplar::Graph &	graph,
		const ConvParams &	params,
		const poplar::OptionFlags &	options = `{}`,
		PlanningCache *	cache = `nullptr`
	)

Report the convolution plan corresponding to the params and options provided.

Parameters

out	Output stream to report the plan to.
graph	The graph that the convolution is planned with.
params	The same parameters as used by the convolution().
options	Options controlling the implementation. See createWeights().
cache	Optional pointer to planning cache to use.

◆ reportWeightUpdatePlanInfo()

void poplin::reportWeightUpdatePlanInfo	(	std::ostream &	out,
		const poplar::Graph &	graph,
		const ConvParams &	fwdParams,
		const poplar::OptionFlags &	fwdOptions = `{}`,
		PlanningCache *	cache = `nullptr`
	)

Report the convolution plan corresponding to the weight update pass given the forward pass params and options.

Parameters

out	ostream to report the plan to.
graph	The graph that the convolution is planned with.
fwdParams	Forward pass parameters as used by the convolution().
fwdOptions	Forward pass options controlling the implementation. See createWeights().
cache	Optional pointer to planning cache to use.

◆ transposeGroupedMatrix()

poplar::Tensor poplin::transposeGroupedMatrix ( const poplar::Tensor & A )

Transposes a grouped matrix tensor.

Parameters

A	Tensor to transpose

Returns: Transposed tensor

◆ triangularMask()

poplar::Tensor poplin::triangularMask	(	poplar::Graph &	graph,
		const poplar::Tensor &	a,
		bool	lower,
		bool	unitDiagonal,
		poplar::program::Sequence &	prog,
		const poplar::DebugContext &	debugContext = `{}`
	)

Masks the unused components of the input tensor with zeroes, optionally allowing for a unit diagonal.

Parameters

graph	The Poplar graph.
a	Tensor of floating-point type with shape [..., N,N].
lower	Whether to use the upper or lower triangle of `a`.
unitDiagonal	If true, the diagonal elements of `a` are assumed to be 1 and not accessed.
prog	A reference to a program sequence which the code to perform the arrangement will be appended to.
debugContext	Optional debug information.

Returns: A tensor with the same shape as a with all unused values masked.

◆ triangularSolve()

poplar::Tensor poplin::triangularSolve	(	poplar::Graph &	graph,
		const poplar::Tensor &	a,
		const poplar::Tensor &	b,
		bool	leftSide,
		bool	lower,
		bool	unitDiagonal,
		poplar::program::Sequence &	prog,
		const poplar::DebugContext &	debugContext = `{}`,
		const poplar::OptionFlags &	options = `{}`,
		PlanningCache *	cache = `nullptr`
	)

Solves systems of linear equations with lower or upper triangular coefficients.

Supported options:

blockSize

A hint for the size of block to be used. The triangularSolve() function uses a divide-and-conquer approach, where the input matrices are split into four quadrants that can be solved independently, and in parallel. This process is repeated until the quadrants are less than the block size. Although smaller blocks can enable more parallelism there is an overhead for combining the results from each step.

The default value is 64.

See matMul() for additional options.

Parameters

graph	The Poplar graph.
a	Tensor of floating-point type with shape [..., N,N].
b	Tensor of the same type with shape [..., N, K] if left_side is true, [...,K, N] otherwise.
leftSide	Solve AX = B if true, XA = B otherwise.
lower	Use the upper or lower triangle of `a`.
unitDiagonal	If true, the diagonal elements of `a` are assumed to be 1 and not accessed.
prog	A reference to a program sequence which the code to perform the arrangement will be appended to.
debugContext	Optional debug information.
options	The implementation options of the triangular solver.
cache	Optional pointer to a planning cache to use.

Returns: Tensor with shape of b with linear system solution.

◆ weightsTransposeChansFlipXY()

void poplin::weightsTransposeChansFlipXY	(	poplar::Graph &	graph,
		const poplar::Tensor &	weightsIn,
		const poplar::Tensor &	weightsOut,
		poplar::program::Sequence &	prog,
		const poplar::DebugContext &	debugContext = `{}`,
		const poplar::OptionFlags &	options = `{}`
	)

Copy the weights in weightsIn into weightsOut such that each element of the kernel is transposed with respect to the input and output channels and flip each spatial dimension of the kernel.

See the transposeAndFlipWeights parameter in convolution().

Parameters

graph	The graph that the operation will be added to.
weightsIn	The input weights tensor.
weightsOut	The output weights tensor.
prog	Poplar program sequence to append the operation onto.
debugContext	Optional debug information.
options	Options controlling the implementation. See createWeights().

Classes

Typedefs

Functions

Detailed Description

Typedef Documentation

◆ DistributedNormReduceCallback

◆ MatMulPlanParams

Function Documentation

◆ addBias()

◆ calculateWeightDeltas()

◆ cholesky()

◆ choleskyInPlace()

◆ convolution()

◆ convolutionBiasUpdate() [1/2]

◆ convolutionBiasUpdate() [2/2]

◆ convolutionValidateOptions()

◆ convolutionWeightUpdate() [1/2]

◆ convolutionWeightUpdate() [2/2]

◆ convolutionWithOutput()

◆ createBiases() [1/2]

◆ createBiases() [2/2]

◆ createCholeskyInput()

◆ createConvOutput()

◆ createInput()

◆ createMatMulGroupedInputLHS()

◆ createMatMulGroupedInputRHS()

◆ createMatMulGroupedOutput()

◆ createMatMulInputLHS() [1/2]

◆ createMatMulInputLHS() [2/2]

◆ createMatMulInputRHS()

◆ createMatMulOutput()

◆ createNormBeta() [1/2]

◆ createNormBeta() [2/2]

◆ createNormGamma() [1/2]

◆ createNormGamma() [2/2]

◆ createNormParams()

◆ createTriangularSolveInputLHS()

◆ createTriangularSolveInputRHS()

◆ createWeights()

◆ distributedNormStatistics()

◆ distributedNormStatisticsGradients()

◆ fullyConnectedWeightTranspose()

◆ getBwdPerfectCycleCount()

◆ getCholeskyMatMulPrePlanParameters()

◆ getFwdPerfectCycleCount()

◆ getInputIndex()

◆ getKernelIndex()

◆ getTriangularSolveMatMulPrePlanParameters()

◆ getWuPerfectCycleCount()

◆ linspace()

◆ matMul()

◆ matMulAcc()

◆ matMulGetConvPlanParams()

◆ matMulGrouped()

◆ matMulGroupedAcc()

◆ matMulGroupedReportPlan()

◆ matMulReportPlan()

◆ matmulValidateOptions()

◆ meshgrid2d()

◆ normalise()

◆ normGradients()

◆ normParamGradients()

◆ normStatistics()

◆ normStatisticsGradients()

◆ normWhiten()

◆ preArrangeMatMulInputRHS()

◆ preplan()

◆ preplanConvolutions() [1/2]

◆ preplanConvolutions() [2/2]

◆ preplanMatMuls()

◆ reportPlanEstimatedCosts()

◆ reportPlanInfo()

◆ reportWeightUpdatePlanInfo()

◆ transposeGroupedMatrix()

◆ triangularMask()

◆ triangularSolve()

◆ weightsTransposeChansFlipXY()