General utility functions for building graphs. More...

Namespaces
namespace	graphfn
	Support for using poplar::Program objects like function calls.

Classes
struct	poplibs_error
	Class for PopLibs exceptions. More...

class	TensorMetaData
	Class used to represent some unspecified form of meta-data for a tensor. More...

class	TensorUseTracker
	Class that tracks the usage of data on different tiles. More...

struct	VertexTemplateToString< popnn::NonLinearityType >
	Specialise vertex template stringification for non-linearity type. More...

struct	VertexTemplateToString< popops::Operation >
	Specialise vertex template stringification for operation type. More...

Typedefs
using	GroupingInfo = std::pair< unsigned, unsigned >
	Grouped dimension info. More...

Functions
void	expandToMatchRanks (poplar::Tensor &a, poplar::Tensor &b)
	Match dimensions of two tensors using numpy-style expansion rules. More...

void	broadcastToMatch (poplar::Tensor &a, const std::vector< std::size_t > &shape)
	Match dimensions of a tensor to a shape using numpy-style broadcast rules: More...

void	broadcastToMatch (poplar::Tensor &a, poplar::Tensor &b)
	Match dimensions of two tensors using numpy-style broadcast rules: More...

void	broadcastToMatch (poplar::Tensor &a, poplar::Tensor &b, poplar::Tensor &c)
	Match dimensions of three tensors using numpy-style broadcast rules: More...

bool	canBroadcastToMatch (const poplar::Tensor &a, const poplar::Tensor &b)
	Test if the given tensors can be broadcast to match one another using the rules for broadcastToMatch(). More...

std::vector< std::vector< poplar::Interval > >	calcLinearTileMapping (const poplar::Graph &graph, std::vector< std::size_t > shape, unsigned minElementsPerTile, unsigned grainSize, unsigned offset=0, bool ascendingOrder=true)
	Calculate a tile mapping that spreads the tensor evenly over the tiles in a graph. More...

std::vector< std::vector< poplar::Interval > >	calcLinearTileMapping (const poplar::Graph &graph, const poplar::Tensor &t, unsigned offset=0, bool ascendingOrder=true)
	Calculate a tile mapping that spreads the tensor evenly over the tiles in a graph. More...

std::pair< poplar::Graph::TileToTensorMapping, unsigned >	calcLinearTileMappingAndNewOffset (const poplar::Graph &graph, const poplar::Tensor &t, unsigned offset=0)
	Calculate a tile mapping that spreads the tensor evenly over the tiles in a graph. More...

void	mapTensorLinearly (poplar::Graph &graph, const poplar::Tensor &t, unsigned minElementsPerTile, unsigned grainSize)
	Map the specified tensor, spreading the tensor evenly over the tiles in a graph. More...

void	mapTensorLinearlyWithOffset (poplar::Graph &graph, const poplar::Tensor &t, unsigned minElementsPerTile, unsigned grainSize, unsigned offset, bool ascendingOrder=true)
	Map the specified tensor, spreading the tensor evenly over the tiles in a graph. More...

void	mapTensorLinearly (poplar::Graph &graph, const poplar::Tensor &t)
	Map the specified tensor, spreading the tensor evenly over the tiles in a graph. More...

void	mapTensorLinearlyWithOffset (poplar::Graph &graph, const poplar::Tensor &t, unsigned offset=0, bool ascendingOrder=true)
	Map the specified tensor, spreading the tensor evenly over the tiles in a graph. More...

std::size_t	chooseMappingOffset (std::size_t numTiles, const std::vector< std::size_t > &shape)
	Choose an offset for use with tensor mapping functions using a hash of the shape provided. More...

std::size_t	chooseMappingOffset (std::size_t numTiles, const std::vector< std::size_t > &shape, std::size_t seed)
	Choose an offset for use with tensor mapping functions using a hash of the shape, and a seed. More...

unsigned	getTileImbalance (const poplar::Graph::TileToTensorMapping &mapping, unsigned minElementsPerTile=0, unsigned grainSize=1)
	Determine how unbalanced a tensor is when mapped over tiles in a graph. More...

unsigned	getTileImbalance (const poplar::Graph &graph, const poplar::Tensor &t, unsigned minElementsPerTile=0, unsigned grainSize=1)
	Determine how unbalanced a tensor is mapped over tiles. More...

poplar::Tensor	cloneToIpu (poplar::Graph &graph, const poplar::Tensor &t, unsigned dstIPU, const poplar::DebugContext &debugContext={}, poplar::TensorCloneMethod method=poplar::TensorCloneMethod::PRESERVE_ORDER_UNLESS_ALIASES)
	Create a clone of the specified tensor on the specified IPU. More...

poplar::Tensor	cloneToGraph (poplar::Graph &srcGraph, poplar::Graph &dstGraph, const poplar::Tensor &t, const poplar::DebugContext &debugContext={}, poplar::TensorCloneMethod method=poplar::TensorCloneMethod::PRESERVE_ORDER_UNLESS_ALIASES)
	Create a clone of the specified tensor on the specified graph. More...

std::pair< poplar::Tensor, unsigned >	cloneAndExpandAliasing (poplar::Graph &graph, const poplar::Tensor &t, unsigned offset=0, const poplar::DebugContext &debugContext={})
	Create a clone of the specified tensor on the specified graph. More...

poplar::Tensor	copyToIpu (poplar::Graph &masterGraph, const poplar::Tensor &t, poplar::program::Sequence &prog, unsigned dstIPU, const poplar::DebugContext &debugContext={}, poplar::TensorCloneMethod method=poplar::TensorCloneMethod::PRESERVE_ORDER_UNLESS_ALIASES)
	Move a tensor from one IPU to another. More...

poplar::Tensor	createIpuCopy (poplar::Graph &graph, const poplar::Tensor &t, unsigned dstIpu, poplar::Tensor &copySrc, poplar::Tensor &copyDst, const poplar::DebugContext &debugContext={}, poplar::TensorCloneMethod method=poplar::TensorCloneMethod::PRESERVE_ORDER_AND_ALIASES)
	Prepare to move a tensor from one IPU to another. More...

bool	dimIsSplitOverTiles (const poplar::Graph &graph, const poplar::Tensor &t, unsigned dimension)
	Check if a dimension of a tensor is split over more than one tile. More...

bool	dimIsSplitOverIPUs (const poplar::Graph &graph, const poplar::Tensor &t, unsigned dimension)
	Check if a dimension of a tensor is split over more than one IPU. More...

poplar::Tensor	createBroadcastOperand (poplar::Graph &graph, const poplar::Tensor &fullTensor, const poplar::Type &type, unsigned dim, bool ditherMapping=false, const poplar::DebugContext &debugContext={})
	Create a simpler tensor that is mapped in the same way as another, full, tensor. More...

unsigned	transformTileIndex (unsigned tile, unsigned numTiles, unsigned offset, bool ascending)
	Transform a tile index such that the result begins at zero and increments. More...

unsigned	invTransformTileIndex (unsigned tile, unsigned numTiles, unsigned offset, bool ascending)
	Transform a tile index such that the result begins at an offset and increments or decrements. More...

std::vector< poplar::Interval >	flattenIntervals (const std::vector< std::vector< poplar::Interval > > &intervals)
	Flatten a vector of vectors of intervals to a vector, maintaining ordering.

std::vector< std::vector< poplar::Interval > >	splitRegions (const std::vector< poplar::Interval > &regions, unsigned grainSize, unsigned maxPartitions, unsigned minElementsPerPartition=0, unsigned maxElementsPerPartition=UINT_MAX, unsigned maxElementsPerRegion=UINT_MAX)
	Given a set of contiguous regions, partition these regions while trying to balance the number of elements in each partition and respecting the specified grain size. More...

std::vector< std::vector< poplar::Interval > >	splitRegionsBetweenWorkers (const poplar::Target &target, const std::vector< poplar::Interval > &regions, unsigned grainSize, unsigned minElementsPerPartition=0, unsigned maxElementsPerPartition=UINT_MAX, unsigned maxElementsPerRegion=UINT_MAX)
	Given a set of contiguous regions per tile, partition these regions between workers on that tile while respecting the specified grain size. More...

std::vector< std::vector< std::vector< poplar::Interval > > >	splitRegions (const std::vector< std::vector< poplar::Interval > > &regions, unsigned grainSize, unsigned maxPartitions, unsigned minElementsPerPartition=0, unsigned maxElementsPerPartition=UINT_MAX, unsigned maxElementsPerRegion=UINT_MAX)
	Given a set of sequences of regions, partition these sequences while trying to balance the number of elements in each partition and respecting the specified grain size. More...

std::vector< std::vector< std::vector< poplar::Interval > > >	splitRegionsBetweenWorkers (const poplar::Target &target, const std::vector< std::vector< poplar::Interval > > &regions, unsigned grainSize, unsigned minElementsPerPartition=0, unsigned maxElementsPerPartition=UINT_MAX, unsigned maxElementsPerRegion=UINT_MAX)
	Given a set of sequences of regions per tile, partition these sequences between workers on that tile while respecting the specified grain size. More...

template<class T >
std::vector< T >	unflattenIndex (const std::vector< T > &shape, std::size_t index)
	Given an index into a flattened tensor, returns the indices into the dimensions of the original tensor.

template<class T >
std::size_t	flattenIndex (const std::vector< T > &shape, const std::vector< T > &indices)
	Given a list of indices into a tensor, return the corresponding index in a flattened version of the tensor.

std::size_t	intervalSequenceNumElements (const std::vector< std::vector< poplar::Interval > > &seq)
	Return the total number of elements in the interval sequence.

poplar::Tensor	duplicate (poplar::Graph &graph, const poplar::Tensor &in, poplar::program::Sequence &p, const poplar::DebugContext &debugContext={}, poplar::TensorCloneMethod method=poplar::TensorCloneMethod::PRESERVE_ORDER_UNLESS_ALIASES)
	Copy a tensor's data to a new tensor. More...

poplar::Tensor	cloneN (poplar::Graph &graph, const poplar::Tensor &t, unsigned N, const poplar::DebugContext &debugContext={}, poplar::TensorCloneMethod method=poplar::TensorCloneMethod::PRESERVE_ORDER_UNLESS_ALIASES)
	Clone a tensor N times. More...

std::vector< int >	balancedPartition (int rangeUpperBound, int splitCount)
	Split a range. More...

double	castToDeviceHalfValue (const poplar::Target &target, double input)
	Cast a double precision value to a value exactly representable in device HALF type. More...

bool	checkAccuracyWhenCast (const poplar::Target &target, double input, poplar::Type inputType, poplar::Type outputType, double tolerance)
	Check accuracy of a cast operation. More...

poplar::Tensor	factorDims (const poplar::Tensor &t, const std::vector< std::size_t > &factors, unsigned startDim=0)
	Factors the outermost dimensions of tensor `t` by the values given in `factors`. More...

poplar::Tensor	unfactorDims (const poplar::Tensor &t, unsigned numDims, unsigned startDim=0)
	The opposite of factorDims(). More...

std::vector< poplar::Interval >	calculateUnshufflingIntervals (const std::vector< poplar::Interval > &intervals)
	Calculate the un-shuffling intervals based on the given intervals. More...

unsigned	detectInnermostGrouping (const poplar::Graph &graph, const poplar::Tensor &t)
	Detect if the tensor `t` has a grouping in its innermost dimension. More...

std::vector< GroupingInfo >	detectDimGroupings (const poplar::Graph &graph, const poplar::Tensor &t)
	Find all grouped dimensions from the innermost grouped dimension moving outwards, returning groupings for each. More...

poplar::Tensor	createPartitionableTensor (poplar::Graph &graph, const poplar::Type &type, const std::vector< std::size_t > &shape, const std::vector< std::size_t > &nPartitions, const poplar::DebugContext &debugContext={})
	Create a tensor with the given shape, so that when it is partitioned into slices according to the given number of partitions in each dimension, each slice is a single contiguous region. More...

void	iterateTensorPartitions (const poplar::Tensor &t, const std::vector< std::size_t > &nPartitions, const std::function< void(const std::vector< std::size_t > &i, const poplar::Tensor &s)> &f)
	Iterate a function over the partitions of a tensor. More...

template<typename... Args>
std::string	templateVertex (const std::string &name, Args &&...args)
	Generate a string representation of a Vertex type for use by poplar::Graph::addVertex(). More...

Detailed Description

General utility functions for building graphs.

Typedef Documentation

◆ GroupingInfo

using poputil::GroupingInfo = typedef std::pair<unsigned, unsigned>

Grouped dimension info.

The first in the pair is the grouped dimension The second in the pair is the grouping in that dimension

Function Documentation

◆ balancedPartition()

std::vector< int > poputil::balancedPartition	(	int	rangeUpperBound,
		int	splitCount
	)

Split a range.

Utility function to split a range [0, rangeUpperBound] into splitCount slices as evenly as possible. If splitCount does not divide rangeUpperBound evenly then output slices are assigned more units in round-robin.

◆ broadcastToMatch() [1/3]

void poputil::broadcastToMatch	(	poplar::Tensor &	a,
		const std::vector< std::size_t > &	shape
	)

Match dimensions of a tensor to a shape using numpy-style broadcast rules:

1) If the rank of the tensor is less than the required shape then expand to the left by adding dimensions of size 1 to match the rank required.

2) For each dimension, the size of the dimension in the tensor must be the same as the required shape or must be 1. In the case where it is of size 1, the tensor is broadcast in that dimension to match the shape. If neither of these conditions hold then an exception is thrown.

Parameters

a	The tensor to broadcast to match the shape. This will be updated in place with broadcast dimensions.
shape	The shape to match.

Exceptions

poputil::poplibs_error If a cannot be broadcast to match shape.

◆ broadcastToMatch() [2/3]

void poputil::broadcastToMatch	(	poplar::Tensor &	a,
		poplar::Tensor &	b
	)

Match dimensions of two tensors using numpy-style broadcast rules:

1) If the rank of one tensor is less than the other then extend the dimensions to the left with dimensions of size 1 to match the rank required.

2) For each dimension, the size of each dimension in both tensors must be the same or one of them must have size 1. In the case where one is of size 1, the tensor is broadcast in that dimension to match the other. If neither of these conditions hold then an exception is thrown.

Parameters

a	First tensor to match. This will be updated in place with broadcast dimensions.
b	Second tensor to match. This will be updated in place with broadcast dimensions.

Exceptions

poputil::poplibs_error If a cannot be broadcast to match a dimension.

◆ broadcastToMatch() [3/3]

void poputil::broadcastToMatch	(	poplar::Tensor &	a,
		poplar::Tensor &	b,
		poplar::Tensor &	c
	)

Match dimensions of three tensors using numpy-style broadcast rules:

1) If the rank of one tensor is less than the other then extend the dimensions to the left with dimensions of size 1 to match the rank required.

2) For each dimension, the size of each dimension in both tensors must be the same or one of them must have size 1. In the case where one is of size 1, the tensor is broadcast in that dimension to match the other. If neither of these conditions hold then an exception is thrown.

Parameters

a	First tensor to match. This will be updated in place with broadcast dimensions.
b	Second tensor to match. This will be updated in place with broadcast dimensions.
c	Third tensor to match. This will be updated in place with broadcast dimensions.

Exceptions

poputil::poplibs_error If a cannot be broadcast to match a dimension.

◆ calcLinearTileMapping() [1/2]

std::vector< std::vector< poplar::Interval > > poputil::calcLinearTileMapping	(	const poplar::Graph &	graph,
		const poplar::Tensor &	t,
		unsigned	offset = `0`,
		bool	ascendingOrder = `true`
	)

Calculate a tile mapping that spreads the tensor evenly over the tiles in a graph.

By default the indices of the resulting mapping go from from low to high tile numbers, however offset and direction can be specified.

In this case the elements are distributed so that groups of elements of the device's natural vector width will not be split. It effectively sets the grain size to the natural vector width for the data type. This means the number of elements on each tile will be a multiple of the natural vector width and the index of the first element is aligned to the natural vector width.

The natural vector width is the largest vector width supported in hardware for arithmetic operations on that data type.

It will also try to keep at least 128 bytes of data on each tile to avoid high exchange costs.

Parameters

graph	The graph to add the operation to.
t	The tensor to be mapped
offset	The offset to the first tile used for mapping
ascendingOrder	If true, the first tile used = offset and tiles are allocated in increasing order If false, the first tile used = (number of device tiles - 1 - offset) and tiles are allocated in decreasing order

Returns: A vector specifying the mapping

◆ calcLinearTileMapping() [2/2]

std::vector< std::vector< poplar::Interval > > poputil::calcLinearTileMapping	(	const poplar::Graph &	graph,
		std::vector< std::size_t >	shape,
		unsigned	minElementsPerTile,
		unsigned	grainSize,
		unsigned	offset = `0`,
		bool	ascendingOrder = `true`
	)

Calculate a tile mapping that spreads the tensor evenly over the tiles in a graph.

By default the indices of the resulting mapping go from from low to high tile numbers, however offset and direction can be specified.

Parameters

graph	The graph to calculate the mapping for.
shape	The shape of the tensor to be mapped: a vector containing the size of each dimension of the tensor.
minElementsPerTile	The minimum number of tensor elements to be allocated to a tile.
grainSize	The number of elements mapped to each tile will be an integer multiple of the grain size.
offset	The offset to the first tile used for mapping
ascendingOrder	If true, the first tile used = offset and tiles are allocated in increasing order If false, the first tile used = (number of device tiles -1 - offset) and tiles are allocated in decreasing order

Returns: A vector specifying the mapping

◆ calcLinearTileMappingAndNewOffset()

std::pair< poplar::Graph::TileToTensorMapping, unsigned > poputil::calcLinearTileMappingAndNewOffset	(	const poplar::Graph &	graph,
		const poplar::Tensor &	t,
		unsigned	offset = `0`
	)

Calculate a tile mapping that spreads the tensor evenly over the tiles in a graph.

This function is similar to poputil::calcLinearTileMapping but with an additional "new offset" output equal to the last plus one tile used for the mapping. For example, consider a target with 8 tiles and a resulting mapping over 4 tiles. The value of the returned offset will be:

6 if offset = 2.
2 if offset = 6.

Parameters

graph	The graph to add the operation to.
t	The tensor to be mapped
offset	The offset to the first tile used for mapping

Returns: A pair consisting of a vector specifying the mapping and the new advanced offset.

◆ calculateUnshufflingIntervals()

std::vector< poplar::Interval > poputil::calculateUnshufflingIntervals ( const std::vector< poplar::Interval > & intervals )

Calculate the un-shuffling intervals based on the given intervals.

Given a vector of intervals, one could use these intervals to shuffle a tensor. For example:

poplar::Tensor shuffled = poplar::concat(tensor.slices(intervals));

Another vector of intervals exists that can be applied in the same way to the shuffled tensor to undo the shuffling. This function calculates these intervals. The time complexity is nlog(n) with n the number of intervals.

Note: This function assumes that the intervals are non-overlapping and form one contiguous interval.

Parameters

intervals A vector of intervals that shuffle a tensor.

Returns: A vector of intervals that unshuffle a tensor.

◆ canBroadcastToMatch()

bool poputil::canBroadcastToMatch	(	const poplar::Tensor &	a,
		const poplar::Tensor &	b
	)

Test if the given tensors can be broadcast to match one another using the rules for broadcastToMatch().

Parameters

a	First tensor to match.
b	Second tensor to match.

Returns: True if the two tensors may be broadcast to match one another and false if they cannot be matched with the broadcastToMatch() rules.

◆ castToDeviceHalfValue()

double poputil::castToDeviceHalfValue	(	const poplar::Target &	target,
		double	input
	)

Cast a double precision value to a value exactly representable in device HALF type.

Parameters

target	The target device that the cast will be performed on.
input	Input value.

Returns: Value cast to HALF type on device.

◆ checkAccuracyWhenCast()

bool poputil::checkAccuracyWhenCast	(	const poplar::Target &	target,
		double	input,
		poplar::Type	inputType,
		poplar::Type	outputType,
		double	tolerance
	)

Check accuracy of a cast operation.

Utility function to check if input can be cast from inputType to outputType without an error in its accuracy, or causing an overflow.

Parameters

target	The target device that the cast will be performed on.
input	Input value.
inputType	Input type before the cast operation.
outputType	Output type after the cast operation.
tolerance	Allowed tolerance in error from cast operation.

Returns: Boolean tensor indicating the error will be less than tolerance.

Exceptions

poputil::poplibs_error If either inputType or outputType are not either half or float.

◆ chooseMappingOffset() [1/2]

std::size_t poputil::chooseMappingOffset	(	std::size_t	numTiles,
		const std::vector< std::size_t > &	shape
	)

Choose an offset for use with tensor mapping functions using a hash of the shape provided.

Parameters

numTiles	The number of tiles of the intended target device.
shape	The shape to produce a hash of.

Returns: The selected offset in the range 0 to numTiles - 1

◆ chooseMappingOffset() [2/2]

std::size_t poputil::chooseMappingOffset	(	std::size_t	numTiles,
		const std::vector< std::size_t > &	shape,
		std::size_t	seed
	)

Choose an offset for use with tensor mapping functions using a hash of the shape, and a seed.

Parameters

numTiles	The number of tiles of the intended target device.
shape	The shape to produce a hash of.
seed	Optional seed to use in producing the hash.

Returns: The selected offset in the range 0 to numTiles - 1

◆ cloneAndExpandAliasing()

std::pair< poplar::Tensor, unsigned > poputil::cloneAndExpandAliasing	(	poplar::Graph &	graph,
		const poplar::Tensor &	t,
		unsigned	offset = `0`,
		const poplar::DebugContext &	debugContext = `{}`
	)

Create a clone of the specified tensor on the specified graph.

The cloned tensor is mapped to the graph in such a way that the mapping of tensor elements to tiles is preserved. If the source tensor consists of aliasing intervals, these will be made non-aliasing in the cloned tensor and mapped linearly accross the tiles with the specified tile offset. The remapping is done as a precautionary measure to reduce the chance of getting out of memory issues on a tile which has many aliasing elements.

In addition to the cloned tensor, this function returns "new offset" output equal to the last plus one tile used for the mapping of the expanded aliasing elements. See poputil::calcLinearTileMappingAndNewOffset for more details.

Parameters

graph	The graph to add the operation to.
t	The tensor to clone.
offset	The offset to the first tile used for mapping the elements of the resulting tensor corresponding to aliasing elements of the source tensor.
debugContext	Optional debug information

Returns: A pair consisting of the cloned tensor and the new advanced offset.

◆ cloneN()

poplar::Tensor poputil::cloneN	(	poplar::Graph &	graph,
		const poplar::Tensor &	t,
		unsigned	N,
		const poplar::DebugContext &	debugContext = `{}`,
		poplar::TensorCloneMethod	method = `poplar::TensorCloneMethod::PRESERVE_ORDER_UNLESS_ALIASES`
	)

Clone a tensor N times.

Given a tensor of shape [D1, D2, ... Dn], this function will create a new tensor of shape [N, D1, D2, ..., Dn] where each of the N sub-tensors is a clone of the original tensor (that is, it has the same layout).

Parameters

graph	The Poplar graph.
t	The tensor to clone.
N	The replication factor to clone with.
name	The name for the new variables created.
method	The tensor cloning method (see Graph::clone()).

◆ cloneToGraph()

poplar::Tensor poputil::cloneToGraph	(	poplar::Graph &	srcGraph,
		poplar::Graph &	dstGraph,
		const poplar::Tensor &	t,
		const poplar::DebugContext &	debugContext = `{}`,
		poplar::TensorCloneMethod	method = `poplar::TensorCloneMethod::PRESERVE_ORDER_UNLESS_ALIASES`
	)

Create a clone of the specified tensor on the specified graph.

The cloned tensor is mapped to the destination graph in such a way that the mapping of tensor elements to tiles is preserved.

Parameters

srcGraph	The graph representing the source tiles.
dstGraph	The graph representing the destination tiles.
t	The tensor to clone.
debugContext	Optional debug information
method	The method to use for the cloning.

Returns: The cloned tensor.

Note: It is assumed that the destination graph has enough tiles to clone the input tensor. This includes any gaps in the tile mapping. This means the maximum mapped tile of t in the source graph must be less than dstGraph.getTarget().getNumTiles().

◆ cloneToIpu()

poplar::Tensor poputil::cloneToIpu	(	poplar::Graph &	graph,
		const poplar::Tensor &	t,
		unsigned	dstIPU,
		const poplar::DebugContext &	debugContext = `{}`,
		poplar::TensorCloneMethod	method = `poplar::TensorCloneMethod::PRESERVE_ORDER_UNLESS_ALIASES`
	)

Create a clone of the specified tensor on the specified IPU.

The cloned tensor is mapped to the IPU in such a way that the mapping of tensor elements to tiles is preserved.

Parameters

graph	The graph representing the entire multi-IPU device.
t	The tensor to clone.
dstIPU	The index of the IPU to clone the tensor onto.
name	A debug name to give to any new tensors allocated in the graph during the clone. If this is empty then the debug names will be derived from existing tensor debug names.
method	The method to use for the cloning.

Exceptions

poputil::poplibs_error If dstIPU is greater than or equal to the number of IPUs targeted by the graph.

Returns: The cloned tensor.

◆ copyToIpu()

poplar::Tensor poputil::copyToIpu	(	poplar::Graph &	masterGraph,
		const poplar::Tensor &	t,
		poplar::program::Sequence &	prog,
		unsigned	dstIPU,
		const poplar::DebugContext &	debugContext = `{}`,
		poplar::TensorCloneMethod	method = `poplar::TensorCloneMethod::PRESERVE_ORDER_UNLESS_ALIASES`
	)

Move a tensor from one IPU to another.

The tensor is moved by duplicating it, mapping the clone onto another IPU, and copying the original tensor values to the new one.

Parameters

masterGraph	The graph representing the entire multi-IPU device.
t	The tensor to move from one IPU to another.
prog	A program sequence to add the Copy to.
dstIPU	The index of the IPU onto which the tensor will be moved.
debugContext	A debug name to give to the tensor created on dstIPU. If this is empty then the debug names will be derived from existing tensor debug names.
method	The method to use for cloning of the tensor on the destination IPU.

Exceptions

poputil::poplibs_error If dstIPU is greater than or equal to the number of IPUs targeted by the graph.

Returns: The new tensor on the specified IPU.

◆ createBroadcastOperand()

poplar::Tensor poputil::createBroadcastOperand	(	poplar::Graph &	graph,
		const poplar::Tensor &	fullTensor,
		const poplar::Type &	type,
		unsigned	dim,
		bool	ditherMapping = `false`,
		const poplar::DebugContext &	debugContext = `{}`
	)

Create a simpler tensor that is mapped in the same way as another, full, tensor.

The full tensor is typically a left hand side operand of an operation while the created tensor is the right hand side. The created tensor has one dimension, which is the same size as the specified dimension of the full tensor.

Because the created tensor has the same mapping as the full tensor, it reduces the amount of data exchange or copies that are required for an operation using the two tensors.

Parameters

graph	The graph which the output tensor is added to.
fullTensor	The tensor mapping for the output tensor is copied from this tensor.
type	The type of the output tensor.
dim	The dimension of the input tensor which is the size of the created tensor.
ditherMapping	Enable dithering to be applied to the mapping of the output tensor.
debugContext	Optional debug information.

Returns: The created output tensor.

◆ createIpuCopy()

poplar::Tensor poputil::createIpuCopy	(	poplar::Graph &	graph,
		const poplar::Tensor &	t,
		unsigned	dstIpu,
		poplar::Tensor &	copySrc,
		poplar::Tensor &	copyDst,
		const poplar::DebugContext &	debugContext = `{}`,
		poplar::TensorCloneMethod	method = `poplar::TensorCloneMethod::PRESERVE_ORDER_AND_ALIASES`
	)

Prepare to move a tensor from one IPU to another.

The tensor is duplicated and the clone is mapped onto another IPU. References to source and destination tensors are provided for use by an inter-IPU copy.

The necessary copy operation is not added to the program.

Parameters

masterGraph	The graph representing the entire multi-IPU device.
t	The tensor to move from one IPU to another.
dstIPU	The index of the IPU onto which the tensor will be moved.
copySrc	A tensor that can be used as the source to do the copy.
copyDst	A tensor that can be used as the destination of the copy.
debugContext	A debug name to give to the tensor created on dstIPU. If this is empty then the debug names will be derived from existing tensor debug names.
method	The method to use for cloning of the tensor on the destination IPU.

Exceptions

poputil::poplibs_error If dstIPU is greater than or equal to the number of IPUs targeted by the graph.

Returns: The new tensor on the specified IPU.

◆ createPartitionableTensor()

poplar::Tensor poputil::createPartitionableTensor	(	poplar::Graph &	graph,
		const poplar::Type &	type,
		const std::vector< std::size_t > &	shape,
		const std::vector< std::size_t > &	nPartitions,
		const poplar::DebugContext &	debugContext = `{}`
	)

Create a tensor with the given shape, so that when it is partitioned into slices according to the given number of partitions in each dimension, each slice is a single contiguous region.

This partitions the tensor so that the maximum number of elements in each partition of a dimension is minimised as well as the number of partitions. That is, if a dimension has n elements, and the number of partitions in that dimension is d then:

a * ceil(n/d) + 1 * (n%d) = n

There will be a partitions with ceil(n/d) elements followed by b partitions with floor(n/d) elements and possibly some number of partitions with 0 elements.

The returned tensor has no tile mapping set.

Parameters

graph	The graph to add the variable to.
type	The type of the elements in the returned tensor.
shape	The shape of the returned tensor.
nPartitions	The number of partitions the shape will be partitioned into in each dimension.
debugContext	Optional debug information.

Returns: A tensor with the given shape where each partition is contiguous.

Exceptions

poputil::poplibs_error If the size of shape and nPartitions are not equal.

◆ detectDimGroupings()

std::vector< GroupingInfo > poputil::detectDimGroupings	(	const poplar::Graph &	graph,
		const poplar::Tensor &	t
	)

Find all grouped dimensions from the innermost grouped dimension moving outwards, returning groupings for each.

The same dimension may appear more than once. This uses detectInnermostGrouping() iteratively.

Parameters

graph	The graph to add the function to.
t	The tensor to check for grouping.

Returns: A list of the grouped dimensions starting with the innermost.

Exceptions

poputil::poplibs_error If the rank of t is zero.

◆ detectInnermostGrouping()

unsigned poputil::detectInnermostGrouping	(	const poplar::Graph &	graph,
		const poplar::Tensor &	t
	)

Detect if the tensor t has a grouping in its innermost dimension.

Parameters

graph	The graph to add the function to.
t	The tensor to check for grouping.

Returns: The size of the group. 1 if there is no grouping.

Exceptions

poputil::poplibs_error If the rank of t is zero.

◆ dimIsSplitOverIPUs()

bool poputil::dimIsSplitOverIPUs	(	const poplar::Graph &	graph,
		const poplar::Tensor &	t,
		unsigned	dimension
	)

Check if a dimension of a tensor is split over more than one IPU.

Examines the mapping of the specified tensor to see if the specified dimension is split over more than one IPU.

Parameters

graph	The graph to examine.
t	The tensor to check.
dimension	The dimension to check.

Returns: True if elements of the given dimension are spread over more than one IPU.

◆ dimIsSplitOverTiles()

bool poputil::dimIsSplitOverTiles	(	const poplar::Graph &	graph,
		const poplar::Tensor &	t,
		unsigned	dimension
	)

Check if a dimension of a tensor is split over more than one tile.

Examines the mapping of the specified tensor to see if the specified dimension is split over more than one tile.

Parameters

graph	The graph to examine.
t	The tensor to check.
dimension	The dimension to check.

Returns: True if elements of the given dimension are spread over more than one tile.

◆ duplicate()

poplar::Tensor poputil::duplicate	(	poplar::Graph &	graph,
		const poplar::Tensor &	in,
		poplar::program::Sequence &	p,
		const poplar::DebugContext &	debugContext = `{}`,
		poplar::TensorCloneMethod	method = `poplar::TensorCloneMethod::PRESERVE_ORDER_UNLESS_ALIASES`
	)

Copy a tensor's data to a new tensor.

The duplicated tensor has the same tile mapping as the original tensor.

◆ expandToMatchRanks()

void poputil::expandToMatchRanks	(	poplar::Tensor &	a,
		poplar::Tensor &	b
	)

Match dimensions of two tensors using numpy-style expansion rules.

Insert singleton dimensions into either of the two tensors so that their ranks match, following numpy-style expansion rules. The tensor with the lower rank has singleton dimensions inserted as the outermost dimensions.

Parameters

a	First tensor to match.
b	Second tensor to match.

◆ factorDims()

poplar::Tensor poputil::factorDims	(	const poplar::Tensor &	t,
		const std::vector< std::size_t > &	factors,
		unsigned	startDim = `0`
	)

Factors the outermost dimensions of tensor t by the values given in factors.

For each value f in factors, the corresponding outer dimension is split into two parts of sizes size(dim)/f and f. The second of these becomes a dimension inside all the factored dimensions. For example, given a tensor with shape [4,6,4] and factors [1,2], we first divide the shape into [4/1,1,6/2,2,4] and then shuffle it to [4/1,6/2,1,2,4].

Parameters

t	The tensor to be factored.
factors	The values to factor each dimension by.
startDim	The outermost dimension to start at.

Returns: The refactored tensor.

◆ getTileImbalance() [1/2]

unsigned poputil::getTileImbalance	(	const poplar::Graph &	graph,
		const poplar::Tensor &	t,
		unsigned	minElementsPerTile = `0`,
		unsigned	grainSize = `1`
	)

Determine how unbalanced a tensor is mapped over tiles.

This compares the way a tensor is mapped to a set of tiles to the mapping based on a given number of elements per tile.

Parameters

graph	The graph containing the mapped tensor.
mapping	The tensor currently mapped to tiles in the graph.
minElementsPerTile	The suggested minimum number of elements per tile.
grainSize	The number of elements mapped to each tile would be an integer multiple of the suggested grain size.

Returns: The maximum number of elements greater than expected on any tile.

◆ getTileImbalance() [2/2]

unsigned poputil::getTileImbalance	(	const poplar::Graph::TileToTensorMapping &	mapping,
		unsigned	minElementsPerTile = `0`,
		unsigned	grainSize = `1`
	)

Determine how unbalanced a tensor is when mapped over tiles in a graph.

This reports how well a tensor mapping compares with the mapping based on a given number of elements per tile.

Parameters

mapping	The current tile mapping of the tensor.
minElementsPerTile	The suggested minimum number of elements per tile.
grainSize	The number of elements mapped to each tile would be an integer multiple of the suggested grain size.

Returns: The maximum number of elements greater than expected on any tile.

◆ invTransformTileIndex()

unsigned poputil::invTransformTileIndex	(	unsigned	tile,
		unsigned	numTiles,
		unsigned	offset,
		bool	ascending
	)

Transform a tile index such that the result begins at an offset and increments or decrements.

Parameters

tile	The tile number to transform.
numTiles	The number of tiles on the target device.
offset	The offset to the first tile used for the mapping after the transform takes place.
ascendingOrder	Mapping order after the transform takes place: If true, the first tile used = offset and tiles are allocated in increasing order. If false, the first tile used = (number of device tiles -1 - offset) and tiles are allocated in decreasing order.

Returns: Transformed tile number

◆ iterateTensorPartitions()

void poputil::iterateTensorPartitions	(	const poplar::Tensor &	t,
		const std::vector< std::size_t > &	nPartitions,
		const std::function< void(const std::vector< std::size_t > &i, const poplar::Tensor &s)> &	f
	)

Iterate a function over the partitions of a tensor.

Partitioning follows the same definition as described for createPartitionableTensor().

Parameters

t	The tensor to iterate over.
nPartitions	The number of partitions the tensor is partitioned into in each dimension.
i
f	A function taking the indices of the partition in the range [0, splits[d]) in each dimension of the tensor as well as the slice of the tensor corresponding to that partition.

Exceptions

poputil::poplibs_error If the rank of t and the size of nPartitions are not equal.

◆ mapTensorLinearly() [1/2]

void poputil::mapTensorLinearly	(	poplar::Graph &	graph,
		const poplar::Tensor &	t
	)

Map the specified tensor, spreading the tensor evenly over the tiles in a graph.

The indices of the flattened tensor are mapped from low to high tile numbers.

In this case the elements are distributed so that groups of elements of the device's natural vector width will not be split. It effectively sets the grain size to the natural vector width for the data type. This means the number of elements on each tile will be a multiple of the natural vector width and the index of the first element is aligned to the natural vector width.

The natural vector width is the largest vector width supported in hardware for arithmetic operations on that data type.

It will also try to keep at least 128 bytes of data on each tile to avoid high exchange costs.

Parameters

graph	The graph to add the operation to.
t	The tensor to be mapped.

◆ mapTensorLinearly() [2/2]

void poputil::mapTensorLinearly	(	poplar::Graph &	graph,
		const poplar::Tensor &	t,
		unsigned	minElementsPerTile,
		unsigned	grainSize
	)

Map the specified tensor, spreading the tensor evenly over the tiles in a graph.

The indices of the flattened tensor are mapped from low to high tile numbers.

Parameters

graph	The graph to calculate the mapping for.
t	The tensor to be mapped.
minElementsPerTile	The minimum number of tensor elements to be allocated to a tile.
grainSize	The number of elements mapped to each tile will be an integer multiple of the grain size.

◆ mapTensorLinearlyWithOffset() [1/2]

void poputil::mapTensorLinearlyWithOffset	(	poplar::Graph &	graph,
		const poplar::Tensor &	t,
		unsigned	minElementsPerTile,
		unsigned	grainSize,
		unsigned	offset,
		bool	ascendingOrder = `true`
	)

Map the specified tensor, spreading the tensor evenly over the tiles in a graph.

The indices of the flattened tensor are mapped from low to high tile numbers, however offset and direction can be specified.

Parameters

graph	The graph to calculate the mapping for.
t	The tensor to be mapped.
minElementsPerTile	The minimum number of tensor elements to be allocated to a tile.
grainSize	The number of elements mapped to each tile will be an integer multiple of the grain size.
offset	The offset to the first tile used for mapping
ascendingOrder	If true, the first tile used = offset and tiles are allocated in increasing order. If false, the first tile used = (number of device tiles -1 - offset) and tiles are allocated in decreasing order.

◆ mapTensorLinearlyWithOffset() [2/2]

void poputil::mapTensorLinearlyWithOffset	(	poplar::Graph &	graph,
		const poplar::Tensor &	t,
		unsigned	offset = `0`,
		bool	ascendingOrder = `true`
	)

Map the specified tensor, spreading the tensor evenly over the tiles in a graph.

The indices of the flattened tensor are mapped from low to high tile numbers, however offset and direction can be specified. In this case the elements are distributed so that groups of elements of the device's natural vector width will not be split. It effectively sets the grain size to the natural vector width for the data type. This means the number of elements on each tile will be a multiple of the natural vector width and the index of the first element is aligned to the natural vector width.

The natural vector width is the largest vector width supported in hardware for arithmetic operations on that data type.

It will also try to keep at least 128 bytes of data on each tile to avoid high exchange costs.

Parameters

graph	The graph to calculate the mapping for.
t	The tensor to be mapped.
offset	The offset to the first tile used for mapping.
ascendingOrder	If true, the first tile used = offset and tiles are allocated in increasing order. If false, the first tile used = (number of device tiles -1 - offset) and tiles are allocated in decreasing order.

◆ splitRegions() [1/2]

std::vector< std::vector< poplar::Interval > > poputil::splitRegions	(	const std::vector< poplar::Interval > &	regions,
		unsigned	grainSize,
		unsigned	maxPartitions,
		unsigned	minElementsPerPartition = `0`,
		unsigned	maxElementsPerPartition = `UINT_MAX`,
		unsigned	maxElementsPerRegion = `UINT_MAX`
	)

Given a set of contiguous regions, partition these regions while trying to balance the number of elements in each partition and respecting the specified grain size.

At most maxPartitions partitions are created. Regions may be split to achieve a better balance.

◆ splitRegions() [2/2]

std::vector< std::vector< std::vector< poplar::Interval > > > poputil::splitRegions	(	const std::vector< std::vector< poplar::Interval > > &	regions,
		unsigned	grainSize,
		unsigned	maxPartitions,
		unsigned	minElementsPerPartition = `0`,
		unsigned	maxElementsPerPartition = `UINT_MAX`,
		unsigned	maxElementsPerRegion = `UINT_MAX`
	)

Given a set of sequences of regions, partition these sequences while trying to balance the number of elements in each partition and respecting the specified grain size.

At most maxPartitions partitions are created. Sequences, and regions within them, may be split to achieve a better balance.

◆ splitRegionsBetweenWorkers() [1/2]

std::vector< std::vector< poplar::Interval > > poputil::splitRegionsBetweenWorkers	(	const poplar::Target &	target,
		const std::vector< poplar::Interval > &	regions,
		unsigned	grainSize,
		unsigned	minElementsPerPartition = `0`,
		unsigned	maxElementsPerPartition = `UINT_MAX`,
		unsigned	maxElementsPerRegion = `UINT_MAX`
	)

Given a set of contiguous regions per tile, partition these regions between workers on that tile while respecting the specified grain size.

Regions may be split to balance the work across workers.

◆ splitRegionsBetweenWorkers() [2/2]

std::vector< std::vector< std::vector< poplar::Interval > > > poputil::splitRegionsBetweenWorkers	(	const poplar::Target &	target,
		const std::vector< std::vector< poplar::Interval > > &	regions,
		unsigned	grainSize,
		unsigned	minElementsPerPartition = `0`,
		unsigned	maxElementsPerPartition = `UINT_MAX`,
		unsigned	maxElementsPerRegion = `UINT_MAX`
	)

Given a set of sequences of regions per tile, partition these sequences between workers on that tile while respecting the specified grain size.

Regions may be split to balance the work across workers.

◆ templateVertex()

template<typename... Args>

std::string poputil::templateVertex	(	const std::string &	name,
		Args &&...	args
	)

inline

Generate a string representation of a Vertex type for use by poplar::Graph::addVertex().

Parameters

name	The name of the vertex.
args	The types of the arguments to the vertex.

Returns: A string representation of the vertex type.

◆ transformTileIndex()

unsigned poputil::transformTileIndex	(	unsigned	tile,
		unsigned	numTiles,
		unsigned	offset,
		bool	ascending
	)

Transform a tile index such that the result begins at zero and increments.

Parameters

tile	The tile number to transform.
numTiles	The number of tiles on the target device.
offset	The offset to the first tile used for the mapping before the transform takes place.
ascendingOrder	Mapping order before the transform takes place: If true, the first tile used = offset and tiles are allocated in increasing order. If false, the first tile used = (number of device tiles -1 - offset) and tiles are allocated in decreasing order.

Returns: Transformed tile number.

◆ unfactorDims()

poplar::Tensor poputil::unfactorDims	(	const poplar::Tensor &	t,
		unsigned	numDims,
		unsigned	startDim = `0`
	)

The opposite of factorDims().

This does not need information for each dimension because that is present in the tensor. It just needs the number of dimensions.

Parameters

t	The tensor to be refactored.
numDims	The number of dimensions to be refactored.
startDim	The outermost dimension to start at.

Returns: The refactored tensor.

Namespaces

Classes

Typedefs

Functions

Detailed Description

Typedef Documentation

◆ GroupingInfo

Function Documentation

◆ balancedPartition()

◆ broadcastToMatch() [1/3]

◆ broadcastToMatch() [2/3]

◆ broadcastToMatch() [3/3]

◆ calcLinearTileMapping() [1/2]

◆ calcLinearTileMapping() [2/2]

◆ calcLinearTileMappingAndNewOffset()

◆ calculateUnshufflingIntervals()

◆ canBroadcastToMatch()

◆ castToDeviceHalfValue()

◆ checkAccuracyWhenCast()

◆ chooseMappingOffset() [1/2]

◆ chooseMappingOffset() [2/2]

◆ cloneAndExpandAliasing()

◆ cloneN()

◆ cloneToGraph()

◆ cloneToIpu()

◆ copyToIpu()

◆ createBroadcastOperand()

◆ createIpuCopy()

◆ createPartitionableTensor()

◆ detectDimGroupings()

◆ detectInnermostGrouping()

◆ dimIsSplitOverIPUs()

◆ dimIsSplitOverTiles()

◆ duplicate()

◆ expandToMatchRanks()

◆ factorDims()

◆ getTileImbalance() [1/2]

◆ getTileImbalance() [2/2]

◆ invTransformTileIndex()

◆ iterateTensorPartitions()

◆ mapTensorLinearly() [1/2]

◆ mapTensorLinearly() [2/2]

◆ mapTensorLinearlyWithOffset() [1/2]

◆ mapTensorLinearlyWithOffset() [2/2]

◆ splitRegions() [1/2]

◆ splitRegions() [2/2]

◆ splitRegionsBetweenWorkers() [1/2]

◆ splitRegionsBetweenWorkers() [2/2]

◆ templateVertex()

◆ transformTileIndex()

◆ unfactorDims()