A target representation. More...

#include <Target.hpp>

Public Member Functions
	Target (std::istream &in)
	Load from a serialised target. More...

std::size_t	hash () const
	Hash of the target.

void	serialize (std::ostream &out) const
	Serialize a target to a stream. More...

TargetType	getTargetType () const
	The target type.

std::string	getTargetSystemString () const
	The target system.

StringRef	getTargetArchString () const
	The target architecture.

unsigned	getNumIPUs () const
	The number of IPUs.

unsigned	getTilesPerIPU () const
	The number of tiles per IPU.

unsigned	getNumWorkerContexts () const
	The number of worker contexts per tile.

unsigned	getBytesPerTile () const
	Bytes of memory per tile.

unsigned	getExchangeBytesPerCycle () const
	The bandwidth of internal IPU exchange in bytes per cycle.

unsigned	getMemcpyBytesPerCycle () const
	The maximum bandwidth for internal data copies on a tile.

unsigned	getMinIPUSyncDelay () const
	The IPU sync delay for the tile that is closest to the sync controller.

unsigned	getGlobalSyncCycles () const
	The number of clock cycles required to synchronize all IPUs.

const std::vector< unsigned > &	getMemoryElementOffsets () const
	Memory element offsets.

unsigned	getInterleavedMemoryElementIndex () const
	Memory element offset index for interleaved memory.

const std::vector< GlobalExchangeConstraint > &	getGlobalExchangeConstraints () const
	Set of constraints that provide a lower bound on the time it takes to send data between IPUs.

unsigned	getDataPathWidth () const
	The width of the load/store data path within the tile.

unsigned	getFp8ConvUnitMaxPipelineDepth () const
	The maximum pipeline depth of the convolution units within the tile for fp8.

unsigned	getFp16ConvUnitMaxPipelineDepth () const
	The maximum pipeline depth of the convolution units within the tile for fp16.

unsigned	getFp32ConvUnitMaxPipelineDepth () const
	The maximum pipeline depth of the convolution units within the tile for fp32.

unsigned	getFp8ConvUnitInputLoadElemsPerCycle () const
	The number of input elements loaded per cycle in f8 convolution unit.

unsigned	getFp16ConvUnitInputLoadElemsPerCycle () const
	The number of input elements loaded per cycle in f16 convolution unit.

unsigned	getFp32ConvUnitInputLoadElemsPerCycle () const
	The number of input elements loaded per cycle in f32 convolution unit.

unsigned	getFp16InFp16OutConvUnitsPerTile () const
	The number of convolution units in the tile that can be used when partial results are outputs as 16-bits and inputs are 16 bits.

unsigned	getFp16InFp32OutConvUnitsPerTile () const
	The number of convolution units in the tile that can be used when partial results are outputs as 32-bits and inputs are 16 bits.

unsigned	getFp32InFp32OutConvUnitsPerTile () const
	The number of convolution units in the tile that can be used when accumulating to 32 bit values.

unsigned	getFp8InFp16OutConvUnitsPerTile () const
	The number of convolution units in the tile that can be used when partial results are 16-bits and inputs are 8-bits.

unsigned	getConvUnitCoeffLoadBytesPerCycle () const
	The number of convolutional weights that can be loaded in a cycle.

bool	supportsExchangeBusSharing () const
	Whether tiles can share the local exchange bus during exchange.

unsigned	getTilesPerSharedExchangeBus () const
	The number of consecutive tiles that can share the exchange bus.

unsigned	getNumTiles () const
	Get the total number of tiles for this target (tiles per IPU * number of IPUs).

std::uint64_t	getMemoryBytes () const
	Get the total amount of memory on this target, across all IPUs.

unsigned	getFloatVectorWidth () const
	How many floats can be processed in one vector operation. More...

unsigned	getHalfVectorWidth () const
	How many halves can be processed in one vector operation. More...

unsigned	getVectorWidth (const poplar::Type &type) const
	How many of the given type can be processed in one vector operation.

unsigned	getMaxIPUSyncDelay () const
	Get the maximum number of cycles required for an IPU sync in the best case scenario (all tiles are immediately ready).

double	getTileClockFrequency () const
	Get the tile clock frequency in Hertz.

unsigned	getNumTilesPerXBContext () const
	Get the number of tiles per exchange-block context (with repair).

unsigned	getNumContextsPerXB () const
	Get the number of contexts per exchange-block.

unsigned	getTileHostExchangeXB (unsigned tile) const
	Get the XB of a tile.

unsigned	getTileHostExchangeContext (unsigned tile) const
	Get the context of a tile within an XB.

unsigned	getTileHostExchangeContextPosition (unsigned tile) const
	Get the position of a tile within a context.

std::size_t	getTypeSize (const Type &) const
	Get the size of a given type in bytes.

std::size_t	getAtomicStoreGranularity () const
	Get the granularity of atomic stores that can be made by independent parallel worker threads. More...

uint32_t	makeFpIctlValue (bool inv, bool div0, bool oflo, bool esr, bool nanoo) const
	Generate a value that could be written to Floating Point Initial Control Value register CSR_S.FP_ICTL in order to configure it with the specified options. More...

unsigned	getFpIctlRegIndex () const
	Return the register index of the Floating Point Initial Control Value register CSR_S.FP_ICTL.

unsigned	getDbgDataRegIndex () const
	Return the register index of CSR_C.DBG_DATA.

IpuLinkConfiguration	getIpuLinkConfiguration () const
	Return the IPU-Link configuration of this target.

IpuLinkTopology	getIpuLinkTopology () const
	Return the IPU-Link topology.

unsigned	getIpuLinkDomainSize () const
	Return the size of the IPU-Link domain. More...

Target	createVirtualTarget (unsigned numIPUs, unsigned tilesPerIPU) const
	Create a "virtual" target consisting of a subset of the target's tiles. More...

Static Public Member Functions
static Target	createCPUTarget (bool accurateHalf=false, unsigned numIPUs=1)
	Create a CPU target. More...

static Target	createIPUTarget (StringRef system, const OptionFlags &opts={})
	Create an IPU target. More...

static Target	createIPUTarget (unsigned numIPUs, StringRef system, const OptionFlags &opts={})
	Create an IPU target. More...

static Target	createIPUTarget (unsigned numIPUs, unsigned tilesPerIPU, StringRef system, const OptionFlags &opts={})
	Create an IPU target with a virtual number of tiles. More...

static Target	createIPUTarget (unsigned numIPUs, StringRef system, const core::TargetOptions &opts)
	Create an IPU target. More...

static Target	createIPUTarget (unsigned numIPUs, unsigned tilesPerIPU, StringRef system, const core::TargetOptions &opts)
	Create an IPU target with a virtual number of tiles, and target options. More...

Detailed Description

A target representation.

The Target class holds characteristics of a compilation target and enables interaction with it.

Target creation options

ipuLinkConfiguration (Default, BarleyTwist, SlidingWindow) [=Poplar decides]

The configuration used for the IPU-to-IPU connections. If it is not set, Poplar decides on a configuration based on the number of IPUs.

Note that Default is not the default.
syncConfiguration (intraReplicaAndAll, ipuAndAll) [=intraReplicaAndAll]

The configuration of the hardware synchronisation groups. Note the target.syncReplicasIndependently engine option determines which of the synchronisation groups is used for host synchronisation.
- intraReplicaAndAll: The first sync group is used to sync IPUs within a replica and the second sync group is used to sync all IPUs.
- ipuAndAll: The first sync group is used to sync each IPU independently with the host (if the target.syncReplicasIndependently option is set) and the second sync group is used to sync all IPUs.
ipuLinkTopology (mesh, torus) [=mesh]

The topology of the IPU-Links. It describes how the IPUs in the system are connected.
- mesh: The IPUs are connected as a ladder.
- torus: The IPUs are connected as a ladder, with the top and bottom of the ladder linked together.
IpuLinkDomainSize Integer [=64]

The number of IPUs connected via IPU-Links. Two IPU-Link domains can be connected together via GW-Links.

Constructor & Destructor Documentation

◆ Target()

poplar::Target::Target ( std::istream & in )

Load from a serialised target.

Parameters

in	The stream to read from.

Member Function Documentation

◆ createCPUTarget()

static Target poplar::Target::createCPUTarget	(	bool	accurateHalf = `false`,
		unsigned	numIPUs = `1`
	)

static

Create a CPU target.

Create a target for executing a simple graph on the CPU.

This should only be used for simple functional testing.

Parameters

accurateHalf	By default, half is an alias for float, and `sizeof(half)` will be 4. If you set `accurateHalf` to true, half will be implemented in software as 16-bit IEEE floating point. This will be slower, but will produce the same results as the IPU.
numIPUs	The number of IPUs in the target. The IPUs will each have 1 tile with 1 worker thread.

Returns: A Target object that can be used to create a graph.

◆ createIPUTarget() [1/5]

static Target poplar::Target::createIPUTarget	(	StringRef	system,
		const OptionFlags &	opts = `{}`
	)

static

Create an IPU target.

Create an IPU target with all IPUs for the system based on the given system type.

Valid system types are:

IPU-POD16
IPU-POD64
IPU-POD128
IPU-POD256
IPU-POD4-DA
IPU-POD16-DA

Parameters

system	The type of the IPU system.
opts	The options passed to the target.

Returns: A Target object that can be used to create a graph.

◆ createIPUTarget() [2/5]

static Target poplar::Target::createIPUTarget	(	unsigned	numIPUs,
		StringRef	system,
		const core::TargetOptions &	opts
	)

static

Create an IPU target.

Create an IPU target with a specified number of IPUs based on the given system type.

Parameters

numIPUs	The number of IPUs the target should be for.
system	The type of the IPU system.
opts	The options passed to the target.

Returns: A Target object that can be used to create a graph.

Deprecated:: Use createIPUTarget(unsigned numIPUs, StringRef system, const OptionFlags &opts) instead.

◆ createIPUTarget() [3/5]

static Target poplar::Target::createIPUTarget	(	unsigned	numIPUs,
		StringRef	system,
		const OptionFlags &	opts = `{}`
	)

static

Create an IPU target.

Create an IPU target with a specified number of IPUs based on the given system type.

Valid system types are:

IPU-POD16
IPU-POD64
IPU-POD128
IPU-POD256
IPU-POD4-DA
IPU-POD16-DA

Parameters

numIPUs	The number of IPUs the target should be for.
system	The type of the IPU system.
opts	The options passed to the target.

Returns: A Target object that can be used to create a graph.

◆ createIPUTarget() [4/5]

static Target poplar::Target::createIPUTarget	(	unsigned	numIPUs,
		unsigned	tilesPerIPU,
		StringRef	system,
		const core::TargetOptions &	opts
	)

static

Create an IPU target with a virtual number of tiles, and target options.

Create an IPU target with a specified number of IPUs based on the given system type. In addition, the number of tiles can be restricted to a smaller virtual number of observable tiles. This overload also accepts target options that can be obtained from another target.

Parameters

numIPUs	The number of IPUs the target should be for.
tilesPerIPU	The number of tiles per IPU.
system	The type of the IPU system.
opts	The options passed to the target.

Returns: A Target object that can be used to create a graph.

Deprecated:: Use createIPUTarget(unsigned numIPUs, unsigned tilesPerIPU, StringRef system, const OptionFlags &opts) instead.

◆ createIPUTarget() [5/5]

static Target poplar::Target::createIPUTarget	(	unsigned	numIPUs,
		unsigned	tilesPerIPU,
		StringRef	system,
		const OptionFlags &	opts = `{}`
	)

static

Create an IPU target with a virtual number of tiles.

Create an IPU target with a specified number of IPUs based on the given system type. In addition, the number of tiles can be restricted to a smaller virtual number of observable tiles.

Valid system types are:

IPU-POD16
IPU-POD64
IPU-POD128
IPU-POD256
IPU-POD4-DA
IPU-POD16-DA

Parameters

numIPUs	The number of IPUs the target should be for.
tilesPerIPU	The number of tiles per IPU.
system	The type of the IPU system.
opts	The options passed to the target.

Returns: A Target object that can be used to create a graph.

◆ createVirtualTarget()

Target poplar::Target::createVirtualTarget	(	unsigned	numIPUs,
		unsigned	tilesPerIPU
	)		const

Create a "virtual" target consisting of a subset of the target's tiles.

This method returns a target object that references the same state as this target but only uses a subset of the target's tiles.

Parameters

numIPUs	The number of IPUs the target should be for.
tilesPerIPU	The number of tiles per IPU.

Returns: The virtual target object.

◆ getAtomicStoreGranularity()

std::size_t poplar::Target::getAtomicStoreGranularity ( ) const

Get the granularity of atomic stores that can be made by independent parallel worker threads.

Returns: The granularity in bytes.

◆ getFloatVectorWidth()

unsigned poplar::Target::getFloatVectorWidth ( ) const

How many floats can be processed in one vector operation.

Equivalent to getDataPathWidth() / 32.

◆ getHalfVectorWidth()

unsigned poplar::Target::getHalfVectorWidth ( ) const

How many halves can be processed in one vector operation.

Equivalent to getDataPathWidth() / 16.

◆ getIpuLinkDomainSize()

unsigned poplar::Target::getIpuLinkDomainSize ( ) const

Return the size of the IPU-Link domain.

That is, the number of IPUs that are connected via IPU-Links.

◆ makeFpIctlValue()

uint32_t poplar::Target::makeFpIctlValue	(	bool	inv,
		bool	div0,
		bool	oflo,
		bool	esr,
		bool	nanoo
	)		const

Generate a value that could be written to Floating Point Initial Control Value register CSR_S.FP_ICTL in order to configure it with the specified options.

Parameters

inv	If true, a floating-point invalid operation (defined by IEEE 754) will cause an exception. The invalid operations are: Addition or subtraction where the operands are + or - infinity (inf) and the operation results in the subtraction of two infs; for example: (-inf)+(+inf) or (+inf)-(+inf). Divisions: (+/-0)/(+/-0) and (+/-inf)/(+/-inf). Multiplications: (+/-0)(+/-inf) and (+/-inf)(+/-0). Remainder: x REM y where y=0 or x=(+/-inf) Real operations with complex results such as the square root or logarithm of a negative number. Operations with Not-a-Number as at least one operand. Comparisons where one of the operands is Not-a-Number. See also nanoo below.
div	If true a floating point divide by zero operation will cause an exception
oflo	If true a floating point overflow will cause an exception
esr	Enable stochastic rounding
nanoo	Enable Not-a-Number on overflow mode. When enabled half precision calculations that have overflowed will produce a Not-a-Number result, rather than saturating to the half precision max/min value, and the invalid operation (`inv`) flag will be set

◆ serialize()

void poplar::Target::serialize ( std::ostream & out ) const

Serialize a target to a stream.

Currently the format is opaque, and compatibility between different versions of Poplar is not guaranteed.

Parameters

out	The stream to write to.

The documentation for this class was generated from the following file:

include/poplar/Target.hpp

Public Member Functions

Static Public Member Functions

Detailed Description

Constructor & Destructor Documentation

◆ Target()

Member Function Documentation

◆ createCPUTarget()

◆ createIPUTarget() [1/5]

◆ createIPUTarget() [2/5]

◆ createIPUTarget() [3/5]

◆ createIPUTarget() [4/5]

◆ createIPUTarget() [5/5]

◆ createVirtualTarget()

◆ getAtomicStoreGranularity()

◆ getFloatVectorWidth()

◆ getHalfVectorWidth()

◆ getIpuLinkDomainSize()

◆ makeFpIctlValue()

◆ serialize()