3#ifndef poplar_IPUModel_hpp
4#define poplar_IPUModel_hpp
5#include <poplar/Device.hpp>
6#include <poplar/OptionFlags.hpp>
7#include <poplar/Target.hpp>
112 unsigned rptCountMax;
118 bool operator==(
const IPUModel &)
const;
119 bool operator!=(
const IPUModel &)
const;
A device refers to a physical entity that can execute code.
Definition: Device.hpp:26
A set of option/value string flags to be used in various APIs.
Definition: OptionFlags.hpp:24
half2 max(half2 src0, half2 src1)
Targets the f16v2max instruction.
Definition: ipu_intrinsics:333
Poplar classes and functions.
Definition: ArrayRef.hpp:14
A model of an IPU to create an IPUModel Device The IPU Model will simulate the behaviour of the IPU h...
Definition: IPUModel.hpp:14
unsigned fp16ConvUnitInputLoadElemsPerCycle
The input elements loaded per cycle for f16 conv.
Definition: IPUModel.hpp:84
unsigned atomicStoreGranularity
The atomic store granularity.
Definition: IPUModel.hpp:114
std::vector< GlobalExchangeConstraint > globalExchangeConstraints
Set of constraints that provide a lower bound on the time it takes to send data between IPUs.
Definition: IPUModel.hpp:59
unsigned fp8ConvUnitInputLoadElemsPerCycle
The input elements loaded per cycle for f8 conv.
Definition: IPUModel.hpp:82
Device createDevice(OptionFlags opts={}, bool accurateHalf=false, unsigned deviceManagerId=std::numeric_limits< unsigned >::max())
Create a device that runs code on the CPU and models the performance that would be achieved on an IPU...
unsigned fp16ConvUnitMaxPipelineDepth
The maximum pipeline depth of the convolution units within the tile for fp16.
Definition: IPUModel.hpp:77
unsigned workerInstrFetchDelay
Number of bytes worker context may be loading instructions from memory ahead of current PC.
Definition: IPUModel.hpp:106
unsigned memcpyBytesPerCycle
The number of bytes per cycle that can be copied from one location to another using a memcpy.
Definition: IPUModel.hpp:35
unsigned fp32InFp32OutConvUnitsPerTile
The number of convolution units in the tile that can be used when accumulating to 32 bit values.
Definition: IPUModel.hpp:95
unsigned dataPathWidth
The width of the load/store data path within the tile.
Definition: IPUModel.hpp:71
unsigned fp8InFp16OutConvUnitsPerTile
The number of convolution units in the tile that can be used when partial results are outputs as 16-b...
Definition: IPUModel.hpp:98
unsigned tilesPerSuperTile
The number of tiles per supertile.
Definition: IPUModel.hpp:22
std::string IPUVersion
Valid values for IPUVersion are "ipu1" and "ipu2" (for Mk1 and Mk2 IPU architectures respectively)
Definition: IPUModel.hpp:18
unsigned supervisorInstrFetchDelay
Number of bytes supervisor contexts may be loading instructions from memory ahead of current PC.
Definition: IPUModel.hpp:103
unsigned tileLocalSyncSyncDelay
Number of cycles from issuing a sync instruction to the earliest time that instructions can resume.
Definition: IPUModel.hpp:64
unsigned numStrideBits
Number of stride bits.
Definition: IPUModel.hpp:69
unsigned numIPUs
The number of IPUs.
Definition: IPUModel.hpp:20
unsigned memoryBytesPerTile
Memory bytes per tile.
Definition: IPUModel.hpp:28
RelativeSyncDelayType
A function that returns the number of cycles before the specificed tile is released from sync relativ...
Definition: IPUModel.hpp:52
unsigned instructionBytes
The size of an instruction in bytes.
Definition: IPUModel.hpp:37
unsigned fp16InFp32OutConvUnitsPerTile
The number of convolution units in the tile that can be used when partial results are outputs as 32-b...
Definition: IPUModel.hpp:92
unsigned tileLocalSyncExitDelay
Number of cycles after a worker has issued its exit instruction that the supervisor can resume.
Definition: IPUModel.hpp:67
bool compileIPUCode
Whether or not to actually compile real IPU code for modelling.
Definition: IPUModel.hpp:116
double tileClockFrequency
Clock frequency in Hz.
Definition: IPUModel.hpp:30
unsigned maxImmediateOffsetInRunInstr
max range of immediate operand in run instruction zimm16 operand multiplied implicitly by 4 when adde...
Definition: IPUModel.hpp:109
unsigned fp32ConvUnitInputLoadElemsPerCycle
The input elements loaded per cycle for f32 conv.
Definition: IPUModel.hpp:86
unsigned convUnitCoeffLoadBytesPerCycle
The number of convolutional weights that can be loaded in a cycle.
Definition: IPUModel.hpp:100
unsigned fp32ConvUnitMaxPipelineDepth
The maximum pipeline depth of the convolution units within the tile for fp32.
Definition: IPUModel.hpp:80
unsigned tilesPerIPU
The number of tiles per IPU.
Definition: IPUModel.hpp:24
bool supportsSuperTileSendReceive
Whether a tile in a supertile can use all the exchange bandwidth of the supertile to send or receive,...
Definition: IPUModel.hpp:41
unsigned fp16InFp16OutConvUnitsPerTile
The number of convolution units in the tile that can be used when partial results are outputs as 16-b...
Definition: IPUModel.hpp:89
unsigned exchangeBytesPerCycle
The bandwidth of internal IPU exchange in bytes per cycle.
Definition: IPUModel.hpp:32
unsigned fp8ConvUnitMaxPipelineDepth
The maximum pipeline depth of the convolution units within the tile for fp8.
Definition: IPUModel.hpp:74
unsigned globalExchangePacketBytes
Size of the packet used to transfer data between tiles in bytes.
Definition: IPUModel.hpp:61
unsigned numWorkerContexts
The number of worker contexts per tile.
Definition: IPUModel.hpp:26
unsigned interleavedMemoryElementIndex
Index in the memoryElementOffsets table (returned by Target::getMemoryElementOffsets) which gives the...
Definition: IPUModel.hpp:48
unsigned globalSyncCycles
The number of clock cycles required to synchronize all IPUs.
Definition: IPUModel.hpp:56
unsigned minIPUSyncDelay
The IPU sync delay for the tile that is closest to the sync controller.
Definition: IPUModel.hpp:54