3#ifndef poplar_Target_hpp
4#define poplar_Target_hpp
6#include <gccs/CompilerFeatures.hpp>
7#include <poplar/GlobalExchangeConstraints.hpp>
8#include <poplar/IpuLinkConfiguration.hpp>
9#include <poplar/IpuLinkTopology.hpp>
10#include <poplar/OptionFlags.hpp>
11#include <poplar/StringRef.hpp>
12#include <poplar/TargetType.hpp>
13#include <poplar/Type.hpp>
84 bool operator==(const
Target &) const;
85 bool operator!=(const
Target &) const;
86 bool operator<(const
Target &) const;
126 const std::vector<GlobalExchangeConstraint> &
129 unsigned getNumStrideBits() const;
162 unsigned getRptCountMax() const;
185 unsigned getQuarterVectorWidth() const;
187 unsigned getQuarterMetadataVectorWidth() const;
192 unsigned getWeightsPerConvUnit(const
Type &type) const;
194 unsigned getConvUnitInputLoadElemsPerCycle(const
Type &type) const;
196 unsigned getConvUnitMaxPipelineDepth(const
Type &partialsType) const;
197 unsigned getNumConvUnits(const
Type &activationsType,
198 const
Type &partialsType) const;
284 unsigned getInstanceSize() const;
287 bool getGatewayMode() const;
317 unsigned numIPUs = 1);
401 const core::TargetOptions &opts);
421 unsigned tilesPerIPU,
423 const core::TargetOptions &opts);
425 Target(std::unique_ptr<core::Target> &&) noexcept;
426 core::
Target &getImpl()
const {
return *impl; }
429 const core::TargetOptions &getTargetOptions()
const;
432 std::unique_ptr<core::Target> impl;
446 std::size_t numElements);
459 std::size_t numElements);
472 std::size_t numElements);
485 std::size_t numElements);
491template <>
struct hash<
poplar::Target> {
A set of option/value string flags to be used in various APIs.
Definition: OptionFlags.hpp:24
A target representation.
Definition: Target.hpp:69
unsigned getNumTiles() const
Get the total number of tiles for this target (tiles per IPU * number of IPUs).
static Target createCPUTarget(bool accurateHalf=false, unsigned numIPUs=1)
Create a CPU target.
static Target createIPUTarget(StringRef system, const OptionFlags &opts={})
Create an IPU target.
unsigned getVectorWidth(const poplar::Type &type) const
How many of the given type can be processed in one vector operation.
unsigned getFpIctlRegIndex() const
Return the register index of the Floating Point Initial Control Value register CSR_S....
const std::vector< unsigned > & getMemoryElementOffsets() const
Memory element offsets.
unsigned getIpuLinkDomainSize() const
Return the size of the IPU-Link domain.
unsigned getTilesPerSharedExchangeBus() const
The number of consecutive tiles that can share the exchange bus.
std::size_t getTypeSize(const Type &) const
Get the size of a given type in bytes.
unsigned getDbgDataRegIndex() const
Return the register index of CSR_C.DBG_DATA.
unsigned getDataPathWidth() const
The width of the load/store data path within the tile.
unsigned getMaxIPUSyncDelay() const
Get the maximum number of cycles required for an IPU sync in the best case scenario (all tiles are im...
unsigned getNumTilesPerXBContext() const
Get the number of tiles per exchange-block context (with repair).
unsigned getTilesPerIPU() const
The number of tiles per IPU.
unsigned getFp32InFp32OutConvUnitsPerTile() const
The number of convolution units in the tile that can be used when accumulating to 32 bit values.
std::uint64_t getMemoryBytes() const
Get the total amount of memory on this target, across all IPUs.
static Target createIPUTarget(unsigned numIPUs, StringRef system, const OptionFlags &opts={})
Create an IPU target.
void serialize(std::ostream &out) const
Serialize a target to a stream.
unsigned getFp16ConvUnitInputLoadElemsPerCycle() const
The number of input elements loaded per cycle in f16 convolution unit.
unsigned getGlobalSyncCycles() const
The number of clock cycles required to synchronize all IPUs.
double getTileClockFrequency() const
Get the tile clock frequency in Hertz.
unsigned getMinIPUSyncDelay() const
The IPU sync delay for the tile that is closest to the sync controller.
std::size_t hash() const
Hash of the target.
unsigned getInterleavedMemoryElementIndex() const
Memory element offset index for interleaved memory.
unsigned getTileHostExchangeContext(unsigned tile) const
Get the context of a tile within an XB.
TargetType getTargetType() const
The target type.
unsigned getFp32ConvUnitInputLoadElemsPerCycle() const
The number of input elements loaded per cycle in f32 convolution unit.
std::string getTargetSystemString() const
The target system.
unsigned getExchangeBytesPerCycle() const
The bandwidth of internal IPU exchange in bytes per cycle.
unsigned getFp16InFp16OutConvUnitsPerTile() const
The number of convolution units in the tile that can be used when partial results are outputs as 16-b...
unsigned getFp16InFp32OutConvUnitsPerTile() const
The number of convolution units in the tile that can be used when partial results are outputs as 32-b...
unsigned getNumIPUs() const
The number of IPUs.
unsigned getConvUnitCoeffLoadBytesPerCycle() const
The number of convolutional weights that can be loaded in a cycle.
std::size_t getAtomicStoreGranularity() const
Get the granularity of atomic stores that can be made by independent parallel worker threads.
uint32_t makeFpIctlValue(bool inv, bool div0, bool oflo, bool esr, bool nanoo) const
Generate a value that could be written to Floating Point Initial Control Value register CSR_S....
bool supportsExchangeBusSharing() const
Whether tiles can share the local exchange bus during exchange.
const std::vector< GlobalExchangeConstraint > & getGlobalExchangeConstraints() const
Set of constraints that provide a lower bound on the time it takes to send data between IPUs.
unsigned getHalfVectorWidth() const
How many halves can be processed in one vector operation.
unsigned getBytesPerTile() const
Bytes of memory per tile.
static Target createIPUTarget(unsigned numIPUs, unsigned tilesPerIPU, StringRef system, const core::TargetOptions &opts)
Create an IPU target with a virtual number of tiles, and target options.
StringRef getTargetArchString() const
The target architecture.
unsigned getFloatVectorWidth() const
How many floats can be processed in one vector operation.
unsigned getNumContextsPerXB() const
Get the number of contexts per exchange-block.
IpuLinkTopology getIpuLinkTopology() const
Return the IPU-Link topology.
unsigned getTileHostExchangeXB(unsigned tile) const
Get the XB of a tile.
static Target createIPUTarget(unsigned numIPUs, unsigned tilesPerIPU, StringRef system, const OptionFlags &opts={})
Create an IPU target with a virtual number of tiles.
Target(std::istream &in)
Load from a serialised target.
unsigned getTileHostExchangeContextPosition(unsigned tile) const
Get the position of a tile within a context.
Target createVirtualTarget(unsigned numIPUs, unsigned tilesPerIPU) const
Create a "virtual" target consisting of a subset of the target's tiles.
unsigned getFp8ConvUnitMaxPipelineDepth() const
The maximum pipeline depth of the convolution units within the tile for fp8.
unsigned getFp32ConvUnitMaxPipelineDepth() const
The maximum pipeline depth of the convolution units within the tile for fp32.
unsigned getFp8ConvUnitInputLoadElemsPerCycle() const
The number of input elements loaded per cycle in f8 convolution unit.
unsigned getMemcpyBytesPerCycle() const
The maximum bandwidth for internal data copies on a tile.
unsigned getFp16ConvUnitMaxPipelineDepth() const
The maximum pipeline depth of the convolution units within the tile for fp16.
IpuLinkConfiguration getIpuLinkConfiguration() const
Return the IPU-Link configuration of this target.
unsigned getNumWorkerContexts() const
The number of worker contexts per tile.
unsigned getFp8InFp16OutConvUnitsPerTile() const
The number of convolution units in the tile that can be used when partial results are 16-bits and inp...
static Target createIPUTarget(unsigned numIPUs, StringRef system, const core::TargetOptions &opts)
Create an IPU target.
Class representing device data types.
Definition: Type.hpp:42
Poplar classes and functions.
Definition: ArrayRef.hpp:14
void copyDoubleToDeviceHalf(const Target &target, const double *src, void *dst, std::size_t numElements)
Convert double precision values to device half-precision values.
void copyDeviceHalfToFloat(const Target &target, const void *src, float *dst, std::size_t numElements)
Convert device half-precision values to floats.
IpuLinkTopology
Enum to represent the IPU interconnect layout.
Definition: IpuLinkTopology.hpp:10
IpuLinkConfiguration
Enum to represent the IPU interconnect layout.
Definition: IpuLinkConfiguration.hpp:10
void copyFloatToDeviceHalf(const Target &target, const float *src, void *dst, std::size_t numElements)
Convert float values to device half-precision values.
void copyDeviceHalfToDouble(const Target &target, const void *src, double *dst, std::size_t numElements)
Convert device half-precision values to doubles.
TargetType
Enum to represent the type of a device capable of running a graph.
Definition: TargetType.hpp:10
poplar::Tensor inv(poplar::Graph &graph, const poplar::Tensor &A, poplar::program::Sequence &prog, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options={})
Compute the inverse of each element in A.
Definition: ElementWise.hpp:890