Poplar and PopLibs
Target.hpp
1// Copyright (c) 2017 Graphcore Ltd. All rights reserved.
2
3#ifndef poplar_Target_hpp
4#define poplar_Target_hpp
5
6#include <gccs/CompilerFeatures.hpp>
7#include <poplar/GlobalExchangeConstraints.hpp>
8#include <poplar/IpuLinkConfiguration.hpp>
9#include <poplar/IpuLinkTopology.hpp>
10#include <poplar/OptionFlags.hpp>
11#include <poplar/StringRef.hpp>
12#include <poplar/TargetType.hpp>
13#include <poplar/Type.hpp>
14
15#include <memory>
16#include <vector>
17
18namespace poplar {
19namespace core {
20class Target;
21struct TargetOptions;
22} // namespace core
23
69class Target {
70public:
71 Target();
72 ~Target();
73 Target(const Target &);
74 Target(Target &&) noexcept;
75
80 Target(std::istream &in);
81
82 Target &operator=(const Target &);
83 Target &operator=(Target &&) noexcept;
84 bool operator==(const Target &) const;
85 bool operator!=(const Target &) const;
86 bool operator<(const Target &) const;
87
89 std::size_t hash() const;
90
96 void serialize(std::ostream &out) const;
97
101 std::string getTargetSystemString() const;
103 StringRef getTargetArchString() const;
105 unsigned getNumIPUs() const;
107 unsigned getTilesPerIPU() const;
109 unsigned getNumWorkerContexts() const;
111 unsigned getBytesPerTile() const;
113 unsigned getExchangeBytesPerCycle() const;
115 unsigned getMemcpyBytesPerCycle() const;
117 unsigned getMinIPUSyncDelay() const;
119 unsigned getGlobalSyncCycles() const;
121 const std::vector<unsigned> &getMemoryElementOffsets() const;
126 const std::vector<GlobalExchangeConstraint> &
128 // The number of stride bits.
129 unsigned getNumStrideBits() const;
131 unsigned getDataPathWidth() const;
161 // rpt counters max value.
162 unsigned getRptCountMax() const;
163
166
169
172 unsigned getNumTiles() const;
173
175 std::uint64_t getMemoryBytes() const;
176
179 unsigned getFloatVectorWidth() const;
180
183 unsigned getHalfVectorWidth() const;
184
185 unsigned getQuarterVectorWidth() const;
186
187 unsigned getQuarterMetadataVectorWidth() const;
188
190 unsigned getVectorWidth(const poplar::Type &type) const;
191
192 unsigned getWeightsPerConvUnit(const Type &type) const;
193
194 unsigned getConvUnitInputLoadElemsPerCycle(const Type &type) const;
195
196 unsigned getConvUnitMaxPipelineDepth(const Type &partialsType) const;
197 unsigned getNumConvUnits(const Type &activationsType,
198 const Type &partialsType) const;
199
202 unsigned getMaxIPUSyncDelay() const;
203
205 double getTileClockFrequency() const;
206
208 unsigned getNumTilesPerXBContext() const;
209
211 unsigned getNumContextsPerXB() const;
212
214 unsigned getTileHostExchangeXB(unsigned tile) const;
215
217 unsigned getTileHostExchangeContext(unsigned tile) const;
218
220 unsigned getTileHostExchangeContextPosition(unsigned tile) const;
221
223 std::size_t getTypeSize(const Type &) const;
224
229 std::size_t getAtomicStoreGranularity() const;
230
261 uint32_t makeFpIctlValue(bool inv, bool div0, bool oflo, bool esr,
262 bool nanoo) const;
263
267 unsigned getFpIctlRegIndex() const;
268
270 unsigned getDbgDataRegIndex() const;
271
274
277
281 unsigned getIpuLinkDomainSize() const;
282
283 // Return the number of IPUs in an instance. Internal use only.
284 unsigned getInstanceSize() const;
285
286 // Return the state of gateway mode in the PCI Complex. Internal use only.
287 bool getGatewayMode() const;
288
299 Target createVirtualTarget(unsigned numIPUs, unsigned tilesPerIPU) const;
300
316 static Target createCPUTarget(bool accurateHalf = false,
317 unsigned numIPUs = 1);
318
337 static Target createIPUTarget(StringRef system, const OptionFlags &opts = {});
338
358 static Target createIPUTarget(unsigned numIPUs, StringRef system,
359 const OptionFlags &opts = {});
360
382 static Target createIPUTarget(unsigned numIPUs, unsigned tilesPerIPU,
383 StringRef system, const OptionFlags &opts = {});
384
399 GC_DEPRECATED static Target createIPUTarget(unsigned numIPUs,
400 StringRef system,
401 const core::TargetOptions &opts);
402
420 GC_DEPRECATED static Target createIPUTarget(unsigned numIPUs,
421 unsigned tilesPerIPU,
422 StringRef system,
423 const core::TargetOptions &opts);
424 // Implementation
425 Target(std::unique_ptr<core::Target> &&) noexcept;
426 core::Target &getImpl() const { return *impl; }
427
428 // Internal use only
429 const core::TargetOptions &getTargetOptions() const;
430
431private:
432 std::unique_ptr<core::Target> impl;
433};
434
445void copyDeviceHalfToFloat(const Target &target, const void *src, float *dst,
446 std::size_t numElements);
447
458void copyFloatToDeviceHalf(const Target &target, const float *src, void *dst,
459 std::size_t numElements);
460
471void copyDeviceHalfToDouble(const Target &target, const void *src, double *dst,
472 std::size_t numElements);
473
484void copyDoubleToDeviceHalf(const Target &target, const double *src, void *dst,
485 std::size_t numElements);
486
487} // namespace poplar
488
489namespace std {
490
491template <> struct hash<poplar::Target> {
492 size_t operator()(const poplar::Target &t) const { return t.hash(); };
493};
494
495} // namespace std
496
497#endif // poplar_Target_hpp
A set of option/value string flags to be used in various APIs.
Definition: OptionFlags.hpp:24
A target representation.
Definition: Target.hpp:69
unsigned getNumTiles() const
Get the total number of tiles for this target (tiles per IPU * number of IPUs).
static Target createCPUTarget(bool accurateHalf=false, unsigned numIPUs=1)
Create a CPU target.
static Target createIPUTarget(StringRef system, const OptionFlags &opts={})
Create an IPU target.
unsigned getVectorWidth(const poplar::Type &type) const
How many of the given type can be processed in one vector operation.
unsigned getFpIctlRegIndex() const
Return the register index of the Floating Point Initial Control Value register CSR_S....
const std::vector< unsigned > & getMemoryElementOffsets() const
Memory element offsets.
unsigned getIpuLinkDomainSize() const
Return the size of the IPU-Link domain.
unsigned getTilesPerSharedExchangeBus() const
The number of consecutive tiles that can share the exchange bus.
std::size_t getTypeSize(const Type &) const
Get the size of a given type in bytes.
unsigned getDbgDataRegIndex() const
Return the register index of CSR_C.DBG_DATA.
unsigned getDataPathWidth() const
The width of the load/store data path within the tile.
unsigned getMaxIPUSyncDelay() const
Get the maximum number of cycles required for an IPU sync in the best case scenario (all tiles are im...
unsigned getNumTilesPerXBContext() const
Get the number of tiles per exchange-block context (with repair).
unsigned getTilesPerIPU() const
The number of tiles per IPU.
unsigned getFp32InFp32OutConvUnitsPerTile() const
The number of convolution units in the tile that can be used when accumulating to 32 bit values.
std::uint64_t getMemoryBytes() const
Get the total amount of memory on this target, across all IPUs.
static Target createIPUTarget(unsigned numIPUs, StringRef system, const OptionFlags &opts={})
Create an IPU target.
void serialize(std::ostream &out) const
Serialize a target to a stream.
unsigned getFp16ConvUnitInputLoadElemsPerCycle() const
The number of input elements loaded per cycle in f16 convolution unit.
unsigned getGlobalSyncCycles() const
The number of clock cycles required to synchronize all IPUs.
double getTileClockFrequency() const
Get the tile clock frequency in Hertz.
unsigned getMinIPUSyncDelay() const
The IPU sync delay for the tile that is closest to the sync controller.
std::size_t hash() const
Hash of the target.
unsigned getInterleavedMemoryElementIndex() const
Memory element offset index for interleaved memory.
unsigned getTileHostExchangeContext(unsigned tile) const
Get the context of a tile within an XB.
TargetType getTargetType() const
The target type.
unsigned getFp32ConvUnitInputLoadElemsPerCycle() const
The number of input elements loaded per cycle in f32 convolution unit.
std::string getTargetSystemString() const
The target system.
unsigned getExchangeBytesPerCycle() const
The bandwidth of internal IPU exchange in bytes per cycle.
unsigned getFp16InFp16OutConvUnitsPerTile() const
The number of convolution units in the tile that can be used when partial results are outputs as 16-b...
unsigned getFp16InFp32OutConvUnitsPerTile() const
The number of convolution units in the tile that can be used when partial results are outputs as 32-b...
unsigned getNumIPUs() const
The number of IPUs.
unsigned getConvUnitCoeffLoadBytesPerCycle() const
The number of convolutional weights that can be loaded in a cycle.
std::size_t getAtomicStoreGranularity() const
Get the granularity of atomic stores that can be made by independent parallel worker threads.
uint32_t makeFpIctlValue(bool inv, bool div0, bool oflo, bool esr, bool nanoo) const
Generate a value that could be written to Floating Point Initial Control Value register CSR_S....
bool supportsExchangeBusSharing() const
Whether tiles can share the local exchange bus during exchange.
const std::vector< GlobalExchangeConstraint > & getGlobalExchangeConstraints() const
Set of constraints that provide a lower bound on the time it takes to send data between IPUs.
unsigned getHalfVectorWidth() const
How many halves can be processed in one vector operation.
unsigned getBytesPerTile() const
Bytes of memory per tile.
static Target createIPUTarget(unsigned numIPUs, unsigned tilesPerIPU, StringRef system, const core::TargetOptions &opts)
Create an IPU target with a virtual number of tiles, and target options.
StringRef getTargetArchString() const
The target architecture.
unsigned getFloatVectorWidth() const
How many floats can be processed in one vector operation.
unsigned getNumContextsPerXB() const
Get the number of contexts per exchange-block.
IpuLinkTopology getIpuLinkTopology() const
Return the IPU-Link topology.
unsigned getTileHostExchangeXB(unsigned tile) const
Get the XB of a tile.
static Target createIPUTarget(unsigned numIPUs, unsigned tilesPerIPU, StringRef system, const OptionFlags &opts={})
Create an IPU target with a virtual number of tiles.
Target(std::istream &in)
Load from a serialised target.
unsigned getTileHostExchangeContextPosition(unsigned tile) const
Get the position of a tile within a context.
Target createVirtualTarget(unsigned numIPUs, unsigned tilesPerIPU) const
Create a "virtual" target consisting of a subset of the target's tiles.
unsigned getFp8ConvUnitMaxPipelineDepth() const
The maximum pipeline depth of the convolution units within the tile for fp8.
unsigned getFp32ConvUnitMaxPipelineDepth() const
The maximum pipeline depth of the convolution units within the tile for fp32.
unsigned getFp8ConvUnitInputLoadElemsPerCycle() const
The number of input elements loaded per cycle in f8 convolution unit.
unsigned getMemcpyBytesPerCycle() const
The maximum bandwidth for internal data copies on a tile.
unsigned getFp16ConvUnitMaxPipelineDepth() const
The maximum pipeline depth of the convolution units within the tile for fp16.
IpuLinkConfiguration getIpuLinkConfiguration() const
Return the IPU-Link configuration of this target.
unsigned getNumWorkerContexts() const
The number of worker contexts per tile.
unsigned getFp8InFp16OutConvUnitsPerTile() const
The number of convolution units in the tile that can be used when partial results are 16-bits and inp...
static Target createIPUTarget(unsigned numIPUs, StringRef system, const core::TargetOptions &opts)
Create an IPU target.
Class representing device data types.
Definition: Type.hpp:42
Poplar classes and functions.
Definition: ArrayRef.hpp:14
void copyDoubleToDeviceHalf(const Target &target, const double *src, void *dst, std::size_t numElements)
Convert double precision values to device half-precision values.
void copyDeviceHalfToFloat(const Target &target, const void *src, float *dst, std::size_t numElements)
Convert device half-precision values to floats.
IpuLinkTopology
Enum to represent the IPU interconnect layout.
Definition: IpuLinkTopology.hpp:10
IpuLinkConfiguration
Enum to represent the IPU interconnect layout.
Definition: IpuLinkConfiguration.hpp:10
void copyFloatToDeviceHalf(const Target &target, const float *src, void *dst, std::size_t numElements)
Convert float values to device half-precision values.
void copyDeviceHalfToDouble(const Target &target, const void *src, double *dst, std::size_t numElements)
Convert device half-precision values to doubles.
TargetType
Enum to represent the type of a device capable of running a graph.
Definition: TargetType.hpp:10
poplar::Tensor inv(poplar::Graph &graph, const poplar::Tensor &A, poplar::program::Sequence &prog, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options={})
Compute the inverse of each element in A.
Definition: ElementWise.hpp:890