Poplar and PopLibs
|
A graph compute engine. More...
#include <Engine.hpp>
Classes | |
class | TimerTimePoint |
PImpl interface to core timing information. More... | |
Public Types | |
using | ProgressFunc = std::function< void(int, int)> |
Callback function used to to indicate engine compilation progress. More... | |
Public Member Functions | |
Engine (Graph &&graph, ArrayRef< program::Program > progs, const OptionFlags &opt={}, ProgressFunc progressCallBack=ProgressFunc(), const DebugContext &debugContext={}) | |
Construct the engine from a graph and a list of programs. More... | |
Engine (Graph &&graph, program::Program prog, const OptionFlags &opt={}, ProgressFunc progressCallBack=ProgressFunc(), const DebugContext &debugContext={}) | |
Construct the engine from a graph and a program. More... | |
Engine (Executable &&exe, const OptionFlags &opt={}) | |
Construct the engine from a precompiled executable. More... | |
void | prepare (const Device &device) |
Prepare the device for loading. More... | |
void | prepare (const Device &device, const RuntimeOptions &runOptions) |
Prepare the device for loading. More... | |
void | deploy () |
Load the engine. More... | |
void | load (const Device &device) |
Load the compiled program/graph onto a device. More... | |
void | run (unsigned prog=0, const std::string &debugName="") |
Run the graph program. More... | |
void | stop () |
Stop the graph program. More... | |
void | run (unsigned prog, const std::string &debugName, const RuntimeOptions &options) |
Run the graph program. More... | |
void | loadAndRun (const Device &device, unsigned prog=0) |
Run the graph program. More... | |
TimerTimePoint | getTimeStamp () |
Get a record of the current host and device time. More... | |
void | resetExecutionProfile () |
Reset execution profile. More... | |
pva::Report | getReport (bool reportExecution=true) |
Get a PVA Report object that allows access to profiling data for the graph and the execution with this engine. More... | |
void | disableExecutionProfiling () |
Pause execution profiling. More... | |
void | enableExecutionProfiling () |
Enable execution profiling. More... | |
void | printProfileSummary (std::ostream &outputStream, const OptionFlags &opt={}) |
Get and print the summary of a report with the given options. More... | |
void | reportIntervals (std::ostream &outputStream) |
Write a CSV data file to a specified output stream. More... | |
void | readTensor (StringRef handle, void *buf, void *bufEnd) |
Synchronous copy of a buffer of non-Quarter type data from a specific tensor in the device into a host-side buffer. More... | |
void | readTensor (StringRef handle, QuarterMetadata &metadata, void *buf, void *bufEnd) |
Synchronous copy of a buffer of Quarter type data from a specific tensor in the device into a host-side buffer. More... | |
template<class T > | |
void | readTensor (StringRef handle, gccs::ArrayRef< T > buffer) |
Synchronous copy of a buffer of non-Quarter type data from a specific tensor in the device into a host-side buffer. More... | |
void | writeTensor (StringRef handle, const void *buf, const void *bufEnd) |
Synchronous copy of a buffer of non-Quarter type data from the host to a specific tensor in the device. More... | |
void | writeTensor (StringRef handle, const QuarterMetadata &metadata, const void *buf, const void *bufEnd) |
Synchronous copy of a buffer of Quarter type data from the host to a specific tensor in the device. More... | |
template<class T > | |
void | writeTensor (StringRef handle, ArrayRef< T > buffer) |
Synchronous copy of a buffer of non-Quarter type data from the host to a specific tensor in the device. More... | |
void | connectStream (StringRef handle, void *begin, void *end) |
Connect a stream of non-Quarter to a circular buffer in memory. More... | |
void | connectStream (StringRef handle, const gccs::ArrayRef< QuarterMetadata > &metadata, void *begin, void *end) |
Connect a stream of type Quarter to a circular buffer in memory. More... | |
template<class T > | |
void | connectStream (StringRef handle, const gccs::ArrayRef< T > &buffer) |
Connect a stream of non-Quarter type to a circular buffer in memory. More... | |
void | connectStream (StringRef handle, void *p) |
Connect a stream of non-Quarter type to a fixed location in memory. More... | |
void | connectStreamToCallback (StringRef handle, StreamCallbackHandle f) |
Connect a stream to a callback taking a pointer to the location in memory to copy into/from. More... | |
void | connectStreamToCallback (StringRef handle, unsigned index, StreamCallbackHandle f) |
Connect a replicated stream to a callback taking a pointer to the location in memory to copy into/from. More... | |
void | connectHostFunction (StringRef handle, unsigned index, HostCallbackHandle f) |
Connect a HostFunction to a callback. More... | |
void | copyFromRemoteBuffer (StringRef handle, void *w, uint64_t repeatIndex, unsigned replicationIndex=0) |
Copy from a remote buffer to a user buffer w . More... | |
void | copyToRemoteBuffer (void *w, StringRef handle, uint64_t repeatIndex, unsigned replicationIndex=0) |
Copy to a remote buffer from a user buffer w . More... | |
std::vector< std::string > | listStreams () const |
Return a list of all streams in the engine. More... | |
void | setPrintStream (std::ostream &stream) |
Set output stream for printf commands. More... | |
void | setPrintTensorStream (std::ostream &stream) |
Set the output stream for PrintTensor programs. More... | |
OptionFlags | getEngineOptions () const |
Returns the options the engine was created with. | |
void | serializeExecutable (std::ostream &out) const |
Serialize the executable used by the engine. | |
void | insertSimulatedError (ErrorCode error, ErrorLocation const &location) |
Simulate an error. More... | |
void | eraseSimulatedError (ErrorLocation const &location) |
Undo the effects of Engine::insertSimulatedError();. More... | |
void | clearSimulatedErrors () |
Undo the effects of all Engine::insertSimulatedError() calls. More... | |
std::vector< ErrorLocation > | getSimulatedErrorLocations (unsigned programId, unsigned tile=~0) const |
Return the locations of a program from a program ID. More... | |
std::vector< ErrorLocation > | getSimulatedErrorLocations (StringRef vertexName, unsigned tile=~0) const |
Return the locations of a program from a vertex name. More... | |
Static Public Member Functions | |
static std::string | reportTiming (const TimerTimePoint &start, const TimerTimePoint &end) |
Get a timing report for the measured interval. More... | |
A graph compute engine.
The Engine class provides the ability to execute a graph program.
Engine creation options
Options can be overridden with the environment variable POPLAR_ENGINE_OPTIONS
. For example:
POPLAR_ENGINE_OPTIONS='{"target.deterministicWorkers":"true"}'
Engine creation options: Debug
debug.allowOutOfMemory
(true, false) [=false]
If true, allow out-of-memory while compiling and linking. This is automatically set to true if autoReport.outputGraphProfile
is set to true (direct or indirectly).
debug.computeInstrumentationLevel
(vertex, tile, ipu) [=tile]
The granularity of compute instrumentation. This option has no effect unless debug.instrumentCompute
is true.
debug.retainDebugInformation
(true, false) [=true]
Enable/disable the generation and retention of debug information. This can be set to false to reduce host memory consumption if no profiling is required, or only memory profiling is required and it is requested during compilation via the Engine option autoReport.outputGraphProfile
.
debug.cpuMultiThreadExecution
(true, false) [=true] If true, operations are executed using multiple host threads for a CPU or IPU Model target. Setting to false may simplify debugging at the cost of reduced performance.debug.instrument
(true, false) [=false]
If true, enable all instrument options (below). This will instruct the engine to add cycle counters to the compiled program to enable the execution profile to be retrieved after the program is run. This is only available for an IPU target (not an IPU Model target). Note that the more specific instrumentation options may override the default. For example,
{"debug.instrument":"true", "debug.instrumentExternalExchange":"false"}
will instrument everything apart from external exchange.
debug.instrumentCompute
(true, false) [=false]
If true, enable instrumentation of compute sets. See debug.instrument
.
debug.instrumentExternalExchange
(true, false) [=false]
If true, enable instrumentation of external exchanges. See debug.instrument
.
debug.instrumentControlFlow
(true, false) [=false]
If true, enable instrumentation of loops and conditionals. See debug.instrument
.
debug.outputAllSymbols
(true, false) [=false]
If true, output additional symbols to the ELF files that are not required but aid debugging.
debug.profilingTile
Integer [=Tiles per IPU - 1]
The tile on which to store the cycle counter for every comput set. This has no effect unless debug.computeInstrumentationLevel
is set to ipu.
debug.branchRecordTile
Integer [=NTILES-1]
The tile on which to store the branch record. This has no effect unless debug.instrumentControlFlow
flag is set. In a CPU target, this option has no effect. In an IPU Model, it only affects the memory profile.
debug.runtimeVerify
(true, false) [=false]
If true, expensive verification steps are enabled at runtime.
debug.trace
(true, false) [=false]
If true, a trace is printed to the error stream with the state of every edge before and after the execution of a compute set or exchange.
debug.traceFile
String
Only used if debug.trace
is true. If set, the debug trace is output to the specified file instead of the error stream.
debug.verify
(true, false) [=false]
If true, expensive verification steps are enabled at compile time. The checks mostly focus on exchange code, including the following:
In addition, after laying out memory we verify the memory constraints on variables are satisfied.
debug.supervisorStackSizeInBytes
Integer
If set, the automatically computed stack size for supervisor threads will be overridden with the specified value (in bytes) for all tiles.
debug.workerStackSizeInBytes
Integer
If set, the automatically computed stack size for worker threads will be overridden with the specified value (in bytes) for all tiles.
debug.floatPointOpException
(true, false) [=false]
If true an invalid floating-point operation will cause an exception. You can also enable or disable invalid floating-point exception via the inv
flag using the function setFloatingPointBehaviour().
debug.nanOverflowMode
(true, false) [=false]
If true, enable Not-a-Number (NaN) on overflow mode. When enabled, half precision calculations that have overflowed will produce a NaN result, rather than saturating to the half precision max/min value, and the invalid operation (inv
) flag will be set. You can also enable or disable NaN-on-overflow mode via the nanoo
flag using the function setFloatingPointBehaviour().
debug.dumpDirectory
String
If set will store debug dump files in the specified directory, else in the current working directory.
Engine creation options: Optimisations
opt.maxCompilationThreads
Integer [=0]
The maximum number of threads to use during compilation. A value of 0 means the hardware will be fully utilised.
opt.maxLinkerThreads
Integer [=0]
The maximum number of threads to use during compilation. A value of 0 means the same number will be used as were used for compilation.
opt.internalExchangeOptimisationTarget
(balanced, cycles, memory) [=cycles]
What balance of heuristics to use when generating exchange code. Can be used to balance exchange memory usage against speed.
opt.enableMultiAccessCopies
(true, false) [=true]
Enable this option to make some of the copies faster at the expense of adding more constraints on variables used in the copies.
opt.limitVertexStateToLower256K
(true, false) [=false]
Enable this option to optimise the control code by allocating all of the vertex state in the first 256KB of memory. This has a disadvantage that this is the same range of memory that the code must be put in, so if the sum of the two is larger than 256KB then the model will fail to compile.
opt.useAutoloader
(true, false) [=true on Mk2 IPU, false otherwise]
If true, use the secondary loading mechanism to load the executable. This option is ignored on non-IPU targets.
Engine creation options: Target
target.deterministicWorkers
(true, false, portable) [=true]
Ensure that the mapping of vertices to worker threads is the same for repeated execution either on the same IPU (true), or on every IPU (portable). This guarantee does not hold following breakpoints or exceptions.
NOTE: The option portable
is deprecated and has the same functionality as setting the option to true
. This option will be removed in a future version.
target.saveArchive
String
If set, the binary archive will be saved to the specified filename during graph compilation. This archive contains the ELF files for each tile. No archive will be saved unless this option is set.
target.saveOutputVertexGraph
String
If set, the output vertex graph will be saved to the specified filepath during graph compilation.
target.gatewayWriteCombining
(true, false) [=target option gatewayMode]
Optimise write-to-host code to use IPU-Gateway write combining.
target.extendedMemory
(true, false) [=false]
When enabled, supports >16GiB for remote buffers. Only supported on IPU-M2000 systems.
Engine creation options: Report generation
The report generation options will automatically output the Poplar reports that can be viewed in the PopVision Graph Analyser.
These options provide a basic ability to capture the reports. For more complex use cases the reports should be generated programmatically via functions in the framework (TensorFlow, PopTorch, PopART or Poplar) in which the application is written.
autoReport.all
(true, false) [=false]
Output all the available reports described below.
You can exclude individual reports by combining options. For example, this will generate all reports apart from the serialized graph:
{"autoReport.all":"true", "autoReport.outputSerializedGraph":"false"}
autoReport.outputGraphProfile
(true, false) [=false]
Output the graph profile report to profile.pop
.
autoReport.outputLoweredVars
(true, false) [=false]
Generate lowered variables info in profile.pop
. This is equivalent to using the debug.loweredVarDumpFile
option with the filename set to profile.pop
.
To generate the old capnp format, set debug.loweredVarDumpFile
to vars.capnp
.
autoReport.outputArchive
(true, false) [=false]
Output the archive report: archive.a
. This is equivalent to using the target.saveArchive
option with the filename set to archive.a
.
autoReport.outputSerializedGraph
(true, false) [=false]
Output the serialized graph: serialized_graph.capnp.
autoReport.outputExecutionProfile
(true, false) [=false]
Output the execution profile report to profile.pop
By default this setting will also set debug.instrument
to true. If you do not want instrumentation enabled you can set autoReport.outputExecutionProfile
or debug.instrument
to false.
autoReport.streamAtEachRun
(true, false) [=true]
Applies to profiler format V3 or higher. Enable or disable the streaming of the execution profile to disk at each run. If false, the whole execution will be written to disk on Engine destruction (note, some frameworks like TensorFlow may not properly destroy the Engine).
autoReport.outputDebugInfo
(true, false) [=false]
Output debug info: debug.json
. This file gathers the data in every DebugInfo object created. Elements in the graph report with debugIds can be related to these DebugInfo objects.
autoReport.executionProfileProgramRunCount
Integer [=2]
Specify how many runs of each program to capture in the execution profile.
autoReport.directory
String [=./]
Specify which directory you want the reports to be written to. By default they will be written to the current working directory.
Engine creation options: Other
prng.enableStochasticRounding
(true, false) [=false]
If true, stochastic rounding is enabled.
You can also enable or disable stochastic rounding using the functions setFloatingPointBehaviour() and setStochasticRounding(). For setFloatingPointBehaviour() the default behaviour is to enable stochastic rounding.
prng.seed
Integer [=0]
Base seed for PRNG initialisation.
Engine creation options: Experimental
experimental.minLoweringTiles
Integer [=5888]
Minimum number of tiles lowered at a time. Zero equates to all tiles. Lower sizes reduce the peak host memory required during engine lowering but may increase the time taken. This is only a hint. Fractions of an IPU may be rounded to a convenient boundary; other options may lead to this option being ignored.
Engine runtime options:
Any OptionFlags parameters defined for RuntimeOptions construction can be used to construct an engine too. These options will be used as default. However, they can be overridden by passing a RuntimeOptions argument to the member functions that accept or defining the POPLAR_RUNTIME_OPTIONS
environment variable.
using poplar::Engine::ProgressFunc = std::function<void(int, int)> |
Callback function used to to indicate engine compilation progress.
The function is passed two integers. The first is the progress value and the second is the maximum value for the progress.
If a progress callback is used, the function should not block. All calls to the callback function will be made in a single dedicated thread so blocking in the callback will block the receipt of further notifications (but will not block compilation from progressing). The callback should not use Poplar objects or functions relating to the Graph, Engine or Device that are being compiled.
poplar::Engine::Engine | ( | Graph && | graph, |
ArrayRef< program::Program > | progs, | ||
const OptionFlags & | opt = {} , |
||
ProgressFunc | progressCallBack = ProgressFunc() , |
||
const DebugContext & | debugContext = {} |
||
) |
Construct the engine from a graph and a list of programs.
Unless graph
is an rvalue a copy of some graph state will be made.
graph | The graph to compile into the engine. |
progs | The list of programs to run over the graph. Each program can be run separately by calling the run() method of the Engine with the argument being the index of the program to run in this list. |
opt | Options that can be used to control compilation and execution. The available options are listed under Engine. |
progressCallBack | A function that will be called to indicate engine compilation progress. See Engine::ProgressFunc for more information. |
debugContext | Optional Engine name and Debug Id. |
invalid_option | If any of the options passed in opt were not recognised or improperly formatted. |
link_error | If program linking fails; for example, due to undefined symbols or lack of memory on a tile. |
poplar::Engine::Engine | ( | Graph && | graph, |
program::Program | prog, | ||
const OptionFlags & | opt = {} , |
||
ProgressFunc | progressCallBack = ProgressFunc() , |
||
const DebugContext & | debugContext = {} |
||
) |
Construct the engine from a graph and a program.
Unless graph
is an rvalue a copy of some graph state will be made.
graph | The graph to compile into the engine. |
prog | The program to run over the graph. This program is run when the run() method is called on the Engine. |
opt | Options that can be used to control compilation and execution. The available options are listed under Engine. |
progressCallBack | A function that will be called to indicate engine compilation progress. See Engine::ProgressFunc for more information. |
debugContext | Optional Engine name and Debug Id. |
invalid_option | If any of the options passed in opt were not recognised or improperly formatted. |
link_error | If the program linking fails; for example, due to undefined symbols or lack of memory on a tile. |
poplar::Engine::Engine | ( | Executable && | exe, |
const OptionFlags & | opt = {} |
||
) |
Construct the engine from a precompiled executable.
exe | The precompiled executable. This can be created using compileGraph(). |
opt | Options that can be used to control execution. These must be the same as the options passed to compileGraph(). The available options are listed under Engine. |
invalid_option | If any of the options passed in opt were not recognised or improperly formatted. |
void poplar::Engine::clearSimulatedErrors | ( | ) |
Undo the effects of all Engine::insertSimulatedError() calls.
To remove the effects of the simulated errors and run the program from the beginning again you can call Engine::load() after clearing the simulated errors:
void poplar::Engine::connectHostFunction | ( | StringRef | handle, |
unsigned | index, | ||
HostCallbackHandle | f | ||
) |
Connect a HostFunction to a callback.
The callback takes two arguments, which point to the locations in memory for each of the function's input and output arguments, respectively. During a host function call, first the device transfers the input data to the host, then the callback is invoked, and finally the output data is copied back to the device. The given memory pointed by the callback arguments must only be accessed during the duration of the callback.
handle | The name of the host function. |
index | The replicated index to connect to. |
f | Function to be called whenever new input data is available |
void poplar::Engine::connectStream | ( | StringRef | handle, |
const gccs::ArrayRef< QuarterMetadata > & | metadata, | ||
void * | begin, | ||
void * | end | ||
) |
Connect a stream of type Quarter to a circular buffer in memory.
Each time data is copied to/from the stream the pointer for the next transfer is incremented within the bounds of the buffer.
handle | The destination host copy handle. |
metadata | Array reference to host side variables per replica from or to which to copy metadata on the device. If the copy is from the device to the host the variable will be overwritten. |
begin | Pointer to the start of the circular buffer. |
end | Pointer to the end of the circular buffer. |
|
inline |
Connect a stream of non-Quarter type to a circular buffer in memory.
Each time data is copied to/from the stream the pointer for the next transfer is incremented within the bounds of the buffer.
handle | The destination host copy handle. |
buffer | A view of the circular buffer. |
void poplar::Engine::connectStream | ( | StringRef | handle, |
void * | begin, | ||
void * | end | ||
) |
Connect a stream of non-Quarter to a circular buffer in memory.
Each time data is copied to/from the stream the pointer for the next transfer is incremented within the bounds given.
handle | The name of the stream to connect to. |
begin | Pointer to the start of the circular buffer. |
end | Pointer to the end of the circular buffer. |
void poplar::Engine::connectStream | ( | StringRef | handle, |
void * | p | ||
) |
Connect a stream of non-Quarter type to a fixed location in memory.
Each time data is copied to/from the stream this location will be read/written.
handle | The name of the stream to connect to. |
p | The pointer to the memory buffer. |
void poplar::Engine::connectStreamToCallback | ( | StringRef | handle, |
StreamCallbackHandle | f | ||
) |
Connect a stream to a callback taking a pointer to the location in memory to copy into/from.
This will be called whenever the stream will be read or was written by the device. The given memory location will only be valid to read from or write to for the duration of the callback.
handle | The name of the stream to connect to. |
f | Callback to be called whenever the stream is to be read or was written by the device. |
void poplar::Engine::connectStreamToCallback | ( | StringRef | handle, |
unsigned | index, | ||
StreamCallbackHandle | f | ||
) |
Connect a replicated stream to a callback taking a pointer to the location in memory to copy into/from.
This will be called whenever the stream will be read or was written by the device. The given memory location will only be valid to read from or write to for the duration of the callback.
handle | The name of the stream to connect to. |
index | The replicated index to connect to. |
f | Callback to be called whenever the stream is to be read or was written by the device. |
void poplar::Engine::copyFromRemoteBuffer | ( | StringRef | handle, |
void * | w, | ||
uint64_t | repeatIndex, | ||
unsigned | replicationIndex = 0 |
||
) |
Copy from a remote buffer to a user buffer w
.
handle | The name of the remote buffer to copy from. |
w | The user buffer to copy to. |
repeatIndex | The index in the remote buffer to copy from. |
replicationIndex | The replicated graph index. |
void poplar::Engine::copyToRemoteBuffer | ( | void * | w, |
StringRef | handle, | ||
uint64_t | repeatIndex, | ||
unsigned | replicationIndex = 0 |
||
) |
Copy to a remote buffer from a user buffer w
.
w | The user buffer to copy from. |
handle | The remote buffer to copy to. |
repeatIndex | The index in the remote buffer to copy to. |
replicationIndex | The replicated graph index. |
void poplar::Engine::deploy | ( | ) |
Load the engine.
This loads binary code. The device must have been prepared previously by calling prepare().
void poplar::Engine::disableExecutionProfiling | ( | ) |
Pause execution profiling.
Subsequent Engine::run() calls are executed without being profiled until a subsequent call to enableExecutionProfiling().
For example, you can exclude individual programs from a profile like this:
engine.disableExecutionProfiling(); engine.run(...); engine.enableExecutionProfiling();
void poplar::Engine::enableExecutionProfiling | ( | ) |
Enable execution profiling.
Subsequent Engine::run() calls are profiled when executed.
void poplar::Engine::eraseSimulatedError | ( | ErrorLocation const & | location | ) |
Undo the effects of Engine::insertSimulatedError();.
location | Any one of the locations passed to Engine::insertSimulatedError(). |
poplar::poplar_error | If there is no simulated error at location . |
pva::Report poplar::Engine::getReport | ( | bool | reportExecution = true | ) |
Get a PVA Report object that allows access to profiling data for the graph and the execution with this engine.
Subsequent Engine::run() executions may not be accessible through the returned report due to caching.
reportExecution | Enables access to the execution report (since this engine was constructed/the execution report was last reset). Otherwise, only the graph profile is available. |
profiling_disabled | If the device is not an IPU or IPU Model. |
std::vector< ErrorLocation > poplar::Engine::getSimulatedErrorLocations | ( | StringRef | vertexName, |
unsigned | tile = ~0 |
||
) | const |
Return the locations of a program from a vertex name.
Similar to Engine::getSimulatedErrorLocations(tile, programId) but look-up the error locations using the name of a Vertex.
std::vector< ErrorLocation > poplar::Engine::getSimulatedErrorLocations | ( | unsigned | programId, |
unsigned | tile = ~0 |
||
) | const |
Return the locations of a program from a program ID.
It's possible a program exists on multiple tiles so tile
can be used to disambiguate the tile on which the error should occur. If tile
is not specified then a location will be returned for each tile the program exists on.
programId | The program id of the program. This can be looked up in the Graph Analyser tool (look for the "Id" field under "Details" in the "Program Tree" tab). |
tile | The tile in the device. For devices containing multiple IPUs the range of valid tiles is [0, numIpus * numTilesPerIpu). |
programId
specifies a poplar::Vertex then this will return the location of the launch routine for the poplar::Vertex and not the location of the code that comprises the poplar::Vertex. Use the vertexName overload of this function to specify an error inside a poplar::Vertex.TimerTimePoint poplar::Engine::getTimeStamp | ( | ) |
Get a record of the current host and device time.
Details depend on the underlying device used.
void poplar::Engine::insertSimulatedError | ( | ErrorCode | error, |
ErrorLocation const & | location | ||
) |
Simulate an error.
This function causes the program to generate an error when it gets to the specified location. This can be useful for failure testing.
This function must be called after Engine::load() and before Engine::run().
You can simulate many errors at the same time, provided each error is simulated at a unique location. However, only a subset of the errors may be reported because the execution of the program will stop as soon as any one of the errors is detected.
Example usage:
error | The type of error to simulate. See poplar::ErrorCode for a list of possible errors to simulate. |
location | Where to simulate the error. The location of an error must be unique from other simulated errors. See Engine::getSimulatedErrorLocations for how to specify an error location based on some program information. |
poplar::poplar_error | If Engine::load() has not been called yet. |
poplar::poplar_error | If an error is already being simulated at location . |
std::vector< std::string > poplar::Engine::listStreams | ( | ) | const |
Return a list of all streams in the engine.
void poplar::Engine::load | ( | const Device & | device | ) |
void poplar::Engine::loadAndRun | ( | const Device & | device, |
unsigned | prog = 0 |
||
) |
Run the graph program.
This function will load the program/graph onto the device and then execute the graph program.
prog | The index of the program to run. If this is greater than or equal to the number of programs given in the constructor then an exception is thrown. |
void poplar::Engine::prepare | ( | const Device & | device | ) |
Prepare the device for loading.
This configures the device ready for loading binary code, which is done by calling deploy().
device | The device to load onto. |
void poplar::Engine::prepare | ( | const Device & | device, |
const RuntimeOptions & | runOptions | ||
) |
Prepare the device for loading.
This configures the device ready for loading binary code, which is done by calling deploy().
device | The device to load onto. |
runOptions | Set of parameters to adjust runtime behaviour. |
void poplar::Engine::printProfileSummary | ( | std::ostream & | outputStream, |
const OptionFlags & | opt = {} |
||
) |
Get and print the summary of a report with the given options.
This is equivalent to getting and printing the summary of both the graph and execution reports using poplar::printProfileSummary()
.
outputStream | A stream to write the summary to. |
opt | A set of option flags to configure the contents of the report. All can be "true" or "false". The default is "false". The available options are:
|
profiling_disabled | If the device is not an IPU or IPU Model. |
invalid_option | If any of the options passed in opt were not recognised or improperly formatted. |
|
inline |
Synchronous copy of a buffer of non-Quarter type data from a specific tensor in the device into a host-side buffer.
The tensor must have been marked as an output tensor. The buffer must have room for all of the tensor data. The handle should match the one passed to Graph::createHostRead().
handle | The source host copy handle. |
buffer | A view of the circular buffer. |
void poplar::Engine::readTensor | ( | StringRef | handle, |
QuarterMetadata & | metadata, | ||
void * | buf, | ||
void * | bufEnd | ||
) |
Synchronous copy of a buffer of Quarter type data from a specific tensor in the device into a host-side buffer.
The tensor must have been marked as an output tensor. The buffer must have room for all of the tensor data. The buffer end address is required for size verification. The handle should match the one passed to Graph::createHostRead().
handle | The source host copy handle. |
metadata | The host side variable to which metadata on the device is to be copied. |
buf | The destination of the read. |
bufEnd | The end address of destination buffer. |
void poplar::Engine::readTensor | ( | StringRef | handle, |
void * | buf, | ||
void * | bufEnd | ||
) |
Synchronous copy of a buffer of non-Quarter type data from a specific tensor in the device into a host-side buffer.
The tensor must have been marked as an output tensor. The buffer must have room for all of the tensor data. The buffer end address is required for size verification. The handle should match the one passed to Graph::createHostRead().
handle | The source host copy handle. |
buf | The destination of the read. |
bufEnd | The end address of destination buffer. |
void poplar::Engine::reportIntervals | ( | std::ostream & | outputStream | ) |
Write a CSV data file to a specified output stream.
The data files contain the number of tiles active over time in cycles for compute, synchronisation and exchange phases.
Each row contains the following entries:
Because tiles execute a number of threads (up to 6) in parallel, a single "thread cycle" may only be executed every 6 tile clock cycles. The cycles reported by this function are tile clock cycles rather than thread cycles.
outputStream | An output stream for the CSV data to be written to. |
profiling_disabled | If the device has no profiling enabled. |
|
static |
Get a timing report for the measured interval.
Details depend on the underlying device used.
start | Start time of report |
end | End time of report |
void poplar::Engine::resetExecutionProfile | ( | ) |
Reset execution profile.
When programs are run their profiles are appended to the execution profile. This discards profiling information for previously executed programs.
void poplar::Engine::run | ( | unsigned | prog, |
const std::string & | debugName, | ||
const RuntimeOptions & | options | ||
) |
Run the graph program.
This function is similar to run(prog, debugName) and allows the user to override runtime parameters via an instance of RuntimeOptions.
prog | The index of the program to run. If this is greater than or equal to the number of programs given in the constructor then an exception is thrown. |
debugName | Run name (for debugging/analysis). |
options | Multiple parameters that alter the execution behaviour. |
void poplar::Engine::run | ( | unsigned | prog = 0 , |
const std::string & | debugName = "" |
||
) |
Run the graph program.
This function will execute the graph program. Note that the program needs to have already been loaded onto a device otherwise an exception will occur.
prog | The index of the program to run. If this is greater than or equal to the number of programs given in the constructor then an exception is thrown. |
debugName | Run name (for debugging/analysis). |
void poplar::Engine::setPrintStream | ( | std::ostream & | stream | ) |
Set output stream for printf commands.
stream | The output stream to use. |
void poplar::Engine::setPrintTensorStream | ( | std::ostream & | stream | ) |
Set the output stream for PrintTensor
programs.
By default, tensors are printed to stderr.
stream | The output stream to use. |
void poplar::Engine::stop | ( | ) |
Stop the graph program.
This function attempts to stop a long-running well-behaved graph program asynchronously. After this call the run()
will return. Only programs that execute host syncs can be stopped. The device will be left in an undefined state and no more programs can be run before the device is reset. Stream accesses may be occur before the program actually stops.
|
inline |
Synchronous copy of a buffer of non-Quarter type data from the host to a specific tensor in the device.
The tensor must have been marked as an input tensor. The handle should match the one passed to Graph::createHostWrite().
handle | The destination host copy handle. |
buffer | A view of the circular buffer. |
void poplar::Engine::writeTensor | ( | StringRef | handle, |
const QuarterMetadata & | metadata, | ||
const void * | buf, | ||
const void * | bufEnd | ||
) |
Synchronous copy of a buffer of Quarter type data from the host to a specific tensor in the device.
The tensor must have been marked as an input tensor. The buffer end address is required for size verification. The handle should match the one passed to Graph::createHostWrite().
handle | The destination host copy handle. |
metadata | The host side variable from which metadata is to be copied to the device. |
buf | The source of the write. |
bufEnd | The end address of source buffer. |
void poplar::Engine::writeTensor | ( | StringRef | handle, |
const void * | buf, | ||
const void * | bufEnd | ||
) |
Synchronous copy of a buffer of non-Quarter type data from the host to a specific tensor in the device.
The tensor must have been marked as an input tensor. The buffer end address is required for size verification. The handle should match the one passed to Graph::createHostWrite().
handle | The destination host copy handle. |
buf | The source of the write. |
bufEnd | The end address of source buffer. |