A graph compute engine. More...

#include <Engine.hpp>

Classes
class	TimerTimePoint
	PImpl interface to core timing information. More...

Public Types
using	ProgressFunc = std::function< void(int, int)>
	Callback function used to to indicate engine compilation progress. More...

Public Member Functions
	Engine (Graph &&graph, ArrayRef< program::Program > progs, const OptionFlags &opt={}, ProgressFunc progressCallBack=ProgressFunc(), const DebugContext &debugContext={})
	Construct the engine from a graph and a list of programs. More...

	Engine (Graph &&graph, program::Program prog, const OptionFlags &opt={}, ProgressFunc progressCallBack=ProgressFunc(), const DebugContext &debugContext={})
	Construct the engine from a graph and a program. More...

	Engine (Executable &&exe, const OptionFlags &opt={})
	Construct the engine from a precompiled executable. More...

void	prepare (const Device &device)
	Prepare the device for loading. More...

void	prepare (const Device &device, const RuntimeOptions &runOptions)
	Prepare the device for loading. More...

void	deploy ()
	Load the engine. More...

void	load (const Device &device)
	Load the compiled program/graph onto a device. More...

void	run (unsigned prog=0, const std::string &debugName="")
	Run the graph program. More...

void	stop ()
	Stop the graph program. More...

void	run (unsigned prog, const std::string &debugName, const RuntimeOptions &options)
	Run the graph program. More...

void	loadAndRun (const Device &device, unsigned prog=0)
	Run the graph program. More...

TimerTimePoint	getTimeStamp ()
	Get a record of the current host and device time. More...

void	resetExecutionProfile ()
	Reset execution profile. More...

pva::Report	getReport (bool reportExecution=true)
	Get a PVA Report object that allows access to profiling data for the graph and the execution with this engine. More...

void	disableExecutionProfiling ()
	Pause execution profiling. More...

void	enableExecutionProfiling ()
	Enable execution profiling. More...

void	printProfileSummary (std::ostream &outputStream, const OptionFlags &opt={})
	Get and print the summary of a report with the given options. More...

void	reportIntervals (std::ostream &outputStream)
	Write a CSV data file to a specified output stream. More...

void	readTensor (StringRef handle, void buf, void bufEnd)
	Synchronous copy of a buffer of non-Quarter type data from a specific tensor in the device into a host-side buffer. More...

void	readTensor (StringRef handle, QuarterMetadata &metadata, void buf, void bufEnd)
	Synchronous copy of a buffer of Quarter type data from a specific tensor in the device into a host-side buffer. More...

template<class T >
void	readTensor (StringRef handle, gccs::ArrayRef< T > buffer)
	Synchronous copy of a buffer of non-Quarter type data from a specific tensor in the device into a host-side buffer. More...

void	writeTensor (StringRef handle, const void buf, const void bufEnd)
	Synchronous copy of a buffer of non-Quarter type data from the host to a specific tensor in the device. More...

void	writeTensor (StringRef handle, const QuarterMetadata &metadata, const void buf, const void bufEnd)
	Synchronous copy of a buffer of Quarter type data from the host to a specific tensor in the device. More...

template<class T >
void	writeTensor (StringRef handle, ArrayRef< T > buffer)
	Synchronous copy of a buffer of non-Quarter type data from the host to a specific tensor in the device. More...

void	connectStream (StringRef handle, void begin, void end)
	Connect a stream of non-Quarter to a circular buffer in memory. More...

void	connectStream (StringRef handle, const gccs::ArrayRef< QuarterMetadata > &metadata, void begin, void end)
	Connect a stream of type Quarter to a circular buffer in memory. More...

template<class T >
void	connectStream (StringRef handle, const gccs::ArrayRef< T > &buffer)
	Connect a stream of non-Quarter type to a circular buffer in memory. More...

void	connectStream (StringRef handle, void *p)
	Connect a stream of non-Quarter type to a fixed location in memory. More...

void	connectStreamToCallback (StringRef handle, StreamCallbackHandle f)
	Connect a stream to a callback taking a pointer to the location in memory to copy into/from. More...

void	connectStreamToCallback (StringRef handle, unsigned index, StreamCallbackHandle f)
	Connect a replicated stream to a callback taking a pointer to the location in memory to copy into/from. More...

void	connectHostFunction (StringRef handle, unsigned index, HostCallbackHandle f)
	Connect a HostFunction to a callback. More...

void	copyFromRemoteBuffer (StringRef handle, void *w, uint64_t repeatIndex, unsigned replicationIndex=0)
	Copy from a remote buffer to a user buffer `w`. More...

void	copyToRemoteBuffer (void *w, StringRef handle, uint64_t repeatIndex, unsigned replicationIndex=0)
	Copy to a remote buffer from a user buffer `w`. More...

std::vector< std::string >	listStreams () const
	Return a list of all streams in the engine. More...

void	setPrintStream (std::ostream &stream)
	Set output stream for printf commands. More...

void	setPrintTensorStream (std::ostream &stream)
	Set the output stream for `PrintTensor` programs. More...

OptionFlags	getEngineOptions () const
	Returns the options the engine was created with.

void	serializeExecutable (std::ostream &out) const
	Serialize the executable used by the engine.

void	insertSimulatedError (ErrorCode error, ErrorLocation const &location)
	Simulate an error. More...

void	eraseSimulatedError (ErrorLocation const &location)
	Undo the effects of Engine::insertSimulatedError();. More...

void	clearSimulatedErrors ()
	Undo the effects of all Engine::insertSimulatedError() calls. More...

std::vector< ErrorLocation >	getSimulatedErrorLocations (unsigned programId, unsigned tile=~0) const
	Return the locations of a program from a program ID. More...

std::vector< ErrorLocation >	getSimulatedErrorLocations (StringRef vertexName, unsigned tile=~0) const
	Return the locations of a program from a vertex name. More...

Static Public Member Functions
static std::string	reportTiming (const TimerTimePoint &start, const TimerTimePoint &end)
	Get a timing report for the measured interval. More...

Detailed Description

A graph compute engine.

The Engine class provides the ability to execute a graph program.

Engine creation options

Options can be overridden with the environment variable POPLAR_ENGINE_OPTIONS. For example:

POPLAR_ENGINE_OPTIONS='{"target.deterministicWorkers":"true"}'

Engine creation options: Debug

debug.allowOutOfMemory (true, false) [=false]

If true, allow out-of-memory while compiling and linking. This is automatically set to true if autoReport.outputGraphProfile is set to true (direct or indirectly).
debug.computeInstrumentationLevel (vertex, tile, ipu) [=tile]

The granularity of compute instrumentation. This option has no effect unless debug.instrumentCompute is true.
- vertex: Store the last cycle count of each vertex on every tile.
- tile: Store the last cycle count of each compute set on every tile.
- ipu: Store the last cycle count of each compute set on one tile per IPU. This saves memory compared to tile (since the cycle counts are always live and this needs to store them on only one tile), but it loses all per-tile cycle information. It works by adding a sync after each compute set and timing how long it takes to get to that sync. So, effectively, it measures the cycle time of the longest-running tile in the compute set.
debug.retainDebugInformation (true, false) [=true]

Enable/disable the generation and retention of debug information. This can be set to false to reduce host memory consumption if no profiling is required, or only memory profiling is required and it is requested during compilation via the Engine option autoReport.outputGraphProfile.
debug.cpuMultiThreadExecution (true, false) [=true] If true, operations are executed using multiple host threads for a CPU or IPU Model target. Setting to false may simplify debugging at the cost of reduced performance.
debug.instrument (true, false) [=false]

If true, enable all instrument options (below). This will instruct the engine to add cycle counters to the compiled program to enable the execution profile to be retrieved after the program is run. This is only available for an IPU target (not an IPU Model target). Note that the more specific instrumentation options may override the default. For example,
```
{"debug.instrument":"true",
 "debug.instrumentExternalExchange":"false"}
```
will instrument everything apart from external exchange.
debug.instrumentCompute (true, false) [=false]

If true, enable instrumentation of compute sets. See debug.instrument.
debug.instrumentExternalExchange (true, false) [=false]

If true, enable instrumentation of external exchanges. See debug.instrument.
debug.instrumentControlFlow (true, false) [=false]

If true, enable instrumentation of loops and conditionals. See debug.instrument.
debug.outputAllSymbols (true, false) [=false]

If true, output additional symbols to the ELF files that are not required but aid debugging.
debug.profilingTile Integer [=Tiles per IPU - 1]

The tile on which to store the cycle counter for every comput set. This has no effect unless debug.computeInstrumentationLevel is set to ipu.
debug.branchRecordTile Integer [=NTILES-1]

The tile on which to store the branch record. This has no effect unless debug.instrumentControlFlow flag is set. In a CPU target, this option has no effect. In an IPU Model, it only affects the memory profile.
debug.runtimeVerify (true, false) [=false]

If true, expensive verification steps are enabled at runtime.
debug.trace (true, false) [=false]

If true, a trace is printed to the error stream with the state of every edge before and after the execution of a compute set or exchange.
debug.traceFile String

Only used if debug.trace is true. If set, the debug trace is output to the specified file instead of the error stream.
debug.verify (true, false) [=false]

If true, expensive verification steps are enabled at compile time. The checks mostly focus on exchange code, including the following:
- ensuring variables have been set,
- ensuring section/instruction alignment is correct,
- and ensuring the total number of bytes received is as expected.
In addition, after laying out memory we verify the memory constraints on variables are satisfied.
debug.supervisorStackSizeInBytes Integer

If set, the automatically computed stack size for supervisor threads will be overridden with the specified value (in bytes) for all tiles.
debug.workerStackSizeInBytes Integer

If set, the automatically computed stack size for worker threads will be overridden with the specified value (in bytes) for all tiles.
debug.floatPointOpException (true, false) [=false]

If true an invalid floating-point operation will cause an exception. You can also enable or disable invalid floating-point exception via the inv flag using the function setFloatingPointBehaviour().
debug.nanOverflowMode (true, false) [=false]

If true, enable Not-a-Number (NaN) on overflow mode. When enabled, half precision calculations that have overflowed will produce a NaN result, rather than saturating to the half precision max/min value, and the invalid operation (inv) flag will be set. You can also enable or disable NaN-on-overflow mode via the nanoo flag using the function setFloatingPointBehaviour().
debug.dumpDirectory String

If set will store debug dump files in the specified directory, else in the current working directory.

Engine creation options: Optimisations

opt.maxCompilationThreads Integer [=0]

The maximum number of threads to use during compilation. A value of 0 means the hardware will be fully utilised.
opt.maxLinkerThreads Integer [=0]

The maximum number of threads to use during compilation. A value of 0 means the same number will be used as were used for compilation.
opt.internalExchangeOptimisationTarget (balanced, cycles, memory) [=cycles]

What balance of heuristics to use when generating exchange code. Can be used to balance exchange memory usage against speed.
- cycles: Focus completely on speed at the expense of always-live memory
- memory: Focus completely on minimising the memory footprint, at the expense of speed
- balanced: Sacrifice some speed to attempt to reduce the amount of always live memory produced.
opt.enableMultiAccessCopies (true, false) [=true]

Enable this option to make some of the copies faster at the expense of adding more constraints on variables used in the copies.
opt.limitVertexStateToLower256K (true, false) [=false]

Enable this option to optimise the control code by allocating all of the vertex state in the first 256KB of memory. This has a disadvantage that this is the same range of memory that the code must be put in, so if the sum of the two is larger than 256KB then the model will fail to compile.
opt.useAutoloader (true, false) [=true on Mk2 IPU, false otherwise]

If true, use the secondary loading mechanism to load the executable. This option is ignored on non-IPU targets.

Engine creation options: Target

target.deterministicWorkers (true, false, portable) [=true]

Ensure that the mapping of vertices to worker threads is the same for repeated execution either on the same IPU (true), or on every IPU (portable). This guarantee does not hold following breakpoints or exceptions.

NOTE: The option portable is deprecated and has the same functionality as setting the option to true. This option will be removed in a future version.
target.saveArchive String

If set, the binary archive will be saved to the specified filename during graph compilation. This archive contains the ELF files for each tile. No archive will be saved unless this option is set.
target.saveOutputVertexGraph String

If set, the output vertex graph will be saved to the specified filepath during graph compilation.
target.gatewayWriteCombining (true, false) [=target option gatewayMode]

Optimise write-to-host code to use IPU-Gateway write combining.
target.extendedMemory (true, false) [=false]

When enabled, supports >16GiB for remote buffers. Only supported on IPU-M2000 systems.

Engine creation options: Report generation

The report generation options will automatically output the Poplar reports that can be viewed in the PopVision Graph Analyser.

These options provide a basic ability to capture the reports. For more complex use cases the reports should be generated programmatically via functions in the framework (TensorFlow, PopTorch, PopART or Poplar) in which the application is written.

autoReport.all (true, false) [=false]

Output all the available reports described below.

You can exclude individual reports by combining options. For example, this will generate all reports apart from the serialized graph:
```
{"autoReport.all":"true",
 "autoReport.outputSerializedGraph":"false"}
```
autoReport.outputGraphProfile (true, false) [=false]

Output the graph profile report to profile.pop.
autoReport.outputLoweredVars (true, false) [=false]

Generate lowered variables info in profile.pop. This is equivalent to using the debug.loweredVarDumpFile option with the filename set to profile.pop.

To generate the old capnp format, set debug.loweredVarDumpFile to vars.capnp.
autoReport.outputArchive (true, false) [=false]

Output the archive report: archive.a. This is equivalent to using the target.saveArchive option with the filename set to archive.a.
autoReport.outputSerializedGraph (true, false) [=false]

Output the serialized graph: serialized_graph.capnp.
autoReport.outputExecutionProfile (true, false) [=false]

Output the execution profile report to profile.pop

By default this setting will also set debug.instrument to true. If you do not want instrumentation enabled you can set autoReport.outputExecutionProfile or debug.instrument to false.
autoReport.streamAtEachRun (true, false) [=true]

Applies to profiler format V3 or higher. Enable or disable the streaming of the execution profile to disk at each run. If false, the whole execution will be written to disk on Engine destruction (note, some frameworks like TensorFlow may not properly destroy the Engine).
autoReport.outputDebugInfo (true, false) [=false]

Output debug info: debug.json. This file gathers the data in every DebugInfo object created. Elements in the graph report with debugIds can be related to these DebugInfo objects.
autoReport.executionProfileProgramRunCount Integer [=2]

Specify how many runs of each program to capture in the execution profile.
autoReport.directory String [=./]

Specify which directory you want the reports to be written to. By default they will be written to the current working directory.

Engine creation options: Other

prng.enableStochasticRounding (true, false) [=false]

If true, stochastic rounding is enabled.

You can also enable or disable stochastic rounding using the functions setFloatingPointBehaviour() and setStochasticRounding(). For setFloatingPointBehaviour() the default behaviour is to enable stochastic rounding.
prng.seed Integer [=0]

Base seed for PRNG initialisation.

Engine creation options: Experimental

experimental.minLoweringTiles Integer [=5888]

Minimum number of tiles lowered at a time. Zero equates to all tiles. Lower sizes reduce the peak host memory required during engine lowering but may increase the time taken. This is only a hint. Fractions of an IPU may be rounded to a convenient boundary; other options may lead to this option being ignored.

Engine runtime options:

Any OptionFlags parameters defined for RuntimeOptions construction can be used to construct an engine too. These options will be used as default. However, they can be overridden by passing a RuntimeOptions argument to the member functions that accept or defining the POPLAR_RUNTIME_OPTIONS environment variable.

Member Typedef Documentation

◆ ProgressFunc

using poplar::Engine::ProgressFunc = std::function<void(int, int)>

Callback function used to to indicate engine compilation progress.

The function is passed two integers. The first is the progress value and the second is the maximum value for the progress.

If a progress callback is used, the function should not block. All calls to the callback function will be made in a single dedicated thread so blocking in the callback will block the receipt of further notifications (but will not block compilation from progressing). The callback should not use Poplar objects or functions relating to the Graph, Engine or Device that are being compiled.

Constructor & Destructor Documentation

◆ Engine() [1/3]

poplar::Engine::Engine	(	Graph &&	graph,
		ArrayRef< program::Program >	progs,
		const OptionFlags &	opt = `{}`,
		ProgressFunc	progressCallBack = `ProgressFunc()`,
		const DebugContext &	debugContext = `{}`
	)

Construct the engine from a graph and a list of programs.

Unless graph is an rvalue a copy of some graph state will be made.

Parameters

graph	The graph to compile into the engine.
progs	The list of programs to run over the graph. Each program can be run separately by calling the run() method of the Engine with the argument being the index of the program to run in this list.
opt	Options that can be used to control compilation and execution. The available options are listed under Engine.
progressCallBack	A function that will be called to indicate engine compilation progress. See Engine::ProgressFunc for more information.
debugContext	Optional Engine name and Debug Id.

Exceptions

invalid_option	If any of the options passed in `opt` were not recognised or improperly formatted.
link_error	If program linking fails; for example, due to undefined symbols or lack of memory on a tile.

◆ Engine() [2/3]

poplar::Engine::Engine	(	Graph &&	graph,
		program::Program	prog,
		const OptionFlags &	opt = `{}`,
		ProgressFunc	progressCallBack = `ProgressFunc()`,
		const DebugContext &	debugContext = `{}`
	)

Construct the engine from a graph and a program.

Unless graph is an rvalue a copy of some graph state will be made.

Parameters

graph	The graph to compile into the engine.
prog	The program to run over the graph. This program is run when the run() method is called on the Engine.
opt	Options that can be used to control compilation and execution. The available options are listed under Engine.
progressCallBack	A function that will be called to indicate engine compilation progress. See Engine::ProgressFunc for more information.
debugContext	Optional Engine name and Debug Id.

Exceptions

invalid_option	If any of the options passed in `opt` were not recognised or improperly formatted.
link_error	If the program linking fails; for example, due to undefined symbols or lack of memory on a tile.

◆ Engine() [3/3]

poplar::Engine::Engine	(	Executable &&	exe,
		const OptionFlags &	opt = `{}`
	)

Construct the engine from a precompiled executable.

Parameters

exe	The precompiled executable. This can be created using compileGraph().
opt	Options that can be used to control execution. These must be the same as the options passed to compileGraph(). The available options are listed under Engine.

Exceptions

invalid_option If any of the options passed in opt were not recognised or improperly formatted.

Member Function Documentation

◆ clearSimulatedErrors()

void poplar::Engine::clearSimulatedErrors ( )

Undo the effects of all Engine::insertSimulatedError() calls.

To remove the effects of the simulated errors and run the program from the beginning again you can call Engine::load() after clearing the simulated errors:

engine.clearSimulatedErrors();
engine.load();
engine.run(); // won't hit any simulated errors.

◆ connectHostFunction()

void poplar::Engine::connectHostFunction	(	StringRef	handle,
		unsigned	index,
		HostCallbackHandle	f
	)

Connect a HostFunction to a callback.

The callback takes two arguments, which point to the locations in memory for each of the function's input and output arguments, respectively. During a host function call, first the device transfers the input data to the host, then the callback is invoked, and finally the output data is copied back to the device. The given memory pointed by the callback arguments must only be accessed during the duration of the callback.

Parameters

handle	The name of the host function.
index	The replicated index to connect to.
f	Function to be called whenever new input data is available

◆ connectStream() [1/4]

void poplar::Engine::connectStream	(	StringRef	handle,
		const gccs::ArrayRef< QuarterMetadata > &	metadata,
		void *	begin,
		void *	end
	)

Connect a stream of type Quarter to a circular buffer in memory.

Each time data is copied to/from the stream the pointer for the next transfer is incremented within the bounds of the buffer.

See also: Graph::createHostWrite()

Parameters

handle	The destination host copy handle.
metadata	Array reference to host side variables per replica from or to which to copy metadata on the device. If the copy is from the device to the host the variable will be overwritten.
begin	Pointer to the start of the circular buffer.
end	Pointer to the end of the circular buffer.

◆ connectStream() [2/4]

template<class T >

void poplar::Engine::connectStream	(	StringRef	handle,
		const gccs::ArrayRef< T > &	buffer
	)

inline

Connect a stream of non-Quarter type to a circular buffer in memory.

Each time data is copied to/from the stream the pointer for the next transfer is incremented within the bounds of the buffer.

See also: Graph::createHostWrite()

Parameters

handle	The destination host copy handle.
buffer	A view of the circular buffer.

◆ connectStream() [3/4]

void poplar::Engine::connectStream	(	StringRef	handle,
		void *	begin,
		void *	end
	)

Connect a stream of non-Quarter to a circular buffer in memory.

Each time data is copied to/from the stream the pointer for the next transfer is incremented within the bounds given.

Parameters

handle	The name of the stream to connect to.
begin	Pointer to the start of the circular buffer.
end	Pointer to the end of the circular buffer.

Deprecated:: Use Engine::connectStream(StringRef,gccs::ArrayRef<T>) instead.

◆ connectStream() [4/4]

void poplar::Engine::connectStream	(	StringRef	handle,
		void *	p
	)

Connect a stream of non-Quarter type to a fixed location in memory.

Each time data is copied to/from the stream this location will be read/written.

Parameters

handle	The name of the stream to connect to.
p	The pointer to the memory buffer.

Deprecated:: Use Engine::connectStream(StringRef,gccs::ArrayRef<T>) instead.

◆ connectStreamToCallback() [1/2]

void poplar::Engine::connectStreamToCallback	(	StringRef	handle,
		StreamCallbackHandle	f
	)

Connect a stream to a callback taking a pointer to the location in memory to copy into/from.

This will be called whenever the stream will be read or was written by the device. The given memory location will only be valid to read from or write to for the duration of the callback.

Parameters

handle	The name of the stream to connect to.
f	Callback to be called whenever the stream is to be read or was written by the device.

◆ connectStreamToCallback() [2/2]

void poplar::Engine::connectStreamToCallback	(	StringRef	handle,
		unsigned	index,
		StreamCallbackHandle	f
	)

Connect a replicated stream to a callback taking a pointer to the location in memory to copy into/from.

This will be called whenever the stream will be read or was written by the device. The given memory location will only be valid to read from or write to for the duration of the callback.

Parameters

handle	The name of the stream to connect to.
index	The replicated index to connect to.
f	Callback to be called whenever the stream is to be read or was written by the device.

◆ copyFromRemoteBuffer()

void poplar::Engine::copyFromRemoteBuffer	(	StringRef	handle,
		void *	w,
		uint64_t	repeatIndex,
		unsigned	replicationIndex = `0`
	)

Copy from a remote buffer to a user buffer w.

Parameters

handle	The name of the remote buffer to copy from.
w	The user buffer to copy to.
repeatIndex	The index in the remote buffer to copy from.
replicationIndex	The replicated graph index.

Deprecated:: Use Engine::copyFromRemoteBuffer(StringRef,gccs::ArrayRef<T>,unsigned,unsigned) instead.

◆ copyToRemoteBuffer()

void poplar::Engine::copyToRemoteBuffer	(	void *	w,
		StringRef	handle,
		uint64_t	repeatIndex,
		unsigned	replicationIndex = `0`
	)

Copy to a remote buffer from a user buffer w.

Parameters

w	The user buffer to copy from.
handle	The remote buffer to copy to.
repeatIndex	The index in the remote buffer to copy to.
replicationIndex	The replicated graph index.

Deprecated:: Use Engine::copyToRemoteBuffer(StringRef,gccs::ArrayRef<T>,unsigned,unsigned) instead.

◆ deploy()

void poplar::Engine::deploy ( )

Load the engine.

This loads binary code. The device must have been prepared previously by calling prepare().

◆ disableExecutionProfiling()

void poplar::Engine::disableExecutionProfiling ( )

Pause execution profiling.

Subsequent Engine::run() calls are executed without being profiled until a subsequent call to enableExecutionProfiling().

For example, you can exclude individual programs from a profile like this:

 engine.disableExecutionProfiling();
 engine.run(...);
 engine.enableExecutionProfiling();

◆ enableExecutionProfiling()

void poplar::Engine::enableExecutionProfiling ( )

Enable execution profiling.

Subsequent Engine::run() calls are profiled when executed.

◆ eraseSimulatedError()

void poplar::Engine::eraseSimulatedError ( ErrorLocation const & location )

Undo the effects of Engine::insertSimulatedError();.

Parameters

location Any one of the locations passed to Engine::insertSimulatedError().

Exceptions

poplar::poplar_error If there is no simulated error at location.

◆ getReport()

pva::Report poplar::Engine::getReport ( bool reportExecution = true )

Get a PVA Report object that allows access to profiling data for the graph and the execution with this engine.

Subsequent Engine::run() executions may not be accessible through the returned report due to caching.

Parameters

reportExecution Enables access to the execution report (since this engine was constructed/the execution report was last reset). Otherwise, only the graph profile is available.

Exceptions

profiling_disabled If the device is not an IPU or IPU Model.

Returns: A PVA Report object (declared in libpva <pva/pva.hpp>).

◆ getSimulatedErrorLocations() [1/2]

std::vector< ErrorLocation > poplar::Engine::getSimulatedErrorLocations	(	StringRef	vertexName,
		unsigned	tile = `~0`
	)		const

Return the locations of a program from a vertex name.

Similar to Engine::getSimulatedErrorLocations(tile, programId) but look-up the error locations using the name of a Vertex.

Returns: The locations of the vertex code. Any error simulated at these locations may be executed by any worker thread on the tiles.

◆ getSimulatedErrorLocations() [2/2]

std::vector< ErrorLocation > poplar::Engine::getSimulatedErrorLocations	(	unsigned	programId,
		unsigned	tile = `~0`
	)		const

Return the locations of a program from a program ID.

It's possible a program exists on multiple tiles so tile can be used to disambiguate the tile on which the error should occur. If tile is not specified then a location will be returned for each tile the program exists on.

Parameters

programId	The program id of the program. This can be looked up in the Graph Analyser tool (look for the "Id" field under "Details" in the "Program Tree" tab).
tile	The tile in the device. For devices containing multiple IPUs the range of valid tiles is [0, numIpus * numTilesPerIpu).

Note: If programId specifies a poplar::Vertex then this will return the location of the launch routine for the poplar::Vertex and not the location of the code that comprises the poplar::Vertex. Use the vertexName overload of this function to specify an error inside a poplar::Vertex.

Returns: An object that uniquely identifies the location of the error within the device. An ErrorLocation is only valid for the program for which it is generated.

◆ getTimeStamp()

TimerTimePoint poplar::Engine::getTimeStamp ( )

Get a record of the current host and device time.

Details depend on the underlying device used.

◆ insertSimulatedError()

void poplar::Engine::insertSimulatedError	(	ErrorCode	error,
		ErrorLocation const &	location
	)

Simulate an error.

This function causes the program to generate an error when it gets to the specified location. This can be useful for failure testing.

This function must be called after Engine::load() and before Engine::run().

You can simulate many errors at the same time, provided each error is simulated at a unique location. However, only a subset of the errors may be reported because the execution of the program will stop as soon as any one of the errors is detected.

Example usage:

const unsigned tile = 0;
const poplar::ErrorCode error = poplar::ErrorCode::IPUSOFTERR;
const std::vector<poplar::ErrorLocation> locations =
    engine.getSimulatedErrorLocations("MyComputeVertex", tile);
 
engine.load(device);
engine.insertSimulatedError(error, locations[0]);
try {
  engine.run();
} catch (poplar::runtime_error const& exception) {
  assert(exception.errors.size() == 1);
  assert(exception.errors[0].isSimulated);
  assert(exception.errors[0].code == error);
  assert(exception.errors[0].location == location);
  std::cerr << "ErrorCode: " << exception.what() << '\n';
}

Parameters

error	The type of error to simulate. See poplar::ErrorCode for a list of possible errors to simulate.
location	Where to simulate the error. The location of an error must be unique from other simulated errors. See Engine::getSimulatedErrorLocations for how to specify an error location based on some program information.

Exceptions

poplar::poplar_error	If Engine::load() has not been called yet.
poplar::poplar_error	If an error is already being simulated at `location`.

◆ listStreams()

std::vector< std::string > poplar::Engine::listStreams ( ) const

Return a list of all streams in the engine.

Returns: Vector of strings, each of which is a stream's handle postfixed with '+' or '-' indicating whether the stream is a host-write or a host-read respectively.

◆ load()

void poplar::Engine::load ( const Device & device )

Load the compiled program/graph onto a device.

This function will load all binary code and data onto the device ready for execution. This is a shortcut to call the prepare() and deploy() functions in succession.

Parameters

device The device to load onto.

◆ loadAndRun()

void poplar::Engine::loadAndRun	(	const Device &	device,
		unsigned	prog = `0`
	)

Run the graph program.

This function will load the program/graph onto the device and then execute the graph program.

Parameters

prog	The index of the program to run. If this is greater than or equal to the number of programs given in the constructor then an exception is thrown.

◆ prepare() [1/2]

void poplar::Engine::prepare ( const Device & device )

Prepare the device for loading.

This configures the device ready for loading binary code, which is done by calling deploy().

Parameters

device The device to load onto.

◆ prepare() [2/2]

void poplar::Engine::prepare	(	const Device &	device,
		const RuntimeOptions &	runOptions
	)

Prepare the device for loading.

This configures the device ready for loading binary code, which is done by calling deploy().

Parameters

device	The device to load onto.
runOptions	Set of parameters to adjust runtime behaviour.

◆ printProfileSummary()

void poplar::Engine::printProfileSummary	(	std::ostream &	outputStream,
		const OptionFlags &	opt = `{}`
	)

Get and print the summary of a report with the given options.

This is equivalent to getting and printing the summary of both the graph and execution reports using poplar::printProfileSummary().

Parameters

outputStream A stream to write the summary to.

opt

A set of option flags to configure the contents of the report. All can be "true" or "false". The default is "false".
The available options are:

colours: Control the use of colours in the summary output
showVarStorage: Show liveness information for each program
showOptimizations: Show compile optimisation details
showExecutionSteps: Show the execution steps
showPerIpuMemoryUsage: Show memory usage for each IPU

Exceptions

profiling_disabled	If the device is not an IPU or IPU Model.
invalid_option	If any of the options passed in `opt` were not recognised or improperly formatted.

◆ readTensor() [1/3]

template<class T >

void poplar::Engine::readTensor	(	StringRef	handle,
		gccs::ArrayRef< T >	buffer
	)

inline

Synchronous copy of a buffer of non-Quarter type data from a specific tensor in the device into a host-side buffer.

The tensor must have been marked as an output tensor. The buffer must have room for all of the tensor data. The handle should match the one passed to Graph::createHostRead().

See also: Graph::createHostRead()

Parameters

handle	The source host copy handle.
buffer	A view of the circular buffer.

◆ readTensor() [2/3]

void poplar::Engine::readTensor	(	StringRef	handle,
		QuarterMetadata &	metadata,
		void *	buf,
		void *	bufEnd
	)

Synchronous copy of a buffer of Quarter type data from a specific tensor in the device into a host-side buffer.

The tensor must have been marked as an output tensor. The buffer must have room for all of the tensor data. The buffer end address is required for size verification. The handle should match the one passed to Graph::createHostRead().

See also: Graph::createHostRead()

Parameters

handle	The source host copy handle.
metadata	The host side variable to which metadata on the device is to be copied.
buf	The destination of the read.
bufEnd	The end address of destination buffer.

◆ readTensor() [3/3]

void poplar::Engine::readTensor	(	StringRef	handle,
		void *	buf,
		void *	bufEnd
	)

Synchronous copy of a buffer of non-Quarter type data from a specific tensor in the device into a host-side buffer.

The tensor must have been marked as an output tensor. The buffer must have room for all of the tensor data. The buffer end address is required for size verification. The handle should match the one passed to Graph::createHostRead().

See also: Graph::createHostRead()

Parameters

handle	The source host copy handle.
buf	The destination of the read.
bufEnd	The end address of destination buffer.

Deprecated:: For all types except Half and Quarter use Engine::readTensor(StringRef,gccs::ArrayRef<T>) instead.

◆ reportIntervals()

void poplar::Engine::reportIntervals ( std::ostream & outputStream )

Write a CSV data file to a specified output stream.

The data files contain the number of tiles active over time in cycles for compute, synchronisation and exchange phases.
Each row contains the following entries:

begin time in cycles
end time in cycles
number of tiles participating in compute
number of tiles participating in exchange
number of tiles participating in synchronisation

Because tiles execute a number of threads (up to 6) in parallel, a single "thread cycle" may only be executed every 6 tile clock cycles. The cycles reported by this function are tile clock cycles rather than thread cycles.

Parameters

outputStream An output stream for the CSV data to be written to.

Exceptions

profiling_disabled If the device has no profiling enabled.

Deprecated:
Use libpva instead.

◆ reportTiming()

static std::string poplar::Engine::reportTiming	(	const TimerTimePoint &	start,
		const TimerTimePoint &	end
	)

static

Get a timing report for the measured interval.

Details depend on the underlying device used.

Parameters

start	Start time of report
end	End time of report

◆ resetExecutionProfile()

void poplar::Engine::resetExecutionProfile ( )

Reset execution profile.

When programs are run their profiles are appended to the execution profile. This discards profiling information for previously executed programs.

Deprecated:: Use the PVA library instead.

◆ run() [1/2]

void poplar::Engine::run	(	unsigned	prog,
		const std::string &	debugName,
		const RuntimeOptions &	options
	)

Run the graph program.

This function is similar to run(prog, debugName) and allows the user to override runtime parameters via an instance of RuntimeOptions.

Parameters

prog	The index of the program to run. If this is greater than or equal to the number of programs given in the constructor then an exception is thrown.
debugName	Run name (for debugging/analysis).
options	Multiple parameters that alter the execution behaviour.

◆ run() [2/2]

void poplar::Engine::run	(	unsigned	prog = `0`,
		const std::string &	debugName = `""`
	)

Run the graph program.

This function will execute the graph program. Note that the program needs to have already been loaded onto a device otherwise an exception will occur.

Parameters

prog	The index of the program to run. If this is greater than or equal to the number of programs given in the constructor then an exception is thrown.
debugName	Run name (for debugging/analysis).

◆ setPrintStream()

void poplar::Engine::setPrintStream ( std::ostream & stream )

Set output stream for printf commands.

Parameters

stream The output stream to use.

◆ setPrintTensorStream()

void poplar::Engine::setPrintTensorStream ( std::ostream & stream )

Set the output stream for PrintTensor programs.

By default, tensors are printed to stderr.

Parameters

stream The output stream to use.

◆ stop()

void poplar::Engine::stop ( )

Stop the graph program.

This function attempts to stop a long-running well-behaved graph program asynchronously. After this call the run() will return. Only programs that execute host syncs can be stopped. The device will be left in an undefined state and no more programs can be run before the device is reset. Stream accesses may be occur before the program actually stops.

◆ writeTensor() [1/3]

template<class T >

void poplar::Engine::writeTensor	(	StringRef	handle,
		ArrayRef< T >	buffer
	)

inline

Synchronous copy of a buffer of non-Quarter type data from the host to a specific tensor in the device.

The tensor must have been marked as an input tensor. The handle should match the one passed to Graph::createHostWrite().

See also: Graph::createHostWrite()

Parameters

handle	The destination host copy handle.
buffer	A view of the circular buffer.

◆ writeTensor() [2/3]

void poplar::Engine::writeTensor	(	StringRef	handle,
		const QuarterMetadata &	metadata,
		const void *	buf,
		const void *	bufEnd
	)

Synchronous copy of a buffer of Quarter type data from the host to a specific tensor in the device.

The tensor must have been marked as an input tensor. The buffer end address is required for size verification. The handle should match the one passed to Graph::createHostWrite().

See also: Graph::createHostWrite()

Parameters

handle	The destination host copy handle.
metadata	The host side variable from which metadata is to be copied to the device.
buf	The source of the write.
bufEnd	The end address of source buffer.

◆ writeTensor() [3/3]

void poplar::Engine::writeTensor	(	StringRef	handle,
		const void *	buf,
		const void *	bufEnd
	)

Synchronous copy of a buffer of non-Quarter type data from the host to a specific tensor in the device.

The tensor must have been marked as an input tensor. The buffer end address is required for size verification. The handle should match the one passed to Graph::createHostWrite().

See also: Graph::createHostWrite()

Parameters

handle	The destination host copy handle.
buf	The source of the write.
bufEnd	The end address of source buffer.

Deprecated:: For all types except Half and Quarter use Engine::writeTensor(StringRef,gccs::ArrayRef<T>) instead.

The documentation for this class was generated from the following file:

include/poplar/Engine.hpp

Classes

Public Types

Public Member Functions

Static Public Member Functions

Detailed Description

Member Typedef Documentation

◆ ProgressFunc

Constructor & Destructor Documentation

◆ Engine() [1/3]

◆ Engine() [2/3]

◆ Engine() [3/3]

Member Function Documentation

◆ clearSimulatedErrors()

◆ connectHostFunction()

◆ connectStream() [1/4]

◆ connectStream() [2/4]

◆ connectStream() [3/4]

◆ connectStream() [4/4]

◆ connectStreamToCallback() [1/2]

◆ connectStreamToCallback() [2/2]

◆ copyFromRemoteBuffer()

◆ copyToRemoteBuffer()

◆ deploy()

◆ disableExecutionProfiling()

◆ enableExecutionProfiling()

◆ eraseSimulatedError()

◆ getReport()

◆ getSimulatedErrorLocations() [1/2]

◆ getSimulatedErrorLocations() [2/2]

◆ getTimeStamp()

◆ insertSimulatedError()

◆ listStreams()

◆ load()

◆ loadAndRun()

◆ prepare() [1/2]

◆ prepare() [2/2]

◆ printProfileSummary()

◆ readTensor() [1/3]

◆ readTensor() [2/3]

◆ readTensor() [3/3]

◆ reportIntervals()

◆ reportTiming()

◆ resetExecutionProfile()

◆ run() [1/2]

◆ run() [2/2]

◆ setPrintStream()

◆ setPrintTensorStream()

◆ stop()

◆ writeTensor() [1/3]

◆ writeTensor() [2/3]

◆ writeTensor() [3/3]