3#ifndef poplar_Engine_hpp
4#define poplar_Engine_hpp
6#include <gccs/CompilerFeatures.hpp>
8#include <poplar/Executable.hpp>
9#include <poplar/Graph.hpp>
10#include <poplar/HostFunctionCallback.hpp>
11#include <poplar/Module.hpp>
12#include <poplar/OptionFlags.hpp>
13#include <poplar/ProfileValue.hpp>
14#include <poplar/Program.hpp>
15#include <poplar/Quarter.hpp>
16#include <poplar/RuntimeOptions.hpp>
17#include <poplar/StreamCallback.hpp>
18#include <poplar/StringRef.hpp>
19#include <poplar/Type.hpp>
38using ProgressFunc = std::function<void(
int,
int)>;
43const static unsigned WORKER_SCRATCH_SIZE = 48;
438 Engine(
const Graph &graph, ArrayRef<program::Program> progs,
441 const DebugContext &debugContext = {});
473 const DebugContext &debugContext = {});
541 void run(
unsigned prog = 0, const std::
string &debugName = "");
567 void run(
unsigned prog, const std::
string &debugName,
589 std::shared_ptr<core::TimerTimePoint> impl;
741 void readTensor(StringRef handle, gccs::ArrayRef<T> buffer) {
742 using HostType =
typename std::decay<T>::type;
743 static_assert(not std::is_const<T>::value,
744 "readTensor requires a writable buffer");
745 auto cast = [](T *p) {
746 return const_cast<void *
>(
static_cast<const void *
>(p));
749 cast(buffer.begin()),
cast(buffer.end()));
766 void writeTensor(StringRef handle,
const void *buf,
const void *bufEnd);
782 const void *buf,
const void *bufEnd);
794 template <
class T>
void writeTensor(StringRef handle, ArrayRef<T> buffer) {
795 using HostType =
typename std::decay<T>::type;
796 auto cast = [](
const T *p) {
797 return const_cast<void *
>(
static_cast<const void *
>(p));
800 cast(buffer.begin()),
cast(buffer.end()));
830 const gccs::ArrayRef<QuarterMetadata> &metadata,
831 void *begin,
void *end);
844 using HostType =
typename std::decay<T>::type;
845 auto cast = [](T *p) {
846 return const_cast<void *
>(
static_cast<const void *
>(p));
849 std::is_const<T>::value,
cast(buffer.begin()),
918 unsigned replicationIndex = 0);
922 uint64_t repeatIndex,
923 unsigned replicationIndex = 0) {
924 static_assert(not std::is_const<T>::value,
925 "Engine::copyFromRemoteBuffer requires a writable buffer");
926 auto cast = [](T *p) {
927 return const_cast<void *
>(
static_cast<const void *
>(p));
929 using HostType =
typename std::decay<T>::type;
930 copyRemoteBuffer(handle, repeatIndex, replicationIndex,
932 cast(buffer.begin()), buffer.size());
947 unsigned replicationIndex = 0);
951 uint64_t repeatIndex,
unsigned replicationIndex = 0) {
952 auto cast = [](T *p) {
953 return const_cast<void *
>(
static_cast<const void *
>(p));
955 using HostType =
typename std::decay<T>::type;
956 copyRemoteBuffer(handle, repeatIndex, replicationIndex,
958 cast(buffer.begin()), buffer.size());
1084 std::vector<ErrorLocation>
1095 std::vector<ErrorLocation>
1099 Engine(std::unique_ptr<core::Engine>);
1100 const core::Engine &getImpl()
const {
return *impl; }
1104 const gccs::ArrayRef<QuarterMetadata> &metadatas,
1105 bool readOnly,
void *begin,
void *end);
1107 void copyTensor(StringRef handle,
const Type &type,
bool read,
void *begin,
1111 bool read,
void *begin,
void *end);
1113 void readTensor(StringRef handle,
void *metadata,
void *buf,
void *bufEnd);
1115 void writeTensor(StringRef handle,
void *metadata,
const void *buf,
1116 const void *bufEnd);
1118 void copyRemoteBuffer(StringRef handle,
unsigned repeatIndex,
1119 unsigned replicationIndex,
const Type *type,
bool read,
1120 void *begin,
size_t numElements);
1122 std::unique_ptr<core::Engine> impl;
1153 ProgressFunc progressCallBack = ProgressFunc(),
1154 const DebugContext &debugContext = {});
1155Executable
compileGraph(
const Graph &graph, ArrayRef<program::Program> progs,
1157 ProgressFunc progressCallBack = ProgressFunc(),
1158 const DebugContext &debugContext = {});
1186 ProgressFunc progressCallBack = ProgressFunc(),
1187 const DebugContext &debugContext = {});
1188Module
compileModule(
const Graph &graph, program::Program prog,
1190 ProgressFunc progressCallBack = ProgressFunc(),
1191 const DebugContext &debugContext = {});
1223 ProgressFunc progressCallBack = ProgressFunc(),
1224 const DebugContext &debugContext = {});
1225Module
compileModule(
const Graph &graph, program::Program prog,
1226 const Preallocations &preallocations,
1228 ProgressFunc progressCallBack = ProgressFunc(),
1229 const DebugContext &debugContext = {});
1263 ProgressFunc progressCallBack = ProgressFunc(),
1264 const DebugContext &debugContext = {});
1265Executable
compileGraph(
const Graph &graph, ArrayRef<program::Program> progs,
1266 const Preallocations &preallocations,
1268 ProgressFunc progressCallBack = ProgressFunc(),
1269 const DebugContext &debugContext = {});
DebugContext gathers the common external parameters of the context of an operation.
Definition: DebugContext.hpp:221
A device refers to a physical entity that can execute code.
Definition: Device.hpp:26
PImpl interface to core timing information.
Definition: Engine.hpp:584
A graph compute engine.
Definition: Engine.hpp:390
Engine(Graph &&graph, program::Program prog, const OptionFlags &opt={}, ProgressFunc progressCallBack=ProgressFunc(), const DebugContext &debugContext={})
Construct the engine from a graph and a program.
void resetExecutionProfile()
Reset execution profile.
void reportIntervals(std::ostream &outputStream)
Write a CSV data file to a specified output stream.
void setPrintStream(std::ostream &stream)
Set output stream for printf commands.
std::vector< ErrorLocation > getSimulatedErrorLocations(unsigned programId, unsigned tile=~0) const
Return the locations of a program from a program ID.
void clearSimulatedErrors()
Undo the effects of all Engine::insertSimulatedError() calls.
std::function< void(int, int)> ProgressFunc
Callback function used to to indicate engine compilation progress.
Definition: Engine.hpp:407
void run(unsigned prog=0, const std::string &debugName="")
Run the graph program.
Engine(Graph &&graph, ArrayRef< program::Program > progs, const OptionFlags &opt={}, ProgressFunc progressCallBack=ProgressFunc(), const DebugContext &debugContext={})
Construct the engine from a graph and a list of programs.
void readTensor(StringRef handle, gccs::ArrayRef< T > buffer)
Synchronous copy of a buffer of non-Quarter type data from a specific tensor in the device into a hos...
Definition: Engine.hpp:741
void deploy()
Load the engine.
void writeTensor(StringRef handle, const QuarterMetadata &metadata, const void *buf, const void *bufEnd)
Synchronous copy of a buffer of Quarter type data from the host to a specific tensor in the device.
void connectStream(StringRef handle, const gccs::ArrayRef< T > &buffer)
Connect a stream of non-Quarter type to a circular buffer in memory.
Definition: Engine.hpp:843
void readTensor(StringRef handle, void *buf, void *bufEnd)
Synchronous copy of a buffer of non-Quarter type data from a specific tensor in the device into a hos...
void stop()
Stop the graph program.
void connectStreamToCallback(StringRef handle, StreamCallbackHandle f)
Connect a stream to a callback taking a pointer to the location in memory to copy into/from.
void load(const Device &device)
Load the compiled program/graph onto a device.
void connectStream(StringRef handle, void *p)
Connect a stream of non-Quarter type to a fixed location in memory.
void eraseSimulatedError(ErrorLocation const &location)
Undo the effects of Engine::insertSimulatedError();.
void copyFromRemoteBuffer(StringRef handle, void *w, uint64_t repeatIndex, unsigned replicationIndex=0)
Copy from a remote buffer to a user buffer w.
void connectHostFunction(StringRef handle, unsigned index, HostCallbackHandle f)
Connect a HostFunction to a callback.
void connectStream(StringRef handle, const gccs::ArrayRef< QuarterMetadata > &metadata, void *begin, void *end)
Connect a stream of type Quarter to a circular buffer in memory.
void insertSimulatedError(ErrorCode error, ErrorLocation const &location)
Simulate an error.
void readTensor(StringRef handle, QuarterMetadata &metadata, void *buf, void *bufEnd)
Synchronous copy of a buffer of Quarter type data from a specific tensor in the device into a host-si...
void connectStreamToCallback(StringRef handle, unsigned index, StreamCallbackHandle f)
Connect a replicated stream to a callback taking a pointer to the location in memory to copy into/fro...
void prepare(const Device &device)
Prepare the device for loading.
void writeTensor(StringRef handle, ArrayRef< T > buffer)
Synchronous copy of a buffer of non-Quarter type data from the host to a specific tensor in the devic...
Definition: Engine.hpp:794
std::vector< std::string > listStreams() const
Return a list of all streams in the engine.
static std::string reportTiming(const TimerTimePoint &start, const TimerTimePoint &end)
Get a timing report for the measured interval.
void copyToRemoteBuffer(void *w, StringRef handle, uint64_t repeatIndex, unsigned replicationIndex=0)
Copy to a remote buffer from a user buffer w.
pva::Report getReport(bool reportExecution=true)
Get a PVA Report object that allows access to profiling data for the graph and the execution with thi...
void setPrintTensorStream(std::ostream &stream)
Set the output stream for PrintTensor programs.
void printProfileSummary(std::ostream &outputStream, const OptionFlags &opt={})
Get and print the summary of a report with the given options.
void connectStream(StringRef handle, void *begin, void *end)
Connect a stream of non-Quarter to a circular buffer in memory.
Engine(Executable &&exe, const OptionFlags &opt={})
Construct the engine from a precompiled executable.
void writeTensor(StringRef handle, const void *buf, const void *bufEnd)
Synchronous copy of a buffer of non-Quarter type data from the host to a specific tensor in the devic...
std::vector< ErrorLocation > getSimulatedErrorLocations(StringRef vertexName, unsigned tile=~0) const
Return the locations of a program from a vertex name.
void disableExecutionProfiling()
Pause execution profiling.
void enableExecutionProfiling()
Enable execution profiling.
void serializeExecutable(std::ostream &out) const
Serialize the executable used by the engine.
TimerTimePoint getTimeStamp()
Get a record of the current host and device time.
OptionFlags getEngineOptions() const
Returns the options the engine was created with.
void loadAndRun(const Device &device, unsigned prog=0)
Run the graph program.
An instance of poplar::Executable contains all of the information needed to run a program on an IPU d...
Definition: Executable.hpp:17
This class represents a graph program to be executed on the IPU.
Definition: Graph.hpp:52
Wrapper for HostCallback instances.
Definition: HostFunctionCallback.hpp:31
An instance of poplar::Moudle contains all of the information needed to run a program on an IPU devic...
Definition: Module.hpp:17
A set of option/value string flags to be used in various APIs.
Definition: OptionFlags.hpp:24
This class describes where external variables and other pre-existing allocations are in memory.
Definition: Preallocations.hpp:21
A group of properties that are reconfigurable in each engine execution.
Definition: RuntimeOptions.hpp:124
Wrapper for StreamCallback instances.
Definition: StreamCallback.hpp:161
Class representing device data types.
Definition: Type.hpp:42
This class represents a control program that executes operations on the graph.
Definition: Program.hpp:30
Poplar classes and functions.
Definition: ArrayRef.hpp:14
Executable compileGraph(Graph &&graph, ArrayRef< program::Program > progs, const OptionFlags &opt={}, ProgressFunc progressCallBack=ProgressFunc(), const DebugContext &debugContext={})
Compile the given graph and programs to make an executable that can be executed using a poplar::Engin...
ErrorCode
Unique error codes for all the possible errors.
Definition: Error.hpp:31
Module compileModule(Graph &&graph, program::Program prog, const OptionFlags &opt={}, ProgressFunc progressCallBack=ProgressFunc(), const DebugContext &debugContext={})
Compile the given graph and single program to make a module.
poplar::Tensor cast(poplar::Graph &graph, const poplar::Tensor &src, const poplar::Type &dstType, poplar::program::Sequence &prog, const poplar::DebugContext &debugContext={})
Cast elements of the specified src tensor to dstType, returning the result as a new tensor.
Uniquely identify the location of an error within a Poplar Device.
Definition: Error.hpp:97
Template structure to relate a host type to a device type.
Definition: Type.hpp:192