#!/usr/bin/env python3
# Copyright (c) 2022 Graphcore Ltd. All rights reserved.

import argparse
from datetime import timedelta
from re import L
import numpy as np
import model_runtime
import popef
"""
The example shows loading a model from PopEF files and sending
inference requests using all available ModelRunner execution modes.
"""


def main():
    parser = argparse.ArgumentParser("Model runner simple example.")
    parser.add_argument(
        "-p",
        "--popef",
        type=str,
        metavar='popef_file_path',
        help="A collection of PopEF files containing the model.",
        nargs='+',
        required=True)
    args = parser.parse_args()

    # Create model runner
    config = model_runtime.ModelRunnerConfig()
    config.device_wait_config = model_runtime.DeviceWaitConfig(
        model_runtime.DeviceWaitStrategy.WAIT_WITH_TIMEOUT,
        timeout=timedelta(seconds=600),
        sleepTime=timedelta(seconds=1))

    print("Creating ModelRunner with", config)
    model_runner = model_runtime.ModelRunner(model_runtime.PopefPaths(
        args.popef),
                                             config=config)

    print("Preparing input tensors:")
    input_descriptions = model_runner.getExecuteInputs()
    input_tensors = [
        np.random.randn(*input_desc.shape).astype(input_desc.numpy_data_type())
        for input_desc in input_descriptions
    ]
    input_view = model_runtime.InputMemoryView()

    for input_desc, input_tensor in zip(input_descriptions, input_tensors):
        print("\tname:", input_desc.name, "shape:", input_tensor.shape,
              "dtype:", input_tensor.dtype)
        input_view[input_desc.name] = input_tensor

    print("Running synchronous execution mode. The memory of the output "
          "tensors is allocated by the ModelRunner object.")
    synchronousExecutionModeLibraryAllocatedOutput(model_runner, input_view)

    print("Running synchronous execution mode. The memory of the output "
          "tensors is allocated by the user.")
    synchronousExecutionModeUserAllocatedOutput(model_runner, input_view)

    print("Running asynchronous execution mode. The memory of the output "
          "tensors is allocated by the ModelRunner object.")
    asynchronousExecutionModeLibraryAllocatedOutput(model_runner, input_view)

    print("Running asynchronous execution mode. The memory of the output "
          "tensors is allocated by the user.")
    asynchronousExecutionModeUserAllocatedOutput(model_runner, input_view)

    input_numpy = dict()
    for input_desc, input_tensor in zip(input_descriptions, input_tensors):
        input_numpy[input_desc.name] = input_tensor

    print("Running synchronous execution mode. The input is a numpy array. "
          "The memory of the output tensors is allocated by the ModelRunner "
          "object.")
    synchronousExecutionModeLibraryAllocatedNumpyInputOutput(
        model_runner, input_numpy)

    print("Running synchronous execution mode. The input and the output are "
          "numpy arrays. The memory of the output tensors is allocated by the "
          "user. ")
    synchronousExecutionModeUserAllocatedNumpyInputOutput(
        model_runner, input_numpy)

    print(
        "Running asynchronous execution mode. The input and the output are "
        "numpy arrays . The memory of the output tensors is allocated by the "
        "ModelRunner object.")
    asynchronousExecutionModeLibraryAllocatedNumpyOutput(
        model_runner, input_numpy)

    print(
        "Running asynchronous execution mode. The input and the output are "
        "numpy arrays . The memory of the output tensors is allocated by the "
        "user.")
    asynchronousExecutionModeUserAllocatedNumpyOutput(model_runner,
                                                      input_numpy)

    print("Success: exiting")
    return 0


def synchronousExecutionModeLibraryAllocatedOutput(model_runner, input_view):
    print("Sending single synchronous request with random data. Output "
          "allocated by ModelRunner.")
    result = model_runner.execute(input_view)

    output_descriptions = model_runner.getExecuteOutputs()
    print("Processing output tensors:")
    for output_desc in output_descriptions:
        output_tensor = np.frombuffer(
            result[output_desc.name],
            dtype=output_desc.numpy_data_type()).reshape(output_desc.shape)
        print("\tname:", output_desc.name, "shape:", output_tensor.shape,
              "dtype:", output_tensor.dtype, "\n", output_tensor)


def synchronousExecutionModeUserAllocatedOutput(model_runner, input_view):

    output_descriptions = model_runner.getExecuteOutputs()
    print("Preparing memory for output tensors")
    output_tensors = [
        np.zeros(output_desc.shape, dtype=output_desc.numpy_data_type())
        for output_desc in output_descriptions
    ]

    print("Creating model_runtime.OutputMemoryView()")
    output_view = model_runtime.OutputMemoryView()
    for desc, tensor in zip(output_descriptions, output_tensors):
        print("\tname:", desc.name, "shape:", tensor.shape, "dtype:",
              tensor.dtype)
        output_view[desc.name] = tensor

    print("Sending single synchronous request with random data")
    model_runner.execute(input_view, output_view)
    print("Processing output tensors:")
    for desc, tensor in zip(output_descriptions, output_tensors):
        print("\tname:", desc.name, "shape", tensor.shape, "dtype",
              tensor.dtype, "\n", tensor)


def synchronousExecutionModeLibraryAllocatedNumpyInputOutput(
        model_runner, numpy_input):

    output_descriptions = model_runner.getExecuteOutputs()

    print("Sending single synchronous request random data (numpy array)")
    output_tensors = model_runner.execute(numpy_input)
    print("Processing output tensors (numpy dict):")
    for desc in output_descriptions:
        tensor = output_tensors[desc.name]
        print("\tname:", desc.name, "shape", tensor.shape, "dtype",
              tensor.dtype, "\n", tensor)


def synchronousExecutionModeUserAllocatedNumpyInputOutput(
        model_runner, numpy_input):

    output_descriptions = model_runner.getExecuteOutputs()
    print("Preparing memory for output tensors")
    numpy_output = {}
    for output_desc in output_descriptions:
        numpy_output[output_desc.name] = np.zeros(
            output_desc.shape, dtype=output_desc.numpy_data_type())

    print("Sending single synchronous request with random data")
    model_runner.execute(numpy_input, numpy_output)
    print("Processing output tensors (numpy dict):")
    for desc in output_descriptions:
        tensor = numpy_output[desc.name]
        print("\tname:", desc.name, "shape", tensor.shape, "dtype",
              tensor.dtype, "\n", tensor)


def asynchronousExecutionModeLibraryAllocatedOutput(model_runner, input_view):

    print("Sending single asynchronous request with random data. Output "
          "allocated by ModelRunner.")
    result = model_runner.executeAsync(input_view)

    print("Waiting for output allocated by ModelRunner:")
    result.wait()
    print("Results available")

    output_descriptions = model_runner.getExecuteOutputs()
    print("Processing output tensors:")
    for output_desc in output_descriptions:
        output_tensor = np.frombuffer(
            result[output_desc.name],
            dtype=output_desc.numpy_data_type()).reshape(output_desc.shape)
        print("\tname:", output_desc.name, "shape:", output_tensor.shape,
              "dtype:", output_tensor.dtype, "\n", output_tensor)


def asynchronousExecutionModeUserAllocatedOutput(model_runner, input_view):
    output_descriptions = model_runner.getExecuteOutputs()
    print("Preparing memory for output tensors")
    output_tensors = [
        np.zeros(output_desc.shape, dtype=output_desc.numpy_data_type())
        for output_desc in output_descriptions
    ]

    print("Creating model_runtime.OutputMemoryView()")
    output_view = model_runtime.OutputMemoryView()
    for desc, tensor in zip(output_descriptions, output_tensors):
        print("\tname:", desc.name, "shape:", tensor.shape, "dtype:",
              tensor.dtype)
        output_view[desc.name] = tensor

    print("Sending single asynchronous request with random data")
    future = model_runner.executeAsync(input_view, output_view)

    print("Waiting for the output.")
    future.wait()
    print("Results available.")
    print("Processing output tensors:")
    for desc, tensor in zip(output_descriptions, output_tensors):
        print("\tname:", desc.name, "shape", tensor.shape, "dtype",
              tensor.dtype, "\n", tensor)


def asynchronousExecutionModeLibraryAllocatedNumpyOutput(
        model_runner, numpy_input):
    print("Sending single asynchronous request with random data")
    future = model_runner.executeAsync(numpy_input)

    print("Waiting for the output.")
    future.wait()
    for desc in model_runner.getExecuteOutputs():
        future_py_array = future[desc.name]

        # Create a np.array copy from the future_py_array buffer
        # using numpy() method.
        tensor = future_py_array.numpy()
        print("\tname:", desc.name, "shape", tensor.shape, "dtype",
              tensor.dtype, "tensor id", id(tensor), "\n", tensor)

        # Create a np.array copy from the future_py_array buffer
        # (allocated by ModelRunner instance).
        tensor_copy = np.array(future_py_array, copy=True)
        print("Tensor copy", tensor_copy, "tensor id", id(tensor_copy))

        # Avoid copying. Create a np.array view from the future_py_array buffer
        # (allocated by ModelRunner instance).
        tensor_view = np.array(future_py_array, copy=False)
        print("Tensor view", tensor_view, "tensor id", id(tensor_view))

        assert not np.shares_memory(tensor_view, tensor_copy)
        assert not np.shares_memory(tensor, tensor_copy)
        assert not np.shares_memory(tensor, tensor_view)


def asynchronousExecutionModeUserAllocatedNumpyOutput(model_runner,
                                                      numpy_input):

    output_descriptions = model_runner.getExecuteOutputs()
    print("Preparing memory for output tensors")
    numpy_output = {}
    for output_desc in output_descriptions:
        numpy_output[output_desc.name] = np.zeros(
            output_desc.shape, dtype=output_desc.numpy_data_type())

    print("Sending single asynchronous request with random data")
    future = model_runner.executeAsync(numpy_input, numpy_output)

    print("Waiting for the output.")
    future.wait()
    print("Results available.")
    print("Processing output tensors:")
    for desc in output_descriptions:
        output_tensor = numpy_output[desc.name]
        future_py_array_view = future[desc.name]

        # Create a np.array view from the future_py_array_view using numpy()
        # method, view points to np.array present in numpy_output dict
        tensor_from_future_object = future_py_array_view.numpy()
        print("\tname:", desc.name, "shape", tensor_from_future_object.shape,
              "dtype", tensor_from_future_object.dtype, "\n",
              tensor_from_future_object)
        assert np.shares_memory(output_tensor, tensor_from_future_object)

        # Create a np.array view from the future_py_array_view buffer, view
        # points to np.array present in numpy_output dict
        tensor_view = np.array(future_py_array_view, copy=False)
        assert np.shares_memory(output_tensor, tensor_view)
        assert np.shares_memory(tensor_from_future_object, tensor_view)

        # Create a np.array copy from the future_py_array_view buffer
        tensor_copy = np.array(future_py_array_view, copy=True)
        assert not np.shares_memory(tensor_from_future_object, tensor_copy)
        assert not np.shares_memory(output_tensor, tensor_copy)


if __name__ == "__main__":
    main()
