/* * Copyright (C) 2017 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef ANDROID_PACKAGES_MODULES_NEURALNETWORKS_RUNTIME_MANAGER_H #define ANDROID_PACKAGES_MODULES_NEURALNETWORKS_RUNTIME_MANAGER_H #include #include #include #include #include #include #include #include #include #include #include #include #include "ExecutionCallback.h" #include "Memory.h" namespace android { namespace nn { // Forward declaration class Device; class MetaModel; class ModelArgumentInfo; // A unified interface for a reusable execution with cached resources. // This object provides no thread-safety guarantee. The caller must guarantee there is at most one // call to RuntimeExecution::compute or RuntimeExecution::computeFenced on the same RuntimeExecution // object in flight at a time. class RuntimeExecution { DISALLOW_COPY_AND_ASSIGN(RuntimeExecution); public: RuntimeExecution() = default; virtual ~RuntimeExecution() = default; virtual std::tuple, Timing> compute( const SharedBurst& burstController, const OptionalTimePoint& deadline) const = 0; // The returned timing information is only valid if the callback is nullptr. // Returns error_code, sync_fence, callback and timing. virtual std::tuple computeFenced( const std::vector& waitFor, const OptionalTimePoint& deadline, const OptionalDuration& timeoutDurationAfterFence) const = 0; }; // A unified interface for actual driver prepared model as well as the CPU. class RuntimePreparedModel { DISALLOW_COPY_AND_ASSIGN(RuntimePreparedModel); public: RuntimePreparedModel() = default; virtual ~RuntimePreparedModel() = default; virtual const Device* getDevice() const = 0; virtual SharedPreparedModel getInterface() const = 0; // Perform computation with given input/output argument info and memory pools. virtual std::tuple, Timing> execute( const std::vector& inputs, const std::vector& outputs, const std::vector& memories, const SharedBurst& burstController, MeasureTiming measure, const OptionalTimePoint& deadline, const OptionalDuration& loopTimeoutDuration, const std::vector& metaData) const = 0; // Perform fenced computation with given input/output argument info and memory pools. // The returned timing information is only valid if the callback is nullptr. // Returns error_code, sync_fence, callback and timing. virtual std::tuple executeFenced( const std::vector& inputs, const std::vector& outputs, const std::vector& memories, const std::vector& waitFor, MeasureTiming measure, const OptionalTimePoint& deadline, const OptionalDuration& loopTimeoutDuration, const OptionalDuration& timeoutDurationAfterFence, const std::vector& metaData) const = 0; // Create a reusable execution with given input/output argument info and memory pools. virtual std::pair> createReusableExecution( const std::vector& inputs, const std::vector& outputs, const std::vector& memories, MeasureTiming measure, const OptionalDuration& loopTimeoutDuration, const std::vector& metaData) const = 0; virtual GeneralResult configureExecutionBurst() const = 0; virtual MemoryPreference getMemoryPreference() const = 0; }; using ModelFactory = std::function; struct CacheHandles { std::vector modelCache; std::vector dataCache; }; using CacheDir = std::string; struct CacheInfo { std::variant variant; }; // A unified interface for actual driver devices as well as the CPU class Device { DISALLOW_COPY_AND_ASSIGN(Device); public: Device() = default; virtual ~Device() = default; // Introspection methods returning device information virtual const std::string& getName() const = 0; virtual const std::string& getVersionString() const = 0; virtual Version getFeatureLevel() const = 0; virtual int32_t getType() const = 0; virtual const std::vector& getSupportedExtensions() const = 0; // See the MetaModel class in MetaModel.h for more details. virtual std::vector getSupportedOperations(const MetaModel& metaModel) const = 0; virtual const Capabilities& getCapabilities() const = 0; virtual Capabilities::PerformanceInfo getPerformance(OperandType type) const = 0; virtual Capabilities::PerformanceInfo getRelaxedFloat32toFloat16PerformanceScalar() const = 0; virtual Capabilities::PerformanceInfo getRelaxedFloat32toFloat16PerformanceTensor() const = 0; virtual Capabilities::PerformanceInfo getIfPerformance() const = 0; virtual Capabilities::PerformanceInfo getWhilePerformance() const = 0; virtual std::pair getNumberOfCacheFilesNeeded() const = 0; virtual bool isCachingSupported() const = 0; virtual int wait() const = 0; virtual std::pair> prepareModel( const ModelFactory& makeModel, ExecutionPreference preference, Priority priority, const OptionalTimePoint& deadline, const CacheInfo& cacheInfo, const std::optional& maybeToken, const std::vector& metaData, const std::vector& extensionNameAndPrefix) const = 0; // The caller is responsible for making sure the MemoryDescriptor only contains // PreparedModels from the same Device. virtual std::pair> allocate(const MemoryDescriptor& desc, OperandType type) const = 0; }; // Manages the NN HAL devices. Only one instance of this class will exist. // Use get() to retrieve it. class DeviceManager { public: const std::vector>& getDrivers() const { if (mSetCpuOnly || mDebugNNCpuOnly) { return mDevicesCpuOnly; } return mDevices; } // Gets the runtime version corresponding to getServerFeatureLevelFlag (in ServerFlag.h). Version getRuntimeVersion() const { return mRuntimeVersion; } // Gets the runtime feature level corresponding to getServerFeatureLevelFlag (in ServerFlag.h). int64_t getRuntimeFeatureLevel() const; // Convert the internal Version level representation to the NDK representation. static int64_t versionToFeatureLevel(Version::Level versionLevel); // Returns whether platform telemetry is enabled. bool isPlatformTelemetryEnabled() const { return mIsPlatformTelemetryEnabled; } // For testing only: void setUseCpuOnly(bool useCpuOnly) { mSetCpuOnly = useCpuOnly; } bool getUseCpuOnly() const { return mSetCpuOnly; } bool syncExecCpu() const { return mSyncExecCpu; } bool syncExecRuntime() const { return mSyncExecRuntime; } // How to handle graph partitioning? // 0 - Don't do graph partitioning. // 1 - Do graph partitioning; but fall back to non-partitioned // execution if there is a partitioning failure. // 2 - Do graph partitioning, and rely on it; there is no fallback. enum { kPartitioningNo = 0, kPartitioningWithFallback = 1, kPartitioningWithoutFallback = 2 }; uint32_t getPartitioning() const { return mPartitioning; } static bool partitioningAllowsFallback(uint32_t partitioning) { return partitioning == kPartitioningWithFallback; } bool strictSlicing() const { return mStrictSlicing; } // Returns the singleton manager. static DeviceManager* get(); // Returns the singleton Cpu device. static std::shared_ptr getCpuDevice(); // The forTest_* functions below are solely intended for use by unit tests. // Returns all devices (ignores the cpu-only flags). std::vector> forTest_getDevices() const { return mDevices; } // Sets the device list (does not affect cpu-only queries). void forTest_setDevices(std::vector> devices) { mDevices = std::move(devices); } // Register a test device. void forTest_registerDevice(const SharedDevice& device) { registerDevice(device); } // Re-initialize the list of available devices. void forTest_reInitializeDeviceList() { mDevices.clear(); mDevicesCpuOnly.clear(); findAvailableDevices(); } // Make a test device static std::shared_ptr forTest_makeDriverDevice(const SharedDevice& device); bool forTest_isCpuDevice(const ANeuralNetworksDevice* device) const { return reinterpret_cast(device) == getCpuDevice().get(); } private: // Builds the list of available drivers and queries their capabilities. DeviceManager(); // Adds a device for the manager to use. void registerDevice(const SharedDevice& device); void findAvailableDevices(); // Runtime version corresponding to getServerFeatureLevelFlag (in ServerFlag.h). Version mRuntimeVersion; // Holds whether platform telemetry is enabled, as indicated by getServerTelemetryEnableFlag (in // ServerFlag.h). bool mIsPlatformTelemetryEnabled; // List of all the devices we discovered (including CpuDevice). std::vector> mDevices; // We set this one to have CpuDevice only. To be used when m*CpuOnly is true. std::vector> mDevicesCpuOnly; // If either of these is true, we'll ignore the drivers that are // on the device and run everything on the CPU. bool mSetCpuOnly = false; // set by setUseCpuOnly() bool mDebugNNCpuOnly = false; // derived from system property debug.nn.cpuonly // synchronous execution bool mSyncExecCpu = true; bool mSyncExecRuntime = false; static const uint32_t kPartitioningDefault = kPartitioningWithFallback; uint32_t mPartitioning = kPartitioningDefault; bool mStrictSlicing = false; }; std::vector getDevices(); } // namespace nn } // namespace android #endif // ANDROID_PACKAGES_MODULES_NEURALNETWORKS_RUNTIME_MANAGER_H