Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add changes for allocating same offset for input and output tensors #945

Open
wants to merge 4 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion third_party/lib_tflite_micro
Submodule lib_tflite_micro updated 0 files
153 changes: 147 additions & 6 deletions xformer/Analysis/MemoryPlan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,9 @@ int MemoryPlan::getOffset(Value v, int size,

if ((valueInfo[allocatedVal].firstUsed > valueInfo[v].lastUsed) ||
(valueInfo[v].firstUsed > valueInfo[allocatedVal].lastUsed)) {
// No overlap
// There is no overlap with this buffer. We move on until we have a clash.
// When there is a clash, we know we can allocate before that one if there
// is space as we don't overlap with any of those buffers.
continue;
}

Expand All @@ -149,6 +151,70 @@ int MemoryPlan::getOffset(Value v, int size,
return offset;
}

void MemoryPlan::buildInputOutputTensorMaps(
llvm::StringMap<Value> &inputTensorMap,
llvm::StringMap<Value> &outputTensorMap) {
auto buildMap = [&](StringRef argAttr, StringRef nameAttr,
llvm::SmallVector<std::string> &attrsInOrder) {
llvm::StringMap<std::string> map;
llvm::SmallVector<std::string> argNames;
auto funcOp = dyn_cast<func::FuncOp>(op);

llvm::SmallVector<llvm::StringRef, 2> inputNames;
auto dictAttr =
funcOp->getAttrOfType<mlir::DictionaryAttr>("tf.entry_function");
if (auto str =
dictAttr.get(nameAttr).dyn_cast_or_null<mlir::StringAttr>()) {
str.getValue().split(inputNames, ',', /*MaxSplit=*/-1,
/*KeepEmpty=*/false);
}

auto argAttrs = funcOp->getAttrOfType<mlir::ArrayAttr>(argAttr);
if (argAttrs) {
for (auto attr : argAttrs) {
auto d = attr.dyn_cast_or_null<mlir::DictionaryAttr>();

const ArrayRef<Attribute> indexPathAttrs =
d.get("tf_saved_model.index_path").cast<ArrayAttr>().getValue();
auto stringAttr =
indexPathAttrs[0].dyn_cast_or_null<mlir::StringAttr>();
if (!stringAttr)
continue;
argNames.push_back(stringAttr.getValue().str());
}
} else {
for (int i = 0; i < inputNames.size(); i++) {
argNames.push_back(inputNames[i].str());
}
}

assert(argNames.size() == inputNames.size());
for (int i = 0; i < inputNames.size(); i++) {
map[inputNames[i].str()] = argNames[i];
attrsInOrder.push_back(argNames[i]);
}
return map;
};

llvm::StringMap<std::string> inNameToAttrMap, outNameToAttrMap;
llvm::SmallVector<std::string> attrsInOrder;

inNameToAttrMap = buildMap("arg_attrs", "inputs", attrsInOrder);
outNameToAttrMap = buildMap("res_attrs", "outputs", attrsInOrder);

for (int i = 0; i < inNameToAttrMap.size(); i++) {
inputTensorMap[attrsInOrder[i]] = values[i];
}

for (auto v : values) {
if (auto loc = v.getLoc()->dyn_cast_or_null<NameLoc>()) {
if (outNameToAttrMap.count(loc.getName())) {
outputTensorMap[outNameToAttrMap[loc.getName()]] = v;
}
}
}
}

std::vector<int> MemoryPlan::getAllocatedOffsets(const bool overlapOps,
int &peakMemoryUsed,
int &peakOpId) {
Expand Down Expand Up @@ -245,6 +311,22 @@ std::vector<int> MemoryPlan::getAllocatedOffsets(const bool overlapOps,
}
}

// Handle input output tensor same allocations
llvm::DenseSet<Value> inputTensorSet;
llvm::DenseSet<Value> outputTensorSet;
llvm::StringMap<Value> inputTensorMap, outputTensorMap;

if (sameAllocationInputOutputTensorOption.size() > 0) {
buildInputOutputTensorMaps(inputTensorMap, outputTensorMap);
for (int i = 0; i < sameAllocationInputOutputTensorOption.size();
i = i + 2) {
inputTensorSet.insert(
inputTensorMap[sameAllocationInputOutputTensorOption[i]]);
outputTensorSet.insert(
outputTensorMap[sameAllocationInputOutputTensorOption[i + 1]]);
}
}

// The comparator keeps the buffers ordered by id if their sizes are the
// same
auto DecreasingSizesComparator = [&](QueueItem &lhs, QueueItem &rhs) {
Expand All @@ -259,23 +341,51 @@ std::vector<int> MemoryPlan::getAllocatedOffsets(const bool overlapOps,
queue(DecreasingSizesComparator);

// Insert values and their sizes into priority queue
// InOutmap prevents adding in values which are overlapped
// In a chain of overlapped values, only the last value is allocated and the
// rest are patched up and add in allocated values list later
// Don't insert same allocation input and output tensors into queue as they
// are allocated separately
for (auto v : values) {
if (!inOutMap.count(v) && !vInfo[v].isConstant) {
if (!inOutMap.count(v) && !vInfo[v].isConstant &&
!outputTensorSet.contains(v) && !inputTensorSet.contains(v)) {
queue.push({v, vInfo[v].size});
}
}

ValuesOrderedByOffset allocatedValues;
auto v = queue.top().first;
queue.pop();
allocatedValues.insert({v, 0});

// If there are same allocation input and output tensors, allocate those first
if (sameAllocationInputOutputTensorOption.size() > 0) {
// Allocate first input and output tensor with offsets of zero
allocatedValues.insert(
{inputTensorMap[sameAllocationInputOutputTensorOption[0]], 0});
allocatedValues.insert(
{outputTensorMap[sameAllocationInputOutputTensorOption[1]], 0});

for (int i = 2; i < sameAllocationInputOutputTensorOption.size();
i = i + 2) {
auto inputTensor =
inputTensorMap[sameAllocationInputOutputTensorOption[i]];
int newOffset = getOffset(inputTensor, vInfo[inputTensor].size, vInfo,
allocatedValues);
allocatedValues.insert({inputTensor, newOffset});
allocatedValues.insert(
{outputTensorMap[sameAllocationInputOutputTensorOption[i + 1]],
newOffset});
}
} else {
// Else allocate the largest tensor at offset zero
auto v = queue.top().first;
queue.pop();
allocatedValues.insert({v, 0});
}

while (!queue.empty()) {
auto v = queue.top().first;
auto size = queue.top().second;
queue.pop();

// check with allocatedValues list
int newOffset = getOffset(v, size, vInfo, allocatedValues);
allocatedValues.insert({v, newOffset});
}
Expand Down Expand Up @@ -313,6 +423,37 @@ std::vector<int> MemoryPlan::getAllocatedOffsets(const bool overlapOps,
allocatedValuesOrderedByID.insert(i);
}

// Check if buffers clash
// for (auto i : allocatedValuesOrderedByID) {
// for (auto j : allocatedValuesOrderedByID) {
// if (vInfo[i.first].id < vInfo[j.first].id) {
// if ((vInfo[i.first].firstUsed > vInfo[j.first].firstUsed &&
// vInfo[i.first].firstUsed < vInfo[j.first].lastUsed) ||
// (vInfo[j.first].firstUsed > vInfo[i.first].firstUsed &&
// vInfo[j.first].firstUsed < vInfo[i.first].lastUsed)) {
// auto iBegin = i.second;
// auto iEnd = i.second + vInfo[i.first].size;
// auto jBegin = j.second;
// auto jEnd = j.second + vInfo[j.first].size;
// if ((iBegin > jBegin && iBegin < jEnd) ||
// (jBegin > iBegin && jBegin < iEnd)) {
// printf("\n\nProblem!");
// std::cout << "\nValue one " << vInfo[i.first].id
// << ", size = " << vInfo[i.first].size
// << ", offset = " << i.second
// << ", first = " << vInfo[i.first].firstUsed
// << ", last = " << vInfo[i.first].lastUsed;
// std::cout << "\nValue two " << vInfo[j.first].id
// << ", size = " << vInfo[j.first].size
// << ", offset = " << j.second
// << ", first = " << vInfo[j.first].firstUsed
// << ", last = " << vInfo[j.first].lastUsed;
// }
// }
// }
// }
// }

size_t peakUsed = 0;
size_t peakUsedValueID = 0;
size_t maxId = 0;
Expand Down
22 changes: 14 additions & 8 deletions xformer/Analysis/MemoryPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,21 @@
#include "mlir/Analysis/Liveness.h"
#include "mlir/IR/Value.h"
#include "llvm/ADT/PriorityQueue.h"
#include "llvm/ADT/StringMap.h"

#include <set>

namespace mlir {
namespace xcore {

struct ValueInfo {
size_t id;
size_t size;
bool isConstant;
int firstUsed;
int lastUsed;
};

// Represents an analysis for memory planning of a given FuncOp for a model.
// - Uses liveness analysis and a greedy algorithm to arrange buffers in memory.
// - Tries to overlap input and output buffers based on the op characteristics.
Expand Down Expand Up @@ -51,6 +60,11 @@ class MemoryPlan {

std::vector<Operation *> getOperationsSequence() { return operations; }

DenseMap<Value, ValueInfo> getValuesInfoMap() { return valueInfo; }

void buildInputOutputTensorMaps(llvm::StringMap<Value> &inputTensorMap,
llvm::StringMap<Value> &outputTensorMap);

// OpSplitPlan getOpSplitPlan();

void printMemoryPlan();
Expand All @@ -70,14 +84,6 @@ class MemoryPlan {
using ValuesOrderedByOffset =
std::multiset<QueueItem, IncreasingOffsetsComparator>;

struct ValueInfo {
size_t id;
size_t size;
bool isConstant;
int firstUsed;
int lastUsed;
};

int getOffset(Value v, int size, DenseMap<Value, ValueInfo> &valueInfo,
ValuesOrderedByOffset &allocatedOffsets);

Expand Down
2 changes: 2 additions & 0 deletions xformer/Transforms/Options.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ extern llvm::cl::opt<bool> convDebugOption;
extern llvm::cl::opt<bool> overlapConvOption;
extern llvm::cl::opt<bool> offlineOffsetsOption;
extern llvm::cl::opt<unsigned> convChannelwiseSplitSizeOption;
extern llvm::cl::list<std::string> sameAllocationInputOutputTensorOption;

} // namespace xcore
} // namespace mlir

Expand Down
1 change: 1 addition & 0 deletions xformer/Transforms/Passes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ void buildXCorePreOpSplitPassPipeline(OpPassManager &pm) {
// Run pass from LCE to convert Larq ops which are in TFL custom op format to
// Larq dialect
pm.addPass(mlir::TFL::CreateTranslateToLCEPass());
pm.addPass(createVerifySameAllocationTensorsPass());
// Convert dynamic shapes in batch dimension to static
pm.addPass(createRemoveDynamicShapePass());
}
Expand Down
2 changes: 2 additions & 0 deletions xformer/Transforms/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ std::unique_ptr<OperationPass<func::FuncOp>> createReplaceFCWithConv2DPass();
std::unique_ptr<OperationPass<func::FuncOp>> createOptimizeConv2DPass();
std::unique_ptr<OperationPass<func::FuncOp>> createOpSplitPass();
std::unique_ptr<OperationPass<func::FuncOp>> createApplyTFLPatternsPass();
std::unique_ptr<OperationPass<func::FuncOp>>
createVerifySameAllocationTensorsPass();
std::unique_ptr<OperationPass<func::FuncOp>> createRemoveDynamicShapePass();
std::unique_ptr<OperationPass<func::FuncOp>> createReplaceAddSubPass();
std::unique_ptr<OperationPass<func::FuncOp>> createReplaceMulPass();
Expand Down
Loading