Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DRAFT][luci] Introduce requantize U8 to S8 pass #14319

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions compiler/luci/pass/src/CircleQuantizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -697,8 +697,8 @@ void CircleQuantizer::requantize(loco::Graph *g) const
// Requantize
if (_options->query(Options::Algorithm::Requantize))
{
static const std::vector<std::string> rq_supported_input_model_dtype{"int8"};
static const std::vector<std::string> rq_supported_output_model_dtype{"uint8"};
static const std::vector<std::string> rq_supported_input_model_dtype{"int8", "uint8"};
static const std::vector<std::string> rq_supported_output_model_dtype{"uint8", "int8"};

auto input_model_dtype =
_options->param(Options::AlgorithmParameters::Quantize_input_model_dtype);
Expand Down
133 changes: 133 additions & 0 deletions compiler/luci/pass/src/RequantizePass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,93 @@ void requant_const_int8_to_uint8(CircleConst *node)
}
}

// Requantize Non-const node from uint8 to int8
// Original values: 0 ~ 255
// After requantization: -128 ~ 127
void requant_nonconst_uint8_to_int8(CircleNode *circle_node)
{
assert(circle_node->dtype() == loco::DataType::U8);

auto quantparam = circle_node->quantparam();
assert(quantparam != nullptr);
for (size_t i = 0; i < quantparam->zerop.size(); ++i)
{
float min = static_cast<float>(static_cast<int32_t>(std::numeric_limits<uint8_t>::min()) -
quantparam->zerop[i]) *
quantparam->scale[i];
float max = static_cast<float>(static_cast<int32_t>(std::numeric_limits<uint8_t>::max()) -
quantparam->zerop[i]) *
quantparam->scale[i];
float nudged_min, nudged_max;
compute_sym_scale(min, max, quantparam->scale[i], nudged_min, nudged_max, loco::DataType::S8);
quantparam->zerop[i] = 0;
}
circle_node->dtype(loco::DataType::S8);
}

// Requantize CircleConst from asymmetric uint8 to symmetric int8
// Original values: 1 ~ 255 (zp <- zp + 128)
// After requantization: -127 ~ 127
void requant_const_uint8_to_int8(CircleConst *node)
{
assert(node->dtype() == loco::DataType::U8);

uint32_t size = node->size<loco::DataType::U8>();
std::vector<int32_t> requantized_values(size);
std::vector<float> fake_quantized_values(size);

auto quantparam = node->quantparam();
assert(quantparam != nullptr);
const auto channel_size = size / quantparam->zerop.size();

for (auto j = 0; j < quantparam->zerop.size(); ++j)
{
for (uint32_t i = 0; i < channel_size; ++i)
{
int32_t data = node->at<loco::DataType::U8>(j * channel_size + i);
fake_quantized_values[j * channel_size + i] =
(data - quantparam->zerop[j]) * quantparam->scale[j];
}
}

for (size_t i = 0; i < quantparam->zerop.size(); ++i)
{
float min = static_cast<float>(static_cast<int32_t>(std::numeric_limits<uint8_t>::min()) -
quantparam->zerop[i]) *
quantparam->scale[i];
float max = static_cast<float>(static_cast<int32_t>(std::numeric_limits<uint8_t>::max()) -
quantparam->zerop[i]) *
quantparam->scale[i];
float nudged_min, nudged_max;
compute_sym_scale(min, max, quantparam->scale[i], nudged_min, nudged_max, loco::DataType::S8);
quantparam->zerop[i] = 0;
}

for (auto j = 0; j < quantparam->zerop.size(); ++j)
{
for (uint32_t i = 0; i < channel_size; ++i)
{
requantized_values[j * channel_size + i] =
std::round(fake_quantized_values[j * channel_size + i] / quantparam->scale[j]);
}
}

node->dtype(loco::DataType::S8); // change the type of tensor
node->size<loco::DataType::S8>(size);
for (uint32_t i = 0; i < size; ++i)
{
assert(-127 <= requantized_values[i] && requantized_values[i] <= 127);
node->at<loco::DataType::S8>(i) = requantized_values[i];
}

// auto quantparam = node->quantparam();
// assert(quantparam != nullptr);
// for (size_t i = 0; i < quantparam->zerop.size(); ++i)
// {
// quantparam->zerop[i] += 128;
// }
}

#define RETURN_UNLESS(cond) \
if (not(cond)) \
return;
Expand Down Expand Up @@ -119,6 +206,41 @@ struct RequantizeS8ToU8 final : public luci::CircleNodeMutableVisitor<void>
}
};

/**
* @brief Requantize uint8 quantized tensors to int8 tensors
*/
struct RequantizeU8ToS8 final : public luci::CircleNodeMutableVisitor<void>
{
// Requantize non-const tensors
void visit(luci::CircleNode *node)
{
LOGGER(l);
INFO(l) << "RequantizeU8ToS8 visit non-const node: " << node->name() << std::endl;

// Ignore non-quantized tensors
RETURN_UNLESS(node->quantparam() != nullptr);

// Check dtype is int8
RETURN_UNLESS(node->dtype() == loco::DataType::U8);

requant_nonconst_uint8_to_int8(node);
}

// Requantize const tensors
void visit(luci::CircleConst *node)
{
LOGGER(l);
INFO(l) << "RequantizeU8ToS8 visit const node: " << node->name() << std::endl;

// Ignore non-quantized tensors
RETURN_UNLESS(node->quantparam() != nullptr);

// Check dtype is int8
RETURN_UNLESS(node->dtype() == loco::DataType::U8);

requant_const_uint8_to_int8(node);
}
};
#undef RETURN_UNLESS

} // namespace
Expand All @@ -139,6 +261,17 @@ bool RequantizePass::run(loco::Graph *g)
circle_node->accept(&rq);
}
}
// Input: uint8 model
// Output: int8 model
else if (_input_dtype == loco::DataType::U8 and _output_dtype == loco::DataType::S8)
{
for (auto node : loco::active_nodes(loco::output_nodes(g)))
{
RequantizeU8ToS8 rq;
auto circle_node = loco::must_cast<luci::CircleNode *>(node);
circle_node->accept(&rq);
}
}
else
{
// Ignore other cases
Expand Down
Loading