Created
April 28, 2019 03:19
-
-
Save hewumars/f3acc698a3b982121a7104456ddaaa63 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Quantization module for generating the calibration tables will be used by | |
quantized (INT8) models from FP32 models.with bucket split,[k, k, cin, cout] | |
cut into "cout" buckets. | |
This tool is based on Caffe Framework. | |
""" | |
from __future__ import division | |
from __future__ import print_function | |
import argparse | |
import numpy as np | |
import math, copy | |
import matplotlib.pyplot as plt | |
import sys,os | |
import caffe | |
import caffe.proto.caffe_pb2 as caffe_pb2 | |
import time | |
import datetime | |
from google.protobuf import text_format | |
from scipy import stats | |
np.set_printoptions(threshold='nan') | |
np.set_printoptions(suppress=True) | |
def parse_args(): | |
parser = argparse.ArgumentParser( | |
description='find the pretrained caffe models int8 quantize scale value') | |
parser.add_argument('--proto', dest='proto', | |
help="path to deploy prototxt.", type=str) | |
parser.add_argument('--model', dest='model', | |
help='path to pretrained weights', type=str) | |
parser.add_argument('--mean', dest='mean', | |
help='value of mean', type=float, nargs=3) | |
parser.add_argument('--norm', dest='norm', | |
help='value of normalize', type=float, nargs=1, default=1.0) | |
parser.add_argument('--images', dest='images', | |
help='path to calibration images', type=str) | |
parser.add_argument('--output', dest='output', | |
help='path to output calibration table file', type=str, default='calibration-dev.table') | |
parser.add_argument('--group', dest='group', | |
help='enable the group scale', type=int, default=1) | |
parser.add_argument('--gpu', dest='gpu', | |
help='use gpu to forward', type=int, default=0) | |
args = parser.parse_args() | |
return args, parser | |
global args, parser | |
args, parser = parse_args() | |
# global params | |
QUANTIZE_NUM = 127 | |
QUANTIZE_WINOGRAND_NUM = 31 | |
STATISTIC = 1 | |
INTERVAL_NUM = 2048 | |
# ugly global params | |
quantize_layer_lists = [] | |
class QuantizeLayer: | |
def __init__(self, name, blob_name, group_num): | |
self.name = name | |
self.blob_name = blob_name | |
self.group_num = group_num | |
self.weight_scale = np.zeros(group_num) | |
self.blob_max = 0.0 | |
self.blob_distubution_interval = 0.0 | |
self.blob_distubution = np.zeros(INTERVAL_NUM) | |
self.blob_threshold = 0 | |
self.blob_scale = 1.0 | |
self.group_zero = np.zeros(group_num) | |
def quantize_weight(self, weight_data, flag): | |
# spilt the weight data by cout num | |
blob_group_data = np.array_split(weight_data, self.group_num) | |
for i, group_data in enumerate(blob_group_data): | |
max_val = np.max(group_data) | |
min_val = np.min(group_data) | |
threshold = max(abs(max_val), abs(min_val)) | |
if threshold < 0.0001: | |
self.weight_scale[i] = 0 | |
self.group_zero[i] = 1 | |
else: | |
if(flag == True): | |
self.weight_scale[i] = QUANTIZE_WINOGRAND_NUM / threshold | |
else: | |
self.weight_scale[i] = QUANTIZE_NUM / threshold | |
print("%-20s group : %-5d max_val : %-10f scale_val : %-10f" % (self.name + "_param0", i, threshold, self.weight_scale[i])) | |
def initial_blob_max(self, blob_data): | |
# get the max value of blob | |
max_val = np.max(blob_data) | |
min_val = np.min(blob_data) | |
self.blob_max = max(self.blob_max, max(abs(max_val), abs(min_val))) | |
def initial_blob_distubution_interval(self): | |
self.blob_distubution_interval = STATISTIC * self.blob_max / INTERVAL_NUM | |
print("%-20s max_val : %-10.8f distribution_intervals : %-10.8f" % (self.name, self.blob_max, self.blob_distubution_interval)) | |
def initial_histograms(self, blob_data): | |
# collect histogram of every group channel blob | |
th = self.blob_max | |
hist, hist_edge = np.histogram(blob_data, bins=INTERVAL_NUM, range=(0, th)) | |
self.blob_distubution += hist | |
def quantize_blob(self): | |
# calculate threshold | |
distribution = np.array(self.blob_distubution) | |
# pick threshold which minimizes KL divergence | |
threshold_bin = threshold_distribution(distribution) | |
self.blob_threshold = threshold_bin | |
threshold = (threshold_bin + 0.5) * self.blob_distubution_interval | |
# get the activation calibration value | |
self.blob_scale = QUANTIZE_NUM / threshold | |
print("%-20s bin : %-8d threshold : %-10f interval : %-10f scale : %-10f" % (self.name, threshold_bin, threshold, self.blob_distubution_interval, self.blob_scale)) | |
def _smooth_distribution(p, eps=0.0001): | |
"""Given a discrete distribution (may have not been normalized to 1), | |
smooth it by replacing zeros with eps multiplied by a scaling factor and taking the | |
corresponding amount off the non-zero values. | |
Ref: http://web.engr.illinois.edu/~hanj/cs412/bk3/KL-divergence.pdf | |
""" | |
is_zeros = (p == 0).astype(np.float32) | |
is_nonzeros = (p != 0).astype(np.float32) | |
n_zeros = is_zeros.sum() | |
n_nonzeros = p.size - n_zeros | |
if not n_nonzeros: | |
raise ValueError('The discrete probability distribution is malformed. All entries are 0.') | |
eps1 = eps * float(n_zeros) / float(n_nonzeros) | |
assert eps1 < 1.0, 'n_zeros=%d, n_nonzeros=%d, eps1=%f' % (n_zeros, n_nonzeros, eps1) | |
hist = p.astype(np.float32) | |
hist += eps * is_zeros + (-eps1) * is_nonzeros | |
assert (hist <= 0).sum() == 0 | |
return hist | |
def threshold_distribution(distribution, target_bin=128): | |
""" | |
Return the best threshold value. | |
Ref: https://github.com//apache/incubator-mxnet/blob/master/python/mxnet/contrib/quantization.py | |
Args: | |
distribution: list, activations has been processed by histogram and normalize,size is 2048 | |
target_bin: int, the num of bin that is used by quantize, Int8 default value is 128 | |
Returns: | |
target_threshold: int, num of bin with the minimum KL | |
""" | |
distribution = distribution[1:] | |
length = distribution.size | |
threshold_sum = sum(distribution[target_bin:]) | |
kl_divergence = np.zeros(length - target_bin) | |
for threshold in range(target_bin, length): | |
sliced_nd_hist = copy.deepcopy(distribution[:threshold]) | |
# generate reference distribution p | |
p = sliced_nd_hist.copy() | |
p[threshold-1] += threshold_sum | |
threshold_sum = threshold_sum - distribution[threshold] | |
# is_nonzeros[k] indicates whether hist[k] is nonzero | |
is_nonzeros = (p != 0).astype(np.int64) | |
# | |
quantized_bins = np.zeros(target_bin, dtype=np.int64) | |
# calculate how many bins should be merged to generate quantized distribution q | |
num_merged_bins = sliced_nd_hist.size // target_bin | |
# merge hist into num_quantized_bins bins | |
for j in range(target_bin): | |
start = j * num_merged_bins | |
stop = start + num_merged_bins | |
quantized_bins[j] = sliced_nd_hist[start:stop].sum() | |
quantized_bins[-1] += sliced_nd_hist[target_bin * num_merged_bins:].sum() | |
# expand quantized_bins into p.size bins | |
q = np.zeros(sliced_nd_hist.size, dtype=np.float64) | |
for j in range(target_bin): | |
start = j * num_merged_bins | |
if j == target_bin - 1: | |
stop = -1 | |
else: | |
stop = start + num_merged_bins | |
norm = is_nonzeros[start:stop].sum() | |
if norm != 0: | |
q[start:stop] = float(quantized_bins[j]) / float(norm) | |
q[p == 0] = 0 | |
# p = _smooth_distribution(p) # with some bugs, need to fix | |
# q = _smooth_distribution(q) | |
p[p == 0] = 0.0001 | |
q[q == 0] = 0.0001 | |
# calculate kl_divergence between q and p | |
kl_divergence[threshold - target_bin] = stats.entropy(p, q) | |
min_kl_divergence = np.argmin(kl_divergence) | |
threshold_value = min_kl_divergence + target_bin | |
return threshold_value | |
def net_forward(net, image_path, transformer): | |
""" | |
network inference and statistics the cost time | |
Args: | |
net: the instance of Caffe inference | |
image_path: a image need to be inference | |
transformer: | |
Returns: | |
none | |
""" | |
# load image | |
image = caffe.io.load_image(image_path) | |
# transformer.preprocess the image | |
net.blobs['data'].data[...] = transformer.preprocess('data',image) | |
# net forward | |
output = net.forward() | |
def file_name(file_dir): | |
""" | |
Find the all file path with the directory | |
Args: | |
file_dir: The source file directory | |
Returns: | |
files_path: all the file path into a list | |
""" | |
files_path = [] | |
for root, dir, files in os.walk(file_dir): | |
for name in files: | |
file_path = root + "/" + name | |
print(file_path) | |
files_path.append(file_path) | |
return files_path | |
def network_prepare(net, mean, norm): | |
""" | |
instance the prepare process param of caffe network inference | |
Args: | |
net: the instance of Caffe inference | |
mean: the value of mean | |
norm: the value of normalize | |
Returns: | |
none | |
""" | |
print("Network initial") | |
img_mean = np.array(mean) | |
# initial transformer | |
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape}) | |
# convert hwc to cwh | |
transformer.set_transpose('data', (2,0,1)) | |
# load meanfile | |
transformer.set_mean('data', img_mean) | |
# resize image data from [0,1] to [0,255] | |
transformer.set_raw_scale('data', 255) | |
# convert RGB -> BGR | |
transformer.set_channel_swap('data', (2,1,0)) | |
# normalize | |
transformer.set_input_scale('data', norm) | |
return transformer | |
def weight_quantize(net, net_file, group_on): | |
""" | |
CaffeModel convolution weight blob Int8 quantize | |
Args: | |
net: the instance of Caffe inference | |
net_file: deploy caffe prototxt | |
Returns: | |
none | |
""" | |
print("\nQuantize the kernel weight:") | |
# parse the net param from deploy prototxt | |
params = caffe_pb2.NetParameter() | |
with open(net_file) as f: | |
text_format.Merge(f.read(), params) | |
for i, layer in enumerate(params.layer): | |
# find the convolution layers to get out the weight_scale | |
if(layer.type == "Convolution" or layer.type == "ConvolutionDepthwise"): | |
weight_blob = net.params[layer.name][0].data | |
# initial the instance of QuantizeLayer Class lists,you can use enable group quantize to generate int8 scale for each group layer.convolution_param.group | |
if (group_on == 1): | |
quanitze_layer = QuantizeLayer(layer.name, layer.bottom[0], layer.convolution_param.num_output) | |
else: | |
quanitze_layer = QuantizeLayer(layer.name, layer.bottom[0], 1) | |
# quantize the weight value using 6bit for conv3x3s1 layer to winograd F(4,3) | |
if(layer.type == "Convolution" and layer.convolution_param.kernel_size[0] == 3 and ((len(layer.convolution_param.stride) == 0) or layer.convolution_param.stride[0] == 1)): | |
if(layer.convolution_param.group != layer.convolution_param.num_output): | |
quanitze_layer.quantize_weight(weight_blob, True) | |
else: | |
quanitze_layer.quantize_weight(weight_blob, False) | |
# quantize the weight value using 8bit for another conv layers | |
else: | |
quanitze_layer.quantize_weight(weight_blob, False) | |
# add the quantize_layer into the save list | |
quantize_layer_lists.append(quanitze_layer) | |
return None | |
def activation_quantize(net, transformer, images_files): | |
""" | |
Activation Int8 quantize, optimaize threshold selection with KL divergence, | |
given a dataset, find the optimal threshold for quantizing it. | |
Ref: http://on-demand.gputechconf.com/gtc/2017/presentation/s7310-8-bit-inference-with-tensorrt.pdf | |
Args: | |
net: the instance of Caffe inference | |
transformer: | |
images_files: calibration dataset | |
Returns: | |
none | |
""" | |
print("\nQuantize the Activation:") | |
# run float32 inference on calibration dataset to find the activations range | |
for i , image in enumerate(images_files): | |
# inference | |
net_forward(net, image, transformer) | |
# find max threshold | |
for layer in quantize_layer_lists: | |
blob = net.blobs[layer.blob_name].data[0].flatten() | |
layer.initial_blob_max(blob) | |
if i % 100 == 0: | |
print("loop stage 1 : %d/%d" % (i, len(images_files))) | |
# calculate statistic blob scope and interval distribution | |
for layer in quantize_layer_lists: | |
layer.initial_blob_distubution_interval() | |
# for each layers | |
# collect histograms of activations | |
print("\nCollect histograms of activations:") | |
for i, image in enumerate(images_files): | |
net_forward(net, image, transformer) | |
for layer in quantize_layer_lists: | |
blob = net.blobs[layer.blob_name].data[0].flatten() | |
layer.initial_histograms(blob) | |
if i % 100 == 0: | |
print("loop stage 2 : %d/%d" % (i, len(images_files))) | |
# calculate threshold with KL divergence | |
for layer in quantize_layer_lists: | |
layer.quantize_blob() | |
return None | |
def save_calibration_file(calibration_path): | |
calibration_file = open(calibration_path, 'w') | |
# save temp | |
save_temp = [] | |
# save weight scale | |
for layer in quantize_layer_lists: | |
save_string = layer.name + "_param_0" | |
for i in range(layer.group_num): | |
save_string = save_string + " " + str(layer.weight_scale[i]) | |
save_temp.append(save_string) | |
# save bottom blob scales | |
for layer in quantize_layer_lists: | |
save_string = layer.name + " " + str(layer.blob_scale) | |
save_temp.append(save_string) | |
# save into txt file | |
for data in save_temp: | |
calibration_file.write(data + "\n") | |
calibration_file.close() | |
# save calibration logs | |
save_temp_log = [] | |
calibration_file_log = open(calibration_path + ".log", 'w') | |
for layer in quantize_layer_lists: | |
save_string = layer.name + ": value range 0 - " + str(layer.blob_max) \ | |
+ ", interval " + str(layer.blob_distubution_interval) \ | |
+ ", interval num " + str(INTERVAL_NUM) \ | |
+ ", threshold num " + str(layer.blob_threshold) + "\n" \ | |
+ str(layer.blob_distubution.astype(dtype=np.int64)) | |
save_temp_log.append(save_string) | |
# save into txt file | |
for data in save_temp_log: | |
calibration_file_log.write(data + "\n") | |
def usage_info(): | |
""" | |
usage info | |
""" | |
print("Input params is illegal...╮(╯3╰)╭") | |
print("try it again:\n python caffe-int8-scale-tools-dev.py -h") | |
def main(): | |
""" | |
main function | |
""" | |
# time start | |
time_start = datetime.datetime.now() | |
print(args) | |
if args.proto == None or args.model == None or args.mean == None or args.images == None: | |
usage_info() | |
return None | |
# deploy caffe prototxt path | |
net_file = args.proto | |
# trained caffemodel path | |
caffe_model = args.model | |
# mean value | |
mean = args.mean | |
# norm value | |
norm = 1.0 | |
if args.norm != 1.0: | |
norm = args.norm[0] | |
# calibration dataset | |
images_path = args.images | |
# the output calibration file | |
calibration_path = args.output | |
# enable the group scale | |
group_on = args.group | |
# default use CPU to forwark | |
if args.gpu != 0: | |
caffe.set_device(0) | |
caffe.set_mode_gpu() | |
# initial caffe net and the forword model(GPU or CPU) | |
net = caffe.Net(net_file,caffe_model,caffe.TEST) | |
# prepare the cnn network | |
transformer = network_prepare(net, mean, norm) | |
# get the calibration datasets images files path | |
images_files = file_name(images_path) | |
# quanitze kernel weight of the caffemodel to find it's calibration table | |
weight_quantize(net, net_file, group_on) | |
# quantize activation value of the caffemodel to find it's calibration table | |
activation_quantize(net, transformer, images_files) | |
# save the calibration tables,best wish for your INT8 inference have low accuracy loss :) | |
save_calibration_file(calibration_path) | |
# time end | |
time_end = datetime.datetime.now() | |
print("\nCaffe Int8 Calibration table create success, it's cost %s, best wish for your INT8 inference has a low accuracy loss...\(^▽^)/...2333..." % (time_end - time_start)) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment