1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
| import torch import torch.nn as nn
class Quantizer: """ 模型量化器 """ def __init__(self): self.calibrator = calibrate() self.quant_config = { 'weight_dtype': torch.qint8, 'activation_dtype': torch.qint8, 'scheme': 'per_channel' } def quantize(self, model, calibrate_data): """ 量化模型 """ calibration_stats = self.collect_calibration_stats( model, calibrate_data ) quantized_model = torch.quantization.quantize_dynamic( model, calibration_stats, **self.quant_config ) return quantized_model def collect_calibration_stats(self, model, calibrate_data): """ 收集校准统计 """ stats = {} for name, module in model.named_modules(): if isinstance(module, (nn.Conv2d, nn.Linear)): activation_range = self.collect_activation_range( module, calibrate_data ) stats[name] = activation_range return stats def collect_activation_range(self, module, calibrate_data): """ 收集激活值范围 """ activations = [] for data in calibrate_data: with torch.no_grad(): output = module(data) activations.append(output.detach()) activations_tensor = torch.cat(activations, dim=0) min_val = torch.min(activations_tensor) max_val = torch.max(activations_tensor) return { 'min': min_val.item(), 'max': max_val.item(), 'range': max_val - min_val }
|