PyTorch模型部署:从训练到生产环境

张开发
2026/5/8 16:19:06 15 分钟阅读

分享文章

PyTorch模型部署:从训练到生产环境
PyTorch模型部署从训练到生产环境1. 技术分析1.1 部署方式对比方式延迟吞吐量资源消耗适用场景PyTorch JIT低高中生产环境部署ONNX Runtime很低很高低跨平台部署TorchServe中高中云端服务TensorRT极低极高低GPU加速推理1.2 模型格式对比格式优点缺点.ptPyTorch原生仅PyTorch可用.onnx跨框架兼容可能丢失动态图特性TorchScript生产优化需要JIT编译2. 核心功能实现2.1 模型导出为TorchScriptimport torch import torch.nn as nn class SimpleModel(nn.Module): def __init__(self, input_dim10, hidden_dim32, num_classes2): super().__init__() self.fc1 nn.Linear(input_dim, hidden_dim) self.relu nn.ReLU() self.fc2 nn.Linear(hidden_dim, num_classes) self.softmax nn.Softmax(dim1) def forward(self, x): x self.fc1(x) x self.relu(x) x self.fc2(x) return self.softmax(x) def export_to_torchscript(): model SimpleModel() model.eval() # Tracing方式 example_input torch.randn(1, 10) traced_model torch.jit.trace(model, example_input) traced_model.save(model_traced.pt) # Scripting方式保留控制流 scripted_model torch.jit.script(model) scripted_model.save(model_scripted.pt) print(模型已导出) return traced_model def load_and_inference(): model torch.jit.load(model_traced.pt) model.eval() with torch.no_grad(): input_data torch.randn(1, 10) output model(input_data) return output2.2 ONNX导出与优化import torch.onnx import onnxruntime as ort class ResNetLikeModel(nn.Module): def __init__(self, num_classes1000): super().__init__() self.features nn.Sequential( nn.Conv2d(3, 64, 3, padding1), nn.BatchNorm2d(64), nn.ReLU(inplaceTrue), nn.MaxPool2d(2, 2), nn.Conv2d(64, 128, 3, padding1), nn.AdaptiveAvgPool2d((1, 1)) ) self.classifier nn.Linear(128, num_classes) def forward(self, x): x self.features(x) x x.view(x.size(0), -1) return self.classifier(x) def export_to_onnx(): model ResNetLikeModel() model.eval() dummy_input torch.randn(1, 3, 224, 224) torch.onnx.export( model, dummy_input, model.onnx, export_paramsTrue, opset_version11, input_names[input], output_names[output], dynamic_axes{ input: {0: batch_size}, output: {0: batch_size} } ) onnx_model onnx.load(model.onnx) onnx.checker.check_model(onnx_model) print(ONNX模型验证通过) return model.onnx def inference_onnx(onnx_model_path): sess_options ort.SessionOptions() sess_options.intra_op_num_threads 4 ort_session ort.InferenceSession(onnx_model_path, sess_options) import numpy as np input_data np.random.randn(1, 3, 224, 224).astype(np.float32) outputs ort_session.run(None, {input: input_data}) return outputs[0]2.3 TorchServe部署# handler.py import torch from ts.torch_handler.base_handler import BaseHandler class ImageClassifier(BaseHandler): def __init__(self): super().__init__() self.model None self.mapping None def initialize(self, ctx): super().initialize(ctx) self.mapping ctx.model_yaml_config.get(mapping, {}) def preprocess(self, data): images [] for row in data: image row.get(data) or row.get(body) if isinstance(image, bytes): image torch.from_numpy( np.frombuffer(image, dtypenp.float32) ).reshape(3, 224, 224) images.append(torch.stack(images)) return torch.stack(images) def inference(self, data): with torch.no_grad(): outputs self.model(data) return outputs def postprocess(self, data): results [] for output in data: probs torch.softmax(output, dim0) top_prob, top_class torch.topk(probs, 5) results.append([ {class: int(c), probability: float(p)} for c, p in zip(top_class, top_prob) ]) return results3. 性能优化3.1 量化推理import torch.quantization def quantize_model(): model SimpleModel() model.eval() # 动态量化 quantized_model torch.quantization.quantize_dynamic( model, {nn.Linear, nn.ReLU}, dtypetorch.qint8 ) torch.save(quantized_model.state_dict(), model_quantized.pt) return quantized_model def static_quantization(): model SimpleModel() model.train() # Fuse模块 model torch.quantization.fuse_modules(model, [[fc1, relu]]) # 设置量化配置 model.qconfig torch.quantization.get_default_qconfig(fbgemm) torch.quantization.prepare(model, inplaceTrue) # 转换 quantized_model torch.quantization.convert(model, inplaceFalse) return quantized_model3.2 性能测试import time def benchmark_inference(): model SimpleModel() model.eval() model_traced torch.jit.trace(model, torch.randn(1, 10)) model_quantized quantize_model() input_data torch.randn(100, 10) num_iterations 1000 results {} # PyTorch原生 times [] with torch.no_grad(): for _ in range(num_iterations): start time.perf_counter() _ model(input_data) times.append(time.perf_counter() - start) results[PyTorch] sum(times) / len(times) * 1000 # TorchScript times [] for _ in range(num_iterations): start time.perf_counter() _ model_traced(input_data) times.append(time.perf_counter() - start) results[TorchScript] sum(times) / len(times) * 1000 print(推理性能对比 (ms):) for name, ms in results.items(): print(f {name}: {ms:.3f}ms) return results4. 最佳实践4.1 部署架构选择场景推荐方案理由小规模部署Flask TorchScript简单易用中等规模TorchServe官方支持大规模/云端ONNX ONNX Runtime跨平台高性能边缘设备TensorRTGPU加速优化4.2 优化建议# ✅ 推荐使用torch.no_grad()进行推理 with torch.no_grad(): output model(input_data) # ✅ 推荐使用eval()模式 model.eval() # ✅ 推荐使用torch.jit.optimize_for_inference torch.jit.optimize_for_inference def fast_inference(model, input_data): return model(input_data)5. 总结PyTorch模型部署要点TorchScript生产环境首选平衡性能和易用性ONNX跨平台部署的标准格式量化显著降低延迟和内存占用

更多文章