Pytorch 训练时有时候会因为加载的东西过多而爆显存,有些时候这种情况还可以使用cuda的清理技术进行修整。
使用torch.cuda.empty_cache()删除一些不需要的变量代码示例如下:
try:
output = model(input)
except RuntimeError as exception:
if "out of memory" in str(exception):
print("WARNING: out of memory")
if hasattr(torch.cuda, 'empty_cache'):
torch.cuda.empty_cache()
else:
raise exception
预测的时候爆显存有可能是忘记设置no_grad, 示例代码如下:
with torch.no_grad():
for _,(inputs,filelist) in tqdm(enumerate(test_loader), desc='predict'):
if opt.use_gpu:
inputs = inputs.cuda()
if len(inputs.shape) < 4:
inputs = inputs.unsqueeze(1)
else:
if len(inputs.shape) < 4:
inputs = torch.transpose(inputs, 1, 2)
inputs = inputs.unsqueeze(1)
上述方法都不生效时,试试:
def wipe_memory(self):
self._optimizer_to(torch.device('cpu'))
del self.optimizer
gc.collect()
torch.cuda.empty_cache()
def _optimizer_to(self, device):
for param in self.optimizer.state.values():
# Not sure there are any global tensors in the state dict
if isinstance(param, torch.Tensor):
param.data = param.data.to(device)
if param._grad is not None:
param._grad.data = param._grad.data.to(device)
elif isinstance(param, dict):
for subparam in param.values():
if isinstance(subparam, torch.Tensor):
subparam.data = subparam.data.to(device)
if subparam._grad is not None:
subparam._grad.data = subparam._grad.data.to(device)