一、AttentionUNet速度
AttentionUNet是一个新颖的网络结构,它有效地将U-Net(一种流行的医学图像分割框架)与注意力机制相结合,可以在更少的时间内实现高质量的医学图像分割。
相比于传统的U-Net模型,AttentionUNet的速度要快得多。因为AttentionUNet引入了注意力机制,可以只关注有用的特征,从而减少了网络的计算复杂度。
下面是使用AttentionUNet进行医学图像分割的示例代码:
import torch import torch.nn as nn import torch.nn.functional as F class ConvBlock(nn.Module): def __init__(self, in_channels, out_channels): super(ConvBlock, self).__init__() self.conv1 = nn.Conv2d(in_channels, out_channels, 3, padding=1) self.bn1 = nn.BatchNorm2d(out_channels) self.conv2 = nn.Conv2d(out_channels, out_channels, 3, padding=1) self.bn2 = nn.BatchNorm2d(out_channels) def forward(self, x): x = F.relu(self.bn1(self.conv1(x))) x = F.relu(self.bn2(self.conv2(x))) return x class AttentionBlock(nn.Module): def __init__(self, in_channels, out_channels): super(AttentionBlock, self).__init__() self.conv = nn.Conv2d(in_channels, out_channels, 1) self.bn = nn.BatchNorm2d(out_channels) self.theta = nn.Conv2d(out_channels, out_channels // 8, 1) self.phi = nn.Conv2d(out_channels, out_channels // 8, 1) self.g = nn.Conv2d(out_channels, out_channels // 2, 1) self.W = nn.Conv2d(out_channels // 2, out_channels, 1) def forward(self, x): h = F.relu(self.bn(self.conv(x))) theta = self.theta(h) phi = F.max_pool2d(self.phi(h), [2, 2]) g = F.max_pool2d(self.g(h), [2, 2]) theta = theta.view(-1, theta.size(1), theta.size(2) * theta.size(3)) theta = theta.permute(0, 2, 1) phi = phi.view(-1, phi.size(1), phi.size(2) * phi.size(3)) f = torch.matmul(theta, phi) f = F.softmax(f, dim=-1) g = g.view(-1, g.size(1), g.size(2) * g.size(3)) out = torch.matmul(f, g) out = out.permute(0, 2, 1).contiguous() out = out.view(-1, self.W.size(1), h.size(2), h.size(3)) out = self.W(out) return out + h class AttentionUNet(nn.Module): def __init__(self, in_channels=3, out_channels=1, init_features=32): super(AttentionUNet, self).__init__() self.downsamples = nn.ModuleList([]) self.upsamples = nn.ModuleList([]) features = init_features self.conv1 = nn.Conv2d(in_channels, features, 3, padding=1) self.bn1 = nn.BatchNorm2d(features) self.conv2 = nn.Conv2d(features, features, 3, padding=1) self.bn2 = nn.BatchNorm2d(features) for i in range(4): self.downsamples.append(ConvBlock(features, features * 2)) features = features * 2 features = features * 2 self.bridge = ConvBlock(features, features) for i in range(4): self.upsamples.append(AttentionBlock(features, features // 2)) features = features // 2 self.conv3 = nn.Conv2d(init_features, out_channels, 1) def forward(self, x): residuals = [] out = F.relu(self.bn1(self.conv1(x))) out = F.relu(self.bn2(self.conv2(out))) residuals.append(out) for downsample in self.downsamples: out = downsample(out) residuals.append(out) out = self.bridge(out) for i in range(len(self.upsamples)): attention = self.upsamples[i](out) out = F.interpolate(out, scale_factor=2, mode='bilinear', align_corners=True) out = torch.cat([out, attention], dim=1) out = self.conv3(torch.cat([residuals[-1], out], dim=1)) return out if __name__ == '__main__': model = AttentionUNet(in_channels=3, out_channels=1, init_features=32) print(model)
二、AttentionUNet代码
AttentionUNet模型的代码可以从上面的示例中简单地看出来,它是由若干个卷积块和注意力块组成,并在卷积块之间添加了下采样和上采样操作,从而得到更好的分辨率。
注意力块在这里起到了非常重要的作用,能够专注于有用的特征,从而帮助网络更快地学习到有意义的信息。
代码中的模型结构在训练医学图像分割模型时特别有用,下面是数据准备和模型训练的示例代码:
from torch.utils.data import DataLoader from torchvision import transforms train_transforms = transforms.Compose([ transforms.RandomHorizontalFlip(p=0.5), transforms.RandomVerticalFlip(p=0.5), transforms.ToTensor(), ]) val_transforms = transforms.Compose([ transforms.ToTensor(), ]) train_data = MedicalImageSegmentationDataset(data_dir='train', transforms=train_transforms) val_data = MedicalImageSegmentationDataset(data_dir='val', transforms=val_transforms) train_loader = DataLoader(train_data, batch_size=4, shuffle=True, num_workers=2) val_loader = DataLoader(val_data, batch_size=1, shuffle=False, num_workers=1) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = AttentionUNet(in_channels=3, out_channels=1, init_features=32).to(device) criterion = nn.BCEWithLogitsLoss() optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) for epoch in range(10): model.train() train_loss = 0 for i, data in enumerate(train_loader): inputs, labels = data['input'].to(device), data['label'].to(device) optimizer.zero_grad() outputs = model(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() train_loss += loss.item() train_loss /= len(train_loader) model.eval() val_loss = 0 with torch.no_grad(): for i, data in enumerate(val_loader): inputs, labels = data['input'].to(device), data['label'].to(device) outputs = model(inputs) loss = criterion(outputs, labels) val_loss += loss.item() val_loss /= len(val_loader) print(f'Epoch {epoch + 1}, Training Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}')
三、AttentionUNet参数量
AttentionUNet相对于传统的U-Net模型来说有更多的参数,但是这些参数是经过仔细设计的,能够帮助网络更好地学习长期依赖关系。此外,注意力机制还可以降低网络的计算复杂度,使得AttentionUNet在医学图像分割方面的实际表现要更加出色。
AttentionUNet网络的总参数量随着模型深度的增加而逐渐增加,但是相比于其他一些现有的医学图像分割方法,AttentionUNet的参数量并不是非常大,训练也可以在合理的时间内完成。
下面是获取AttentionUNet模型的总参数量的代码:
from torchsummary import summary model = AttentionUNet(in_channels=3, out_channels=1, init_features=32) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model.to(device) summary(model, (3, 256, 256))
四、AttentionUNet中的注意力选取
AttentionUNet中的注意力选取是通过一系列的卷积操作来实现的,这个过程被称为自注意力机制。自注意力机制可以帮助网络专注于有用的特征,从而加速模型的学习过程。在AttentionUNet中,自注意力机制被应用于每个注意力块中,以选择最有用的特征并将其提供给下一层。
下面是在AttentionBlock中实现注意力选取的代码:
class AttentionBlock(nn.Module): def __init__(self, in_channels, out_channels): super(AttentionBlock, self).__init__() self.conv = nn.Conv2d(in_channels, out_channels, 1) self.bn = nn.BatchNorm2d(out_channels) self.theta = nn.Conv2d(out_channels, out_channels // 8, 1) self.phi = nn.Conv2d(out_channels, out_channels // 8, 1) self.g = nn.Conv2d(out_channels, out_channels // 2, 1) self.W = nn.Conv2d(out_channels // 2, out_channels, 1) def forward(self, x): h = F.relu(self.bn(self.conv(x))) theta = self.theta(h) phi = F.max_pool2d(self.phi(h), [2, 2]) g = F.max_pool2d(self.g(h), [2, 2]) theta = theta.view(-1, theta.size(1), theta.size(2) * theta.size(3)) theta = theta.permute(0, 2, 1) phi = phi.view(-1, phi.size(1), phi.size(2) * phi.size(3)) f = torch.matmul(theta, phi) f = F.softmax(f, dim=-1) g = g.view(-1, g.size(1), g.size(2) * g.size(3)) out = torch.matmul(f, g) out = out.permute(0, 2, 1).contiguous() out = out.view(-1, self.W.size(1), h.size(2), h.size(3)) out = self.W(out) return out + h
通过上面的代码,我们可以很清楚地看到注意力选取是如何在AttentionBlock中实现的,具体来说,它通过三个卷积函数来计算每个像素点的注意力权重。这些函数在网络中不断交替使用,以将有价值的信息提供给下一层,从而更好地分割医学图像。