| | import torch |
| | import torch.nn as nn |
| | from transformers import PreTrainedModel |
| | from .configuration_pixel import TopAIImageConfig |
| |
|
| | class ResidualBlock(nn.Module): |
| | def __init__(self, channels): |
| | super().__init__() |
| | self.block = nn.Sequential( |
| | nn.Conv2d(channels, channels, kernel_size=3, padding=1, bias=False), |
| | nn.BatchNorm2d(channels), |
| | nn.ReLU(True), |
| | nn.Conv2d(channels, channels, kernel_size=3, padding=1, bias=False), |
| | nn.BatchNorm2d(channels) |
| | ) |
| |
|
| | def forward(self, x): |
| | return x + self.block(x) |
| |
|
| | class TopAIImageGenerator(PreTrainedModel): |
| | config_class = TopAIImageConfig |
| | |
| | |
| | all_tied_weights_keys = {} |
| |
|
| | def __init__(self, config): |
| | super().__init__(config) |
| | |
| | h = config.hidden_dim |
| | |
| | self.text_projection = nn.Linear(config.input_dim, 4 * 4 * h) |
| | |
| | |
| | self.decoder = nn.Sequential( |
| | |
| | self._upsample(h, h), |
| | |
| | ResidualBlock(h), |
| | |
| | self._upsample(h, 256), |
| | |
| | ResidualBlock(256), |
| | |
| | self._upsample(256, 128), |
| | |
| | self._upsample(128, 64), |
| | |
| | nn.ConvTranspose2d(64, 32, 4, 2, 1, bias=False), |
| | nn.BatchNorm2d(32), |
| | nn.ReLU(True), |
| | |
| | nn.Conv2d(32, config.image_channels, kernel_size=3, padding=1), |
| | nn.Tanh() |
| | ) |
| |
|
| | def _upsample(self, i, o): |
| | return nn.Sequential( |
| | nn.ConvTranspose2d(i, o, 4, 2, 1, bias=False), |
| | nn.BatchNorm2d(o), |
| | nn.ReLU(True) |
| | ) |
| |
|
| | def forward(self, text_embeddings): |
| | |
| | x = self.text_projection(text_embeddings) |
| | x = x.view(-1, self.config.hidden_dim, 4, 4) |
| | return self.decoder(x) |