1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
| class MoCo(nn.Module): """MoCo v1""" def __init__(self, base_encoder, projection_dim=128, queue_size=65536, momentum=0.999): super().__init__() self.queue_size = queue_size self.momentum = momentum self.encoder_q = base_encoder self.projection_q = nn.Linear(2048, projection_dim) self.encoder_k = base_encoder.__class__(pretrained=False) self.projection_k = nn.Linear(2048, projection_dim) for param_q, param_k in zip( self.encoder_q.parameters(), self.encoder_k.parameters() ): param_k.data.copy_(param_q.data) self.register_buffer('queue', torch.randn(queue_size, projection_dim)) self.register_buffer('queue_ptr', torch.zeros(1, dtype=torch.long)) @torch.no_grad() def _momentum_update_key_encoder(self): """动量更新键编码器""" for param_q, param_k in zip( self.encoder_q.parameters(), self.encoder_k.parameters() ): param_k.data = self.momentum * param_k.data + (1 - self.momentum) * param_q.data @torch.no_grad() def _dequeue_and_enqueue(self, keys): """入队新样本,出队最旧样本""" batch_size = keys.shape[0] ptr = int(self.queue_ptr) self.queue[ptr:ptr+batch_size] = keys ptr = (ptr + batch_size) % self.queue_size self.queue_ptr[0] = ptr def forward(self, im_q, im_k): q = self.encoder_q(im_q) q = self.projection_q(q) q = F.normalize(q, dim=1) with torch.no_grad(): self._momentum_update_key_encoder() k = self.encoder_k(im_k) k = self.projection_k(k) k = F.normalize(k, dim=1) l_pos = torch.einsum('nc,nc->n', [q, k]).unsqueeze(-1) l_neg = torch.einsum('nc,ck->nk', [q, self.queue.clone().detach()]) logits = torch.cat([l_pos, l_neg], dim=1) / 0.07 labels = torch.zeros(logits.shape[0], dtype=torch.long, device=logits.device) loss = F.cross_entropy(logits, labels) self._dequeue_and_enqueue(k) return loss
|