Luisgust commited on
Commit
4b4ac07
·
verified ·
1 Parent(s): c6d57da

Create vtoonify/model/encoder/psp.py

Browse files
Files changed (1) hide show
  1. vtoonify/model/encoder/psp.py +127 -0
vtoonify/model/encoder/psp.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ """
3
+ This file defines the core research contribution
4
+ """
5
+ import matplotlib
6
+ matplotlib.use('Agg')
7
+ import math
8
+
9
+ import torch
10
+ from torch import nn
11
+ from model.encoder.encoders import psp_encoders
12
+ from model.stylegan.model import Generator
13
+
14
+ def get_keys(d, name):
15
+ if 'state_dict' in d:
16
+ d = d['state_dict']
17
+ d_filt = {k[len(name) + 1:]: v for k, v in d.items() if k[:len(name)] == name}
18
+ return d_filt
19
+
20
+
21
+ class pSp(nn.Module):
22
+
23
+ def __init__(self, opts):
24
+ super(pSp, self).__init__()
25
+ self.set_opts(opts)
26
+ # compute number of style inputs based on the output resolution
27
+ self.opts.n_styles = int(math.log(self.opts.output_size, 2)) * 2 - 2
28
+ # Define architecture
29
+ self.encoder = self.set_encoder()
30
+ self.decoder = Generator(self.opts.output_size, 512, 8)
31
+ self.face_pool = torch.nn.AdaptiveAvgPool2d((256, 256))
32
+ # Load weights if needed
33
+ self.load_weights()
34
+
35
+ def set_encoder(self):
36
+ if self.opts.encoder_type == 'GradualStyleEncoder':
37
+ encoder = psp_encoders.GradualStyleEncoder(50, 'ir_se', self.opts)
38
+ elif self.opts.encoder_type == 'BackboneEncoderUsingLastLayerIntoW':
39
+ encoder = psp_encoders.BackboneEncoderUsingLastLayerIntoW(50, 'ir_se', self.opts)
40
+ elif self.opts.encoder_type == 'BackboneEncoderUsingLastLayerIntoWPlus':
41
+ encoder = psp_encoders.BackboneEncoderUsingLastLayerIntoWPlus(50, 'ir_se', self.opts)
42
+ else:
43
+ raise Exception('{} is not a valid encoders'.format(self.opts.encoder_type))
44
+ return encoder
45
+
46
+ def load_weights(self):
47
+ if self.opts.checkpoint_path is not None:
48
+ print('Loading pSp from checkpoint: {}'.format(self.opts.checkpoint_path))
49
+ ckpt = torch.load(self.opts.checkpoint_path, map_location='cpu')
50
+ self.encoder.load_state_dict(get_keys(ckpt, 'encoder'), strict=True)
51
+ self.decoder.load_state_dict(get_keys(ckpt, 'decoder'), strict=True)
52
+ self.__load_latent_avg(ckpt)
53
+ else:
54
+ pass
55
+ '''print('Loading encoders weights from irse50!')
56
+ encoder_ckpt = torch.load(model_paths['ir_se50'])
57
+ # if input to encoder is not an RGB image, do not load the input layer weights
58
+ if self.opts.label_nc != 0:
59
+ encoder_ckpt = {k: v for k, v in encoder_ckpt.items() if "input_layer" not in k}
60
+ self.encoder.load_state_dict(encoder_ckpt, strict=False)
61
+ print('Loading decoder weights from pretrained!')
62
+ ckpt = torch.load(self.opts.stylegan_weights)
63
+ self.decoder.load_state_dict(ckpt['g_ema'], strict=False)
64
+ if self.opts.learn_in_w:
65
+ self.__load_latent_avg(ckpt, repeat=1)
66
+ else:
67
+ self.__load_latent_avg(ckpt, repeat=self.opts.n_styles)
68
+ '''
69
+
70
+ def forward(self, x, resize=True, latent_mask=None, input_code=False, randomize_noise=True,
71
+ inject_latent=None, return_latents=False, alpha=None, z_plus_latent=False, return_z_plus_latent=True):
72
+ if input_code:
73
+ codes = x
74
+ else:
75
+ codes = self.encoder(x)
76
+ #print(codes.shape)
77
+ # normalize with respect to the center of an average face
78
+ if self.opts.start_from_latent_avg:
79
+ if self.opts.learn_in_w:
80
+ codes = codes + self.latent_avg.repeat(codes.shape[0], 1)
81
+ else:
82
+ codes = codes + self.latent_avg.repeat(codes.shape[0], 1, 1)
83
+
84
+
85
+ if latent_mask is not None:
86
+ for i in latent_mask:
87
+ if inject_latent is not None:
88
+ if alpha is not None:
89
+ codes[:, i] = alpha * inject_latent[:, i] + (1 - alpha) * codes[:, i]
90
+ else:
91
+ codes[:, i] = inject_latent[:, i]
92
+ else:
93
+ codes[:, i] = 0
94
+
95
+ input_is_latent = not input_code
96
+ if z_plus_latent:
97
+ input_is_latent = False
98
+ images, result_latent = self.decoder([codes],
99
+ input_is_latent=input_is_latent,
100
+ randomize_noise=randomize_noise,
101
+ return_latents=return_latents,
102
+ z_plus_latent=z_plus_latent)
103
+
104
+ if resize:
105
+ images = self.face_pool(images)
106
+
107
+ if return_latents:
108
+ if z_plus_latent and return_z_plus_latent:
109
+ return images, codes
110
+ if z_plus_latent and not return_z_plus_latent:
111
+ return images, result_latent
112
+ else:
113
+ return images, result_latent
114
+ else:
115
+ return images
116
+
117
+ def set_opts(self, opts):
118
+ self.opts = opts
119
+
120
+ def __load_latent_avg(self, ckpt, repeat=None):
121
+ if 'latent_avg' in ckpt:
122
+ self.latent_avg = ckpt['latent_avg'].to(self.opts.device)
123
+ if repeat is not None:
124
+ self.latent_avg = self.latent_avg.repeat(repeat, 1)
125
+ else:
126
+ self.latent_avg = None
127
+