diff --git a/extensions-builtin/Lora/network_oft.py b/extensions-builtin/Lora/network_oft.py index e0672ba6d..e462ccb1b 100644 --- a/extensions-builtin/Lora/network_oft.py +++ b/extensions-builtin/Lora/network_oft.py @@ -1,6 +1,5 @@ import torch import network -from modules import devices class ModuleTypeOFT(network.ModuleType): @@ -31,33 +30,24 @@ class NetworkModuleOFT(network.NetworkModule): self.org_module: list[torch.Module] = [self.sd_module] self.org_weight = self.org_module[0].weight.to(self.org_module[0].weight.device, copy=True) - #self.org_weight = self.org_module[0].weight.to(devices.cpu, copy=True) + init_multiplier = self.multiplier() * self.calc_scale() self.last_multiplier = init_multiplier + self.R = self.get_weight(self.oft_blocks, init_multiplier) self.merged_weight = self.merge_weight() self.apply_to() self.merged = False - # weights_backup = getattr(self.org_module[0], 'network_weights_backup', None) - # if weights_backup is None: - # self.org_module[0].network_weights_backup = self.org_weight - - def merge_weight(self): - #org_sd = self.org_module[0].state_dict() R = self.R.to(self.org_weight.device, dtype=self.org_weight.dtype) if self.org_weight.dim() == 4: weight = torch.einsum("oihw, op -> pihw", self.org_weight, R) else: weight = torch.einsum("oi, op -> pi", self.org_weight, R) - #org_sd['weight'] = weight - # replace weight - #self.org_module[0].load_state_dict(org_sd) return weight - pass - + def replace_weight(self, new_weight): org_sd = self.org_module[0].state_dict() org_sd['weight'] = new_weight @@ -70,9 +60,7 @@ class NetworkModuleOFT(network.NetworkModule): self.org_module[0].load_state_dict(org_sd) self.merged = False - - # replace forward method of original linear rather than replacing the module - # how do we revert this to unload the weights? + # FIXME: hook forward method of original linear, but how do we undo the hook when we are done? def apply_to(self): self.org_forward = self.org_module[0].forward #self.org_module[0].forward = self.forward @@ -90,82 +78,26 @@ class NetworkModuleOFT(network.NetworkModule): block_R = torch.matmul(m_I + block_Q, (m_I - block_Q).inverse()) block_R_weighted = multiplier * block_R + (1 - multiplier) * m_I R = torch.block_diag(*block_R_weighted) - #R = torch.block_diag(*block_R) return R def calc_updown(self, orig_weight): - #oft_blocks = self.oft_blocks.to(orig_weight.device, dtype=orig_weight.dtype) - - #R = self.R.to(orig_weight.device, dtype=orig_weight.dtype) - ##self.R = R - - #R = self.R.to(orig_weight.device, dtype=orig_weight.dtype) - ##self.R = R - #if orig_weight.dim() == 4: - # weight = torch.einsum("oihw, op -> pihw", orig_weight, R) - #else: - # weight = torch.einsum("oi, op -> pi", orig_weight, R) - - #updown = orig_weight @ R - #updown = weight updown = torch.zeros_like(orig_weight, device=orig_weight.device, dtype=orig_weight.dtype) - #updown = orig_weight output_shape = orig_weight.shape orig_weight = self.merged_weight.to(orig_weight.device, dtype=orig_weight.dtype) #output_shape = self.oft_blocks.shape return self.finalize_updown(updown, orig_weight, output_shape) - + def pre_forward_hook(self, module, input): multiplier = self.multiplier() * self.calc_scale() - if not multiplier==self.last_multiplier or not self.merged: - #if multiplier != self.last_multiplier or not self.merged: + if not multiplier==self.last_multiplier or not self.merged: self.R = self.get_weight(self.oft_blocks, multiplier) self.last_multiplier = multiplier self.merged_weight = self.merge_weight() self.replace_weight(self.merged_weight) - #elif not self.merged: - # self.replace_weight(self.merged_weight) - + def forward_hook(self, module, args, output): pass - #output = output * self.multiplier() * self.calc_scale() - #if len(args) > 0: - # y = args[0] - # output = output + y - #return output - #if self.merged: - # pass - #self.restore_weight() - #print(f'Forward hook in {self.network_key} called') - - #x = output - #R = self.R.to(x.device, dtype=x.dtype) - - #if x.dim() == 4: - # x = x.permute(0, 2, 3, 1) - # x = torch.matmul(x, R) - # x = x.permute(0, 3, 1, 2) - #else: - # x = torch.matmul(x, R) - #return x - - # def forward(self, x, y=None): - # x = self.org_forward(x) - # if self.multiplier() == 0.0: - # return x - - # # calculating R here is excruciatingly slow - # #R = self.get_weight().to(x.device, dtype=x.dtype) - # R = self.R.to(x.device, dtype=x.dtype) - - # if x.dim() == 4: - # x = x.permute(0, 2, 3, 1) - # x = torch.matmul(x, R) - # x = x.permute(0, 3, 1, 2) - # else: - # x = torch.matmul(x, R) - # return x