jinaai
/

jina-bert-flash-implementation

Markus28 commited on Mar 25, 2024

Commit

ad76444

1 Parent(s): e55e319

feat: for converting v2, added lines to save model weights and print config

Files changed (1) hide show

convert_v2_weights.py CHANGED Viewed

@@ -131,6 +131,12 @@ new_state_dict = remap_state_dict(state_dict, config)
 flash_model = BertModel(config)
 flash_model.load_state_dict(new_state_dict)
 tokenizer = AutoTokenizer.from_pretrained('jinaai/jina-embeddings-v2-base-en')
 inp = tokenizer.batch_encode_plus(['Hello world', 'How is the weather today?', 'It is raining a lot in  Berlin'], return_tensors='pt', padding=True).to('cuda')
 v2_model.eval()
@@ -141,4 +147,5 @@ output_v2 = v2_model(**inp)
 output_flash = flash_model(**inp)
 x = output_v2.last_hidden_state
 y = output_flash.last_hidden_state
-print(torch.abs(x - y))

 flash_model = BertModel(config)
 flash_model.load_state_dict(new_state_dict)
+torch.save(new_state_dict, 'converted_weights.bin')
+print(config.to_json_string())
+"""
 tokenizer = AutoTokenizer.from_pretrained('jinaai/jina-embeddings-v2-base-en')
 inp = tokenizer.batch_encode_plus(['Hello world', 'How is the weather today?', 'It is raining a lot in  Berlin'], return_tensors='pt', padding=True).to('cuda')
 v2_model.eval()
 output_flash = flash_model(**inp)
 x = output_v2.last_hidden_state
 y = output_flash.last_hidden_state
+print(torch.abs(x - y))
+"""