File size: 2,192 Bytes
0bc59cd b702a18 0bc59cd 87f6e47 0bc59cd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
#!/usr/bin/env python3
# Example usage for videoloc/seamless-langpairs
from transformers import AutoModel, AutoConfig
from huggingface_hub import hf_hub_download
import torch
import numpy as np
import importlib.util
def load_model_and_collator():
# Load model - custom architecture requires importing the model class
model_files = hf_hub_download(repo_id="videoloc/seamless-langpairs", filename="modeling_seamless_langpairs.py")
spec = importlib.util.spec_from_file_location("modeling_seamless_langpairs", model_files)
modeling_module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(modeling_module)
# Now load the model using the custom class
config = modeling_module.SeamlessLanguagePairsConfig.from_pretrained("videoloc/seamless-langpairs")
model = modeling_module.HFSeamlessLanguagePairs.from_pretrained("videoloc/seamless-langpairs")
# Load data collator
collator_file = hf_hub_download(repo_id="videoloc/seamless-langpairs", filename="data_collator.py")
spec = importlib.util.spec_from_file_location("data_collator", collator_file)
collator_module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(collator_module)
data_collator = collator_module.DataCollatorSimpleSeamless(
processor="facebook/hf-seamless-m4t-medium",
max_audio_length_sec=8.0,
max_text_length=256
)
return model, data_collator
def example_inference():
model, collator = load_model_and_collator()
# Example data with translation and language pair awareness
data = [{
'raw_audio': np.random.randn(16000 * 3), # 3 seconds at 16kHz
'raw_text': "Example subtitle text for TTE prediction",
'is_translation': 1, # 1 for translated content, 0 for original
'language_pair_id': 5, # 0-20 for specific language pairs
}]
batch = collator(data)
model.eval()
with torch.no_grad():
outputs = model(**batch)
tte_prediction = outputs.logits.item()
print(f"Predicted Time To Edit (TTE): {tte_prediction:.2f} seconds")
return tte_prediction
if __name__ == "__main__":
example_inference()
|