|
|
--- |
|
|
license: mit |
|
|
base_model: |
|
|
- stabilityai/sdxl-turbo |
|
|
language: |
|
|
- hi |
|
|
- bn |
|
|
- as |
|
|
- gu |
|
|
- kn |
|
|
- ml |
|
|
- mr |
|
|
- ne |
|
|
- or |
|
|
- pa |
|
|
- sa |
|
|
- ta |
|
|
- te |
|
|
- ur |
|
|
- ks |
|
|
- es |
|
|
- fr |
|
|
- ja |
|
|
- zh |
|
|
- tr |
|
|
- de |
|
|
- ar |
|
|
- pt |
|
|
- ru |
|
|
- vi |
|
|
- it |
|
|
- ko |
|
|
--- |
|
|
|
|
|
**Use with the Stable Diffusion Pipeline** |
|
|
|
|
|
|
|
|
```python |
|
|
import torch |
|
|
from diffusers import AutoPipelineForText2Image |
|
|
from transformers import CLIPTokenizer, CLIPTextModel |
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
lang = "hin_Deva" # Hindi |
|
|
|
|
|
# Load pipeline |
|
|
pipe = AutoPipelineForText2Image.from_pretrained("stabilityai/sdxl-turbo") |
|
|
|
|
|
# Load the multilingual tokenizer |
|
|
tokenizer = CLIPTokenizer.from_pretrained("tokenizers/multilingual") |
|
|
pipe.tokenizer = tokenizer |
|
|
pipe.text_encoder.resize_token_embeddings(len(tokenizer)) |
|
|
|
|
|
# Load the fine-tuned text encoder |
|
|
state_dict = torch.load(f"models/{lang}/{lang}_text_encoder.pth") |
|
|
new_text_encoder = CLIPTextModel(config=pipe.text_encoder.config) |
|
|
new_text_encoder.load_state_dict(state_dict) |
|
|
new_text_encoder = new_text_encoder.to(device) |
|
|
pipe.text_encoder = new_text_encoder |
|
|
pipe = pipe.to(device) |
|
|
|
|
|
# Generate and save image |
|
|
caption = "गाँव का शांतिपूर्ण दृश्य|" |
|
|
image = pipe(caption).images[0] |
|
|
image.save(f"example.png") |