gijs
/

audsemthinker-qa

qwen2_5_omni_thinker

auditory-semantics

Model card Files Files and versions

gijs commited on 30 days ago

Commit

c640e4a

·

verified ·

1 Parent(s): 2d3f2ac

Update README.md

Files changed (1) hide show

README.md +9 -11

README.md CHANGED Viewed

@@ -24,30 +24,28 @@ To use `AudSemThinker-QA` for audio question answering, you can load it using th
 ```python
 import soundfile as sf
-from transformers import Qwen2_5OmniForConditionalGeneration, Qwen2_5OmniProcessor
 from qwen_omni_utils import process_mm_info
 import torchaudio
 # default: Load the model on the available device(s)
-model = Qwen2_5OmniForConditionalGeneration.from_pretrained(
-    "gijs/audsemthinker-qa",
     torch_dtype="auto",
     device_map="auto",
-    trust_remote_code=True,
-    low_cpu_mem_usage=True
 )
 # We recommend enabling flash_attention_2 for better acceleration and memory saving.
-# model = Qwen2_5OmniForConditionalGeneration.from_pretrained(
-#     "gijs/audsemthinker-qa",
 #     torch_dtype="auto",
 #     device_map="auto",
 #     attn_implementation="flash_attention_2",
-#     trust_remote_code=True,
-#     low_cpu_mem_usage=True
 # )
-processor = Qwen2_5OmniProcessor.from_pretrained("gijs/audsemthinker-qa", trust_remote_code=True)
 # Load and preprocess audio
 audio_file = "path/to/your/audio.wav"
@@ -82,7 +80,7 @@ conversation = [
 # Preparation for inference
 text = processor.apply_chat_template(conversation, add_generation_prompt=True, tokenize=False)
-audios, images, videos = process_mm_info(conversation)
 inputs = processor(
     text=text,
     audio=audios,

 ```python
 import soundfile as sf
+from transformers import Qwen2_5OmniThinkerForConditionalGeneration, Qwen2_5OmniProcessor
 from qwen_omni_utils import process_mm_info
 import torchaudio
 # default: Load the model on the available device(s)
+model = Qwen2_5OmniThinkerForConditionalGeneration.from_pretrained(
+    "gijs/audsemthinker",
     torch_dtype="auto",
     device_map="auto",
+    trust_remote_code=True
 )
 # We recommend enabling flash_attention_2 for better acceleration and memory saving.
+# model = Qwen2_5OmniThinkerForConditionalGeneration.from_pretrained(
+#     "gijs/audsemthinker",
 #     torch_dtype="auto",
 #     device_map="auto",
 #     attn_implementation="flash_attention_2",
+#     trust_remote_code=True
 # )
+processor = Qwen2_5OmniProcessor.from_pretrained("Qwen/Qwen2.5-Omni-7B", trust_remote_code=True)
 # Load and preprocess audio
 audio_file = "path/to/your/audio.wav"
 # Preparation for inference
 text = processor.apply_chat_template(conversation, add_generation_prompt=True, tokenize=False)
+audios, images, videos = process_mm_info(conversation, use_audio_in_video=False)
 inputs = processor(
     text=text,
     audio=audios,