gijs commited on
Commit
c640e4a
·
verified ·
1 Parent(s): 2d3f2ac

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +9 -11
README.md CHANGED
@@ -24,30 +24,28 @@ To use `AudSemThinker-QA` for audio question answering, you can load it using th
24
 
25
  ```python
26
  import soundfile as sf
27
- from transformers import Qwen2_5OmniForConditionalGeneration, Qwen2_5OmniProcessor
28
  from qwen_omni_utils import process_mm_info
29
  import torchaudio
30
 
31
  # default: Load the model on the available device(s)
32
- model = Qwen2_5OmniForConditionalGeneration.from_pretrained(
33
- "gijs/audsemthinker-qa",
34
  torch_dtype="auto",
35
  device_map="auto",
36
- trust_remote_code=True,
37
- low_cpu_mem_usage=True
38
  )
39
 
40
  # We recommend enabling flash_attention_2 for better acceleration and memory saving.
41
- # model = Qwen2_5OmniForConditionalGeneration.from_pretrained(
42
- # "gijs/audsemthinker-qa",
43
  # torch_dtype="auto",
44
  # device_map="auto",
45
  # attn_implementation="flash_attention_2",
46
- # trust_remote_code=True,
47
- # low_cpu_mem_usage=True
48
  # )
49
 
50
- processor = Qwen2_5OmniProcessor.from_pretrained("gijs/audsemthinker-qa", trust_remote_code=True)
51
 
52
  # Load and preprocess audio
53
  audio_file = "path/to/your/audio.wav"
@@ -82,7 +80,7 @@ conversation = [
82
 
83
  # Preparation for inference
84
  text = processor.apply_chat_template(conversation, add_generation_prompt=True, tokenize=False)
85
- audios, images, videos = process_mm_info(conversation)
86
  inputs = processor(
87
  text=text,
88
  audio=audios,
 
24
 
25
  ```python
26
  import soundfile as sf
27
+ from transformers import Qwen2_5OmniThinkerForConditionalGeneration, Qwen2_5OmniProcessor
28
  from qwen_omni_utils import process_mm_info
29
  import torchaudio
30
 
31
  # default: Load the model on the available device(s)
32
+ model = Qwen2_5OmniThinkerForConditionalGeneration.from_pretrained(
33
+ "gijs/audsemthinker",
34
  torch_dtype="auto",
35
  device_map="auto",
36
+ trust_remote_code=True
 
37
  )
38
 
39
  # We recommend enabling flash_attention_2 for better acceleration and memory saving.
40
+ # model = Qwen2_5OmniThinkerForConditionalGeneration.from_pretrained(
41
+ # "gijs/audsemthinker",
42
  # torch_dtype="auto",
43
  # device_map="auto",
44
  # attn_implementation="flash_attention_2",
45
+ # trust_remote_code=True
 
46
  # )
47
 
48
+ processor = Qwen2_5OmniProcessor.from_pretrained("Qwen/Qwen2.5-Omni-7B", trust_remote_code=True)
49
 
50
  # Load and preprocess audio
51
  audio_file = "path/to/your/audio.wav"
 
80
 
81
  # Preparation for inference
82
  text = processor.apply_chat_template(conversation, add_generation_prompt=True, tokenize=False)
83
+ audios, images, videos = process_mm_info(conversation, use_audio_in_video=False)
84
  inputs = processor(
85
  text=text,
86
  audio=audios,