gijs commited on
Commit
6010a51
·
verified ·
1 Parent(s): 77b51a5

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +9 -11
README.md CHANGED
@@ -21,30 +21,28 @@ To use `AudSemThinker-QA-GRPO` for audio question answering, you can load it usi
21
 
22
  ```python
23
  import soundfile as sf
24
- from transformers import Qwen2_5OmniForConditionalGeneration, Qwen2_5OmniProcessor
25
  from qwen_omni_utils import process_mm_info
26
  import torchaudio
27
 
28
  # default: Load the model on the available device(s)
29
- model = Qwen2_5OmniForConditionalGeneration.from_pretrained(
30
- "gijs/audsemthinker-qa-grpo",
31
  torch_dtype="auto",
32
  device_map="auto",
33
- trust_remote_code=True,
34
- low_cpu_mem_usage=True
35
  )
36
 
37
  # We recommend enabling flash_attention_2 for better acceleration and memory saving.
38
- # model = Qwen2_5OmniForConditionalGeneration.from_pretrained(
39
- # "gijs/audsemthinker-qa-grpo",
40
  # torch_dtype="auto",
41
  # device_map="auto",
42
  # attn_implementation="flash_attention_2",
43
- # trust_remote_code=True,
44
- # low_cpu_mem_usage=True
45
  # )
46
 
47
- processor = Qwen2_5OmniProcessor.from_pretrained("gijs/audsemthinker-qa-grpo", trust_remote_code=True)
48
 
49
  # Load and preprocess audio
50
  audio_file = "path/to/your/audio.wav"
@@ -79,7 +77,7 @@ conversation = [
79
 
80
  # Preparation for inference
81
  text = processor.apply_chat_template(conversation, add_generation_prompt=True, tokenize=False)
82
- audios, images, videos = process_mm_info(conversation)
83
  inputs = processor(
84
  text=text,
85
  audio=audios,
 
21
 
22
  ```python
23
  import soundfile as sf
24
+ from transformers import Qwen2_5OmniThinkerForConditionalGeneration, Qwen2_5OmniProcessor
25
  from qwen_omni_utils import process_mm_info
26
  import torchaudio
27
 
28
  # default: Load the model on the available device(s)
29
+ model = Qwen2_5OmniThinkerForConditionalGeneration.from_pretrained(
30
+ "gijs/audsemthinker",
31
  torch_dtype="auto",
32
  device_map="auto",
33
+ trust_remote_code=True
 
34
  )
35
 
36
  # We recommend enabling flash_attention_2 for better acceleration and memory saving.
37
+ # model = Qwen2_5OmniThinkerForConditionalGeneration.from_pretrained(
38
+ # "gijs/audsemthinker",
39
  # torch_dtype="auto",
40
  # device_map="auto",
41
  # attn_implementation="flash_attention_2",
42
+ # trust_remote_code=True
 
43
  # )
44
 
45
+ processor = Qwen2_5OmniProcessor.from_pretrained("Qwen/Qwen2.5-Omni-7B", trust_remote_code=True)
46
 
47
  # Load and preprocess audio
48
  audio_file = "path/to/your/audio.wav"
 
77
 
78
  # Preparation for inference
79
  text = processor.apply_chat_template(conversation, add_generation_prompt=True, tokenize=False)
80
+ audios, images, videos = process_mm_info(conversation, use_audio_in_video=False)
81
  inputs = processor(
82
  text=text,
83
  audio=audios,