gijs commited on
Commit
34defae
·
verified ·
1 Parent(s): b397a36

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +5 -5
README.md CHANGED
@@ -25,12 +25,12 @@ To use `AudSemThinker` for audio understanding and captioning tasks, you can loa
25
 
26
  ```python
27
  import soundfile as sf
28
- from transformers import Qwen2_5OmniForConditionalGeneration, Qwen2_5OmniProcessor
29
  from qwen_omni_utils import process_mm_info
30
  import torchaudio
31
 
32
  # default: Load the model on the available device(s)
33
- model = Qwen2_5OmniForConditionalGeneration.from_pretrained(
34
  "gijs/audsemthinker",
35
  torch_dtype="auto",
36
  device_map="auto",
@@ -38,7 +38,7 @@ model = Qwen2_5OmniForConditionalGeneration.from_pretrained(
38
  )
39
 
40
  # We recommend enabling flash_attention_2 for better acceleration and memory saving.
41
- # model = Qwen2_5OmniForConditionalGeneration.from_pretrained(
42
  # "gijs/audsemthinker",
43
  # torch_dtype="auto",
44
  # device_map="auto",
@@ -46,7 +46,7 @@ model = Qwen2_5OmniForConditionalGeneration.from_pretrained(
46
  # trust_remote_code=True
47
  # )
48
 
49
- processor = Qwen2_5OmniProcessor.from_pretrained("gijs/audsemthinker", trust_remote_code=True)
50
 
51
  # Load and preprocess audio
52
  audio_file = "path/to/your/audio.wav"
@@ -77,7 +77,7 @@ conversation = [
77
 
78
  # Preparation for inference
79
  text = processor.apply_chat_template(conversation, add_generation_prompt=True, tokenize=False)
80
- audios, images, videos = process_mm_info(conversation)
81
  inputs = processor(
82
  text=text,
83
  audio=audios,
 
25
 
26
  ```python
27
  import soundfile as sf
28
+ from transformers import Qwen2_5OmniThinkerForConditionalGeneration, Qwen2_5OmniProcessor
29
  from qwen_omni_utils import process_mm_info
30
  import torchaudio
31
 
32
  # default: Load the model on the available device(s)
33
+ model = Qwen2_5OmniThinkerForConditionalGeneration.from_pretrained(
34
  "gijs/audsemthinker",
35
  torch_dtype="auto",
36
  device_map="auto",
 
38
  )
39
 
40
  # We recommend enabling flash_attention_2 for better acceleration and memory saving.
41
+ # model = Qwen2_5OmniThinkerForConditionalGeneration.from_pretrained(
42
  # "gijs/audsemthinker",
43
  # torch_dtype="auto",
44
  # device_map="auto",
 
46
  # trust_remote_code=True
47
  # )
48
 
49
+ processor = Qwen2_5OmniProcessor.from_pretrained("Qwen/Qwen2.5-Omni-7B", trust_remote_code=True)
50
 
51
  # Load and preprocess audio
52
  audio_file = "path/to/your/audio.wav"
 
77
 
78
  # Preparation for inference
79
  text = processor.apply_chat_template(conversation, add_generation_prompt=True, tokenize=False)
80
+ audios, images, videos = process_mm_info(conversation, use_audio_in_video=False)
81
  inputs = processor(
82
  text=text,
83
  audio=audios,