imranali291 commited on
Commit
27a166f
·
verified ·
1 Parent(s): fbd3f06

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +31 -5
README.md CHANGED
@@ -1,5 +1,31 @@
1
- ---
2
- license: mit
3
- tags:
4
- - unsloth
5
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ tags:
4
+ - unsloth
5
+ ---
6
+
7
+ # Streaming Inference
8
+ ```python
9
+ from unsloth import FastLanguageModel
10
+ model, tokenizer = FastLanguageModel.from_pretrained(
11
+ model_name = "imranali291/gpt-base-prompt-generator",
12
+ max_seq_length = max_seq_length,
13
+ dtype = dtype,
14
+ load_in_4bit = load_in_4bit,
15
+ )
16
+ FastLanguageModel.for_inference(model) # Enable native 2x faster inference
17
+
18
+ messages = [
19
+ {"role": "user", "content": "php developer"},
20
+ ]
21
+ inputs = tokenizer.apply_chat_template(
22
+ messages,
23
+ tokenize = True,
24
+ add_generation_prompt = True, # Must add for generation
25
+ return_tensors = "pt",
26
+ ).to("cuda")
27
+
28
+ from transformers import TextStreamer
29
+ text_streamer = TextStreamer(tokenizer)
30
+ _ = model.generate(input_ids = inputs, streamer = text_streamer, max_new_tokens = 128, use_cache = True)
31
+ ```