alexmarques commited on
Commit
bc4997d
·
verified ·
1 Parent(s): b93a144

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +1 -2
README.md CHANGED
@@ -39,7 +39,6 @@ Once your server is started, you can query the model using the OpenAI API:
39
  ```python
40
  from openai import OpenAI
41
 
42
- # Modify OpenAI's API key and API base to use vLLM's API server.
43
  openai_api_key = "EMPTY"
44
  openai_api_base = "http://localhost:8000/v1"
45
  client = OpenAI(
@@ -62,7 +61,7 @@ And there's more. You can run 2:4 sparse models on vLLM and get significant spee
62
  prompt = f"Give a TL;DR of the following Reddit post.\n<|user|>{post}\nTL;DR:\n<|assistant|>\n"
63
 
64
  completion = client.completions.create(
65
- model="RedHatAI/Llama-3.1-8B-tldr",
66
  prompt=prompt,
67
  max_tokens=256,
68
  )
 
39
  ```python
40
  from openai import OpenAI
41
 
 
42
  openai_api_key = "EMPTY"
43
  openai_api_base = "http://localhost:8000/v1"
44
  client = OpenAI(
 
61
  prompt = f"Give a TL;DR of the following Reddit post.\n<|user|>{post}\nTL;DR:\n<|assistant|>\n"
62
 
63
  completion = client.completions.create(
64
+ model="RedHatAI/Llama-3.1-8B-tldr-FP8-dynamic",
65
  prompt=prompt,
66
  max_tokens=256,
67
  )