Update README.md
Browse files
README.md
CHANGED
|
@@ -23,9 +23,23 @@ set a seed for reproducibility:
|
|
| 23 |
|
| 24 |
```python
|
| 25 |
>>> from transformers import pipeline, set_seed
|
|
|
|
|
|
|
|
|
|
| 26 |
>>> generator = pipeline('text-generation', model='olm/olm-gpt2-oct-2022')
|
| 27 |
>>> set_seed(42)
|
|
|
|
|
|
|
|
|
|
| 28 |
>>> generator("Hello, I'm a language model,", max_length=30, num_return_sequences=5)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
```
|
| 30 |
|
| 31 |
Here is how to use this model to get the features of a given text in PyTorch:
|
|
@@ -33,7 +47,7 @@ Here is how to use this model to get the features of a given text in PyTorch:
|
|
| 33 |
```python
|
| 34 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 35 |
tokenizer = AutoTokenizer.from_pretrained('olm/olm-gpt2-oct-2022')
|
| 36 |
-
model = AutoModelForCausalLM.from_pretrained('gpt2')
|
| 37 |
text = "Replace me by any text you'd like."
|
| 38 |
encoded_input = tokenizer(text, return_tensors='pt')
|
| 39 |
output = model(**encoded_input)
|
|
|
|
| 23 |
|
| 24 |
```python
|
| 25 |
>>> from transformers import pipeline, set_seed
|
| 26 |
+
>>> # It is important to include the bad_words_ids=[[0,2]] if you want this model to stay on topic.
|
| 27 |
+
>>> # Otherwise, the model may generate start and end tokens followed by text that is not relevant to
|
| 28 |
+
>>> # the previous text.
|
| 29 |
>>> generator = pipeline('text-generation', model='olm/olm-gpt2-oct-2022')
|
| 30 |
>>> set_seed(42)
|
| 31 |
+
>>> # This example also illustrates that sometimes our model generates
|
| 32 |
+
>>> # bloggy/spammy/webb-y things, even though it gets higher evaluation results
|
| 33 |
+
>>> # than the original GPT-2 accross a variety of benchmarks. See the first output.
|
| 34 |
>>> generator("Hello, I'm a language model,", max_length=30, num_return_sequences=5)
|
| 35 |
+
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
|
| 36 |
+
[
|
| 37 |
+
{'generated_text': "Hello, I'm a language model, but you can take me if I want.\nReplyDelete\nReplies\nReply\nAnonymous October 17, 2011"},
|
| 38 |
+
{'generated_text': "Hello, I'm a language model, and here's some useful news for you all: The release date for the new release of"},
|
| 39 |
+
{'generated_text': "Hello, I'm a language model, I'm not a developer or anybody who's working on those. I'm a freelancer... I"},
|
| 40 |
+
{'generated_text': "Hello, I'm a language model, a language analyst, and a language system designer. I'm just curious about the"},
|
| 41 |
+
{'generated_text': "Hello, I'm a language model, I'm passionate about languages, but I don't understand how my system works, the interaction"}
|
| 42 |
+
]
|
| 43 |
```
|
| 44 |
|
| 45 |
Here is how to use this model to get the features of a given text in PyTorch:
|
|
|
|
| 47 |
```python
|
| 48 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 49 |
tokenizer = AutoTokenizer.from_pretrained('olm/olm-gpt2-oct-2022')
|
| 50 |
+
model = AutoModelForCausalLM.from_pretrained('olm/olm-gpt2-oct-2022')
|
| 51 |
text = "Replace me by any text you'd like."
|
| 52 |
encoded_input = tokenizer(text, return_tensors='pt')
|
| 53 |
output = model(**encoded_input)
|