ai-forever commited on
Commit
d425199
·
verified ·
1 Parent(s): 5123cdd

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +17 -19
README.md CHANGED
@@ -100,25 +100,20 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
100
 
101
  torch.manual_seed(42)
102
 
103
- PROMPT_TEMPLATE = '''instruction: |
104
- ### Задание для оценки:
105
- {instruction}
106
 
107
- reference_answer: |
108
- ### Эталонный ответ:
109
- {reference_answer}
110
 
111
- response: |
112
- ### Ответ для оценки:
113
- {answer}
114
 
115
- score_name: |
116
- ### Критерий оценки:
117
- {criteria_name}
118
 
119
- score_rubrics: |
120
- ### Шкала оценивания по критерию:
121
- {criteria_rubrics}
122
  '''
123
 
124
  instruction = 'Сколько будет 2+2?'
@@ -142,7 +137,8 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
142
  model = AutoModelForCausalLM.from_pretrained(
143
  MODEL_PATH,
144
  torch_dtype="auto",
145
- device_map="auto"
 
146
  )
147
 
148
  messages = [
@@ -155,17 +151,19 @@ text = tokenizer.apply_chat_template(
155
  )
156
  model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
157
 
158
- generated_ids = model.generate(
159
  **model_inputs,
160
  max_new_tokens=4096
161
  )
162
  generated_ids = [
163
- output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
164
  ]
165
 
166
  response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
167
 
168
- print(response)
 
 
169
  ```
170
 
171
  ## Training Details
 
100
 
101
  torch.manual_seed(42)
102
 
103
+ PROMPT_TEMPLATE = '''### Задание для оценки:
104
+ {instruction}
 
105
 
106
+ ### Эталонный ответ:
107
+ {reference_answer}
 
108
 
109
+ ### Ответ для оценки:
110
+ {answer}
 
111
 
112
+ ### Критерий оценки:
113
+ {criteria_name}
 
114
 
115
+ ### Шкала оценивания по критерию:
116
+ {criteria_rubrics}
 
117
  '''
118
 
119
  instruction = 'Сколько будет 2+2?'
 
137
  model = AutoModelForCausalLM.from_pretrained(
138
  MODEL_PATH,
139
  torch_dtype="auto",
140
+ device_map="auto",
141
+ trust_remote_code=True
142
  )
143
 
144
  messages = [
 
151
  )
152
  model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
153
 
154
+ sequence_ids = model.generate(
155
  **model_inputs,
156
  max_new_tokens=4096
157
  )
158
  generated_ids = [
159
+ output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, sequence_ids)
160
  ]
161
 
162
  response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
163
 
164
+ score = model(input_ids=sequence_ids).regr_output.item()
165
+
166
+ print(response, score)
167
  ```
168
 
169
  ## Training Details