|
--- |
|
license: apache-2.0 |
|
--- |
|
|
|
## Usage Code |
|
|
|
```python |
|
import torch |
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
import numpy as np |
|
from scipy.special import softmax |
|
# 选择模型和模型名称(例如,这里使用GPT-2模型) |
|
model_name = "hkust-nlp/Deita-Complexity-Scorer" |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
model = AutoModelForCausalLM.from_pretrained(model_name) |
|
|
|
|
|
complexity_template = ("You are a helpful assistant. Please identify the complexity score of the following user query. \n##Query: {instruction} \n##Complexity: ") |
|
# 输入文本 |
|
input_text = "write a performance review for a junior data scientist" |
|
|
|
|
|
user_input = complexity_template.format(instruction=input_text) |
|
|
|
# 将输入文本编码为tokens |
|
input_ids = tokenizer.encode(user_input, return_tensors="pt") |
|
|
|
# 生成文本 |
|
max_length = 512 # 设置生成文本的最大长度 |
|
outputs = model.generate(input_ids, max_length=512, num_return_sequences=1, return_dict_in_generate=True, output_scores=True) |
|
logprobs_list = outputs.scores[0][0] |
|
score_logits = [] |
|
id2score = { |
|
29896: "1", |
|
29906: "2", |
|
29941: "3", |
|
29946: "4", |
|
29945: "5", |
|
29953: "6" |
|
} |
|
score_template = np.array([1,2,3,4,5,6]) |
|
for k in id2score: |
|
score_logits.append(logprobs_list[k]) |
|
score_logits = np.array(score_logits) |
|
score_npy = softmax(score_logits, axis=0) |
|
score_npy = score_npy * score_template |
|
|
|
score_npy = np.sum(score_npy, axis=0) |
|
``` |