upload-8-bit
Browse files- .ipynb_checkpoints/README-checkpoint.md +195 -0
- .ipynb_checkpoints/modeling_custom-checkpoint.py +166 -0
- README.md +16 -4
- config.json +15 -0
- model-00001-of-00002.safetensors +3 -0
- model-00002-of-00002.safetensors +3 -0
- model.safetensors.index.json +0 -0
.ipynb_checkpoints/README-checkpoint.md
ADDED
@@ -0,0 +1,195 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: llama3
|
3 |
+
---
|
4 |
+
|
5 |
+
# Absolute-Rating Multi-Objective Reward Model (ArmoRM) with Mixture-of-Experts (MoE) Aggregation of Reward Objectives
|
6 |
+
|
7 |
+
|
8 |
+
|
9 |
+
+ **Authors** (* indicates equal contribution)
|
10 |
+
|
11 |
+
[Haoxiang Wang*](https://haoxiang-wang.github.io/), [Wei Xiong*](https://weixiongust.github.io/WeiXiongUST/index.html), [Tengyang Xie](https://tengyangxie.github.io/), [Han Zhao](https://hanzhaoml.github.io/), [Tong Zhang](https://tongzhang-ml.org/)
|
12 |
+
|
13 |
+
+ **Blog**: https://rlhflow.github.io/posts/2024-05-29-multi-objective-reward-modeling/
|
14 |
+
+ **Tech Report**: https://arxiv.org/abs/2406.12845
|
15 |
+
+ **Model**: [ArmoRM-Llama3-8B-v0.1](https://huggingface.co/RLHFlow/ArmoRM-Llama3-8B-v0.1)
|
16 |
+
+ Finetuned from model: [FsfairX-LLaMA3-RM-v0.1](https://huggingface.co/sfairXC/FsfairX-LLaMA3-RM-v0.1)
|
17 |
+
- **Code Repository:** https://github.com/RLHFlow/RLHF-Reward-Modeling/
|
18 |
+
+ **Architecture**
|
19 |
+
|
20 |
+
<p align="center">
|
21 |
+
<img width="800" alt="image" src="https://github.com/RLHFlow/RLHFlow.github.io/blob/main/assets/ArmoRM-MoE.png?raw=true">
|
22 |
+
</p>
|
23 |
+
|
24 |
+
## RewardBench LeaderBoard
|
25 |
+
|
26 |
+
| Model | Base Model | Method | Score | Chat | Chat Hard | Safety | Reasoning | Prior Sets (0.5 weight) |
|
27 |
+
|:--------------------------------------------------------------------------------|:-----------------------------------------------------------------------|:-----:|:-----|:----------|:-------|:----------|:-----------------------|:------------------------|
|
28 |
+
| ArmoRM-Llama3-8B-v0.1 | Llama-3 8B | ArmoRM + MoE | **89.0** | 96.9 | **76.8** | **92.2** | **97.3** | 74.3 |
|
29 |
+
| Cohere May 2024 | Unknown | Unknown | 88.3 | 96.4 | 71.3 | **92.7** | **97.7** | **78.2** |
|
30 |
+
| [pair-preference-model](https://huggingface.co/RLHFlow/pair-preference-model-LLaMA3-8B)| Llama-3 8B | [SliC-HF](https://arxiv.org/abs/2305.10425) | 85.7 | 98.3 | 65.8 | 89.7 | 94.7 | 74.6 |
|
31 |
+
| GPT-4 Turbo (0125 version) | GPT-4 Turbo | LLM-as-a-Judge | 84.3 | 95.3 | 74.3 | 87.2 | 86.9 | 70.9 |
|
32 |
+
| [FsfairX-LLaMA3-RM-v0.1](https://huggingface.co/sfairXC/FsfairX-LLaMA3-RM-v0.1) | Llama-3 8B | Bradley-Terry | 83.6 | **99.4** | 65.1 | 87.8 | 86.4 | 74.9 |
|
33 |
+
| [Starling-RM-34B](https://huggingface.co/Nexusflow/Starling-RM-34B) | Yi-34B | Bradley-Terry | 81.4 | 96.9 | 57.2 | 88.2 | 88.5 | 71.4 |
|
34 |
+
|
35 |
+
## Demo Code
|
36 |
+
```python
|
37 |
+
import torch
|
38 |
+
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
39 |
+
device = "cuda"
|
40 |
+
path = "RLHFlow/ArmoRM-Llama3-8B-v0.1"
|
41 |
+
model = AutoModelForSequenceClassification.from_pretrained(path, device_map=device,
|
42 |
+
trust_remote_code=True, torch_dtype=torch.bfloat16)
|
43 |
+
tokenizer = AutoTokenizer.from_pretrained(path, use_fast=True)
|
44 |
+
# We load a random sample from the validation set of the HelpSteer dataset
|
45 |
+
prompt = 'What are some synonyms for the word "beautiful"?'
|
46 |
+
response = "Nicely, Beautifully, Handsome, Stunning, Wonderful, Gorgeous, Pretty, Stunning, Elegant"
|
47 |
+
messages = [{"role": "user", "content": prompt},
|
48 |
+
{"role": "assistant", "content": response}]
|
49 |
+
input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt").to(device)
|
50 |
+
with torch.no_grad():
|
51 |
+
output = model(input_ids)
|
52 |
+
# Multi-objective rewards for the response
|
53 |
+
multi_obj_rewards = output.rewards.cpu().float()
|
54 |
+
# The gating layer's output is conditioned on the prompt
|
55 |
+
gating_output = output.gating_output.cpu().float()
|
56 |
+
# The preference score for the response, aggregated from the
|
57 |
+
# multi-objective rewards with the gating layer
|
58 |
+
preference_score = output.score.cpu().float()
|
59 |
+
# We apply a transformation matrix to the multi-objective rewards
|
60 |
+
# before multiplying with the gating layer's output. This mainly aims
|
61 |
+
# at reducing the verbosity bias of the original reward objectives
|
62 |
+
obj_transform = model.reward_transform_matrix.data.cpu().float()
|
63 |
+
# The final coefficients assigned to each reward objective
|
64 |
+
multi_obj_coeffs = gating_output @ obj_transform.T
|
65 |
+
# The preference score is the linear combination of the multi-objective rewards with
|
66 |
+
# the multi-objective coefficients, which can be verified by the following assertion
|
67 |
+
assert torch.isclose(torch.sum(multi_obj_rewards * multi_obj_coeffs, dim=1), preference_score, atol=1e-3)
|
68 |
+
# Find the top-K reward objectives with coefficients of the highest magnitude
|
69 |
+
K = 3
|
70 |
+
top_obj_dims = torch.argsort(torch.abs(multi_obj_coeffs), dim=1, descending=True,)[:, :K]
|
71 |
+
top_obj_coeffs = torch.gather(multi_obj_coeffs, dim=1, index=top_obj_dims)
|
72 |
+
|
73 |
+
# The attributes of the 19 reward objectives
|
74 |
+
attributes = ['helpsteer-helpfulness','helpsteer-correctness','helpsteer-coherence',
|
75 |
+
'helpsteer-complexity','helpsteer-verbosity','ultrafeedback-overall_score',
|
76 |
+
'ultrafeedback-instruction_following', 'ultrafeedback-truthfulness',
|
77 |
+
'ultrafeedback-honesty','ultrafeedback-helpfulness','beavertails-is_safe',
|
78 |
+
'prometheus-score','argilla-overall_quality','argilla-judge_lm','code-complexity',
|
79 |
+
'code-style','code-explanation','code-instruction-following','code-readability']
|
80 |
+
|
81 |
+
example_index = 0
|
82 |
+
for i in range(K):
|
83 |
+
attribute = attributes[top_obj_dims[example_index, i].item()]
|
84 |
+
coeff = top_obj_coeffs[example_index, i].item()
|
85 |
+
print(f"{attribute}: {round(coeff,5)}")
|
86 |
+
# code-complexity: 0.19922
|
87 |
+
# helpsteer-verbosity: -0.10864
|
88 |
+
# ultrafeedback-instruction_following: 0.07861
|
89 |
+
|
90 |
+
# The actual rewards of this example from the HelpSteer dataset
|
91 |
+
# are [3,3,4,2,2] for the five helpsteer objectives:
|
92 |
+
# helpfulness, correctness, coherence, complexity, verbosity
|
93 |
+
# We can linearly transform our predicted rewards to the
|
94 |
+
# original reward space to compare with the ground truth
|
95 |
+
helpsteer_rewards_pred = multi_obj_rewards[0, :5] * 5 - 0.5
|
96 |
+
print(helpsteer_rewards_pred)
|
97 |
+
# [2.78125 2.859375 3.484375 1.3847656 1.296875 ]
|
98 |
+
```
|
99 |
+
|
100 |
+
## Easy to use Pipeline
|
101 |
+
|
102 |
+
```python
|
103 |
+
from typing import Dict, List
|
104 |
+
import torch
|
105 |
+
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
106 |
+
|
107 |
+
|
108 |
+
class ArmoRMPipeline:
|
109 |
+
def __init__(self, model_id, device_map="auto", torch_dtype=torch.bfloat16, truncation=True, trust_remote_code=False, max_length=4096):
|
110 |
+
self.model = AutoModelForSequenceClassification.from_pretrained(
|
111 |
+
model_id,
|
112 |
+
device_map=device_map,
|
113 |
+
trust_remote_code=trust_remote_code,
|
114 |
+
torch_dtype=torch_dtype,
|
115 |
+
)
|
116 |
+
self.tokenizer = AutoTokenizer.from_pretrained(
|
117 |
+
model_id,
|
118 |
+
use_fast=True,
|
119 |
+
)
|
120 |
+
self.truncation = truncation
|
121 |
+
self.device = self.model.device
|
122 |
+
self.max_length = max_length
|
123 |
+
|
124 |
+
def __call__(self, messages: List[Dict[str, str]]) -> Dict[str, float]:
|
125 |
+
"""
|
126 |
+
messages: OpenAI chat messages to be scored
|
127 |
+
Note: no batching since due to length differences, the model will have to pad to the max length which is not efficient
|
128 |
+
Returns: a dictionary with the score between 0 and 1
|
129 |
+
"""
|
130 |
+
input_ids = self.tokenizer.apply_chat_template(
|
131 |
+
messages,
|
132 |
+
return_tensors="pt",
|
133 |
+
padding=True,
|
134 |
+
truncation=self.truncation,
|
135 |
+
max_length=self.max_length,
|
136 |
+
).to(self.device)
|
137 |
+
with torch.no_grad():
|
138 |
+
output = self.model(input_ids)
|
139 |
+
score = output.score.float().item()
|
140 |
+
return {"score": score}
|
141 |
+
|
142 |
+
# Create Reward Model Pipeline
|
143 |
+
prompt = 'What are some synonyms for the word "beautiful"?'
|
144 |
+
rm = ArmoRMPipeline("RLHFlow/ArmoRM-Llama3-8B-v0.1", trust_remote_code=True)
|
145 |
+
# score the messages
|
146 |
+
response1 = 'Nicely, Beautifully, Handsome, Stunning, Wonderful, Gorgeous, Pretty, Stunning, Elegant'
|
147 |
+
score1 = rm([{"role": "user", "content": prompt}, {"role": "assistant", "content": response1}])
|
148 |
+
print(score1)
|
149 |
+
|
150 |
+
response2 = '''Certainly! Here are some synonyms for the word "beautiful":
|
151 |
+
|
152 |
+
1. Gorgeous
|
153 |
+
2. Lovely
|
154 |
+
3. Stunning
|
155 |
+
4. Attractive
|
156 |
+
5. Pretty
|
157 |
+
6. Elegant
|
158 |
+
7. Exquisite
|
159 |
+
8. Handsome
|
160 |
+
9. Charming
|
161 |
+
10. Alluring
|
162 |
+
11. Radiant
|
163 |
+
12. Magnificent
|
164 |
+
13. Graceful
|
165 |
+
14. Enchanting
|
166 |
+
15. Dazzling
|
167 |
+
|
168 |
+
These synonyms can be used in various contexts to convey the idea of beauty.'''
|
169 |
+
score2 = rm([{"role": "user", "content": prompt}, {"role": "assistant", "content": response2}])
|
170 |
+
print(score2)
|
171 |
+
|
172 |
+
response3 = 'Sorry i cannot answer this.'
|
173 |
+
score3 = rm([{"role": "user", "content": prompt}, {"role": "assistant", "content": response3}])
|
174 |
+
print(score3)
|
175 |
+
|
176 |
+
```
|
177 |
+
|
178 |
+
## Citation
|
179 |
+
|
180 |
+
If you find this work useful for your research, please consider citing:
|
181 |
+
```
|
182 |
+
@article{ArmoRM,
|
183 |
+
title={Interpretable Preferences via Multi-Objective Reward Modeling and Mixture-of-Experts},
|
184 |
+
author={Haoxiang Wang and Wei Xiong and Tengyang Xie and Han Zhao and Tong Zhang},
|
185 |
+
journal={arXiv preprint arXiv:2406.12845},
|
186 |
+
}
|
187 |
+
|
188 |
+
@inproceedings{wang2024arithmetic,
|
189 |
+
title={Arithmetic Control of LLMs for Diverse User Preferences: Directional Preference Alignment with Multi-Objective Rewards},
|
190 |
+
author={Haoxiang Wang and Yong Lin and Wei Xiong and Rui Yang and Shizhe Diao and Shuang Qiu and Han Zhao and Tong Zhang},
|
191 |
+
year={2024},
|
192 |
+
booktitle={ACL},
|
193 |
+
}
|
194 |
+
```
|
195 |
+
The second entry, "[Arithmetic Control of LLMs for Diverse User Preferences: Directional Preference Alignment with Multi-Objective Rewards](https://arxiv.org/abs/2402.18571)", is another recent work of ours that trained a multi-objective reward model and adopted it for LLM alignment, which motivated us to develop the current work.
|
.ipynb_checkpoints/modeling_custom-checkpoint.py
ADDED
@@ -0,0 +1,166 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dataclasses import dataclass
|
2 |
+
from typing import Optional, List, Tuple
|
3 |
+
|
4 |
+
import torch
|
5 |
+
import torch.nn as nn
|
6 |
+
import torch.nn.functional as F
|
7 |
+
import torch.utils.checkpoint
|
8 |
+
from transformers import LlamaModel, LlamaPreTrainedModel
|
9 |
+
from transformers.models.llama.modeling_llama import LLAMA_INPUTS_DOCSTRING
|
10 |
+
from transformers.utils import ModelOutput
|
11 |
+
from transformers.utils import add_start_docstrings_to_model_forward
|
12 |
+
|
13 |
+
|
14 |
+
class GatingNetwork(nn.Module):
|
15 |
+
def __init__(self, in_features: int, out_features: int, bias: bool = True, temperature: float = 10,
|
16 |
+
logit_scale: float = 1., hidden_dim: int = 1024, n_hidden: int = 3):
|
17 |
+
super().__init__()
|
18 |
+
self.temperature = temperature
|
19 |
+
self.logit_scale = nn.Parameter(torch.ones(1) * logit_scale)
|
20 |
+
layers = []
|
21 |
+
for _ in range(n_hidden):
|
22 |
+
layers.append(nn.Linear(in_features, hidden_dim))
|
23 |
+
in_features = hidden_dim
|
24 |
+
layers.append(nn.Linear(in_features, out_features, bias=bias))
|
25 |
+
self.layers = nn.ModuleList(layers)
|
26 |
+
|
27 |
+
def forward(self, x: torch.FloatTensor) -> torch.FloatTensor:
|
28 |
+
# Apply the linear layers with ReLU
|
29 |
+
for i, layer in enumerate(self.layers):
|
30 |
+
x = F.relu(layer(x)) if i < len(self.layers) - 1 else layer(x)
|
31 |
+
# Apply the conditional ReLU using the expanded mask
|
32 |
+
x = F.softmax(x / self.temperature, dim=1)
|
33 |
+
return x * self.logit_scale[0]
|
34 |
+
|
35 |
+
|
36 |
+
# token_pattern = tokenizer.encode("<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n", add_special_tokens=False, )
|
37 |
+
token_pattern = [128009, 128006, 78191, 128007, 271]
|
38 |
+
|
39 |
+
|
40 |
+
def find_token_for_gating(lst, ):
|
41 |
+
"""Find the last occurrence of a token_pattern in a list."""
|
42 |
+
token_pattern_len = len(token_pattern)
|
43 |
+
search_end = len(lst)
|
44 |
+
for j in range(search_end - token_pattern_len, -1, -1):
|
45 |
+
if lst[j:j + token_pattern_len] == token_pattern:
|
46 |
+
return j
|
47 |
+
raise ValueError("Token pattern not found in the list.")
|
48 |
+
|
49 |
+
|
50 |
+
@dataclass
|
51 |
+
class CustomOutput(ModelOutput):
|
52 |
+
"""
|
53 |
+
Base class for outputs of sentence classification models.
|
54 |
+
|
55 |
+
Args:
|
56 |
+
hidden_state (`Tuple[torch.FloatTensor]` of length `config.num_hidden_layers`):
|
57 |
+
Tuple of `torch.FloatTensor` (one for the output of the embeddings, if the model has an embedding layer, +
|
58 |
+
one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`.
|
59 |
+
|
60 |
+
Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
|
61 |
+
prompt_embedding (`torch.FloatTensor` of shape `(batch_size, hidden_size)`):
|
62 |
+
The embeddings of the prompt tokens.
|
63 |
+
gating_output (`torch.FloatTensor` of shape `(batch_size, config.num_objectives)`):
|
64 |
+
The logits for the gating network.
|
65 |
+
score (`torch.FloatTensor` of shape `(batch_size, config.num_labels)`):
|
66 |
+
The final reward score.
|
67 |
+
logits (`torch.FloatTensor` of shape `(batch_size, config.num_labels)`):
|
68 |
+
Same as score
|
69 |
+
"""
|
70 |
+
|
71 |
+
rewards: torch.FloatTensor = None
|
72 |
+
hidden_state: Optional[Tuple[torch.FloatTensor, ...]] = None
|
73 |
+
prompt_embedding: Optional[torch.FloatTensor] = None
|
74 |
+
gating_output: Optional[torch.FloatTensor] = None
|
75 |
+
score: Optional[torch.FloatTensor] = None
|
76 |
+
logits: Optional[torch.FloatTensor] = None
|
77 |
+
|
78 |
+
|
79 |
+
class LlamaForRewardModelWithGating(LlamaPreTrainedModel):
|
80 |
+
def __init__(self, config):
|
81 |
+
super().__init__(config)
|
82 |
+
self.num_labels = config.num_labels
|
83 |
+
self.model = LlamaModel(config)
|
84 |
+
config_dict = config.to_dict()
|
85 |
+
self.num_objectives = config_dict.get("num_objectives", 19)
|
86 |
+
self.regression_layer = nn.Linear(config.hidden_size, self.num_objectives, bias=False)
|
87 |
+
self.post_init()
|
88 |
+
# Not using torch.eye because it is not supported in BF16
|
89 |
+
I = torch.zeros(self.num_objectives, self.num_objectives)
|
90 |
+
I[range(self.num_objectives), range(self.num_objectives)] = 1.
|
91 |
+
self.reward_transform_matrix = nn.Parameter(I)
|
92 |
+
self.reward_transform_matrix.requires_grad = False
|
93 |
+
|
94 |
+
# Initialize weights and apply final processing
|
95 |
+
self.gating = GatingNetwork(config.hidden_size, config.num_objectives,
|
96 |
+
temperature=config_dict.get("gating_temperature", 10),
|
97 |
+
hidden_dim=config_dict.get("gating_hidden_dim", 1024),
|
98 |
+
n_hidden=config_dict.get("gating_n_hidden", 3))
|
99 |
+
|
100 |
+
@add_start_docstrings_to_model_forward(LLAMA_INPUTS_DOCSTRING)
|
101 |
+
def forward(
|
102 |
+
self,
|
103 |
+
input_ids: torch.LongTensor = None,
|
104 |
+
attention_mask: Optional[torch.Tensor] = None,
|
105 |
+
position_ids: Optional[torch.LongTensor] = None,
|
106 |
+
past_key_values: Optional[List[torch.FloatTensor]] = None,
|
107 |
+
inputs_embeds: Optional[torch.FloatTensor] = None,
|
108 |
+
labels: Optional[torch.FloatTensor] = None,
|
109 |
+
use_cache: Optional[bool] = None,
|
110 |
+
output_attentions: Optional[bool] = None,
|
111 |
+
output_hidden_states: Optional[bool] = None,
|
112 |
+
return_dict: Optional[bool] = None,
|
113 |
+
) -> CustomOutput:
|
114 |
+
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
115 |
+
|
116 |
+
transformer_outputs = self.model(
|
117 |
+
input_ids,
|
118 |
+
attention_mask=attention_mask,
|
119 |
+
position_ids=position_ids,
|
120 |
+
past_key_values=past_key_values,
|
121 |
+
inputs_embeds=inputs_embeds,
|
122 |
+
use_cache=use_cache,
|
123 |
+
output_attentions=output_attentions,
|
124 |
+
output_hidden_states=output_hidden_states,
|
125 |
+
return_dict=return_dict,
|
126 |
+
)
|
127 |
+
tokens_hidden_states = transformer_outputs[0]
|
128 |
+
|
129 |
+
if input_ids is not None:
|
130 |
+
batch_size = input_ids.shape[0]
|
131 |
+
else:
|
132 |
+
batch_size = inputs_embeds.shape[0]
|
133 |
+
|
134 |
+
if self.config.pad_token_id is None and batch_size != 1:
|
135 |
+
raise ValueError("Cannot handle batch sizes > 1 if no padding token is defined.")
|
136 |
+
if self.config.pad_token_id is None:
|
137 |
+
sequence_lengths = -1
|
138 |
+
else:
|
139 |
+
if input_ids is not None:
|
140 |
+
# if no pad token found, use modulo instead of reverse indexing for ONNX compatibility
|
141 |
+
sequence_lengths = torch.eq(input_ids, self.config.pad_token_id).int().argmax(-1) - 1
|
142 |
+
sequence_lengths = sequence_lengths % input_ids.shape[-1]
|
143 |
+
sequence_lengths = sequence_lengths.to(tokens_hidden_states.device)
|
144 |
+
else:
|
145 |
+
sequence_lengths = -1
|
146 |
+
|
147 |
+
dummy_iterator = torch.arange(batch_size, device=tokens_hidden_states.device)
|
148 |
+
hidden_states = tokens_hidden_states[dummy_iterator, sequence_lengths]
|
149 |
+
assert hidden_states.shape == (batch_size, self.config.hidden_size)
|
150 |
+
rewards = self.regression_layer(hidden_states)
|
151 |
+
|
152 |
+
gating_token_positions = [find_token_for_gating(ids.tolist()) for ids in input_ids]
|
153 |
+
prompt_embedding = tokens_hidden_states[dummy_iterator, gating_token_positions, :]
|
154 |
+
gating_output = self.gating(prompt_embedding)
|
155 |
+
|
156 |
+
rewards_adjusted = rewards @ self.reward_transform_matrix
|
157 |
+
score = torch.sum(gating_output * rewards_adjusted, dim=1)
|
158 |
+
|
159 |
+
return CustomOutput(
|
160 |
+
rewards=rewards,
|
161 |
+
hidden_state=hidden_states,
|
162 |
+
prompt_embedding=prompt_embedding,
|
163 |
+
gating_output=gating_output,
|
164 |
+
score=score,
|
165 |
+
logits=score,
|
166 |
+
)
|
README.md
CHANGED
@@ -35,12 +35,24 @@ license: llama3
|
|
35 |
## Demo Code
|
36 |
```python
|
37 |
import torch
|
38 |
-
from transformers import
|
|
|
|
|
|
|
39 |
device = "cuda"
|
40 |
-
path = "
|
41 |
-
model = AutoModelForSequenceClassification.from_pretrained(path, device_map=device,
|
42 |
-
trust_remote_code=True, torch_dtype=torch.bfloat16)
|
43 |
tokenizer = AutoTokenizer.from_pretrained(path, use_fast=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
# We load a random sample from the validation set of the HelpSteer dataset
|
45 |
prompt = 'What are some synonyms for the word "beautiful"?'
|
46 |
response = "Nicely, Beautifully, Handsome, Stunning, Wonderful, Gorgeous, Pretty, Stunning, Elegant"
|
|
|
35 |
## Demo Code
|
36 |
```python
|
37 |
import torch
|
38 |
+
from transformers import AutoConfig, AutoModelForSequenceClassification
|
39 |
+
from transformers import BitsAndBytesConfig
|
40 |
+
from transformers import AutoTokenizer, pipeline
|
41 |
+
|
42 |
device = "cuda"
|
43 |
+
path = "SteveTran/ArmoRM-Llama3-8B-v0.1-4bit"
|
|
|
|
|
44 |
tokenizer = AutoTokenizer.from_pretrained(path, use_fast=True)
|
45 |
+
bnb_quantization_config = BitsAndBytesConfig(load_in_4bit=True,
|
46 |
+
bnb_4bit_compute_dtype=torch.bfloat16,
|
47 |
+
bnb_4bit_quant_type="fp4",
|
48 |
+
bnb_4bit_use_double_quant=True)
|
49 |
+
model = AutoModelForSequenceClassification.from_pretrained(
|
50 |
+
new_weights_location,
|
51 |
+
quantization_config=bnb_quantization_config,
|
52 |
+
device_map="auto",
|
53 |
+
torch_dtype=torch.bfloat16,
|
54 |
+
trust_remote_code=True,
|
55 |
+
)
|
56 |
# We load a random sample from the validation set of the HelpSteer dataset
|
57 |
prompt = 'What are some synonyms for the word "beautiful"?'
|
58 |
response = "Nicely, Beautifully, Handsome, Stunning, Wonderful, Gorgeous, Pretty, Stunning, Elegant"
|
config.json
CHANGED
@@ -31,6 +31,21 @@
|
|
31 |
"num_objectives": 19,
|
32 |
"pad_token_id": 128256,
|
33 |
"pretraining_tp": 1,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
"rms_norm_eps": 1e-05,
|
35 |
"rope_scaling": null,
|
36 |
"rope_theta": 500000.0,
|
|
|
31 |
"num_objectives": 19,
|
32 |
"pad_token_id": 128256,
|
33 |
"pretraining_tp": 1,
|
34 |
+
"quantization_config": {
|
35 |
+
"_load_in_4bit": true,
|
36 |
+
"_load_in_8bit": false,
|
37 |
+
"bnb_4bit_compute_dtype": "bfloat16",
|
38 |
+
"bnb_4bit_quant_storage": "uint8",
|
39 |
+
"bnb_4bit_quant_type": "fp4",
|
40 |
+
"bnb_4bit_use_double_quant": true,
|
41 |
+
"llm_int8_enable_fp32_cpu_offload": false,
|
42 |
+
"llm_int8_has_fp16_weight": false,
|
43 |
+
"llm_int8_skip_modules": null,
|
44 |
+
"llm_int8_threshold": 6.0,
|
45 |
+
"load_in_4bit": true,
|
46 |
+
"load_in_8bit": false,
|
47 |
+
"quant_method": "bitsandbytes"
|
48 |
+
},
|
49 |
"rms_norm_eps": 1e-05,
|
50 |
"rope_scaling": null,
|
51 |
"rope_theta": 500000.0,
|
model-00001-of-00002.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7002cddb2ab8d0f289596cc337c78b5f4c482b0e31229f88ff4efee8762c9f13
|
3 |
+
size 2985501395
|
model-00002-of-00002.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:42b4ca78d243ecb3c5917ec35ca07b3256e70ddf1baa79096f37fe74b2efcfb6
|
3 |
+
size 1669796944
|
model.safetensors.index.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|