upload-8-bit

Browse files

Files changed (7) hide show

.ipynb_checkpoints/README-checkpoint.md +195 -0
.ipynb_checkpoints/modeling_custom-checkpoint.py +166 -0
README.md +16 -4
config.json +15 -0
model-00001-of-00002.safetensors +3 -0
model-00002-of-00002.safetensors +3 -0
model.safetensors.index.json +0 -0

.ipynb_checkpoints/README-checkpoint.md ADDED Viewed

	@@ -0,0 +1,195 @@

+---
+license: llama3
+---
+# Absolute-Rating Multi-Objective Reward Model (ArmoRM) with Mixture-of-Experts (MoE) Aggregation of Reward Objectives
++ **Authors** (* indicates equal contribution)
+    [Haoxiang Wang*](https://haoxiang-wang.github.io/), [Wei Xiong*](https://weixiongust.github.io/WeiXiongUST/index.html), [Tengyang Xie](https://tengyangxie.github.io/), [Han Zhao](https://hanzhaoml.github.io/), [Tong Zhang](https://tongzhang-ml.org/)
++ **Blog**: https://rlhflow.github.io/posts/2024-05-29-multi-objective-reward-modeling/
++ **Tech Report**: https://arxiv.org/abs/2406.12845
++ **Model**: [ArmoRM-Llama3-8B-v0.1](https://huggingface.co/RLHFlow/ArmoRM-Llama3-8B-v0.1)
+  + Finetuned from model: [FsfairX-LLaMA3-RM-v0.1](https://huggingface.co/sfairXC/FsfairX-LLaMA3-RM-v0.1)
+- **Code Repository:** https://github.com/RLHFlow/RLHF-Reward-Modeling/
++ **Architecture**
+    <p align="center">
+      <img width="800" alt="image" src="https://github.com/RLHFlow/RLHFlow.github.io/blob/main/assets/ArmoRM-MoE.png?raw=true">
+    </p>
+## RewardBench LeaderBoard
+ | Model  | Base Model                                                             | Method | Score | Chat | Chat Hard | Safety | Reasoning | Prior Sets (0.5 weight) |
+|:--------------------------------------------------------------------------------|:-----------------------------------------------------------------------|:-----:|:-----|:----------|:-------|:----------|:-----------------------|:------------------------|
+  | ArmoRM-Llama3-8B-v0.1                                                           | Llama-3 8B | ArmoRM + MoE | **89.0** | 96.9     | **76.8**  | **92.2** | **97.3**  | 74.3                    |
+  | Cohere May 2024                                                                 | Unknown | Unknown  | 88.3     | 96.4     | 71.3      | **92.7** | **97.7**  | **78.2**                |
+  | [pair-preference-model](https://huggingface.co/RLHFlow/pair-preference-model-LLaMA3-8B)| Llama-3 8B | [SliC-HF](https://arxiv.org/abs/2305.10425) | 85.7 | 98.3 | 65.8 | 89.7 | 94.7 | 74.6 |
+  | GPT-4 Turbo (0125 version)                                                      | GPT-4 Turbo | LLM-as-a-Judge | 84.3     | 95.3     | 74.3      | 87.2     | 86.9      | 70.9                    |
+  | [FsfairX-LLaMA3-RM-v0.1](https://huggingface.co/sfairXC/FsfairX-LLaMA3-RM-v0.1) | Llama-3 8B | Bradley-Terry | 83.6     | **99.4** | 65.1      | 87.8     | 86.4      | 74.9                    |
+  | [Starling-RM-34B](https://huggingface.co/Nexusflow/Starling-RM-34B)             | Yi-34B | Bradley-Terry | 81.4     | 96.9     | 57.2      | 88.2     | 88.5      | 71.4                    |
+## Demo Code
+```python
+import torch
+from transformers import AutoModelForSequenceClassification, AutoTokenizer
+device = "cuda"
+path = "RLHFlow/ArmoRM-Llama3-8B-v0.1"
+model = AutoModelForSequenceClassification.from_pretrained(path, device_map=device,
+                               trust_remote_code=True, torch_dtype=torch.bfloat16)
+tokenizer = AutoTokenizer.from_pretrained(path, use_fast=True)
+# We load a random sample from the validation set of the HelpSteer dataset
+prompt = 'What are some synonyms for the word "beautiful"?'
+response = "Nicely, Beautifully, Handsome, Stunning, Wonderful, Gorgeous, Pretty, Stunning, Elegant"
+messages = [{"role": "user", "content": prompt},
+           {"role": "assistant", "content": response}]
+input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt").to(device)
+with torch.no_grad():
+   output = model(input_ids)
+   # Multi-objective rewards for the response
+   multi_obj_rewards = output.rewards.cpu().float()
+   # The gating layer's output is conditioned on the prompt
+   gating_output = output.gating_output.cpu().float()
+   # The preference score for the response, aggregated from the
+   # multi-objective rewards with the gating layer
+   preference_score = output.score.cpu().float()
+# We apply a transformation matrix to the multi-objective rewards
+# before multiplying with the gating layer's output. This mainly aims
+# at reducing the verbosity bias of the original reward objectives
+obj_transform = model.reward_transform_matrix.data.cpu().float()
+# The final coefficients assigned to each reward objective
+multi_obj_coeffs = gating_output @ obj_transform.T
+# The preference score is the linear combination of the multi-objective rewards with
+# the multi-objective coefficients, which can be verified by the following assertion
+assert torch.isclose(torch.sum(multi_obj_rewards * multi_obj_coeffs, dim=1), preference_score, atol=1e-3)
+# Find the top-K reward objectives with coefficients of the highest magnitude
+K = 3
+top_obj_dims = torch.argsort(torch.abs(multi_obj_coeffs), dim=1, descending=True,)[:, :K]
+top_obj_coeffs = torch.gather(multi_obj_coeffs, dim=1, index=top_obj_dims)
+# The attributes of the 19 reward objectives
+attributes = ['helpsteer-helpfulness','helpsteer-correctness','helpsteer-coherence',
+   'helpsteer-complexity','helpsteer-verbosity','ultrafeedback-overall_score',
+   'ultrafeedback-instruction_following', 'ultrafeedback-truthfulness',
+   'ultrafeedback-honesty','ultrafeedback-helpfulness','beavertails-is_safe',
+   'prometheus-score','argilla-overall_quality','argilla-judge_lm','code-complexity',
+   'code-style','code-explanation','code-instruction-following','code-readability']
+example_index = 0
+for i in range(K):
+   attribute = attributes[top_obj_dims[example_index, i].item()]
+   coeff = top_obj_coeffs[example_index, i].item()
+   print(f"{attribute}: {round(coeff,5)}")
+# code-complexity: 0.19922
+# helpsteer-verbosity: -0.10864
+# ultrafeedback-instruction_following: 0.07861
+# The actual rewards of this example from the HelpSteer dataset
+# are [3,3,4,2,2] for the five helpsteer objectives:
+# helpfulness, correctness, coherence, complexity, verbosity
+# We can linearly transform our predicted rewards to the
+# original reward space to compare with the ground truth
+helpsteer_rewards_pred = multi_obj_rewards[0, :5] * 5 - 0.5
+print(helpsteer_rewards_pred)
+# [2.78125   2.859375  3.484375  1.3847656 1.296875 ]
+```
+## Easy to use Pipeline
+```python
+from typing import Dict, List
+import torch
+from transformers import AutoModelForSequenceClassification, AutoTokenizer
+class ArmoRMPipeline:
+    def __init__(self, model_id, device_map="auto", torch_dtype=torch.bfloat16, truncation=True, trust_remote_code=False, max_length=4096):
+        self.model = AutoModelForSequenceClassification.from_pretrained(
+            model_id,
+            device_map=device_map,
+            trust_remote_code=trust_remote_code,
+            torch_dtype=torch_dtype,
+        )
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            model_id,
+            use_fast=True,
+        )
+        self.truncation = truncation
+        self.device = self.model.device
+        self.max_length = max_length
+    def __call__(self, messages: List[Dict[str, str]]) -> Dict[str, float]:
+        """
+        messages: OpenAI chat messages to be scored
+        Note: no batching since due to length differences, the model will have to pad to the max length which is not efficient
+        Returns: a dictionary with the score between 0 and 1
+        """
+        input_ids = self.tokenizer.apply_chat_template(
+            messages,
+            return_tensors="pt",
+            padding=True,
+            truncation=self.truncation,
+            max_length=self.max_length,
+        ).to(self.device)
+        with torch.no_grad():
+            output = self.model(input_ids)
+            score = output.score.float().item()
+        return {"score": score}
+# Create Reward Model Pipeline
+prompt = 'What are some synonyms for the word "beautiful"?'
+rm = ArmoRMPipeline("RLHFlow/ArmoRM-Llama3-8B-v0.1", trust_remote_code=True)
+# score the messages
+response1 = 'Nicely, Beautifully, Handsome, Stunning, Wonderful, Gorgeous, Pretty, Stunning, Elegant'
+score1 = rm([{"role": "user", "content": prompt}, {"role": "assistant", "content": response1}])
+print(score1)
+response2 = '''Certainly! Here are some synonyms for the word "beautiful":
+1. Gorgeous
+2. Lovely
+3. Stunning
+4. Attractive
+5. Pretty
+6. Elegant
+7. Exquisite
+8. Handsome
+9. Charming
+10. Alluring
+11. Radiant
+12. Magnificent
+13. Graceful
+14. Enchanting
+15. Dazzling
+These synonyms can be used in various contexts to convey the idea of beauty.'''
+score2 = rm([{"role": "user", "content": prompt}, {"role": "assistant", "content": response2}])
+print(score2)
+response3 = 'Sorry i cannot answer this.'
+score3 = rm([{"role": "user", "content": prompt}, {"role": "assistant", "content": response3}])
+print(score3)
+```
+## Citation
+If you find this work useful for your research, please consider citing:
+```
+@article{ArmoRM,
+      title={Interpretable Preferences via Multi-Objective Reward Modeling and Mixture-of-Experts},
+      author={Haoxiang Wang and Wei Xiong and Tengyang Xie and Han Zhao and Tong Zhang},
+      journal={arXiv preprint arXiv:2406.12845},
+}
+@inproceedings{wang2024arithmetic,
+      title={Arithmetic Control of LLMs for Diverse User Preferences: Directional Preference Alignment with Multi-Objective Rewards},
+      author={Haoxiang Wang and Yong Lin and Wei Xiong and Rui Yang and Shizhe Diao and Shuang Qiu and Han Zhao and Tong Zhang},
+      year={2024},
+      booktitle={ACL},
+}
+```
+The second entry, "[Arithmetic Control of LLMs for Diverse User Preferences: Directional Preference Alignment with Multi-Objective Rewards](https://arxiv.org/abs/2402.18571)", is another recent work of ours that trained a multi-objective reward model and adopted it for LLM alignment, which motivated us to develop the current work.

.ipynb_checkpoints/modeling_custom-checkpoint.py ADDED Viewed

	@@ -0,0 +1,166 @@

+from dataclasses import dataclass
+from typing import Optional, List, Tuple
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.utils.checkpoint
+from transformers import LlamaModel, LlamaPreTrainedModel
+from transformers.models.llama.modeling_llama import LLAMA_INPUTS_DOCSTRING
+from transformers.utils import ModelOutput
+from transformers.utils import add_start_docstrings_to_model_forward
+class GatingNetwork(nn.Module):
+    def __init__(self, in_features: int, out_features: int, bias: bool = True, temperature: float = 10,
+                 logit_scale: float = 1., hidden_dim: int = 1024, n_hidden: int = 3):
+        super().__init__()
+        self.temperature = temperature
+        self.logit_scale = nn.Parameter(torch.ones(1) * logit_scale)
+        layers = []
+        for _ in range(n_hidden):
+            layers.append(nn.Linear(in_features, hidden_dim))
+            in_features = hidden_dim
+        layers.append(nn.Linear(in_features, out_features, bias=bias))
+        self.layers = nn.ModuleList(layers)
+    def forward(self, x: torch.FloatTensor) -> torch.FloatTensor:
+        # Apply the linear layers with ReLU
+        for i, layer in enumerate(self.layers):
+            x = F.relu(layer(x)) if i < len(self.layers) - 1 else layer(x)
+        # Apply the conditional ReLU using the expanded mask
+        x = F.softmax(x / self.temperature, dim=1)
+        return x * self.logit_scale[0]
+# token_pattern = tokenizer.encode("<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n", add_special_tokens=False, )
+token_pattern = [128009, 128006, 78191, 128007, 271]
+def find_token_for_gating(lst, ):
+    """Find the last occurrence of a token_pattern in a list."""
+    token_pattern_len = len(token_pattern)
+    search_end = len(lst)
+    for j in range(search_end - token_pattern_len, -1, -1):
+        if lst[j:j + token_pattern_len] == token_pattern:
+            return j
+    raise ValueError("Token pattern not found in the list.")
+@dataclass
+class CustomOutput(ModelOutput):
+    """
+    Base class for outputs of sentence classification models.
+    Args:
+        hidden_state (`Tuple[torch.FloatTensor]` of length `config.num_hidden_layers`):
+            Tuple of `torch.FloatTensor` (one for the output of the embeddings, if the model has an embedding layer, +
+            one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`.
+            Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
+        prompt_embedding (`torch.FloatTensor` of shape `(batch_size, hidden_size)`):
+            The embeddings of the prompt tokens.
+        gating_output (`torch.FloatTensor` of shape `(batch_size, config.num_objectives)`):
+            The logits for the gating network.
+        score (`torch.FloatTensor` of shape `(batch_size, config.num_labels)`):
+            The final reward score.
+        logits (`torch.FloatTensor` of shape `(batch_size, config.num_labels)`):
+            Same as score
+    """
+    rewards: torch.FloatTensor = None
+    hidden_state: Optional[Tuple[torch.FloatTensor, ...]] = None
+    prompt_embedding: Optional[torch.FloatTensor] = None
+    gating_output: Optional[torch.FloatTensor] = None
+    score: Optional[torch.FloatTensor] = None
+    logits: Optional[torch.FloatTensor] = None
+class LlamaForRewardModelWithGating(LlamaPreTrainedModel):
+    def __init__(self, config):
+        super().__init__(config)
+        self.num_labels = config.num_labels
+        self.model = LlamaModel(config)
+        config_dict = config.to_dict()
+        self.num_objectives = config_dict.get("num_objectives", 19)
+        self.regression_layer = nn.Linear(config.hidden_size, self.num_objectives, bias=False)
+        self.post_init()
+        # Not using torch.eye because it is not supported in BF16
+        I = torch.zeros(self.num_objectives, self.num_objectives)
+        I[range(self.num_objectives), range(self.num_objectives)] = 1.
+        self.reward_transform_matrix = nn.Parameter(I)
+        self.reward_transform_matrix.requires_grad = False
+        # Initialize weights and apply final processing
+        self.gating = GatingNetwork(config.hidden_size, config.num_objectives,
+                                    temperature=config_dict.get("gating_temperature", 10),
+                                    hidden_dim=config_dict.get("gating_hidden_dim", 1024),
+                                    n_hidden=config_dict.get("gating_n_hidden", 3))
+    @add_start_docstrings_to_model_forward(LLAMA_INPUTS_DOCSTRING)
+    def forward(
+            self,
+            input_ids: torch.LongTensor = None,
+            attention_mask: Optional[torch.Tensor] = None,
+            position_ids: Optional[torch.LongTensor] = None,
+            past_key_values: Optional[List[torch.FloatTensor]] = None,
+            inputs_embeds: Optional[torch.FloatTensor] = None,
+            labels: Optional[torch.FloatTensor] = None,
+            use_cache: Optional[bool] = None,
+            output_attentions: Optional[bool] = None,
+            output_hidden_states: Optional[bool] = None,
+            return_dict: Optional[bool] = None,
+    ) -> CustomOutput:
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+        transformer_outputs = self.model(
+            input_ids,
+            attention_mask=attention_mask,
+            position_ids=position_ids,
+            past_key_values=past_key_values,
+            inputs_embeds=inputs_embeds,
+            use_cache=use_cache,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+        )
+        tokens_hidden_states = transformer_outputs[0]
+        if input_ids is not None:
+            batch_size = input_ids.shape[0]
+        else:
+            batch_size = inputs_embeds.shape[0]
+        if self.config.pad_token_id is None and batch_size != 1:
+            raise ValueError("Cannot handle batch sizes > 1 if no padding token is defined.")
+        if self.config.pad_token_id is None:
+            sequence_lengths = -1
+        else:
+            if input_ids is not None:
+                # if no pad token found, use modulo instead of reverse indexing for ONNX compatibility
+                sequence_lengths = torch.eq(input_ids, self.config.pad_token_id).int().argmax(-1) - 1
+                sequence_lengths = sequence_lengths % input_ids.shape[-1]
+                sequence_lengths = sequence_lengths.to(tokens_hidden_states.device)
+            else:
+                sequence_lengths = -1
+        dummy_iterator = torch.arange(batch_size, device=tokens_hidden_states.device)
+        hidden_states = tokens_hidden_states[dummy_iterator, sequence_lengths]
+        assert hidden_states.shape == (batch_size, self.config.hidden_size)
+        rewards = self.regression_layer(hidden_states)
+        gating_token_positions = [find_token_for_gating(ids.tolist()) for ids in input_ids]
+        prompt_embedding = tokens_hidden_states[dummy_iterator, gating_token_positions, :]
+        gating_output = self.gating(prompt_embedding)
+        rewards_adjusted = rewards @ self.reward_transform_matrix
+        score = torch.sum(gating_output * rewards_adjusted, dim=1)
+        return CustomOutput(
+            rewards=rewards,
+            hidden_state=hidden_states,
+            prompt_embedding=prompt_embedding,
+            gating_output=gating_output,
+            score=score,
+            logits=score,
+        )

README.md CHANGED Viewed

@@ -35,12 +35,24 @@ license: llama3
 ## Demo Code
 ```python
 import torch
-from transformers import AutoModelForSequenceClassification, AutoTokenizer
 device = "cuda"
-path = "RLHFlow/ArmoRM-Llama3-8B-v0.1"
-model = AutoModelForSequenceClassification.from_pretrained(path, device_map=device,
-                               trust_remote_code=True, torch_dtype=torch.bfloat16)
 tokenizer = AutoTokenizer.from_pretrained(path, use_fast=True)
 # We load a random sample from the validation set of the HelpSteer dataset
 prompt = 'What are some synonyms for the word "beautiful"?'
 response = "Nicely, Beautifully, Handsome, Stunning, Wonderful, Gorgeous, Pretty, Stunning, Elegant"

 ## Demo Code
 ```python
 import torch
+from transformers import AutoConfig, AutoModelForSequenceClassification
+from transformers import BitsAndBytesConfig
+from transformers import AutoTokenizer, pipeline
 device = "cuda"
+path = "SteveTran/ArmoRM-Llama3-8B-v0.1-4bit"
 tokenizer = AutoTokenizer.from_pretrained(path, use_fast=True)
+bnb_quantization_config = BitsAndBytesConfig(load_in_4bit=True,
+                                                bnb_4bit_compute_dtype=torch.bfloat16,
+                                                bnb_4bit_quant_type="fp4",
+                                                bnb_4bit_use_double_quant=True)
+model = AutoModelForSequenceClassification.from_pretrained(
+    new_weights_location,
+    quantization_config=bnb_quantization_config,
+    device_map="auto",
+    torch_dtype=torch.bfloat16,
+    trust_remote_code=True,
+)
 # We load a random sample from the validation set of the HelpSteer dataset
 prompt = 'What are some synonyms for the word "beautiful"?'
 response = "Nicely, Beautifully, Handsome, Stunning, Wonderful, Gorgeous, Pretty, Stunning, Elegant"

config.json CHANGED Viewed

@@ -31,6 +31,21 @@
   "num_objectives": 19,
   "pad_token_id": 128256,
   "pretraining_tp": 1,
   "rms_norm_eps": 1e-05,
   "rope_scaling": null,
   "rope_theta": 500000.0,

   "num_objectives": 19,
   "pad_token_id": 128256,
   "pretraining_tp": 1,
+  "quantization_config": {
+    "_load_in_4bit": true,
+    "_load_in_8bit": false,
+    "bnb_4bit_compute_dtype": "bfloat16",
+    "bnb_4bit_quant_storage": "uint8",
+    "bnb_4bit_quant_type": "fp4",
+    "bnb_4bit_use_double_quant": true,
+    "llm_int8_enable_fp32_cpu_offload": false,
+    "llm_int8_has_fp16_weight": false,
+    "llm_int8_skip_modules": null,
+    "llm_int8_threshold": 6.0,
+    "load_in_4bit": true,
+    "load_in_8bit": false,
+    "quant_method": "bitsandbytes"
+  },
   "rms_norm_eps": 1e-05,
   "rope_scaling": null,
   "rope_theta": 500000.0,

model-00001-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7002cddb2ab8d0f289596cc337c78b5f4c482b0e31229f88ff4efee8762c9f13
+size 2985501395

model-00002-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:42b4ca78d243ecb3c5917ec35ca07b3256e70ddf1baa79096f37fe74b2efcfb6
+size 1669796944

model.safetensors.index.json CHANGED Viewed

The diff for this file is too large to render. See raw diff