Abhaykoul commited on
Commit
dd7d18d
·
verified ·
1 Parent(s): a914ade

Rename README (2).md to README.md

Browse files
Files changed (1) hide show
  1. README (2).md → README.md +35 -11
README (2).md → README.md RENAMED
@@ -5,13 +5,13 @@ library_name: transformers
5
  ---
6
  CURRENTLY IN TRAINING :)
7
 
8
- Currently, only the LLM section of this model is fully ready.
9
  ```py
10
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
11
  import torch
12
 
13
  # Load model and tokenizer
14
- model_name = "Abhaykoul/hai3.1-pretrainedv3"
15
 
16
  # Set device to CUDA if available
17
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -49,32 +49,56 @@ model.generate(
49
  ```
50
  Classfication section undertraining
51
  ```py
 
 
52
  import torch
53
  from transformers import AutoModelForCausalLM, AutoTokenizer
54
 
55
- ckpt = "Abhaykoul/hai3.1-pretrainedv3"
56
- device = "cuda" if torch.cuda.is_available() else "cpu"
 
 
 
 
 
57
 
58
- model = AutoModelForCausalLM.from_pretrained(ckpt, trust_remote_code=True).to(device).eval()
59
  tok = AutoTokenizer.from_pretrained(ckpt, trust_remote_code=True)
60
  if tok.pad_token is None:
61
  tok.pad_token = tok.eos_token
62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  text = "I am thrilled about my new job!"
64
  enc = tok([text], padding=True, truncation=True, max_length=2048, return_tensors="pt")
65
  enc = {k: v.to(device) for k, v in enc.items()}
66
 
67
  with torch.no_grad():
68
  out = model(input_ids=enc["input_ids"], attention_mask=enc.get("attention_mask"), output_hidden_states=True, return_dict=True, use_cache=False)
69
- last = out.hidden_states[-1]
70
- idx = (enc["attention_mask"].sum(dim=1) - 1).clamp(min=0)
71
- pooled = last[torch.arange(last.size(0)), idx]
 
 
 
 
72
  logits = model.structured_lm_head(pooled)
73
- pred_id = logits.argmax(dim=-1).item()
74
 
75
  print("Predicted class id:", pred_id)
76
- # Map id -> label using your dataset’s label list, e.g.:
77
- id2label = ["sadness","joy","love","anger","fear","surprise"] # dair-ai/emotion
78
  print("Predicted label:", id2label[pred_id] if pred_id < len(id2label) else "unknown")
79
  ```
80
 
 
5
  ---
6
  CURRENTLY IN TRAINING :)
7
 
8
+ Currently, only the LLM and Classfication section of this model are fully ready.
9
  ```py
10
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
11
  import torch
12
 
13
  # Load model and tokenizer
14
+ model_name = "HelpingAI/hai3.1-checkpoint-0002"
15
 
16
  # Set device to CUDA if available
17
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
49
  ```
50
  Classfication section undertraining
51
  ```py
52
+ import os
53
+ import json
54
  import torch
55
  from transformers import AutoModelForCausalLM, AutoTokenizer
56
 
57
+ # Path to saved model (change if needed)
58
+ ckpt = "HelpingAI/hai3.1-checkpoint-0002" # or the HF name like HelpingAI/hai3.1-checkpoint-0001
59
+ device = "cpu"
60
+
61
+ print("Device:", device)
62
+ model = AutoModelForCausalLM.from_pretrained(ckpt, trust_remote_code=True)
63
+ model.to(device).eval()
64
 
 
65
  tok = AutoTokenizer.from_pretrained(ckpt, trust_remote_code=True)
66
  if tok.pad_token is None:
67
  tok.pad_token = tok.eos_token
68
 
69
+ # Optional: try to load id2label from saved metadata
70
+ id2label = None
71
+ meta_path = os.path.join(ckpt, "label_map.json")
72
+ if os.path.exists(meta_path):
73
+ try:
74
+ with open(meta_path, "r") as f:
75
+ meta = json.load(f)
76
+ id2label = meta.get("id2label")
77
+ print("Loaded id2label from", meta_path)
78
+ except Exception as e:
79
+ print("Failed to read label_map.json:", e)
80
+
81
+ # Fallback id2label (only used if no metadata)
82
+ if id2label is None:
83
+ id2label = ["HARMFUL_SEXUAL","HARMFUL_HATE","HARMFUL_VIOLENCE","HARMFUL_HARASSMENT","HARMFUL_LANGUAGE","HARMFUL_MISINFORMATION","SAFE"]
84
+
85
  text = "I am thrilled about my new job!"
86
  enc = tok([text], padding=True, truncation=True, max_length=2048, return_tensors="pt")
87
  enc = {k: v.to(device) for k, v in enc.items()}
88
 
89
  with torch.no_grad():
90
  out = model(input_ids=enc["input_ids"], attention_mask=enc.get("attention_mask"), output_hidden_states=True, return_dict=True, use_cache=False)
91
+ last = out.hidden_states[-1] # [B, T, H]
92
+ # compute last-token index using attention_mask if available
93
+ if enc.get("attention_mask") is not None:
94
+ idx = (enc["attention_mask"].sum(dim=1) - 1).clamp(min=0)
95
+ pooled = last[torch.arange(last.size(0)), idx]
96
+ else:
97
+ pooled = last[:, -1, :]
98
  logits = model.structured_lm_head(pooled)
99
+ pred_id = int(logits.argmax(dim=-1).item())
100
 
101
  print("Predicted class id:", pred_id)
 
 
102
  print("Predicted label:", id2label[pred_id] if pred_id < len(id2label) else "unknown")
103
  ```
104