bunyaminergen commited on
Commit
249613c
·
1 Parent(s): f30ef71
Files changed (1) hide show
  1. README.md +52 -13
README.md CHANGED
@@ -50,19 +50,58 @@ please read the [CONTRIBUTING](CONTRIBUTING.md) first._
50
  #### Hugging Face
51
 
52
  ```python
53
- from datasets import load_dataset
54
-
55
- # 141k:
56
- dataset_141k = load_dataset("bunyaminergen/cornstack-python-v1-filtered", revision="v3", split="train")
57
- print(dataset_141k[0])
58
-
59
- # 282k:
60
- dataset_282k = load_dataset("bunyaminergen/cornstack-python-v1-filtered", revision="v5", split="train")
61
- print(dataset_282k[0])
62
-
63
- # 423k:
64
- dataset_423k = load_dataset("bunyaminergen/cornstack-python-v1-filtered", revision="v7", split="train")
65
- print(dataset_423k[0])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  ```
67
 
68
  ---
 
50
  #### Hugging Face
51
 
52
  ```python
53
+ import torch
54
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
55
+
56
+ model_name = "bunyaminergen/Qwen2.5-Coder-1.5B-Instruct-SFT"
57
+
58
+ quant_config = BitsAndBytesConfig(
59
+ load_in_4bit=True,
60
+ bnb_4bit_compute_dtype=torch.bfloat16,
61
+ bnb_4bit_use_double_quant=True,
62
+ bnb_4bit_quant_type="nf4"
63
+ )
64
+
65
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
66
+
67
+ model = AutoModelForCausalLM.from_pretrained(
68
+ model_name,
69
+ quantization_config=quant_config,
70
+ device_map="auto"
71
+ )
72
+
73
+ model.eval()
74
+
75
+ messages = [
76
+ {"role": "system", "content": "You are a senior Python developer."},
77
+ {"role": "user", "content": "Give me a quick example of bubble sort in Python."}
78
+ ]
79
+
80
+ prompt = tokenizer.apply_chat_template(
81
+ messages,
82
+ tokenize=False,
83
+ add_generation_prompt=True
84
+ )
85
+
86
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
87
+
88
+ max_new_tokens = 512
89
+ temperature = 0.9
90
+
91
+ with torch.no_grad():
92
+ outputs = model.generate(
93
+ **inputs,
94
+ max_new_tokens=max_new_tokens,
95
+ temperature=temperature,
96
+ do_sample=True,
97
+ top_p=0.95,
98
+ eos_token_id=tokenizer.eos_token_id,
99
+ pad_token_id=tokenizer.eos_token_id
100
+ )
101
+
102
+ result = tokenizer.decode(outputs[0], skip_special_tokens=True)
103
+
104
+ print(result)
105
  ```
106
 
107
  ---