Improve model card: Add pipeline tag, library name, and paper link

#1
by nielsr HF Staff - opened
Files changed (1) hide show
  1. README.md +20 -8
README.md CHANGED
@@ -3,8 +3,12 @@ license: mit
3
  tags:
4
  - decompile
5
  - binary
 
 
6
  ---
7
 
 
 
8
  ### 1. Introduction of LLM4Decompile
9
 
10
  LLM4Decompile aims to decompile x86 assembly instructions into C. The newly released V2 series are trained with a larger dataset (2B tokens) and a maximum token length of 4,096, with remarkable performance (up to 100% improvement) compared to the previous model.
@@ -53,6 +57,7 @@ Note: **Replace** func0 with the function name you want to decompile.
53
  import os
54
  import subprocess
55
  from tqdm import tqdm,trange
 
56
 
57
  OPT = ["O0", "O1", "O2", "O3"]
58
  timeout_duration = 10
@@ -92,7 +97,8 @@ with tempfile.TemporaryDirectory() as temp_dir:
92
  c_decompile = f.read()
93
  c_func = []
94
  flag = 0
95
- for line in c_decompile.split('\n'):
 
96
  if "Function: func0" in line:#**Replace** func0 with the function name you want to decompile.
97
  flag = 1
98
  c_func.append(line)
@@ -108,10 +114,14 @@ with tempfile.TemporaryDirectory() as temp_dir:
108
  if 'func0' in c_func[idx_tmp]:
109
  break
110
  c_func = c_func[idx_tmp:]
111
- input_asm = '\n'.join(c_func).strip()
112
-
113
- before = f"# This is the assembly code:\n"#prompt
114
- after = "\n# What is the source code?\n"#prompt
 
 
 
 
115
  input_asm_prompt = before+input_asm.strip()+after
116
  with open(fileName +'_' + opt +'.pseudo','w',encoding='utf-8') as f:
117
  f.write(input_asm_prompt)
@@ -162,8 +172,10 @@ c_func_decompile = tokenizer.decode(outputs[0][len(inputs[0]):-1])
162
  with open(fileName +'_' + OPT[0] +'.pseudo','r') as f:#original file
163
  func = f.read()
164
 
165
- print(f'pseudo function:\n{func}')# Note we only decompile one function, where the original file may contain multiple functions
166
- print(f'refined function:\n{c_func_decompile}')
 
 
167
 
168
  ```
169
 
@@ -172,4 +184,4 @@ This code repository is licensed under the MIT License.
172
 
173
  ### 5. Contact
174
 
175
- If you have any questions, please raise an issue.
 
3
  tags:
4
  - decompile
5
  - binary
6
+ pipeline_tag: text-generation
7
+ library_name: transformers
8
  ---
9
 
10
+ This repository contains the `LLM4Binary/llm4decompile-6.7b-v2` model. This model is associated with the paper [Decompile-Bench: Million-Scale Binary-Source Function Pairs for Real-World Binary Decompilation](https://huggingface.co/papers/2505.12668).
11
+
12
  ### 1. Introduction of LLM4Decompile
13
 
14
  LLM4Decompile aims to decompile x86 assembly instructions into C. The newly released V2 series are trained with a larger dataset (2B tokens) and a maximum token length of 4,096, with remarkable performance (up to 100% improvement) compared to the previous model.
 
57
  import os
58
  import subprocess
59
  from tqdm import tqdm,trange
60
+ import tempfile
61
 
62
  OPT = ["O0", "O1", "O2", "O3"]
63
  timeout_duration = 10
 
97
  c_decompile = f.read()
98
  c_func = []
99
  flag = 0
100
+ for line in c_decompile.split('
101
+ '):
102
  if "Function: func0" in line:#**Replace** func0 with the function name you want to decompile.
103
  flag = 1
104
  c_func.append(line)
 
114
  if 'func0' in c_func[idx_tmp]:
115
  break
116
  c_func = c_func[idx_tmp:]
117
+ input_asm = '
118
+ '.join(c_func).strip()
119
+
120
+ before = f"# This is the assembly code:
121
+ "#prompt
122
+ after = "
123
+ # What is the source code?
124
+ "#prompt
125
  input_asm_prompt = before+input_asm.strip()+after
126
  with open(fileName +'_' + opt +'.pseudo','w',encoding='utf-8') as f:
127
  f.write(input_asm_prompt)
 
172
  with open(fileName +'_' + OPT[0] +'.pseudo','r') as f:#original file
173
  func = f.read()
174
 
175
+ print(f'pseudo function:
176
+ {func}')# Note we only decompile one function, where the original file may contain multiple functions
177
+ print(f'refined function:
178
+ {c_func_decompile}')
179
 
180
  ```
181
 
 
184
 
185
  ### 5. Contact
186
 
187
+ If you have any questions, please raise an issue.