Add headers and todo
Browse files
README.md
CHANGED
|
@@ -11,11 +11,15 @@ widget:
|
|
| 11 |
- text: "pragma solidity ^0.5.7;\n// Context: ParentA | Functions: helloA helloB | Constants: constantA \ncontract HelloWorld is ParentA {"
|
| 12 |
---
|
| 13 |
|
| 14 |
-
# A code
|
| 15 |
-
|
|
|
|
|
|
|
| 16 |
- Header solidity version like `pragma solidity ^0.5.7`
|
| 17 |
- Ancestor class/library info, e.g. public functions and constants from `ParentA`
|
| 18 |
- Contract/Library/Interface declaration header, e.g. `HelloWorld` ended with `{`
|
|
|
|
|
|
|
| 19 |
|
| 20 |
```python
|
| 21 |
# !pip install transformers -q
|
|
@@ -38,35 +42,16 @@ print(tokenizer.decode(generated_ids[0], skip_special_tokens=True))
|
|
| 38 |
# Expect outcome
|
| 39 |
"""
|
| 40 |
string public constant name = "Hello World";
|
| 41 |
-
|
| 42 |
-
uint8 public constant decimals = 18;
|
| 43 |
-
uint256 public constant initialSupply = 0;
|
| 44 |
uint256 public constant override returns (uint256) {
|
| 45 |
return initialSupply;
|
| 46 |
}
|
| 47 |
function initialSupply() public view returns (uint256) {
|
| 48 |
-
|
| 49 |
-
}
|
| 50 |
-
function balanceOf(address _owner) public view returns (uint256) {
|
| 51 |
-
return balanceOf(_owner);
|
| 52 |
-
}
|
| 53 |
-
function transfer(address _to, uint256 _value) public returns (bool) {
|
| 54 |
-
balanceOf[msg.sender] -= _value;
|
| 55 |
-
balanceOf[_to] += _value;
|
| 56 |
-
emit Transfer(msg.sender, _to, _value);
|
| 57 |
-
return true;
|
| 58 |
-
}
|
| 59 |
-
function transferFrom(address _from, address _to, uint256 _value) public returns (bool) {
|
| 60 |
-
balanceOf[_from] -= _value;
|
| 61 |
-
balanceOf[_to] += _value;
|
| 62 |
-
emit Transfer(_from, _to, _value);
|
| 63 |
-
return true;
|
| 64 |
-
}
|
| 65 |
-
function approve(address _spender, uint256 _value) public returns (bool)
|
| 66 |
"""
|
| 67 |
```
|
| 68 |
|
| 69 |
-
|
| 70 |
- Base T5 code model: https://huggingface.co/Salesforce/codet5-large
|
| 71 |
- Source data: https://huggingface.co/datasets/mwritescode/slither-audited-smart-contracts
|
| 72 |
- Processing steps: Clean, contract-level segmentation sepration, split in and out
|
|
@@ -116,4 +101,9 @@ function approve(address _spender, uint256 _value) public returns (bool)
|
|
| 116 |
}
|
| 117 |
}
|
| 118 |
```
|
| 119 |
-
- Source training code: To be added
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
- text: "pragma solidity ^0.5.7;\n// Context: ParentA | Functions: helloA helloB | Constants: constantA \ncontract HelloWorld is ParentA {"
|
| 12 |
---
|
| 13 |
|
| 14 |
+
# A code generation T5 model for solidity (web3 smart contract)
|
| 15 |
+
|
| 16 |
+
## Hello World example
|
| 17 |
+
- A hello world example to use this model, notice the input `text` includes
|
| 18 |
- Header solidity version like `pragma solidity ^0.5.7`
|
| 19 |
- Ancestor class/library info, e.g. public functions and constants from `ParentA`
|
| 20 |
- Contract/Library/Interface declaration header, e.g. `HelloWorld` ended with `{`
|
| 21 |
+
- Or simply use the test widget on the right side of the window and test, however
|
| 22 |
+
the quality is known to be worse without decoding params
|
| 23 |
|
| 24 |
```python
|
| 25 |
# !pip install transformers -q
|
|
|
|
| 42 |
# Expect outcome
|
| 43 |
"""
|
| 44 |
string public constant name = "Hello World";
|
| 45 |
+
...
|
|
|
|
|
|
|
| 46 |
uint256 public constant override returns (uint256) {
|
| 47 |
return initialSupply;
|
| 48 |
}
|
| 49 |
function initialSupply() public view returns (uint256) {
|
| 50 |
+
...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
"""
|
| 52 |
```
|
| 53 |
|
| 54 |
+
## Background
|
| 55 |
- Base T5 code model: https://huggingface.co/Salesforce/codet5-large
|
| 56 |
- Source data: https://huggingface.co/datasets/mwritescode/slither-audited-smart-contracts
|
| 57 |
- Processing steps: Clean, contract-level segmentation sepration, split in and out
|
|
|
|
| 101 |
}
|
| 102 |
}
|
| 103 |
```
|
| 104 |
+
- Source training code: To be added
|
| 105 |
+
|
| 106 |
+
## Future TODO
|
| 107 |
+
- The model is significantly under-trained because of lack of GPU budget, need 10x colab resources (~$100 for full train)
|
| 108 |
+
- This is quite limited on how the model is used, potentially we could switch to GPT2 decoder-only to compare, but CodeT5 has its strong code optimization
|
| 109 |
+
- Need more classifiers (T5 or BERT alike) to detect potential defects.
|