christopher/rosetta-code
Viewer • Updated • 79k • 521 • 39
How to use philomath-1209/programming-language-identification with Transformers:
# Use a pipeline as a high-level helper
from transformers import pipeline
pipe = pipeline("text-classification", model="philomath-1209/programming-language-identification") # Load model directly
from transformers import AutoTokenizer, AutoModelForSequenceClassification
tokenizer = AutoTokenizer.from_pretrained("philomath-1209/programming-language-identification")
model = AutoModelForSequenceClassification.from_pretrained("philomath-1209/programming-language-identification")This Model is a fine-tuned version of huggingface/CodeBERTa-small-v1 on cakiki/rosetta-code Dataset for 26 Programming Languages as mentioned below.
Model is trained for 25 epochs on Azure for nearly 26000 Datapoints for above Mentioned 26 Programming Languages
extracted from Dataset having 1006 of total Programming Language.
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline
model_name = 'philomath-1209/programming-language-identification'
loaded_tokenizer = AutoTokenizer.from_pretrained(model_name)
loaded_model = AutoModelForSequenceClassification.from_pretrained(model_name)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
text = """
PROGRAM Triangle
IMPLICIT NONE
REAL :: a, b, c, Area
PRINT *, 'Welcome, please enter the&
&lengths of the 3 sides.'
READ *, a, b, c
PRINT *, 'Triangle''s area: ', Area(a,b,c)
END PROGRAM Triangle
FUNCTION Area(x,y,z)
IMPLICIT NONE
REAL :: Area ! function type
REAL, INTENT( IN ) :: x, y, z
REAL :: theta, height
theta = ACOS((x**2+y**2-z**2)/(2.0*x*y))
height = x*SIN(theta); Area = 0.5*y*height
END FUNCTION Area
"""
inputs = loaded_tokenizer(text, return_tensors="pt",truncation=True)
with torch.no_grad():
logits = loaded_model(**inputs).logits
predicted_class_id = logits.argmax().item()
loaded_model.config.id2label[predicted_class_id]
Loading the model requires the 🤗 Optimum library installed.
pip install transformers optimum[onnxruntime] optimum
model_path = "philomath-1209/programming-language-identification"
import torch
from transformers import pipeline, AutoTokenizer
from optimum.onnxruntime import ORTModelForSequenceClassification
tokenizer = AutoTokenizer.from_pretrained(model_path, subfolder="onnx")
model = ORTModelForSequenceClassification.from_pretrained(model_path, export=False, subfolder="onnx")
text = """
PROGRAM Triangle
IMPLICIT NONE
REAL :: a, b, c, Area
PRINT *, 'Welcome, please enter the&
&lengths of the 3 sides.'
READ *, a, b, c
PRINT *, 'Triangle''s area: ', Area(a,b,c)
END PROGRAM Triangle
FUNCTION Area(x,y,z)
IMPLICIT NONE
REAL :: Area ! function type
REAL, INTENT( IN ) :: x, y, z
REAL :: theta, height
theta = ACOS((x**2+y**2-z**2)/(2.0*x*y))
height = x*SIN(theta); Area = 0.5*y*height
END FUNCTION Area
"""
inputs = tokenizer(text, return_tensors="pt",truncation=True)
with torch.no_grad():
logits = model(**inputs).logits
predicted_class_id = logits.argmax().item()
model.config.id2label[predicted_class_id]
Base model
huggingface/CodeBERTa-small-v1