njwright92
/

ComicBot_v.2-gguf

@@ -1,25 +1,42 @@
 from typing import Dict, List, Any
 from llama_cpp import Llama
 import gemma_tools
 MAX_TOKENS = 1000
-class EndpointHandler():
-    def __init__(self, model_dir=None):
-        if model_dir:
-            # Initialize the Llama model directly
-            self.model = Llama(
-                # Adjust the path if necessary
-                model_path=f"{model_dir}/ComicBot_v.2-gguf",
-                n_ctx=MAX_TOKENS,
-            )
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
-        # Extract and validate arguments from the data
         args_check = gemma_tools.get_args_or_none(data)
         if not args_check[0]:  # If validation failed
@@ -29,26 +46,25 @@ class EndpointHandler():
                 "description": args_check.get("description", "Validation error in arguments")
             }]
-        args = args_check  # If validation passed, args are in args_check
-        # Define the formatting template
-        fmat = "<startofturn>system\n{system_prompt} <endofturn>\n<startofturn>user\n{inputs} <endofturn>\n<startofturn>model"
         try:
-            formatted_prompt = fmat.format(**args)
         except Exception as e:
             return [{
                 "status": "error",
                 "reason": "Invalid format",
                 "detail": str(e)
             }]
         max_length = data.get("max_length", 212)
         try:
             max_length = int(max_length)
         except ValueError:
             return [{
                 "status": "error",
@@ -56,16 +72,32 @@ class EndpointHandler():
                 "detail": "max_length was not a valid integer"
             }]
-        res = self.model(
-            formatted_prompt,
-            temperature=args["temperature"],
-            top_p=args["top_p"],
-            top_k=args["top_k"],
-            max_tokens=max_length
-        )
         return [{
             "status": "success",
-            # Assuming Llama's response format
-            "response": res['choices'][0]['text']
         }]

 from typing import Dict, List, Any
 from llama_cpp import Llama
 import gemma_tools
+import os
 MAX_TOKENS = 1000
+class EndpointHandler:
+    def __init__(self, model_dir: str = None):
+        """
+        Initialize the EndpointHandler with the path to the model directory.
+        :param model_dir: Path to the directory containing the model file.
+        """
+        if model_dir:
+            # Update the model filename to match the one in your repository
+            model_path = os.path.join(
+                model_dir, "comic_mistral-v5.2.q5_0.gguf")
+            if not os.path.exists(model_path):
+                raise FileNotFoundError(
+                    f"The model file was not found at {model_path}")
+            try:
+                self.model = Llama(
+                    model_path=model_path,
+                    n_ctx=MAX_TOKENS,  # Use n_ctx for context size in llama_cpp
+                )
+            except Exception as e:
+                raise RuntimeError(f"Failed to load the model: {e}")
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
+        """
+        Handle incoming requests for model inference.
+        :param data: Dictionary containing input data and parameters for the model.
+        :return: A list with a dictionary containing the status and response or error details.
+        """
+        # Extract and validate arguments from the data
         args_check = gemma_tools.get_args_or_none(data)
         if not args_check[0]:  # If validation failed
                 "description": args_check.get("description", "Validation error in arguments")
             }]
+        # If validation passed, args are in the second element of the tuple
+        args = args_check[1]
+        # Define the formatting template for the prompt
+        prompt_format = "<startofturn>system\n{system_prompt} <endofturn>\n<startofturn>user\n{inputs} <endofturn>\n<startofturn>model"
         try:
+            formatted_prompt = prompt_format.format(**args)
         except Exception as e:
             return [{
                 "status": "error",
                 "reason": "Invalid format",
                 "detail": str(e)
             }]
+        # Parse max_length, default to 212 if not provided or invalid
         max_length = data.get("max_length", 212)
         try:
             max_length = int(max_length)
         except ValueError:
             return [{
                 "status": "error",
                 "detail": "max_length was not a valid integer"
             }]
+        # Perform inference
+        try:
+            res = self.model(
+                formatted_prompt,
+                temperature=args["temperature"],
+                top_p=args["top_p"],
+                top_k=args["top_k"],
+                max_tokens=max_length
+            )
+        except Exception as e:
+            return [{
+                "status": "error",
+                "reason": "Inference failed",
+                "detail": str(e)
+            }]
         return [{
             "status": "success",
+            # Extract the text from the response
+            "response": res['choices'][0]['text'].strip()
         }]
+# Usage in your script or where the handler is instantiated:
+try:
+    handler = EndpointHandler("/repository")
+except (FileNotFoundError, RuntimeError) as e:
+    print(f"Initialization error: {e}")
+    exit(1)  # Exit with an error code if the handler cannot be initialized