Add notebook examples for structured outputs and function calling

These notebooks demonstrate to the community how they can use `Kimi-K2-Instruct ` for structured outputs and function calling.

Files changed (2) hide show

function_calling.ipynb +325 -0
structured_outputs.ipynb +198 -0

function_calling.ipynb ADDED Viewed

	@@ -0,0 +1,325 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "eec74b22",
+   "metadata": {
+    "vscode": {
+     "languageId": "raw"
+    }
+   },
+   "source": [
+    "# Function Calling with Hugging Face Inference Providers\n",
+    "\n",
+    "This notebook demonstrates how to use function calling with both OpenAI-compatible and Hugging Face native clients using Hugging Face Inference Providers.\n",
+    "\n",
+    "## Overview\n",
+    "- **OpenAI-Compatible**: Use familiar OpenAI API syntax with HF Inference Providers\n",
+    "- **Hugging Face Native**: Use HF's native InferenceClient with function calling\n",
+    "- **Shared Functions**: Reusable function definitions and schemas across both approaches\n",
+    "\n",
+    "## Installation\n",
+    "\n",
+    "First, install the required dependencies:\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f23485bd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install openai huggingface-hub python-dotenv\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "e39a23ae",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/ben/code/inference-providers-mcp/.venv/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    }
+   ],
+   "source": [
+    "import json\n",
+    "import os\n",
+    "from typing import Dict, Any, Optional\n",
+    "from openai import OpenAI\n",
+    "from huggingface_hub import InferenceClient\n",
+    "from dotenv import load_dotenv\n",
+    "\n",
+    "# Load environment variables\n",
+    "load_dotenv()\n",
+    "\n",
+    "# Create a shared configuration\n",
+    "HF_TOKEN = os.getenv(\"HF_TOKEN\")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0b45612f",
+   "metadata": {},
+   "source": [
+    "# Define some functions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5cd13326",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Shared function definitions (mock weather API)\n",
+    "def get_current_weather(location: str) -> Dict[str, Any]:\n",
+    "    \"\"\"Get current weather information for a location.\"\"\"\n",
+    "    return {\n",
+    "        \"location\": location,\n",
+    "        \"temperature\": \"22°C\",\n",
+    "        \"condition\": \"Sunny\",\n",
+    "        \"humidity\": \"65%\",\n",
+    "        \"wind_speed\": \"5 km/h\",\n",
+    "    }\n",
+    "\n",
+    "\n",
+    "def get_weather_forecast(location: str, date: str) -> Dict[str, Any]:\n",
+    "    \"\"\"Get weather forecast for a location on a specific date.\"\"\"\n",
+    "    return {\n",
+    "        \"location\": location,\n",
+    "        \"date\": date,\n",
+    "        \"forecast\": \"Sunny with a chance of rain\",\n",
+    "        \"temperature\": \"20°C\",\n",
+    "        \"humidity\": \"70%\",\n",
+    "    }\n",
+    "\n",
+    "\n",
+    "# Available functions registry\n",
+    "AVAILABLE_FUNCTIONS = {\n",
+    "    \"get_current_weather\": get_current_weather,\n",
+    "    \"get_weather_forecast\": get_weather_forecast,\n",
+    "}\n",
+    "\n",
+    "# Shared tool schemas (compatible with both OpenAI and HF)\n",
+    "TOOL_SCHEMAS = [\n",
+    "    {\n",
+    "        \"type\": \"function\",\n",
+    "        \"function\": {\n",
+    "            \"name\": \"get_current_weather\",\n",
+    "            \"description\": \"Get current weather information for a location\",\n",
+    "            \"parameters\": {\n",
+    "                \"type\": \"object\",\n",
+    "                \"properties\": {\n",
+    "                    \"location\": {\n",
+    "                        \"type\": \"string\",\n",
+    "                        \"description\": \"City and country (e.g., 'Paris, France')\",\n",
+    "                    }\n",
+    "                },\n",
+    "                \"required\": [\"location\"],\n",
+    "            },\n",
+    "        },\n",
+    "    },\n",
+    "    {\n",
+    "        \"type\": \"function\",\n",
+    "        \"function\": {\n",
+    "            \"name\": \"get_weather_forecast\",\n",
+    "            \"description\": \"Get weather forecast for a location on a specific date\",\n",
+    "            \"parameters\": {\n",
+    "                \"type\": \"object\",\n",
+    "                \"properties\": {\n",
+    "                    \"location\": {\n",
+    "                        \"type\": \"string\",\n",
+    "                        \"description\": \"City and country (e.g., 'London, UK')\",\n",
+    "                    },\n",
+    "                    \"date\": {\n",
+    "                        \"type\": \"string\",\n",
+    "                        \"description\": \"Date in YYYY-MM-DD format\",\n",
+    "                    },\n",
+    "                },\n",
+    "                \"required\": [\"location\", \"date\"],\n",
+    "            },\n",
+    "        },\n",
+    "    },\n",
+    "]\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f48298c3",
+   "metadata": {},
+   "source": [
+    "# Implement a Function Calling app"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "7c4b21dc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "SYSTEM_PROMPT = \"\"\"\n",
+    "You are a helpful assistant that can answer questions and help with tasks.\n",
+    "\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "775ae07e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def process_function_calls(response_message, messages):\n",
+    "    \"\"\"Process function calls and return updated messages.\"\"\"\n",
+    "    if not response_message.tool_calls:\n",
+    "        return messages, False\n",
+    "\n",
+    "    # Add assistant's response to messages\n",
+    "    messages.append(response_message)\n",
+    "\n",
+    "    # Process each tool call\n",
+    "    for tool_call in response_message.tool_calls:\n",
+    "        function_name = tool_call.function.name\n",
+    "        function_args = json.loads(tool_call.function.arguments)\n",
+    "\n",
+    "        print(f\"🔧 Calling: {function_name}\")\n",
+    "        print(f\"📝 Args: {function_args}\")\n",
+    "\n",
+    "        # Call the function\n",
+    "        if function_name in AVAILABLE_FUNCTIONS:\n",
+    "            func = AVAILABLE_FUNCTIONS[function_name]\n",
+    "            result = func(**function_args)\n",
+    "            print(f\"✅ Result: {result}\")\n",
+    "\n",
+    "            # Add function result to messages\n",
+    "            messages.append(\n",
+    "                {\n",
+    "                    \"tool_call_id\": tool_call.id,\n",
+    "                    \"role\": \"tool\",\n",
+    "                    \"name\": function_name,\n",
+    "                    \"content\": json.dumps(result),\n",
+    "                }\n",
+    "            )\n",
+    "        else:\n",
+    "            print(f\"❌ Function {function_name} not found\")\n",
+    "\n",
+    "    return messages, True\n",
+    "\n",
+    "\n",
+    "def chat_with_functions(user_message, client, model) -> str:\n",
+    "    \"\"\"Unified function calling handler for both OpenAI and HF clients.\"\"\"\n",
+    "    messages = [\n",
+    "        {\"role\": \"system\", \"content\": SYSTEM_PROMPT},\n",
+    "        {\"role\": \"user\", \"content\": user_message},\n",
+    "    ]\n",
+    "\n",
+    "    # Initial API call\n",
+    "    response = client.chat.completions.create(\n",
+    "        model=model,\n",
+    "        messages=messages,\n",
+    "        tools=TOOL_SCHEMAS,\n",
+    "        tool_choice=\"auto\",\n",
+    "    )\n",
+    "\n",
+    "    response_message = response.choices[0].message\n",
+    "\n",
+    "    # Process function calls if any\n",
+    "    messages, had_tool_calls = process_function_calls(response_message, messages)\n",
+    "\n",
+    "    if had_tool_calls:\n",
+    "        # Get final response after function calls\n",
+    "        final_response = client.chat.completions.create(\n",
+    "            model=model,\n",
+    "            messages=messages,\n",
+    "            tools=TOOL_SCHEMAS,\n",
+    "            tool_choice=\"auto\",\n",
+    "        )\n",
+    "        final_content = final_response.choices[0].message.content\n",
+    "    else:\n",
+    "        final_content = response_message.content\n",
+    "\n",
+    "    return final_content\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "8b26419b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "client = OpenAI(\n",
+    "    api_key=HF_TOKEN,\n",
+    "    base_url=\"https://router.huggingface.co/groq/openai/v1\",\n",
+    ")\n",
+    "\n",
+    "if False:\n",
+    "    # Initialize HF client with inference provider\n",
+    "    client = InferenceClient(provider=\"groq\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c410bafc",
+   "metadata": {},
+   "source": [
+    "# Demo!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "32ee9713",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "🔧 Calling: get_current_weather\n",
+      "📝 Args: {'location': 'Berlin, Germany'}\n",
+      "✅ Result: {'location': 'Berlin, Germany', 'temperature': '22°C', 'condition': 'Sunny', 'humidity': '65%', 'wind_speed': '5 km/h'}\n"
+     ]
+    }
+   ],
+   "source": [
+    "query = \"What's the current weather in Berlin?\"\n",
+    "\n",
+    "response = chat_with_functions(\n",
+    "    user_message=query,\n",
+    "    client=client,\n",
+    "    model=\"moonshotai/kimi-k2-instruct\",\n",
+    ")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

structured_outputs.ipynb ADDED Viewed

	@@ -0,0 +1,198 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "43a342b3",
+   "metadata": {
+    "vscode": {
+     "languageId": "raw"
+    }
+   },
+   "source": [
+    "# Structured Outputs with Hugging Face Inference Providers\n",
+    "\n",
+    "This notebook demonstrates how to use structured outputs with both OpenAI-compatible and Hugging Face native clients using Hugging Face Inference Providers.\n",
+    "\n",
+    "## Overview\n",
+    "- **OpenAI-Compatible**: Use familiar OpenAI structured outputs with HF Inference Providers\n",
+    "- **Hugging Face Native**: Use HF's native InferenceClient with JSON schema validation\n",
+    "- **Shared Models**: Reusable Pydantic models and schemas across both approaches\n",
+    "- **Guaranteed Structure**: Ensure responses match your defined schemas\n",
+    "\n",
+    "## Installation\n",
+    "\n",
+    "First, install the required dependencies:\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "7071d771",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# %pip install openai huggingface-hub pydantic python-dotenv"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7323b5fb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import json\n",
+    "from typing import Dict, Any, List, Optional\n",
+    "from openai import OpenAI\n",
+    "from huggingface_hub import InferenceClient\n",
+    "from pydantic import BaseModel, Field\n",
+    "from dotenv import load_dotenv\n",
+    "\n",
+    "# Load environment variables\n",
+    "load_dotenv()\n",
+    "\n",
+    "# Create a shared configuration\n",
+    "HF_TOKEN = os.getenv(\"HF_TOKEN\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "abbe98f5",
+   "metadata": {},
+   "source": [
+    "# Structured Outputs Task\n",
+    "\n",
+    "Let's setup a structured output task like analysing a research paper and returning a structured output."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "2c1799a9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Shared Pydantic Models and Sample Data\n",
+    "\n",
+    "# Define structured output models\n",
+    "class PaperAnalysis(BaseModel):\n",
+    "    \"\"\"Analysis of a research paper.\"\"\"\n",
+    "\n",
+    "    title: str = Field(description=\"The title of the paper\")\n",
+    "    abstract_summary: str = Field(description=\"A concise summary of the abstract\")\n",
+    "    main_contributions: List[str] = Field(description=\"Key contributions of the paper\")\n",
+    "    methodology: str = Field(description=\"Brief description of the methodology used\")\n",
+    "\n",
+    "\n",
+    "# Sample data for testing\n",
+    "SAMPLE_PAPER = \"\"\"Title: Attention Is All You Need\n",
+    "\n",
+    "Abstract: The dominant sequence transduction models are based on complex recurrent \n",
+    "or convolutional neural networks that include an encoder and a decoder. The best \n",
+    "performing models also connect the encoder and decoder through an attention mechanism. \n",
+    "We propose a new simple network architecture, the Transformer, based solely on \n",
+    "attention mechanisms, dispensing with recurrence and convolutions entirely. \n",
+    "Experiments on two machine translation tasks show these models to be superior \n",
+    "in quality while being more parallelizable and requiring significantly less time to train.\n",
+    "\n",
+    "Introduction: Recurrent neural networks, long short-term memory and gated recurrent \n",
+    "neural networks in particular, have been firmly established as state of the art approaches \n",
+    "in sequence modeling and transduction problems such as language modeling and machine translation.\n",
+    "The Transformer architecture introduces multi-head attention mechanisms that allow the model\n",
+    "to jointly attend to information from different representation subspaces.\"\"\"\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d4cd793c",
+   "metadata": {},
+   "source": [
+    "# Demo!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b82ca76b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Unified Structured Output Handler\n",
+    "system_prompt = \"Analyze the research paper and extract structured information about its title, abstract, contributions, and methodology.\"\n",
+    "\n",
+    "client = OpenAI(\n",
+    "    api_key=HF_TOKEN,\n",
+    "    base_url=\"https://router.huggingface.co/novita/v3/openai\",\n",
+    ")\n",
+    "\n",
+    "\n",
+    "def get_structured_output(content: str) -> Any:\n",
+    "    \"\"\"Get structured output using OpenAI-compatible client.\"\"\"\n",
+    "\n",
+    "    messages = [\n",
+    "        {\"role\": \"system\", \"content\": system_prompt},\n",
+    "        {\"role\": \"user\", \"content\": content},\n",
+    "    ]\n",
+    "\n",
+    "    # Use OpenAI's structured output parsing\n",
+    "    completion = client.beta.chat.completions.parse(\n",
+    "        model=\"moonshotai/kimi-k2-instruct\",\n",
+    "        messages=messages,\n",
+    "        response_format=PaperAnalysis,\n",
+    "    )\n",
+    "\n",
+    "    return completion.choices[0].message.parsed\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "id": "8519e939",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "📄 Title: Attention Is All You Need\n",
+      "📝 Summary: Proposes the Transformer architecture, a sequence-to-sequence model that replaces all recurrence and convolution with attention mechanisms. Demonstrates state-of-the-art results on machine-translation benchmarks while being more parallelizable and faster to train.\n",
+      "🎯 Contributions: ['Introduces the Transformer architecture, the first transduction model built entirely on attention, eliminating recurrence and convolution.', 'Presents multi-head self-attention to jointly attend to information from different representation subspaces.', 'Shows that attention-only models outperform RNN/CNN baselines in translation quality while offering better parallelization and shorter training times.']\n",
+      "🔬 Methodology: Designs an encoder-decoder architecture composed solely of stacked self-attention and feed-forward layers. Uses multi-head scaled dot-product attention, positional encodings, and residual connections. Evaluates on WMT 2014 English-to-German and English-to-French translation tasks, comparing against previous RNN/CNN-based systems.\n"
+     ]
+    }
+   ],
+   "source": [
+    "paper_analysis = get_structured_output(\n",
+    "    content=SAMPLE_PAPER,\n",
+    ")\n",
+    "\n",
+    "print(f\"📄 Title: {paper_analysis.title}\")\n",
+    "print(f\"📝 Summary: {paper_analysis.abstract_summary}\")\n",
+    "print(f\"🎯 Contributions: {paper_analysis.main_contributions}\")\n",
+    "print(f\"🔬 Methodology: {paper_analysis.methodology}\")\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}