{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "import torch.nn.functional as F\n",
    "import matplotlib.pyplot as plt # for making figures\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['emma', 'olivia', 'ava', 'isabella', 'sophia', 'charlotte', 'mia', 'amelia']"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# read in all the words\n",
    "words = open('names.txt', 'r').read().splitlines()\n",
    "words[:8]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "32033"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(words)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{1: 'a', 2: 'b', 3: 'c', 4: 'd', 5: 'e', 6: 'f', 7: 'g', 8: 'h', 9: 'i', 10: 'j', 11: 'k', 12: 'l', 13: 'm', 14: 'n', 15: 'o', 16: 'p', 17: 'q', 18: 'r', 19: 's', 20: 't', 21: 'u', 22: 'v', 23: 'w', 24: 'x', 25: 'y', 26: 'z', 0: '.'}\n"
     ]
    }
   ],
   "source": [
    "# build the vocabulary of characters and mappings to/from integers\n",
    "chars = sorted(list(set(''.join(words))))\n",
    "stoi = {s:i+1 for i,s in enumerate(chars)}\n",
    "stoi['.'] = 0\n",
    "itos = {i:s for s,i in stoi.items()}\n",
    "print(itos)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "... ---> e\n",
      "..e ---> m\n",
      ".em ---> m\n",
      "emm ---> a\n",
      "mma ---> .\n",
      "... ---> o\n",
      "..o ---> l\n",
      ".ol ---> i\n",
      "oli ---> v\n",
      "liv ---> i\n",
      "ivi ---> a\n",
      "via ---> .\n",
      "... ---> a\n",
      "..a ---> v\n",
      ".av ---> a\n",
      "ava ---> .\n",
      "... ---> i\n",
      "..i ---> s\n",
      ".is ---> a\n",
      "isa ---> b\n",
      "sab ---> e\n",
      "abe ---> l\n",
      "bel ---> l\n",
      "ell ---> a\n",
      "lla ---> .\n",
      "... ---> s\n",
      "..s ---> o\n",
      ".so ---> p\n",
      "sop ---> h\n",
      "oph ---> i\n",
      "phi ---> a\n",
      "hia ---> .\n"
     ]
    }
   ],
   "source": [
    "# build the dataset\n",
    "\n",
    "block_size = 3 # context length: how many characters do we take to predict the next one?\n",
    "X, Y = [], []\n",
    "for w in words[:5]:\n",
    "  \n",
    "  #print(w)\n",
    "  context = [0] * block_size\n",
    "  for ch in w + '.':\n",
    "    ix = stoi[ch]\n",
    "    X.append(context)\n",
    "    Y.append(ix)\n",
    "    print(''.join(itos[i] for i in context), '--->', itos[ix])\n",
    "    context = context[1:] + [ix] # crop and append\n",
    "  \n",
    "X = torch.tensor(X)\n",
    "Y = torch.tensor(Y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(torch.Size([32, 3]), torch.int64, torch.Size([32]), torch.int64)"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X.shape, X.dtype, Y.shape, Y.dtype"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "So our dataset looks like this^ \\\n",
    "\\\n",
    "So, for each of those above 5 words, \\\n",
    "`torch.Size([32, 3])` we have created a dataset of 32 examples and each input of the neural net is 3 integers => X \\\n",
    "`torch.Size([32])` and these are the labels (single row, 32 values) => Y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[ 0,  0,  0],\n",
       "        [ 0,  0,  5],\n",
       "        [ 0,  5, 13],\n",
       "        [ 5, 13, 13],\n",
       "        [13, 13,  1],\n",
       "        [ 0,  0,  0],\n",
       "        [ 0,  0, 15],\n",
       "        [ 0, 15, 12],\n",
       "        [15, 12,  9],\n",
       "        [12,  9, 22],\n",
       "        [ 9, 22,  9],\n",
       "        [22,  9,  1],\n",
       "        [ 0,  0,  0],\n",
       "        [ 0,  0,  1],\n",
       "        [ 0,  1, 22],\n",
       "        [ 1, 22,  1],\n",
       "        [ 0,  0,  0],\n",
       "        [ 0,  0,  9],\n",
       "        [ 0,  9, 19],\n",
       "        [ 9, 19,  1],\n",
       "        [19,  1,  2],\n",
       "        [ 1,  2,  5],\n",
       "        [ 2,  5, 12],\n",
       "        [ 5, 12, 12],\n",
       "        [12, 12,  1],\n",
       "        [ 0,  0,  0],\n",
       "        [ 0,  0, 19],\n",
       "        [ 0, 19, 15],\n",
       "        [19, 15, 16],\n",
       "        [15, 16,  8],\n",
       "        [16,  8,  9],\n",
       "        [ 8,  9,  1]])"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([ 5, 13, 13,  1,  0, 15, 12,  9, 22,  9,  1,  0,  1, 22,  1,  0,  9, 19,\n",
       "         1,  2,  5, 12, 12,  1,  0, 19, 15, 16,  8,  9,  1,  0])"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "C = torch.rand((27, 2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "torch.Size([32, 3, 2])"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "emb = C[X]\n",
    "\n",
    "emb.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "(PyTorch indexing is awesome) \\\n",
    "\\\n",
    "To index simultaneously all the elements of X, We simply do C[X]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "W1 = torch.randn((6, 100))\n",
    "b1 = torch.rand(100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "h = torch.tanh(emb.view(-1, 6) @ W1 + b1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[ 0.9910,  0.8405,  0.4715,  ...,  0.9999,  0.8814,  0.9998],\n",
       "        [ 0.9763,  0.9163,  0.3350,  ...,  0.9991,  0.8249,  0.9992],\n",
       "        [ 0.9791,  0.8450, -0.0272,  ...,  0.9997,  0.9230,  0.9997],\n",
       "        ...,\n",
       "        [ 0.8995,  0.6590,  0.4667,  ...,  0.9995, -0.4144,  0.9988],\n",
       "        [ 0.9777,  0.7397,  0.2623,  ...,  0.9999,  0.9593,  0.9999],\n",
       "        [ 0.9402,  0.7154,  0.2493,  ...,  0.9980, -0.6247,  0.9979]])"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "h"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "torch.Size([32, 100])"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "h.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Hidden layer is now made^"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "W2 = torch.randn((100, 27))\n",
    "b2 = torch.rand(27)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "logits = h @ W2 + b2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "torch.Size([32, 27])"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "logits.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "counts = logits.exp()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "prob = counts / counts.sum(1, keepdims=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "torch.Size([32, 27])"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "prob.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(13.4043)"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "loss = -prob[torch.arange(32), Y].log().mean()\n",
    "loss"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We've made the final output layer^ \\\n",
    "Found the loss function value, which we have to reduce"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "---------------------"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### **Summarising what we've done so far to make this more respectable :)**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(torch.Size([32, 3]), torch.Size([32]))"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Run the first 5 cells and then start from here\n",
    "X.shape, Y.shape #dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "g = torch.Generator().manual_seed(2147483647) #For consistency ofcourse, to keep the same values as andrej\n",
    "C = torch.randn((27,2), generator=g)\n",
    "W1 = torch.rand((6, 100), generator=g)\n",
    "b1 = torch.rand(100, generator=g)\n",
    "W2 = torch.rand((100, 27), generator=g)\n",
    "b2 = torch.rand(27, generator=g)\n",
    "parameters = [C, W1, b1, W2, b2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3481"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sum(p.nelement() for p in parameters) #to check number of parameters in total"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(6.4365)"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "emb = C[X]\n",
    "h = torch.tanh(emb.view(-1,6) @ W1 + b1)\n",
    "logits = h @ W2 + b2\n",
    "counts = logits.exp()\n",
    "prob = counts / counts.sum(1, keepdims=True)\n",
    "loss = - prob[torch.arange(32), Y].log().mean()\n",
    "loss"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "--------------"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}