File size: 740 Bytes
			
			| 6123db4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 | from transformers import PretrainedConfig, PreTrainedModel, AutoModelForCausalLM # Import AutoModelForCausalLM
import torch
import torch.nn as nn
import torch.nn.functional as F
import math
from transformers.modeling_outputs import CausalLMOutputWithPast # Import the necessary output class
# Define a single Expert within the Mesh
class MeshExpert(nn.Module):
    def __init__(self, config: MeshConfig):
        super().__init__()
        self.fc1 = nn.Linear(config.hidden_size, config.expert_intermediate_size)
        self.gelu = nn.GELU() # Using GELU as an example activation
        self.fc2 = nn.Linear(config.expert_intermediate_size, config.hidden_size)
    def forward(self, x):
        return self.fc2(self.gelu(self.fc1(x)))
 | 
