v0.1-2x2-stage001 / meshexpert.py
aquiffoo's picture
Update meshexpert.py
c11d27f verified
raw
history blame contribute delete
740 Bytes
from transformers import PretrainedConfig, PreTrainedModel, AutoModelForCausalLM # Import AutoModelForCausalLM
import torch
import torch.nn as nn
import torch.nn.functional as F
import math
from transformers.modeling_outputs import CausalLMOutputWithPast # Import the necessary output class
# Define a single Expert within the Mesh
class MeshExpert(nn.Module):
def __init__(self, config: MeshConfig):
super().__init__()
self.fc1 = nn.Linear(config.hidden_size, config.expert_intermediate_size)
self.gelu = nn.GELU() # Using GELU as an example activation
self.fc2 = nn.Linear(config.expert_intermediate_size, config.hidden_size)
def forward(self, x):
return self.fc2(self.gelu(self.fc1(x)))