File size: 740 Bytes
c11d27f a4ef8af c11d27f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 |
from transformers import PretrainedConfig, PreTrainedModel, AutoModelForCausalLM # Import AutoModelForCausalLM
import torch
import torch.nn as nn
import torch.nn.functional as F
import math
from transformers.modeling_outputs import CausalLMOutputWithPast # Import the necessary output class
# Define a single Expert within the Mesh
class MeshExpert(nn.Module):
def __init__(self, config: MeshConfig):
super().__init__()
self.fc1 = nn.Linear(config.hidden_size, config.expert_intermediate_size)
self.gelu = nn.GELU() # Using GELU as an example activation
self.fc2 = nn.Linear(config.expert_intermediate_size, config.hidden_size)
def forward(self, x):
return self.fc2(self.gelu(self.fc1(x)))
|