Aleph-Alpha
/

llama-tfree-hat-pretrained-7b-dpo

hierarchical_autoregressive_transformer

Aleph Alpha Research

Hierarchical Autoregressive Transformer

Model card Files Files and versions

llama-tfree-hat-pretrained-7b-dpo / norm.py

nvedant07's picture

Upload folder using huggingface_hub

5c004da verified 25 days ago

history blame contribute delete

719 Bytes

	import torch
	import torch.nn as nn

	class RMSNorm(nn.Module):
	def __init__(self, dimensions: int, eps: float, device: torch.device, dtype: torch.dtype = torch.bfloat16, norm_in_fp32: bool = False):
	super().__init__()
	self.eps = eps
	self.weight = torch.nn.Parameter(torch.ones(dimensions, dtype=dtype).to(device))
	self.norm_in_fp32 = norm_in_fp32

	def forward(self, x: torch.Tensor) -> torch.Tensor:
	original_dtype = x.dtype
	if self.norm_in_fp32:
	x = x.float()

	out = x * torch.rsqrt(x.pow(2).mean(-1, keepdim=True) + self.eps)

	if out.dtype != original_dtype:
	out = out.to(original_dtype)

	return out * self.weight