ndkhanh95
/

Paligemma

Image-Text-to-Text

text-generation-inference

Model card Files Files and versions

Paligemma / big_vision_repo /big_vision /models /proj /flaxformer /bert.py

ndkhanh95's picture

Upload 304 files

fa1a600 verified 10 months ago

history blame contribute delete

3.47 kB

	# Copyright 2024 Big Vision Authors.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	"""BERT encoder, optionally loading pre-trained checkpoints."""

	import dataclasses
	from typing import Optional

	from absl import logging
	from big_vision import utils
	from big_vision.models import common
	import flax
	import flax.linen as nn
	import jax.numpy as jnp
	from tensorflow.io import gfile

	from flaxformer.architectures.bert import bert
	from flaxformer.architectures.bert import bert_checkpoint_converter
	from flaxformer.architectures.bert import configs


	class Model(nn.Module):
	"""BERT encoder with linear projection on last layer CLS token."""

	config: str
	num_classes: Optional[int] = None
	head_zeroinit: bool = True

	@nn.compact
	def __call__(self, text, *, train=False):
	out = {}

	batch_size, max_len = text.shape
	bert_model = bert.BertEncoder(**dataclasses.asdict({
	"base": configs.BertBaseConfig(),
	"large": configs.BertLargeConfig(),
	}[self.config]))
	x = out["transformed"] = bert_model(
	token_ids=text,
	position_ids=jnp.tile(
	jnp.arange(0, max_len, dtype=jnp.int32), [batch_size, 1]),
	segment_ids=jnp.zeros([batch_size, max_len], dtype=jnp.int32),
	input_mask=text.astype(jnp.bool_).astype(jnp.int32),
	enable_dropout=train,
	)

	x = out["pre_logits"] = x[:, 0] # CLS token

	if self.num_classes:
	kw = {"kernel_init": nn.initializers.zeros} if self.head_zeroinit else {}
	x = out["logits"] = nn.Dense(self.num_classes, name="head", **kw)(x)

	return x, out


	def load(params, path, model_cfg=None, dont_load=()):
	"""Returns `params` with BERT weights replaced from checkpoint at `path`."""
	del model_cfg

	checkpoint_path = f"{path}/bert_model.ckpt"
	if gfile.exists(f"{checkpoint_path}.index"):
	logging.info("Loading original BERT checkpoint from '%s'", checkpoint_path)
	params = flax.core.FrozenDict(params).unfreeze() # Recursive copy.
	max_len = (
	params["BertEncoder_0"]["embedder"]["embedders_position_ids"]
	["embedding"].shape[0])
	bert_params, pooler_params = (
	bert_checkpoint_converter.load_params_from_tf_checkpoint(
	checkpoint_path=f"{path}/bert_model.ckpt"))
	del pooler_params
	if isinstance(bert_params, flax.core.FrozenDict):
	bert_params = bert_params.unfreeze()
	bert_params["embedder"]["embedders_position_ids"]["embedding"] = (
	bert_params["embedder"]["embedders_position_ids"]["embedding"][:max_len]
	)
	return common.merge_params(
	{"BertEncoder_0": bert_params}, params, dont_load)

	logging.info(
	"Could not find original BERT checkpoint path '%s', "
	"loading big_vision checkpoint '%s'", checkpoint_path, path)
	restored_params = utils.load_params(path)
	return common.merge_params(restored_params, params, dont_load)