Upload folder using huggingface_hub

f2e3711 verified about 1 month ago

17.6 kB

	# Copyright The Lightning AI team.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	import inspect
	from collections.abc import Generator, Iterator, Mapping
	from copy import deepcopy
	from functools import partial, wraps
	from types import MethodType
	from typing import (
	Any,
	Callable,
	Optional,
	TypeVar,
	Union,
	overload,
	)

	import torch
	from lightning_utilities import is_overridden
	from lightning_utilities.core.apply_func import apply_to_collection
	from torch import Tensor
	from torch import nn as nn
	from torch._dynamo import OptimizedModule
	from torch.nn.modules.module import _IncompatibleKeys
	from torch.optim import Optimizer
	from torch.utils.data import DataLoader
	from typing_extensions import override

	from lightning_fabric.plugins import Precision
	from lightning_fabric.strategies import Strategy
	from lightning_fabric.utilities import move_data_to_device
	from lightning_fabric.utilities.data import _set_sampler_epoch
	from lightning_fabric.utilities.device_dtype_mixin import _DeviceDtypeModuleMixin
	from lightning_fabric.utilities.types import Optimizable

	T_destination = TypeVar("T_destination", bound=dict[str, Any])
	_LIGHTNING_MODULE_STEP_METHODS = ("training_step", "validation_step", "test_step", "predict_step")

	_in_fabric_backward: bool = False


	class _FabricOptimizer:
	def __init__(self, optimizer: Optimizer, strategy: Strategy, callbacks: Optional[list[Callable]] = None) -> None:
	"""FabricOptimizer is a thin wrapper around the :class:`~torch.optim.Optimizer` that delegates the optimizer
	step calls to the strategy.

	The underlying wrapped optimizer object can be accessed via the property :attr:`optimizer`.

	Args:
	optimizer: The optimizer to wrap
	strategy: Reference to the strategy for handling the optimizer step

	"""
	self._optimizer = optimizer
	self._strategy = strategy
	self._callbacks = callbacks or []
	# imitate the class of the wrapped object to make isinstance checks work
	self.__class__ = type("Fabric" + optimizer.__class__.__name__, (self.__class__, optimizer.__class__), {})

	@property
	def optimizer(self) -> Optimizer:
	return self._optimizer

	def state_dict(self) -> dict[str, Tensor]:
	return self._strategy.get_optimizer_state(self.optimizer)

	def load_state_dict(self, state_dict: dict[str, Tensor]) -> None:
	self.optimizer.load_state_dict(state_dict)

	def step(self, closure: Optional[Callable] = None) -> Any:
	kwargs = {"closure": closure} if closure is not None else {}
	if hasattr(self._strategy, "model") and isinstance(self._strategy.model, Optimizable):
	# only DeepSpeed defines this
	optimizer = self._strategy.model
	else:
	optimizer = self.optimizer
	output = self._strategy.optimizer_step(
	optimizer,
	**kwargs,
	)
	for callback in self._callbacks:
	hook = getattr(callback, "on_after_optimizer_step", None)
	if callable(hook):
	hook(strategy=self._strategy, optimizer=optimizer)
	return output

	def __getattr__(self, item: Any) -> Any:
	return getattr(self._optimizer, item)


	class _FabricModule(_DeviceDtypeModuleMixin):
	def __init__(
	self, forward_module: nn.Module, strategy: Strategy, original_module: Optional[nn.Module] = None
	) -> None:
	"""The FabricModule is a thin wrapper around the :class:`torch.nn.Module` and handles precision / autocast
	automatically for the forward pass.

	The underlying wrapped module can be accessed via the property :attr:`module`.

	Args:
	forward_module: The module to wrap the ``forward`` method on.
	strategy: Reference to the strategy for handling precision etc.
	original_module: The original, unmodified module as passed into the
	:meth:`lightning_fabric.fabric.Fabric.setup` method. This is needed when attribute lookup
	on this wrapper should pass through to the original module.

	"""
	super().__init__()
	self._forward_module = forward_module
	self._original_module = original_module or forward_module
	self._strategy = strategy
	self._forward_methods = set(_LIGHTNING_MODULE_STEP_METHODS)
	self._fabric_module_initialized = True

	@property
	def module(self) -> nn.Module:
	return self._original_module or self._forward_module

	@override
	def forward(self, args: Any, *kwargs: Any) -> Any:
	"""Casts all inputs to the right precision and handles autocast for operations in the module forward method."""
	precision = self._strategy.precision
	args, kwargs = precision.convert_input((args, kwargs))

	with precision.forward_context():
	output = self._forward_module(args, *kwargs)

	output = precision.convert_output(output)

	apply_to_collection(output, dtype=Tensor, function=self._register_backward_hook)
	return output

	@overload
	def state_dict(self, *, destination: T_destination, prefix: str = ..., keep_vars: bool = ...) -> T_destination: ...

	@overload
	def state_dict(self, *, prefix: str = ..., keep_vars: bool = ...) -> dict[str, Any]: ...

	@override
	def state_dict(
	self, destination: Optional[T_destination] = None, prefix: str = "", keep_vars: bool = False
	) -> Optional[dict[str, Any]]:
	return self._original_module.state_dict(
	destination=destination, # type: ignore[type-var]
	prefix=prefix,
	keep_vars=keep_vars,
	)

	@override
	def load_state_dict( # type: ignore[override]
	self, state_dict: Mapping[str, Any], strict: bool = True, **kwargs: Any
	) -> _IncompatibleKeys:
	return self._original_module.load_state_dict(state_dict=state_dict, strict=strict, **kwargs)

	def mark_forward_method(self, method: Union[MethodType, str]) -> None:
	"""Mark a method as a 'forward' method to prevent it bypassing the strategy wrapper (e.g., DDP)."""
	if not isinstance(method, (MethodType, str)):
	raise TypeError(f"Expected a method or a string, but got: {type(method).__name__}")
	name = method if isinstance(method, str) else method.__name__
	if name == "forward":
	raise ValueError("You cannot mark the forward method itself as a forward method.")
	if not isinstance(getattr(self._original_module, name, None), MethodType):
	raise AttributeError(
	f"You marked '{name}' as a forward method, but `{type(self._original_module).__name__}.{name}` does not"
	f" exist or is not a method."
	)
	self._forward_methods.add(name)

	def _redirection_through_forward(self, method_name: str) -> Callable:
	assert method_name != "forward"
	original_forward = self._original_module.forward

	def wrapped_forward(args: Any, *kwargs: Any) -> Any:
	# Unpatch ourselves immediately before calling the method `method_name`
	# because itself may want to call the real `forward`
	self._original_module.forward = original_forward
	# Call the actual method e.g. `.training_step(...)`
	method = getattr(self._original_module, method_name)
	return method(args, *kwargs)

	# We make the caller "unknowingly" send their arguments through the forward_module's `__call__`.
	# We expect that the `forward_module` will eventually call `original_module.forward`, which we
	# have patched to redirect back to `original_module.method_name()`.
	def call_forward_module(args: Any, *kwargs: Any) -> Any:
	# Patch the original_module's forward, so we can redirect the arguments back to the real method
	self._original_module.forward = wrapped_forward
	return self.forward(args, *kwargs)

	return call_forward_module

	def _wrap_method_with_module_call_tracker(self, method: Callable, name: str) -> Callable:
	"""Tracks whether any submodule in ``self._original_module`` was called during the execution of ``method`` by
	registering forward hooks on all submodules."""
	module_called = False

	def hook(_: Any, *__: Any) -> None:
	nonlocal module_called
	module_called = True

	@wraps(method)
	def _wrapped_method(args: Any, *kwargs: Any) -> Any:
	handles = []
	for module in self._original_module.modules():
	handles.append(module.register_forward_hook(hook))

	output = method(args, *kwargs)

	if module_called:
	raise RuntimeError(
	f"You are calling the method `{type(self._original_module).__name__}.{name}()` from outside the"
	" model. To avoid issues with the currently selected strategy, explicitly mark it as a"
	f" forward method with `fabric_model.mark_forward_method({name!r})` after `fabric.setup()`."
	)
	for handle in handles:
	handle.remove()
	return output

	return _wrapped_method

	def _register_backward_hook(self, tensor: Tensor) -> Tensor:
	if not tensor.requires_grad:
	return tensor

	strategy_requires = is_overridden("backward", self._strategy, parent=Strategy)
	precision_requires = any(
	is_overridden(method, self._strategy.precision, parent=Precision)
	for method in ("pre_backward", "backward", "post_backward")
	)
	hook = partial(_backward_hook, (strategy_requires or precision_requires))
	tensor.register_hook(hook)
	return tensor

	@override
	def __getattr__(self, item: Any) -> Any:
	if (
	item != "_forward_methods"
	and item in self._forward_methods
	and self._forward_module != self._original_module
	):
	# Special support for methods marked by `mark_forward_method` to prevent bypassing DDP's forward
	return self._redirection_through_forward(item)

	try:
	# __getattr__ gets called as a last resort if the attribute does not exist
	# call nn.Module's implementation first
	return super().__getattr__(item)
	except AttributeError:
	# If the attribute is not available on the _FabricModule wrapper, redirect to the wrapped nn.Module
	original_module = super().__getattr__("_original_module")
	attr = getattr(original_module, item)

	if inspect.ismethod(attr) and self._forward_module != self._original_module:
	attr = self._wrap_method_with_module_call_tracker(attr, item)
	return attr

	@override
	def __setattr__(self, name: str, value: Any) -> None:
	if not getattr(self, "_fabric_module_initialized", False):
	super().__setattr__(name, value)
	return

	# Get the _original_module attribute
	original_module = self._original_module
	original_has_attr = hasattr(original_module, name)
	# Can't use super().__getattr__ because nn.Module only checks _parameters, _buffers, and _modules
	# Can't use self.__getattr__ because it would pass through to the original module
	fabric_has_attr = name in dir(self)

	if not (original_has_attr or fabric_has_attr):
	setattr(original_module, name, value)
	return

	# The original module can also inherit from _DeviceDtypeModuleMixin,
	# in this case, both the Fabric module and original module have attributes like _dtype
	# set attribute on both
	if original_has_attr:
	setattr(original_module, name, value)

	if fabric_has_attr:
	super().__setattr__(name, value)


	class _FabricDataLoader:
	def __init__(self, dataloader: DataLoader, device: Optional[torch.device] = None) -> None:
	"""The FabricDataLoader is a wrapper for the :class:`~torch.utils.data.DataLoader`. It moves the data to the
	device automatically if the device is specified.

	Args:
	dataloader: The dataloader to wrap
	device: The device to which the data should be moved. By default the device is `None` and no data
	transfers will be made (identical behavior as :class:`~torch.utils.data.DataLoader`).

	"""
	self.__dict__.update(dataloader.__dict__)
	self._dataloader = dataloader
	self._device = device
	self._num_iter_calls = 0

	@property
	def device(self) -> Optional[torch.device]:
	return self._device

	def __len__(self) -> int:
	return len(self._dataloader)

	def __iter__(self) -> Union[Iterator[Any], Generator[Any, None, None]]:
	# Without setting the epoch, the distributed sampler would return the same indices every time, even when
	# shuffling is enabled. In PyTorch, the user would normally have to call `.set_epoch()` on the sampler.
	# In Fabric, we take care of this boilerplate code.
	_set_sampler_epoch(self._dataloader, self._num_iter_calls)
	self._num_iter_calls += 1

	if self._device is None:
	yield from iter(self._dataloader)
	else:
	for item in self._dataloader:
	yield move_data_to_device(item, self._device)


	def _unwrap_objects(collection: Any) -> Any:
	def _unwrap(
	obj: Union[_FabricModule, _FabricOptimizer, _FabricDataLoader],
	) -> Union[nn.Module, Optimizer, DataLoader]:
	if isinstance(unwrapped := _unwrap_compiled(obj)[0], _FabricModule):
	return _unwrap_compiled(unwrapped._forward_module)[0]
	if isinstance(obj, _FabricOptimizer):
	return obj.optimizer
	if isinstance(obj, _FabricDataLoader):
	return obj._dataloader
	return obj

	types = [_FabricModule, _FabricOptimizer, _FabricDataLoader]
	types.append(OptimizedModule)

	return apply_to_collection(collection, dtype=tuple(types), function=_unwrap)


	def _unwrap_compiled(obj: Union[Any, OptimizedModule]) -> tuple[Union[Any, nn.Module], Optional[dict[str, Any]]]:
	"""Removes the :class:`torch._dynamo.OptimizedModule` around the object if it is wrapped.

	Use this function before instance checks against e.g. :class:`_FabricModule`.

	"""
	if isinstance(obj, OptimizedModule):
	if (compile_kwargs := getattr(obj, "_compile_kwargs", None)) is None:
	raise RuntimeError(
	"Failed to determine the arguments that were used to compile the module. Make sure to import"
	" lightning before `torch.compile` is used."
	)
	return obj._orig_mod, compile_kwargs
	return obj, None


	def _to_compiled(module: nn.Module, compile_kwargs: dict[str, Any]) -> OptimizedModule:
	return torch.compile(module, **compile_kwargs) # type: ignore[return-value]


	def _backward_hook(requires_backward: bool, *_: Any) -> None:
	if requires_backward and not _in_fabric_backward:
	raise RuntimeError(
	"The current strategy and precision selection requires you to call `fabric.backward(loss)`"
	" instead of `loss.backward()`."
	)


	def is_wrapped(obj: object) -> bool:
	"""Checks if an object was set up by Fabric.

	A :class:`~torch.nn.Module` may be wrapped by a :class:`_FabricModule`, a :class:`~torch.optim.Optimizer`
	may be wrapped by a :class:`_FabricOptimizer`, or a :class:`~torch.utils.data.DataLoader` may be wrapped by
	:class:`_FabricDataLoader`.

	Args:
	obj: The object to test.

	"""
	obj, _ = _unwrap_compiled(obj)
	return isinstance(obj, (_FabricModule, _FabricOptimizer, _FabricDataLoader))


	def _capture_compile_kwargs(compile_fn: Callable) -> Callable:
	"""Wraps the ``torch.compile`` function and captures the compile arguments.

	We extract the compile arguments so that we can reapply ``torch.compile`` in ``Fabric.setup()`` with the
	same arguments as the user passed to the original call. The arguments get stored in a dictionary
	``_compile_kwargs`` on the returned compiled module.

	"""
	# Limitation: Currently, the global compile config does not get captured on a per-model basis.
	# PyTorch will resolve this in the future: https://github.com/pytorch/pytorch/issues/116575

	@wraps(compile_fn)
	def _capture(args: Any, *kwargs: Any) -> Any:
	if not args or not isinstance(args[0], nn.Module):
	# either torch.compile is being applied as a decorator or we're compiling something else
	return compile_fn(args, *kwargs)

	model = args[0]
	compiled_model = compile_fn(model, **kwargs)
	compiled_model._compile_kwargs = deepcopy(kwargs)
	return compiled_model

	return _capture


	torch.compile = _capture_compile_kwargs(torch.compile)