|
import torch |
|
from PIL import Image |
|
import torchvision.transforms as T |
|
import os |
|
from typing import Union, List, Tuple |
|
import numpy as np |
|
|
|
from .utils.benchmark import benchmark |
|
|
|
|
|
|
|
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') |
|
print(f"Using device: {device}") |
|
if device.type == 'cuda': |
|
print(f"CUDA Device Name: {torch.cuda.get_device_name(torch.cuda.current_device())}") |
|
|
|
torch.manual_seed(42) |
|
torch.cuda.manual_seed_all(42) |
|
|
|
def cityscape_benchmark( |
|
model: torch.nn.Module, |
|
image_path: str, |
|
batch_size: Union[int, List[int]] = 1, |
|
image_size: Union[Tuple[int], List[Tuple[int]]] = (3, 1024, 1024), |
|
save_output: bool = True, |
|
|
|
): |
|
""" |
|
Calculate the FPS of a given model using an actual Cityscapes image. |
|
|
|
Args: |
|
model: instance of a model (e.g. SegFormer) |
|
image_path: the path to the Cityscapes image |
|
batch_size: the batch size(s) at which to calculate the FPS (e.g. 1 or [1, 2, 4]) |
|
image_size: the size of the images to use (e.g. (3, 1024, 1024)) |
|
save_output: whether to save the output prediction (default True) |
|
|
|
Returns: |
|
the FPS values calculated for all image sizes and batch sizes in the form of a dictionary |
|
""" |
|
|
|
|
|
if isinstance(batch_size, int): |
|
batch_size = [batch_size] |
|
if isinstance(image_size, tuple): |
|
image_size = [image_size] |
|
|
|
values = {} |
|
throughput_values = [] |
|
|
|
model = model.to(device) |
|
model.eval() |
|
|
|
assert os.path.exists(image_path), f"Image not found: {image_path}" |
|
image = Image.open(image_path).convert("RGB") |
|
|
|
img_tensor = T.ToTensor()(image) |
|
mean = img_tensor.mean(dim=(1, 2)) |
|
std = img_tensor.std(dim=(1, 2)) |
|
print(f"Calculated Mean: {mean}") |
|
print(f"Calculated Std: {std}") |
|
|
|
transform = T.Compose([ |
|
T.Resize((image_size[0][1], image_size[0][2])), |
|
T.ToTensor(), |
|
T.Normalize(mean=mean.tolist(), |
|
std=std.tolist()) |
|
]) |
|
|
|
img_tensor = transform(image).unsqueeze(0).to(device) |
|
|
|
for i in image_size: |
|
|
|
fps = [] |
|
for b in batch_size: |
|
for _ in range(4): |
|
|
|
if i[1] >= 1024: |
|
r = 16 |
|
else: |
|
r = 32 |
|
baseline_throughput = benchmark( |
|
model.to(device), |
|
device=device, |
|
verbose=True, |
|
runs=r, |
|
batch_size=b, |
|
input_size=i |
|
) |
|
throughput_values.append(baseline_throughput) |
|
throughput_values = np.asarray(throughput_values) |
|
throughput = np.around(np.mean(throughput_values), decimals=2) |
|
print('Im_size:', i, 'Batch_size:', b, 'Mean:', throughput, 'Std:', |
|
np.around(np.std(throughput_values), decimals=2)) |
|
throughput_values = [] |
|
fps.append({b: throughput}) |
|
values[i] = fps |
|
|
|
if save_output: |
|
with torch.no_grad(): |
|
output = model(img_tensor) |
|
pred = torch.argmax(output, dim=1).squeeze(0).cpu().numpy() |
|
|
|
|
|
cwd = os.getcwd() |
|
output_path = os.path.join(cwd, 'segformer_plusplus', 'cityscapes_prediction_output.txt') |
|
np.savetxt(output_path, pred, fmt="%d") |
|
|
|
print("Prediction saved as cityscapes_prediction_output.txt") |
|
|
|
return values |
|
|