File size: 3,660 Bytes

import torch
from PIL import Image
import torchvision.transforms as T
import os
from typing import Union, List, Tuple
import numpy as np

from .utils.benchmark import benchmark


# Gerät auswählen
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
if device.type == 'cuda':
    print(f"CUDA Device Name: {torch.cuda.get_device_name(torch.cuda.current_device())}")

torch.manual_seed(42)
torch.cuda.manual_seed_all(42)

def cityscape_benchmark(
        model: torch.nn.Module,
        image_path: str,
        batch_size: Union[int, List[int]] = 1,
        image_size: Union[Tuple[int], List[Tuple[int]]] = (3, 1024, 1024),
        save_output: bool = True,

):
    """
    Calculate the FPS of a given model using an actual Cityscapes image.

    Args:
        model: instance of a model (e.g. SegFormer)
        image_path: the path to the Cityscapes image
        batch_size: the batch size(s) at which to calculate the FPS (e.g. 1 or [1, 2, 4])
        image_size: the size of the images to use (e.g. (3, 1024, 1024))
        save_output: whether to save the output prediction (default True)

    Returns:
        the FPS values calculated for all image sizes and batch sizes in the form of a dictionary
    """


    if isinstance(batch_size, int):
        batch_size = [batch_size]
    if isinstance(image_size, tuple):
        image_size = [image_size]

    values = {}
    throughput_values = []

    model = model.to(device)
    model.eval()

    assert os.path.exists(image_path), f"Image not found: {image_path}"
    image = Image.open(image_path).convert("RGB")

    img_tensor = T.ToTensor()(image)
    mean = img_tensor.mean(dim=(1, 2))
    std = img_tensor.std(dim=(1, 2))
    print(f"Calculated Mean: {mean}")
    print(f"Calculated Std: {std}")

    transform = T.Compose([
        T.Resize((image_size[0][1], image_size[0][2])),
        T.ToTensor(),
        T.Normalize(mean=mean.tolist(),
                    std=std.tolist())
    ])

    img_tensor = transform(image).unsqueeze(0).to(device)

    for i in image_size:
            # fill with fps for each batch size
            fps = []
            for b in batch_size:
                for _ in range(4):
                    # Baseline benchmark
                    if i[1] >= 1024:
                        r = 16
                    else:
                        r = 32
                    baseline_throughput = benchmark(
                        model.to(device),
                        device=device,
                        verbose=True,
                        runs=r,
                        batch_size=b,
                        input_size=i
                    )
                    throughput_values.append(baseline_throughput)
                throughput_values = np.asarray(throughput_values)
                throughput = np.around(np.mean(throughput_values), decimals=2)
                print('Im_size:', i, 'Batch_size:', b, 'Mean:', throughput, 'Std:',
                    np.around(np.std(throughput_values), decimals=2))
                throughput_values = []
                fps.append({b: throughput})
            values[i] = fps

    if save_output:
        with torch.no_grad():
            output = model(img_tensor)
            pred = torch.argmax(output, dim=1).squeeze(0).cpu().numpy()

            # Speichere Prediction als Text ab
            cwd = os.getcwd()
            output_path = os.path.join(cwd, 'segformer_plusplus', 'cityscapes_prediction_output.txt')
            np.savetxt(output_path, pred, fmt="%d")

            print("Prediction saved as cityscapes_prediction_output.txt")

    return values