|
|
|
""" |
|
Batch processing example for the Custom OCR Model. |
|
""" |
|
|
|
from transformers import AutoModel |
|
from PIL import Image |
|
import os |
|
from pathlib import Path |
|
|
|
def batch_ocr_example(image_directory: str): |
|
"""Process multiple images in batch.""" |
|
|
|
|
|
model = AutoModel.from_pretrained("your-username/your-model-name", trust_remote_code=True) |
|
|
|
|
|
image_dir = Path(image_directory) |
|
image_files = list(image_dir.glob("*.jpg")) + list(image_dir.glob("*.png")) |
|
|
|
print(f"Processing {len(image_files)} images...") |
|
|
|
results = [] |
|
for image_file in image_files: |
|
print(f"Processing: {image_file.name}") |
|
|
|
|
|
image = Image.open(image_file) |
|
|
|
|
|
result = model.generate_ocr_text(image, use_native=True) |
|
|
|
results.append({ |
|
"filename": image_file.name, |
|
"text": result["text"], |
|
"confidence": result["confidence"] |
|
}) |
|
|
|
print(f" Text: {result['text'][:50]}...") |
|
print(f" Confidence: {result['confidence']:.3f}") |
|
|
|
return results |
|
|
|
if __name__ == "__main__": |
|
import sys |
|
if len(sys.argv) > 1: |
|
results = batch_ocr_example(sys.argv[1]) |
|
print(f"\nProcessed {len(results)} images successfully!") |
|
else: |
|
print("Usage: python batch_processing.py <image_directory>") |
|
|