textract-ai / examples /batch_processing.py
BabaK07's picture
Upload custom OCR model based on Qwen2.5-VL
b127e5d verified
"""
Batch processing example for the Custom OCR Model.
"""
from transformers import AutoModel
from PIL import Image
import os
from pathlib import Path
def batch_ocr_example(image_directory: str):
"""Process multiple images in batch."""
# Load model
model = AutoModel.from_pretrained("your-username/your-model-name", trust_remote_code=True)
# Get all image files
image_dir = Path(image_directory)
image_files = list(image_dir.glob("*.jpg")) + list(image_dir.glob("*.png"))
print(f"Processing {len(image_files)} images...")
results = []
for image_file in image_files:
print(f"Processing: {image_file.name}")
# Load image
image = Image.open(image_file)
# Extract text
result = model.generate_ocr_text(image, use_native=True)
results.append({
"filename": image_file.name,
"text": result["text"],
"confidence": result["confidence"]
})
print(f" Text: {result['text'][:50]}...")
print(f" Confidence: {result['confidence']:.3f}")
return results
if __name__ == "__main__":
import sys
if len(sys.argv) > 1:
results = batch_ocr_example(sys.argv[1])
print(f"\nProcessed {len(results)} images successfully!")
else:
print("Usage: python batch_processing.py <image_directory>")