textract-ai / examples /batch_processing.py
BabaK07's picture
Upload custom OCR model based on Qwen2.5-VL
b127e5d verified
raw
history blame
1.41 kB
"""
Batch processing example for the Custom OCR Model.
"""
from transformers import AutoModel
from PIL import Image
import os
from pathlib import Path
def batch_ocr_example(image_directory: str):
"""Process multiple images in batch."""
# Load model
model = AutoModel.from_pretrained("your-username/your-model-name", trust_remote_code=True)
# Get all image files
image_dir = Path(image_directory)
image_files = list(image_dir.glob("*.jpg")) + list(image_dir.glob("*.png"))
print(f"Processing {len(image_files)} images...")
results = []
for image_file in image_files:
print(f"Processing: {image_file.name}")
# Load image
image = Image.open(image_file)
# Extract text
result = model.generate_ocr_text(image, use_native=True)
results.append({
"filename": image_file.name,
"text": result["text"],
"confidence": result["confidence"]
})
print(f" Text: {result['text'][:50]}...")
print(f" Confidence: {result['confidence']:.3f}")
return results
if __name__ == "__main__":
import sys
if len(sys.argv) > 1:
results = batch_ocr_example(sys.argv[1])
print(f"\nProcessed {len(results)} images successfully!")
else:
print("Usage: python batch_processing.py <image_directory>")