BabaK07
/

textract-ai

feature-extraction

vision-language

text-extraction

Model card Files Files and versions

textract-ai / examples /batch_processing.py

BabaK07's picture

Upload custom OCR model based on Qwen2.5-VL

b127e5d verified 27 days ago

1.41 kB


	"""
	Batch processing example for the Custom OCR Model.
	"""

	from transformers import AutoModel
	from PIL import Image
	import os
	from pathlib import Path

	def batch_ocr_example(image_directory: str):
	"""Process multiple images in batch."""

	# Load model
	model = AutoModel.from_pretrained("your-username/your-model-name", trust_remote_code=True)

	# Get all image files
	image_dir = Path(image_directory)
	image_files = list(image_dir.glob(".jpg")) + list(image_dir.glob(".png"))

	print(f"Processing {len(image_files)} images...")

	results = []
	for image_file in image_files:
	print(f"Processing: {image_file.name}")

	# Load image
	image = Image.open(image_file)

	# Extract text
	result = model.generate_ocr_text(image, use_native=True)

	results.append({
	"filename": image_file.name,
	"text": result["text"],
	"confidence": result["confidence"]
	})

	print(f" Text: {result['text'][:50]}...")
	print(f" Confidence: {result['confidence']:.3f}")

	return results

	if __name__ == "__main__":
	import sys
	if len(sys.argv) > 1:
	results = batch_ocr_example(sys.argv[1])
	print(f"\nProcessed {len(results)} images successfully!")
	else:
	print("Usage: python batch_processing.py <image_directory>")