from transformers import TrOCRProcessor, VisionEncoderDecoderModel from PIL import Image import requests import gradio as gr # app title title = "ScriptSense" description = "
design and crafted by aryan verma
" article = "aryan verma | 241306064
" css = "footer {display: none !important;}" # sample images examples = [ ["", "images/1.jpg", "images/1.jpg"], ["", "images/sample-handwritten-2.PNG", "images/sample-handwritten-2.PNG"] ] #you can load any model from huggingface processor = TrOCRProcessor.from_pretrained('microsoft/trocr-large-handwritten') model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-large-handwritten') # prediction function for handwritting def predict(ImageUrl,imgDraw,imgUplod): image = None #fetch the image from url or handwritten canvas or the uplaoded image if ImageUrl: try: image = Image.open(requests.get(ImageUrl, stream=True).raw).convert("RGB") except: return "Error: Invalid Image URL" # Prioritize uploaded image if it exists elif imgUplod is not None: image = imgUplod.convert("RGB") # Fallback to the drawing canvas elif imgDraw is not None: # Handle Gradio 4+ sketchpad returning a dictionary if isinstance(imgDraw, dict) and "composite" in imgDraw: if imgDraw["composite"] is not None: image = imgDraw["composite"].convert("RGB") elif not isinstance(imgDraw, dict): image = imgDraw.convert("RGB") if image is None: return "Please provide an image via URL, Sketchpad, or Upload." #predict the image using microsoft/trocr-large-handwritten model loaded earlier pixel_values = processor(images=image, return_tensors="pt").pixel_values generated_ids = model.generate(pixel_values) generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] return generated_text #gradio interface interface = gr.Interface( fn=predict, inputs=["text", gr.Sketchpad(type="pil"), gr.Image(type="pil")], outputs="text", title=title, description=description, article=article, examples=examples, css=css ) interface.launch()