def ocr_demo(image, task, ocr_type, ocr_box, ocr_color): res, html_content = process_image(image, task, ocr_type, ocr_box, ocr_color) res = f"$$ {res} $$" # res = res.replace("$$ \\begin{tabular}", "\\begin{tabular}") # res = res.replace("\\end{tabular} $$", "\\end{tabular}") # res = res.replace("\\(", "") # res = res.replace("\\)", "") if html_content: html_string = f'' return res, html_string return res, None @spaces.GPU def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None): demo_html = os.path.join(results_folder, "demo.html") html_file = os.path.join(results_folder, f"{task.replace(' ', '_').lower()}.html") tikz_file = os.path.join(results_folder, "tikz.html") unique_id = str(uuid.uuid4()) with tempfile.NamedTemporaryFile(mode='w+', suffix='.html', delete=False, dir=results_folder) as temp_file: temp_html_path = temp_file.name if task == "Plain Text OCR": res = model.chat(tokenizer, image, ocr_type='ocr') return res, None, unique_id else: if task == "Format Text OCR": res = model.chat(tokenizer, image, ocr_type='format', render=True, save_render_file=temp_html_path) elif task == "Fine-grained OCR (Box)": res = model.chat(tokenizer, image, ocr_type=ocr_type, ocr_box=ocr_box, render=True, save_render_file=temp_html_path) elif task == "Fine-grained OCR (Color)": res = model.chat(tokenizer, image, ocr_type=ocr_type, ocr_color=ocr_color, render=True, save_render_file=temp_html_path) elif task == "Multi-crop OCR": res = model.chat_crop(tokenizer, image, ocr_type='format', render=True, save_render_file=temp_html_path) elif task == "Render Formatted OCR": res = model.chat(tokenizer, image, ocr_type='format', render=True, save_render_file=temp_html_path) # html_content = None if os.path.exists(temp_html_path): with open(temp_html_path, 'r') as f: html_content = f.read() if os.path.exists(demo_html): with open(demo_html, 'r') as f: html_content = f.read() elif os.path.exists(html_file): with open(html_file, 'r') as f: html_content = f.read() elif os.path.exists(tikz_file): with open(tikz_file, 'r') as f: html_content = f.read() else: html_content = None return res, html_content, unique_id @spaces.GPU def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None): demo_html = os.path.join(results_folder, "demo.html") html_file = os.path.join(results_folder, f"{task.replace(' ', '_').lower()}.html") tikz_file = os.path.join(results_folder, "tikz.html") if task == "Plain Text OCR": res = model.chat(tokenizer, image, ocr_type='ocr') return res, None else: if task == "Format Text OCR": res = model.chat(tokenizer, image, ocr_type='format', render=True, save_render_file=demo_html) elif task == "Fine-grained OCR (Box)": res = model.chat(tokenizer, image, ocr_type=ocr_type, ocr_box=ocr_box, render=True, save_render_file=demo_html) elif task == "Fine-grained OCR (Color)": res = model.chat(tokenizer, image, ocr_type=ocr_type, ocr_color=ocr_color, render=True, save_render_file=demo_html) elif task == "Multi-crop OCR": res = model.chat_crop(tokenizer, image, ocr_type='format', render=True, save_render_file=demo_html) elif task == "Render Formatted OCR": res = model.chat(tokenizer, image, ocr_type='format', render=True, save_render_file=demo_html) if os.path.exists(demo_html): with open(demo_html, 'r') as f: html_content = f.read() elif os.path.exists(html_file): with open(html_file, 'r') as f: html_content = f.read() elif os.path.exists(tikz_file): with open(tikz_file, 'r') as f: html_content = f.read() else: html_content = None return res, html_content