GOT-OCR / notes.py
Tonic's picture
improve file handling , display html
405302e unverified
raw
history blame
4.33 kB
def ocr_demo(image, task, ocr_type, ocr_box, ocr_color):
res, html_content = process_image(image, task, ocr_type, ocr_box, ocr_color)
res = f"$$ {res} $$"
# res = res.replace("$$ \\begin{tabular}", "\\begin{tabular}")
# res = res.replace("\\end{tabular} $$", "\\end{tabular}")
# res = res.replace("\\(", "")
# res = res.replace("\\)", "")
if html_content:
html_string = f'<iframe srcdoc="{html_content}" width="100%" height="600px"></iframe>'
return res, html_string
return res, None
@spaces.GPU
def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None):
demo_html = os.path.join(results_folder, "demo.html")
html_file = os.path.join(results_folder, f"{task.replace(' ', '_').lower()}.html")
tikz_file = os.path.join(results_folder, "tikz.html")
unique_id = str(uuid.uuid4())
with tempfile.NamedTemporaryFile(mode='w+', suffix='.html', delete=False, dir=results_folder) as temp_file:
temp_html_path = temp_file.name
if task == "Plain Text OCR":
res = model.chat(tokenizer, image, ocr_type='ocr')
return res, None, unique_id
else:
if task == "Format Text OCR":
res = model.chat(tokenizer, image, ocr_type='format', render=True, save_render_file=temp_html_path)
elif task == "Fine-grained OCR (Box)":
res = model.chat(tokenizer, image, ocr_type=ocr_type, ocr_box=ocr_box, render=True, save_render_file=temp_html_path)
elif task == "Fine-grained OCR (Color)":
res = model.chat(tokenizer, image, ocr_type=ocr_type, ocr_color=ocr_color, render=True, save_render_file=temp_html_path)
elif task == "Multi-crop OCR":
res = model.chat_crop(tokenizer, image, ocr_type='format', render=True, save_render_file=temp_html_path)
elif task == "Render Formatted OCR":
res = model.chat(tokenizer, image, ocr_type='format', render=True, save_render_file=temp_html_path)
# html_content = None
if os.path.exists(temp_html_path):
with open(temp_html_path, 'r') as f:
html_content = f.read()
if os.path.exists(demo_html):
with open(demo_html, 'r') as f:
html_content = f.read()
elif os.path.exists(html_file):
with open(html_file, 'r') as f:
html_content = f.read()
elif os.path.exists(tikz_file):
with open(tikz_file, 'r') as f:
html_content = f.read()
else:
html_content = None
return res, html_content, unique_id
@spaces.GPU
def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None):
demo_html = os.path.join(results_folder, "demo.html")
html_file = os.path.join(results_folder, f"{task.replace(' ', '_').lower()}.html")
tikz_file = os.path.join(results_folder, "tikz.html")
if task == "Plain Text OCR":
res = model.chat(tokenizer, image, ocr_type='ocr')
return res, None
else:
if task == "Format Text OCR":
res = model.chat(tokenizer, image, ocr_type='format', render=True, save_render_file=demo_html)
elif task == "Fine-grained OCR (Box)":
res = model.chat(tokenizer, image, ocr_type=ocr_type, ocr_box=ocr_box, render=True, save_render_file=demo_html)
elif task == "Fine-grained OCR (Color)":
res = model.chat(tokenizer, image, ocr_type=ocr_type, ocr_color=ocr_color, render=True, save_render_file=demo_html)
elif task == "Multi-crop OCR":
res = model.chat_crop(tokenizer, image, ocr_type='format', render=True, save_render_file=demo_html)
elif task == "Render Formatted OCR":
res = model.chat(tokenizer, image, ocr_type='format', render=True, save_render_file=demo_html)
if os.path.exists(demo_html):
with open(demo_html, 'r') as f:
html_content = f.read()
elif os.path.exists(html_file):
with open(html_file, 'r') as f:
html_content = f.read()
elif os.path.exists(tikz_file):
with open(tikz_file, 'r') as f:
html_content = f.read()
else:
html_content = None
return res, html_content