Tonic commited on
Commit
7dcbad8
β€’
1 Parent(s): ee4b3d0

improve file handling , display html

Browse files
Files changed (2) hide show
  1. .gitignore +2 -1
  2. app.py +40 -23
.gitignore CHANGED
@@ -1,2 +1,3 @@
1
  .DS_Store
2
- .venv/*
 
 
1
  .DS_Store
2
+ .venv/*
3
+ notes.py
app.py CHANGED
@@ -10,6 +10,9 @@ import numpy as np
10
  import yaml
11
  from pathlib import Path
12
  from globe import title, description, modelinfor, joinus
 
 
 
13
 
14
  model_name = 'ucaslcl/GOT-OCR2_0'
15
 
@@ -24,28 +27,36 @@ def image_to_base64(image):
24
  image.save(buffered, format="PNG")
25
  return base64.b64encode(buffered.getvalue()).decode()
26
 
27
- html_file = './demo.html'
 
28
 
29
  @spaces.GPU
30
  def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None):
 
 
 
31
  if task == "Plain Text OCR":
32
  res = model.chat(tokenizer, image, ocr_type='ocr')
33
- return res, None
34
- elif task == "Format Text OCR":
35
- res = model.chat(tokenizer, image, ocr_type='format', render=True, save_render_file=html_file)
36
- elif task == "Fine-grained OCR (Box)":
37
- res = model.chat(tokenizer, image, ocr_type=ocr_type, ocr_box=ocr_box, render=True, save_render_file=html_file)
38
- elif task == "Fine-grained OCR (Color)":
39
- res = model.chat(tokenizer, image, ocr_type=ocr_type, ocr_color=ocr_color, render=True, save_render_file=html_file)
40
- elif task == "Multi-crop OCR":
41
- res = model.chat_crop(tokenizer, image, ocr_type='format', render=True, save_render_file=html_file)
42
- elif task == "Render Formatted OCR":
43
- res = model.chat(tokenizer, image, ocr_type='format', render=True, save_render_file=html_file)
 
 
 
 
 
 
 
 
44
 
45
- with open(html_file, 'r') as f:
46
- html_content = f.read()
47
- return res, html_content
48
-
49
  def update_inputs(task):
50
  if task in ["Plain Text OCR", "Format Text OCR", "Multi-crop OCR", "Render Formatted OCR"]:
51
  return [gr.update(visible=False)] * 3
@@ -64,17 +75,22 @@ def update_inputs(task):
64
  def ocr_demo(image, task, ocr_type, ocr_box, ocr_color):
65
  res, html_content = process_image(image, task, ocr_type, ocr_box, ocr_color)
66
 
67
- res = f"${res}$"
68
- res = res.replace("$\\begin{tabular}", "\\begin{tabular}")
69
- res = res.replace("\\end{tabular}$", "\\end{tabular}")
70
- res = res.replace("\\(", "")
71
- res = res.replace("\\)", "")
72
 
73
  if html_content:
74
  html_string = f'<iframe srcdoc="{html_content}" width="100%" height="600px"></iframe>'
75
  return res, html_string
76
  return res, None
77
- import gradio as gr
 
 
 
 
 
78
 
79
  with gr.Blocks() as demo:
80
  gr.Markdown(title)
@@ -102,7 +118,7 @@ with gr.Blocks() as demo:
102
  )
103
  ocr_box_input = gr.Textbox(
104
  label="OCR Box (x1,y1,x2,y2)",
105
- placeholder="e.g., 100,100,200,200",
106
  visible=False
107
  )
108
  ocr_color_dropdown = gr.Dropdown(
@@ -130,4 +146,5 @@ with gr.Blocks() as demo:
130
  )
131
 
132
  if __name__ == "__main__":
 
133
  demo.launch()
 
10
  import yaml
11
  from pathlib import Path
12
  from globe import title, description, modelinfor, joinus
13
+ import uuid
14
+ import tempfile
15
+ import time
16
 
17
  model_name = 'ucaslcl/GOT-OCR2_0'
18
 
 
27
  image.save(buffered, format="PNG")
28
  return base64.b64encode(buffered.getvalue()).decode()
29
 
30
+ results_folder = Path('./results')
31
+ results_folder.mkdir(parents=True, exist_ok=True)
32
 
33
  @spaces.GPU
34
  def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None):
35
+ unique_id = str(uuid.uuid4())
36
+ temp_html_path = results_folder / f"{unique_id}.html"
37
+
38
  if task == "Plain Text OCR":
39
  res = model.chat(tokenizer, image, ocr_type='ocr')
40
+ return res, None, unique_id
41
+ else:
42
+ if task == "Format Text OCR":
43
+ res = model.chat(tokenizer, image, ocr_type='format', render=True, save_render_file=str(temp_html_path))
44
+ elif task == "Fine-grained OCR (Box)":
45
+ res = model.chat(tokenizer, image, ocr_type=ocr_type, ocr_box=ocr_box, render=True, save_render_file=str(temp_html_path))
46
+ elif task == "Fine-grained OCR (Color)":
47
+ res = model.chat(tokenizer, image, ocr_type=ocr_type, ocr_color=ocr_color, render=True, save_render_file=str(temp_html_path))
48
+ elif task == "Multi-crop OCR":
49
+ res = model.chat_crop(tokenizer, image, ocr_type='format', render=True, save_render_file=str(temp_html_path))
50
+ elif task == "Render Formatted OCR":
51
+ res = model.chat(tokenizer, image, ocr_type='format', render=True, save_render_file=str(temp_html_path))
52
+
53
+ if temp_html_path.exists():
54
+ with open(temp_html_path, 'r') as f:
55
+ html_content = f.read()
56
+ return res, html_content, unique_id
57
+ else:
58
+ return res, None, unique_id
59
 
 
 
 
 
60
  def update_inputs(task):
61
  if task in ["Plain Text OCR", "Format Text OCR", "Multi-crop OCR", "Render Formatted OCR"]:
62
  return [gr.update(visible=False)] * 3
 
75
  def ocr_demo(image, task, ocr_type, ocr_box, ocr_color):
76
  res, html_content = process_image(image, task, ocr_type, ocr_box, ocr_color)
77
 
78
+ res = f"$$ {res} $$"
79
+ # res = res.replace("$$ \\begin{tabular}", "\\begin{tabular}")
80
+ # res = res.replace("\\end{tabular} $$", "\\end{tabular}")
81
+ # res = res.replace("\\(", "")
82
+ # res = res.replace("\\)", "")
83
 
84
  if html_content:
85
  html_string = f'<iframe srcdoc="{html_content}" width="100%" height="600px"></iframe>'
86
  return res, html_string
87
  return res, None
88
+
89
+ def cleanup_old_files():
90
+ current_time = time.time()
91
+ for file_path in results_folder.glob('*.html'):
92
+ if current_time - file_path.stat().st_mtime > 3600: # 1 hour
93
+ file_path.unlink()
94
 
95
  with gr.Blocks() as demo:
96
  gr.Markdown(title)
 
118
  )
119
  ocr_box_input = gr.Textbox(
120
  label="OCR Box (x1,y1,x2,y2)",
121
+ placeholder="[100,100,200,200]",
122
  visible=False
123
  )
124
  ocr_color_dropdown = gr.Dropdown(
 
146
  )
147
 
148
  if __name__ == "__main__":
149
+ cleanup_old_files()
150
  demo.launch()