Tonic commited on
Commit
ee4b3d0
1 Parent(s): 02ff46f

add html and markdown outputs , refactor the interface, add outputs

Browse files
Files changed (2) hide show
  1. app.py +65 -100
  2. globe.py +55 -0
app.py CHANGED
@@ -8,40 +8,9 @@ import io
8
  from PIL import Image
9
  import numpy as np
10
  import yaml
11
- import markdown
12
  from pathlib import Path
 
13
 
14
- # Function to extract title and description from the markdown file
15
- def extract_title_description(md_file_path):
16
- with open(md_file_path, 'r') as f:
17
- lines = f.readlines()
18
-
19
- # Extract frontmatter (YAML) for title
20
- frontmatter = []
21
- content_start = 0
22
- if lines[0].strip() == '---':
23
- for idx, line in enumerate(lines[1:], 1):
24
- if line.strip() == '---':
25
- content_start = idx + 1
26
- break
27
- frontmatter.append(line)
28
-
29
- frontmatter_yaml = yaml.safe_load(''.join(frontmatter))
30
- title = frontmatter_yaml.get('title', 'Title Not Found')
31
-
32
- # Extract content (description)
33
- description_md = ''.join(lines[content_start:])
34
- description = markdown.markdown(description_md)
35
-
36
- return title, description
37
-
38
- # Path to the markdown file
39
- md_file_path = 'content/index.md'
40
-
41
- # Extract title and description from the markdown file
42
- title, description = extract_title_description(md_file_path)
43
-
44
- # Rest of the script continues as before
45
  model_name = 'ucaslcl/GOT-OCR2_0'
46
 
47
  tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
@@ -55,114 +24,110 @@ def image_to_base64(image):
55
  image.save(buffered, format="PNG")
56
  return base64.b64encode(buffered.getvalue()).decode()
57
 
 
 
58
  @spaces.GPU
59
- def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None, render=False):
60
  if task == "Plain Text OCR":
61
  res = model.chat(tokenizer, image, ocr_type='ocr')
 
62
  elif task == "Format Text OCR":
63
- res = model.chat(tokenizer, image, ocr_type='format')
64
  elif task == "Fine-grained OCR (Box)":
65
- res = model.chat(tokenizer, image, ocr_type=ocr_type, ocr_box=ocr_box)
66
  elif task == "Fine-grained OCR (Color)":
67
- res = model.chat(tokenizer, image, ocr_type=ocr_type, ocr_color=ocr_color)
68
  elif task == "Multi-crop OCR":
69
- res = model.chat_crop(tokenizer, image_file=image)
70
  elif task == "Render Formatted OCR":
71
- res = model.chat(tokenizer, image, ocr_type='format', render=True, save_render_file='./demo.html')
72
- with open('./demo.html', 'r') as f:
73
- html_content = f.read()
74
- return res, html_content
75
 
76
- return res, None
 
 
77
 
78
  def update_inputs(task):
79
- if task == "Plain Text OCR" or task == "Format Text OCR" or task == "Multi-crop OCR":
80
- return [gr.update(visible=False)] * 4
81
  elif task == "Fine-grained OCR (Box)":
82
  return [
83
  gr.update(visible=True, choices=["ocr", "format"]),
84
  gr.update(visible=True),
85
  gr.update(visible=False),
86
- gr.update(visible=False)
87
  ]
88
  elif task == "Fine-grained OCR (Color)":
89
  return [
90
  gr.update(visible=True, choices=["ocr", "format"]),
91
  gr.update(visible=False),
92
  gr.update(visible=True, choices=["red", "green", "blue"]),
93
- gr.update(visible=False)
94
  ]
95
- elif task == "Render Formatted OCR":
96
- return [gr.update(visible=False)] * 3 + [gr.update(visible=True)]
97
-
98
  def ocr_demo(image, task, ocr_type, ocr_box, ocr_color):
99
  res, html_content = process_image(image, task, ocr_type, ocr_box, ocr_color)
 
 
 
 
 
 
 
100
  if html_content:
101
- return res, html_content
 
102
  return res, None
103
-
104
  import gradio as gr
105
 
106
  with gr.Blocks() as demo:
107
- with gr.Row():
108
- # Left Column: Description
109
- with gr.Column(scale=1):
110
- gr.Markdown(f"# {title}")
111
- gr.Markdown(description)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
 
113
- # Right Column: App Inputs and Outputs
114
- with gr.Column(scale=3):
115
- image_input = gr.Image(type="filepath", label="Input Image")
116
- task_dropdown = gr.Dropdown(
117
- choices=[
118
- "Plain Text OCR",
119
- "Format Text OCR",
120
- "Fine-grained OCR (Box)",
121
- "Fine-grained OCR (Color)",
122
- "Multi-crop OCR",
123
- "Render Formatted OCR"
124
- ],
125
- label="Select Task",
126
- value="Plain Text OCR"
127
- )
128
- ocr_type_dropdown = gr.Dropdown(
129
- choices=["ocr", "format"],
130
- label="OCR Type",
131
- visible=False
132
- )
133
- ocr_box_input = gr.Textbox(
134
- label="OCR Box (x1,y1,x2,y2)",
135
- placeholder="e.g., 100,100,200,200",
136
- visible=False
137
- )
138
- ocr_color_dropdown = gr.Dropdown(
139
- choices=["red", "green", "blue"],
140
- label="OCR Color",
141
- visible=False
142
- )
143
- render_checkbox = gr.Checkbox(
144
- label="Render Result",
145
- visible=False
146
- )
147
- submit_button = gr.Button("Process")
148
 
149
- # OCR Result below the Submit button
150
- output_text = gr.Textbox(label="OCR Result")
151
- output_html = gr.HTML(label="Rendered HTML Output")
152
-
153
- # Update inputs dynamically based on task selection
154
  task_dropdown.change(
155
  update_inputs,
156
  inputs=[task_dropdown],
157
- outputs=[ocr_type_dropdown, ocr_box_input, ocr_color_dropdown, render_checkbox]
158
  )
159
 
160
- # Process OCR on button click
161
  submit_button.click(
162
  ocr_demo,
163
  inputs=[image_input, task_dropdown, ocr_type_dropdown, ocr_box_input, ocr_color_dropdown],
164
- outputs=[output_text, output_html]
165
  )
166
 
167
  if __name__ == "__main__":
168
- demo.launch()
 
8
  from PIL import Image
9
  import numpy as np
10
  import yaml
 
11
  from pathlib import Path
12
+ from globe import title, description, modelinfor, joinus
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  model_name = 'ucaslcl/GOT-OCR2_0'
15
 
16
  tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
 
24
  image.save(buffered, format="PNG")
25
  return base64.b64encode(buffered.getvalue()).decode()
26
 
27
+ html_file = './demo.html'
28
+
29
  @spaces.GPU
30
+ def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None):
31
  if task == "Plain Text OCR":
32
  res = model.chat(tokenizer, image, ocr_type='ocr')
33
+ return res, None
34
  elif task == "Format Text OCR":
35
+ res = model.chat(tokenizer, image, ocr_type='format', render=True, save_render_file=html_file)
36
  elif task == "Fine-grained OCR (Box)":
37
+ res = model.chat(tokenizer, image, ocr_type=ocr_type, ocr_box=ocr_box, render=True, save_render_file=html_file)
38
  elif task == "Fine-grained OCR (Color)":
39
+ res = model.chat(tokenizer, image, ocr_type=ocr_type, ocr_color=ocr_color, render=True, save_render_file=html_file)
40
  elif task == "Multi-crop OCR":
41
+ res = model.chat_crop(tokenizer, image, ocr_type='format', render=True, save_render_file=html_file)
42
  elif task == "Render Formatted OCR":
43
+ res = model.chat(tokenizer, image, ocr_type='format', render=True, save_render_file=html_file)
 
 
 
44
 
45
+ with open(html_file, 'r') as f:
46
+ html_content = f.read()
47
+ return res, html_content
48
 
49
  def update_inputs(task):
50
+ if task in ["Plain Text OCR", "Format Text OCR", "Multi-crop OCR", "Render Formatted OCR"]:
51
+ return [gr.update(visible=False)] * 3
52
  elif task == "Fine-grained OCR (Box)":
53
  return [
54
  gr.update(visible=True, choices=["ocr", "format"]),
55
  gr.update(visible=True),
56
  gr.update(visible=False),
 
57
  ]
58
  elif task == "Fine-grained OCR (Color)":
59
  return [
60
  gr.update(visible=True, choices=["ocr", "format"]),
61
  gr.update(visible=False),
62
  gr.update(visible=True, choices=["red", "green", "blue"]),
 
63
  ]
 
 
 
64
  def ocr_demo(image, task, ocr_type, ocr_box, ocr_color):
65
  res, html_content = process_image(image, task, ocr_type, ocr_box, ocr_color)
66
+
67
+ res = f"${res}$"
68
+ res = res.replace("$\\begin{tabular}", "\\begin{tabular}")
69
+ res = res.replace("\\end{tabular}$", "\\end{tabular}")
70
+ res = res.replace("\\(", "")
71
+ res = res.replace("\\)", "")
72
+
73
  if html_content:
74
+ html_string = f'<iframe srcdoc="{html_content}" width="100%" height="600px"></iframe>'
75
+ return res, html_string
76
  return res, None
 
77
  import gradio as gr
78
 
79
  with gr.Blocks() as demo:
80
+ gr.Markdown(title)
81
+ gr.Markdown(description)
82
+ gr.Markdown(joinus)
83
+
84
+ with gr.Column():
85
+ image_input = gr.Image(type="filepath", label="Input Image")
86
+ task_dropdown = gr.Dropdown(
87
+ choices=[
88
+ "Plain Text OCR",
89
+ "Format Text OCR",
90
+ "Fine-grained OCR (Box)",
91
+ "Fine-grained OCR (Color)",
92
+ "Multi-crop OCR",
93
+ "Render Formatted OCR"
94
+ ],
95
+ label="Select Task",
96
+ value="Plain Text OCR"
97
+ )
98
+ ocr_type_dropdown = gr.Dropdown(
99
+ choices=["ocr", "format"],
100
+ label="OCR Type",
101
+ visible=False
102
+ )
103
+ ocr_box_input = gr.Textbox(
104
+ label="OCR Box (x1,y1,x2,y2)",
105
+ placeholder="e.g., 100,100,200,200",
106
+ visible=False
107
+ )
108
+ ocr_color_dropdown = gr.Dropdown(
109
+ choices=["red", "green", "blue"],
110
+ label="OCR Color",
111
+ visible=False
112
+ )
113
+ submit_button = gr.Button("Process")
114
 
115
+ output_markdown = gr.Markdown(label="🫴🏻📸GOT-OCR")
116
+ output_html = gr.HTML(label="🫴🏻📸GOT-OCR")
117
+
118
+ gr.Markdown(modelinfor)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
 
 
 
 
 
 
120
  task_dropdown.change(
121
  update_inputs,
122
  inputs=[task_dropdown],
123
+ outputs=[ocr_type_dropdown, ocr_box_input, ocr_color_dropdown]
124
  )
125
 
 
126
  submit_button.click(
127
  ocr_demo,
128
  inputs=[image_input, task_dropdown, ocr_type_dropdown, ocr_box_input, ocr_color_dropdown],
129
+ outputs=[output_markdown, output_html]
130
  )
131
 
132
  if __name__ == "__main__":
133
+ demo.launch()
globe.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ title = """# 🙋🏻‍♂️Welcome to Tonic's🫴🏻📸GOT-OCR
3
+ ---
4
+ """
5
+
6
+ description = """
7
+ The **GOT-OCR model** is a cutting-edge OCR system with **580M parameters**, designed to process a wide range of "characters." Equipped with a **high-compression encoder** and a **long-context decoder**, it excels in both scene and document-style images. The model supports **multi-page** and **dynamic resolution OCR**, enhancing its versatility.
8
+
9
+ ### Key Features
10
+
11
+ - **Plain Text OCR**: Extracts text from images.
12
+ - **Formatted Text OCR**: Retains the original formatting, including tables and formulas.
13
+ - **Fine-grained OCR**: Offers box-based and color-based OCR for precision in specific regions.
14
+ - **Multi-crop OCR**: Handles multiple cropped sections within an image.
15
+
16
+ ## Supported Content Types
17
+
18
+ - Plain text
19
+ - Math/molecular formulas
20
+ - Tables and charts
21
+ - Sheet music
22
+ - Geometric shapes
23
+
24
+ ## How to Use
25
+
26
+ 1. Select a task from the dropdown menu.
27
+ 2. Upload an image.
28
+ 3. (Optional) Adjust parameters based on the selected task.
29
+ 4. Click **Process** to view the results.
30
+ """
31
+ joinus = """---
32
+ ### Join us :
33
+
34
+ 🌟TeamTonic🌟 is always making cool demos! Join our active builder's 🛠️community 👻 [![Join us on Discord](https://img.shields.io/discord/1109943800132010065?label=Discord&logo=discord&style=flat-square)](https://discord.gg/qdfnvSPcqP) On 🤗Huggingface:[MultiTransformer](https://huggingface.co/MultiTransformer) On 🌐Github: [Tonic-AI](https://github.com/tonic-ai) & contribute to🌟 [Build Tonic](https://git.tonic-ai.com/contribute)🤗Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant 🤗
35
+ """
36
+ modelinfor = """---
37
+
38
+ ### Model Information
39
+
40
+ - **Model Name**: GOT-OCR 2.0
41
+ - **Hugging Face Repository**: [ucaslcl/GOT-OCR2_0](https://huggingface.co/ucaslcl/GOT-OCR2_0)
42
+ - **Environment**: CUDA 11.8 + PyTorch 2.0.1
43
+ """
44
+
45
+ tasks = [
46
+ "Plain Text OCR",
47
+ "Format Text OCR",
48
+ "Fine-grained OCR (Box)",
49
+ "Fine-grained OCR (Color)",
50
+ "Multi-crop OCR",
51
+ "Render Formatted OCR"
52
+ ]
53
+
54
+ ocr_types = ["ocr", "format"]
55
+ ocr_colors = ["red", "green", "blue"]