ysharma HF staff commited on
Commit
61e3677
β€’
1 Parent(s): 632f752

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -5
app.py CHANGED
@@ -3,12 +3,13 @@ import subprocess
3
 
4
 
5
  def nougat_ocr(file_name):
 
6
  # CLI Command to run
7
  cli_command = [
8
  'nougat',
9
- '--out', '/output',
10
  'pdf', f'{file_name}',
11
- '--checkpoint', '/nougat'
12
  ]
13
 
14
  # Run the command and get .mmd file in an output folder
@@ -17,23 +18,25 @@ def nougat_ocr(file_name):
17
 
18
 
19
  def predict(pdf_file):
 
20
  print(f"temporary file - {pdf_file.name}")
21
  pdf_name = pdf_file.name.split('/')[-1].split('.')[0]
22
  print(f"pdf file name - {pdf_name}")
23
 
24
  #! Get prediction for a PDF using nougat
25
  nougat_ocr(pdf_file.name)
 
26
 
27
  # Open the multimarkdown (.mmd) file for reading
28
- with open(f'/output/{pdf_name}.mmd', 'r') as file:
29
  content = file.read()
30
 
31
  return content
32
 
33
 
34
  with gr.Blocks() as demo:
35
- gr.HTML("<h1>Nougat: Neural Optical Understanding for Academic Documents<h1>")
36
- gr.HTML("<h3>Lukas Blecher et al. <a href='https://arxiv.org/pdf/2308.13418.pdf' target='_blank'>Paper</a>, <a href='https://facebookresearch.github.io/nougat/'>Project</a></h3>")
37
 
38
  with gr.Row():
39
  pdf_file = gr.File(label='Upload a PDF', scale=1)
 
3
 
4
 
5
  def nougat_ocr(file_name):
6
+ print('******* inside nougat_ocr *******')
7
  # CLI Command to run
8
  cli_command = [
9
  'nougat',
10
+ '--out', 'output',
11
  'pdf', f'{file_name}',
12
+ '--checkpoint', 'nougat'
13
  ]
14
 
15
  # Run the command and get .mmd file in an output folder
 
18
 
19
 
20
  def predict(pdf_file):
21
+ print('******* inside predict *******')
22
  print(f"temporary file - {pdf_file.name}")
23
  pdf_name = pdf_file.name.split('/')[-1].split('.')[0]
24
  print(f"pdf file name - {pdf_name}")
25
 
26
  #! Get prediction for a PDF using nougat
27
  nougat_ocr(pdf_file.name)
28
+ print("BAACCKKK")
29
 
30
  # Open the multimarkdown (.mmd) file for reading
31
+ with open(f'output/{pdf_name}.mmd', 'r') as file:
32
  content = file.read()
33
 
34
  return content
35
 
36
 
37
  with gr.Blocks() as demo:
38
+ gr.HTML("<h1><center>Nougat: Neural Optical Understanding for Academic Documents<center><h1>")
39
+ gr.HTML("<h3><center>Lukas Blecher et al. <a href='https://arxiv.org/pdf/2308.13418.pdf' target='_blank'>Paper</a>, <a href='https://facebookresearch.github.io/nougat/'>Project</a><center></h3>")
40
 
41
  with gr.Row():
42
  pdf_file = gr.File(label='Upload a PDF', scale=1)