kasper-boy commited on
Commit
c1c79ae
·
verified ·
1 Parent(s): 1330c91

Create save_data.py

Browse files
Files changed (1) hide show
  1. save_data.py +144 -0
save_data.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import numpy as np
3
+ import json
4
+ import shutil
5
+ import requests
6
+ import re as r
7
+ from urllib.request import urlopen
8
+ from datetime import datetime
9
+ from datasets import Image
10
+ from PIL import Image
11
+ from huggingface_hub import Repository, upload_file
12
+
13
+ HF_TOKEN = os.environ.get("HF_TOKEN")
14
+ DATASET_NAME = "OCR-img-to-text"
15
+ DATASET_REPO_URL = "https://huggingface.co/datasets/pragnakalp/OCR-img-to-text"
16
+ DATA_FILENAME = "ocr_data.csv"
17
+ DATA_FILE = os.path.join("ocr_data", DATA_FILENAME)
18
+ DATASET_REPO_ID = "pragnakalp/OCR-img-to-text"
19
+ print("is none?", HF_TOKEN is None)
20
+ REPOSITORY_DIR = "data"
21
+ LOCAL_DIR = 'data_local'
22
+ os.makedirs(LOCAL_DIR,exist_ok=True)
23
+
24
+ try:
25
+ hf_hub_download(
26
+ repo_id=DATASET_REPO_ID,
27
+ filename=DATA_FILENAME,
28
+ cache_dir=DATA_DIRNAME,
29
+ force_filename=DATA_FILENAME
30
+ )
31
+
32
+ except:
33
+ print("file not found")
34
+
35
+ try:
36
+ repo = Repository(local_dir="ocr_data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN)
37
+ repo.git_pull()
38
+ except Exception as e:
39
+ print("Error occurred during git pull:", e)
40
+
41
+ # repo = Repository(local_dir="ocr_data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN)
42
+ # repo.git_pull()
43
+
44
+ def getIP():
45
+ ip_address = ''
46
+ try:
47
+ d = str(urlopen('http://checkip.dyndns.com/')
48
+ .read())
49
+
50
+ return r.compile(r'Address: (\d+\.\d+\.\d+\.\d+)').search(d).group(1)
51
+ except Exception as e:
52
+ print("Error while getting IP address -->",e)
53
+ return ip_address
54
+
55
+ def get_location(ip_addr):
56
+ location = {}
57
+ try:
58
+ ip=ip_addr
59
+
60
+ req_data={
61
+ "ip":ip,
62
+ "token":"pkml123"
63
+ }
64
+ url = "https://demos.pragnakalp.com/get-ip-location"
65
+
66
+ # req_data=json.dumps(req_data)
67
+ # print("req_data",req_data)
68
+ headers = {'Content-Type': 'application/json'}
69
+
70
+ response = requests.request("POST", url, headers=headers, data=json.dumps(req_data))
71
+ response = response.json()
72
+ print("response======>>",response)
73
+ return response
74
+ except Exception as e:
75
+ print("Error while getting location -->",e)
76
+ return location
77
+
78
+ """
79
+ Save generated details
80
+ """
81
+ def dump_json(thing,file):
82
+ with open(file,'w+',encoding="utf8") as f:
83
+ json.dump(thing,f)
84
+
85
+ def flag(Method,text_output,input_image):
86
+
87
+ print("saving data------------------------")
88
+ # try:
89
+ adversarial_number = 0
90
+ adversarial_number = 0 if None else adversarial_number
91
+
92
+ ip_address= getIP()
93
+ print("ip_address :",ip_address)
94
+ location = get_location(ip_address)
95
+ print("location :",location)
96
+
97
+ metadata_name = datetime.now().strftime('%Y-%m-%d %H-%M-%S')
98
+ SAVE_FILE_DIR = os.path.join(LOCAL_DIR,metadata_name)
99
+ os.makedirs(SAVE_FILE_DIR,exist_ok=True)
100
+ image_output_filename = os.path.join(SAVE_FILE_DIR,'image.png')
101
+ print("image_output_filename :",image_output_filename)
102
+ print(input_image)
103
+ try:
104
+ Image.fromarray(input_image).save(image_output_filename)
105
+ # input_image.save(image_output_filename)
106
+ except Exception:
107
+ raise Exception(f"Had issues saving np array image to file")
108
+
109
+ # Write metadata.json to file
110
+ json_file_path = os.path.join(SAVE_FILE_DIR,'metadata.jsonl')
111
+ metadata= {'id':metadata_name,'method':Method,'file_name':'image.png',
112
+ 'generated_text':text_output,'ip':ip_address, 'location':location
113
+ }
114
+
115
+ dump_json(metadata,json_file_path)
116
+
117
+ # Simply upload the image file and metadata using the hub's upload_file
118
+ # Upload the image
119
+ repo_image_path = os.path.join(REPOSITORY_DIR,os.path.join(metadata_name,'image.png'))
120
+
121
+ _ = upload_file(path_or_fileobj = image_output_filename,
122
+ path_in_repo =repo_image_path,
123
+ repo_id=DATASET_REPO_ID,
124
+ repo_type='dataset',
125
+ token=HF_TOKEN
126
+ )
127
+
128
+ # Upload the metadata
129
+ repo_json_path = os.path.join(REPOSITORY_DIR,os.path.join(metadata_name,'metadata.jsonl'))
130
+ _ = upload_file(path_or_fileobj = json_file_path,
131
+ path_in_repo =repo_json_path,
132
+ repo_id= DATASET_REPO_ID,
133
+ repo_type='dataset',
134
+ token=HF_TOKEN
135
+ )
136
+ adversarial_number+=1
137
+ repo.git_pull()
138
+
139
+ url = 'http://pragnakalpdev35.pythonanywhere.com/HF_space_image_to_text'
140
+ myobj = {'Method': Method,'text_output':text_output,'img':input_image.tolist(),'ip_address':ip_address, 'loc':location}
141
+ x = requests.post(url, json = myobj)
142
+ print("mail status code",x.status_code)
143
+
144
+ return "*****Logs save successfully!!!!"