Spaces:
Sleeping
Sleeping
Upload 30 files
Browse files- .gitattributes +1 -0
- ReadMe.md +1 -0
- app.py +89 -0
- config.ini +5 -0
- data/Input/gadm41_DEU_0.json +0 -0
- data/Input/gadm41_DEU_1.json +0 -0
- data/Input/gadm41_DEU_2.json +0 -0
- data/Input/gadm41_DEU_3.json +0 -0
- data/Input/gadm41_DEU_4.json +3 -0
- data/Output/1.md +0 -0
- data/Temp/gadm41_DEU_1_geom +0 -0
- data/Temp/gadm41_DEU_1_main +17 -0
- doc/AddditionalInfo +11 -0
- doc/Product_Requirements_Document.md +107 -0
- doc/QuestionsTasks.md +30 -0
- doc/requirements.txt +120 -0
- main.py +77 -0
- main/GeoJSON_Bundesländer.py +75 -0
- modules/__init__.py +0 -0
- modules/__pycache__/__init__.cpython-311.pyc +0 -0
- modules/__pycache__/geojson_github_loader.cpython-311.pyc +0 -0
- modules/__pycache__/geojson_processor.cpython-311.pyc +0 -0
- modules/__pycache__/language_model.cpython-311.pyc +0 -0
- modules/find_neighbors.py +42 -0
- modules/geojson_github_loader.py +40 -0
- modules/geojson_processor.py +56 -0
- modules/getCountrycode.py +30 -0
- modules/gradio_testing.py +74 -0
- modules/language_model.py +34 -0
- modules/neighbors.py +47 -0
- requirements.txt +0 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
data/Input/gadm41_DEU_4.json filter=lfs diff=lfs merge=lfs -text
|
ReadMe.md
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
|
app.py
ADDED
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
# App.py to launch the app via hugging face
|
3 |
+
#######################################################################################################
|
4 |
+
# IMPORT
|
5 |
+
#######################################################################################################
|
6 |
+
|
7 |
+
import pandas as pd
|
8 |
+
import geopandas as gpd
|
9 |
+
import os
|
10 |
+
from configparser import ConfigParser
|
11 |
+
import gradio as gr
|
12 |
+
|
13 |
+
# modules
|
14 |
+
from modules.geojson_github_loader import download_github_geojson
|
15 |
+
from modules.geojson_processor import geojson_processor_to_csv
|
16 |
+
from modules.language_model import TAPAS
|
17 |
+
|
18 |
+
|
19 |
+
#######################################################################################################
|
20 |
+
# CONFIG
|
21 |
+
#######################################################################################################
|
22 |
+
# Prints disabled!!
|
23 |
+
#print('\nCurrent Working Directory (CWD):\n' + os.getcwd())
|
24 |
+
|
25 |
+
config_object = ConfigParser()
|
26 |
+
if 'config.ini' in os.listdir():
|
27 |
+
config_object.read('config.ini')
|
28 |
+
#print('Setting have been imported from the config file.')
|
29 |
+
else:
|
30 |
+
print('No config file in the CWD')
|
31 |
+
quit()
|
32 |
+
|
33 |
+
# changing CWD and input output folders
|
34 |
+
os.chdir(format(config_object['CONFIG']['CWD']))
|
35 |
+
|
36 |
+
DATA = os.getcwd() + '\\' + format(config_object['CONFIG']['Input'])
|
37 |
+
OUT = os.getcwd() + '\\' + format(config_object['CONFIG']['Output'])
|
38 |
+
TEMP = os.getcwd() + '\\' + format(config_object['CONFIG']['Temp'])
|
39 |
+
|
40 |
+
|
41 |
+
#######################################################################################################
|
42 |
+
# Load and prepare Data
|
43 |
+
#######################################################################################################
|
44 |
+
|
45 |
+
# load github data
|
46 |
+
# attributes
|
47 |
+
github_user = "Giedeon25"
|
48 |
+
github_repo = "GID-Project"
|
49 |
+
file_path_github = "main/data/Input/gadm41_DEU_1.json"
|
50 |
+
token = "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"
|
51 |
+
|
52 |
+
local_file_path = DATA + '\\' + 'gadm41_DEU_1.json'
|
53 |
+
|
54 |
+
output_file = TEMP + '\\' + 'gadm41_DEU_1'
|
55 |
+
|
56 |
+
# load locally
|
57 |
+
geojson_data = gpd.read_file(local_file_path)
|
58 |
+
|
59 |
+
|
60 |
+
#######################################################################################################
|
61 |
+
# LLM
|
62 |
+
#######################################################################################################
|
63 |
+
|
64 |
+
# attributes
|
65 |
+
question = 'what is the geometry of Saxony?'
|
66 |
+
table_main = pd.read_csv(TEMP + '\\' + 'gadm41_DEU_1_main').astype(str)
|
67 |
+
table_geom = pd.read_csv(TEMP + '\\' + 'gadm41_DEU_1_geom')
|
68 |
+
|
69 |
+
# function
|
70 |
+
TAPAS(question, table_main)
|
71 |
+
##################################################################################
|
72 |
+
# Function that enables testing
|
73 |
+
##################################################################################
|
74 |
+
def AskAI(ques, lv, table_main = table_main):
|
75 |
+
level = int(lv) # Currently placeholder
|
76 |
+
question = str(ques)
|
77 |
+
ans = TAPAS(question = question, table_main= table_main)
|
78 |
+
return(ans)
|
79 |
+
|
80 |
+
def AskAI_easy(ques):
|
81 |
+
Tmain = pd.read_csv(TEMP + '\\' + 'gadm41_DEU_1_main').astype(str)
|
82 |
+
blub = str(AskAI(ques,1,Tmain))
|
83 |
+
return(blub)
|
84 |
+
#######################################################################################
|
85 |
+
# Gradio Interface
|
86 |
+
###############################################################################
|
87 |
+
desc = 'Example: What is the geometry of Saxony?'
|
88 |
+
iface = gr.Interface(fn=AskAI_easy, inputs=['text'], outputs='text', description= desc)
|
89 |
+
iface.launch()
|
config.ini
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[CONFIG]
|
2 |
+
CWD = B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project
|
3 |
+
Input = data/Input
|
4 |
+
Output = data/Output
|
5 |
+
Temp = data/Temp
|
data/Input/gadm41_DEU_0.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/Input/gadm41_DEU_1.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/Input/gadm41_DEU_2.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/Input/gadm41_DEU_3.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/Input/gadm41_DEU_4.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:29d963239303a0a2513352e073cb3ea3cf3d4064aae9d0848265fce8b641b906
|
3 |
+
size 15240303
|
data/Output/1.md
ADDED
File without changes
|
data/Temp/gadm41_DEU_1_geom
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/Temp/gadm41_DEU_1_main
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
GID_1,GID_0,COUNTRY,NAME_1,VARNAME_1,NL_NAME_1,TYPE_1,ENGTYPE_1,CC_1,HASC_1,ISO_1,geometry,bbox,centroid,neighbors
|
2 |
+
DEU.1_1,DEU,Germany,Baden-Württemberg,NA,NA,Land,State,08,DE.BW,NA,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;0;0,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;0;1,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;0;2,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;0;3
|
3 |
+
DEU.2_1,DEU,Germany,Bayern,Bavaria,NA,Freistaat,FreeState,09,DE.BY,DE-BY,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;1;0,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;1;1,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;1;2,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;1;3
|
4 |
+
DEU.3_1,DEU,Germany,Berlin,NA,NA,Land,State,11,DE.BE,DE-BE,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;2;0,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;2;1,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;2;2,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;2;3
|
5 |
+
DEU.4_1,DEU,Germany,Brandenburg,NA,NA,Land,State,12,DE.BR,DE-BB,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;3;0,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;3;1,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;3;2,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;3;3
|
6 |
+
DEU.5_1,DEU,Germany,Bremen,NA,NA,FreieHansestadt,State,04,DE.HB,DE-HB,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;4;0,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;4;1,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;4;2,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;4;3
|
7 |
+
DEU.6_1,DEU,Germany,Hamburg,NA,NA,FreieundHansestadt,State,02,DE.HH,DE-HH,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;5;0,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;5;1,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;5;2,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;5;3
|
8 |
+
DEU.7_1,DEU,Germany,Hessen,Hesse,NA,Land,State,06,DE.HE,DE-HE,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;6;0,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;6;1,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;6;2,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;6;3
|
9 |
+
DEU.8_1,DEU,Germany,Mecklenburg-Vorpommern,Mecklenburg-WestPomerania,NA,Land,State,13,DE.MV,DE-MV,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;7;0,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;7;1,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;7;2,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;7;3
|
10 |
+
DEU.9_1,DEU,Germany,Niedersachsen,LowerSaxony,NA,Land,State,03,DE.NI,DE-NI,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;8;0,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;8;1,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;8;2,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;8;3
|
11 |
+
DEU.10_1,DEU,Germany,Nordrhein-Westfalen,NorthRhine-Westphalia,NA,Land,State,05,DE.NW,DE-NW,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;9;0,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;9;1,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;9;2,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;9;3
|
12 |
+
DEU.11_1,DEU,Germany,Rheinland-Pfalz,Rhineland-Palatinate,NA,Land,State,07,DE.RP,DE-RP,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;10;0,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;10;1,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;10;2,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;10;3
|
13 |
+
DEU.12_1,DEU,Germany,Saarland,NA,NA,Land,State,10,DE.SL,DE-SL,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;11;0,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;11;1,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;11;2,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;11;3
|
14 |
+
DEU.14_1,DEU,Germany,Sachsen,Saxony,NA,Freistaat,State,14,DE.SN,DE-SN,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;12;0,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;12;1,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;12;2,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;12;3
|
15 |
+
DEU.13_1,DEU,Germany,Sachsen-Anhalt,Saxony-Anhalt,NA,Land,State,15,DE.ST,DE-ST,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;13;0,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;13;1,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;13;2,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;13;3
|
16 |
+
DEU.15_1,DEU,Germany,Schleswig-Holstein,NA,NA,Land,State,01,DE.SH,DE-SH,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;14;0,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;14;1,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;14;2,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;14;3
|
17 |
+
DEU.16_1,DEU,Germany,Thüringen,Thuringia,NA,Freistaat,State,16,DE.TH,NA,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;15;0,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;15;1,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;15;2,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;15;3
|
doc/AddditionalInfo
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# GID-Project
|
2 |
+
|
3 |
+
## Useful Links
|
4 |
+
Link for the Google Docs Draft https://docs.google.com/document/d/1OVhDRL7Myb2vIdw5bqCs1LjLkiYP2ykXYvt_nIBa8oI/edit
|
5 |
+
|
6 |
+
- Maybe use spaCy --> detect different locations in the question --> create sub-questions
|
7 |
+
- Agenten für das suchen in mehreren Tabellen
|
8 |
+
|
9 |
+
Beispiel-Notebook für gradio: https://colab.research.google.com/drive/1uViopfIDRpAI5G28y0guzFgb6SWjjsT0?usp=sharing
|
10 |
+
|
11 |
+
Short Youtube intro about the deployment of an app on Hugging Face: https://www.youtube.com/watch?v=3bSVKNKb_PY
|
doc/Product_Requirements_Document.md
ADDED
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# GID Project
|
2 |
+
|
3 |
+
## Product Requirement Document
|
4 |
+
|
5 |
+
### 1. Introduction
|
6 |
+
|
7 |
+
The purpose of this document is to outline the requirements for project aimed at using a Language Model (LLM) and tabular data to answer questions regarding the geometry of Germany, its states, and cities. The project's goal is to create a user-friendly, educational tool that provides information about the geographical aspects of Germany.
|
8 |
+
|
9 |
+
### 2. Project Overview
|
10 |
+
|
11 |
+
#### 2.1. Project Name
|
12 |
+
|
13 |
+
The project is titled "we dont know yet"
|
14 |
+
|
15 |
+
#### 2.2. Project Description
|
16 |
+
|
17 |
+
wedontknowyet is an educational software application that leverages a Language Model (LLM) to provide detailed information about the geography of Germany, its states, and cities. Users can ask questions related to Germany's geographical features, such as the size, location, and topography of states and cities, and receive informative responses.
|
18 |
+
|
19 |
+
### 3. Key Features
|
20 |
+
|
21 |
+
The software project should include the following key features:
|
22 |
+
|
23 |
+
#### 3.1. User Interface
|
24 |
+
|
25 |
+
- A user-friendly interface that allows users to input questions or queries regarding Germany's geography.
|
26 |
+
- The option to input questions in natural language, ensuring a seamless user experience.
|
27 |
+
|
28 |
+
#### 3.2. Data Integration
|
29 |
+
|
30 |
+
- Integration with a comprehensive dataset containing information on Germany's states, cities, and geographical features.
|
31 |
+
- The ability to update and maintain the dataset to ensure accuracy.
|
32 |
+
|
33 |
+
#### 3.3. Question-Answer Functionality
|
34 |
+
|
35 |
+
- The software should be able to process and interpret user queries and questions.
|
36 |
+
- The LLM should provide accurate and informative responses based on the data from the integrated dataset.
|
37 |
+
|
38 |
+
#### 3.4. Geographical Data
|
39 |
+
|
40 |
+
- Detailed information on the states and cities of Germany, including size, population, topography, and any other relevant geographical details.
|
41 |
+
- Interactive maps that can display the locations of cities and states within Germany.
|
42 |
+
|
43 |
+
#### 3.5. User Assistance
|
44 |
+
|
45 |
+
- An option to provide explanations and context when a user requests further details about a specific topic.
|
46 |
+
- The ability to offer links or references to external sources for more in-depth information.
|
47 |
+
|
48 |
+
### 4. Functional Requirements
|
49 |
+
|
50 |
+
#### 4.1. Language Model Integration
|
51 |
+
|
52 |
+
- Integrate a language model to understand and process natural language queries.
|
53 |
+
|
54 |
+
#### 4.2. Data Integration
|
55 |
+
|
56 |
+
- Develop a data integration system that retrieves and maintains data related to Germany's states and cities.
|
57 |
+
- Ensure regular updates to the dataset to keep the information current.
|
58 |
+
|
59 |
+
#### 4.3. User Interaction
|
60 |
+
|
61 |
+
- Design a user interface that accepts natural language input from users.
|
62 |
+
- Implement a user-friendly system for submitting questions and queries.
|
63 |
+
|
64 |
+
#### 4.4. Question-Answer Functionality
|
65 |
+
|
66 |
+
- Develop an algorithm for processing and understanding user queries.
|
67 |
+
- Implement a system for generating informative responses using the integrated data and the language model.
|
68 |
+
|
69 |
+
#### 4.5. Data Presentation
|
70 |
+
|
71 |
+
- Create interactive and informative visual representations of Germany's geography using maps and other visual aids.
|
72 |
+
|
73 |
+
### 5. Non-Functional Requirements
|
74 |
+
|
75 |
+
#### 5.1. Performance
|
76 |
+
|
77 |
+
- The system should respond to user queries promptly, with minimal latency.
|
78 |
+
- It should be able to handle multiple user requests simultaneously.
|
79 |
+
|
80 |
+
#### 5.2. Security
|
81 |
+
|
82 |
+
- Implement security measures to protect user data and the integrity of the integrated dataset.
|
83 |
+
|
84 |
+
#### 5.3. Accessibility
|
85 |
+
|
86 |
+
- Ensure that the application is accessible to individuals with disabilities.
|
87 |
+
|
88 |
+
#### 5.4. Scalability
|
89 |
+
|
90 |
+
- Design the system in a way that allows for scalability to accommodate potential future enhancements.
|
91 |
+
|
92 |
+
### 6. Milestones
|
93 |
+
|
94 |
+
- **Milestone 1 (Month 1):** Data integration and initial UI design.
|
95 |
+
- **Milestone 2 (Month 2):** Language model integration and basic question-answering functionality.
|
96 |
+
- **Milestone 3 (Month 3):** User interface refinement and data presentation.
|
97 |
+
- **Milestone 4 (Month 4):** Performance optimization and security implementation.
|
98 |
+
- **Milestone 5 (Month 5):** Testing, user feedback, and final refinements.
|
99 |
+
- **Milestone 6 (Month 6):** Launch and ongoing maintenance.
|
100 |
+
|
101 |
+
### 7. Budget and Resources
|
102 |
+
|
103 |
+
The project will require access to the necessary hardware, software, and cloud services for hosting and running the application. Funding should be allocated for data acquisition and maintenance. Human resources will include software developers, data scientists, and UX/UI designers.
|
104 |
+
|
105 |
+
### 8. Conclusion
|
106 |
+
|
107 |
+
The "wedontknowyet" software project aims to provide a valuable educational resource for users interested in the geography of Germany, its states, and cities. By integrating a Language Model with tabular data, the software will enable users to obtain accurate and informative answers to their questions in a user-friendly manner. This project will enhance the understanding of Germany's geography and serve as a useful tool for students and enthusiasts.
|
doc/QuestionsTasks.md
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Level 0: Basic requirements
|
2 |
+
|
3 |
+
- What is the geometry of X? --> (Simple) extract from 1-2 interconnected tables ==> most basic requirement!! (output: link to 1_NUTS)
|
4 |
+
- What is the country code of X? ... --> Questions which could be answered by extracting values from the data table
|
5 |
+
- What is the bounding box of X? --> BB simplifies many subsequent tasks; recommendation to add as a value to all objects!
|
6 |
+
|
7 |
+
|
8 |
+
# Level 1: Table Interconnection, Multi-Output & Searching
|
9 |
+
|
10 |
+
- In what NUTS+1 lies X? --> X is given but the level containing X is wanted as an output (Example: In which federal state is the city Dresden => Saxony)
|
11 |
+
- Which NUTSX regions start with [letter], ...? --> Query more like searching, requiring multiple outputs (Example, which NUTS0 region starts with I => Italy)
|
12 |
+
- Call all NUTS-1 from NUTS where PROPERTY matches. --> Combines the prior 2 questions to query based on parent NUTS (Example: all federal states from Germany that start with the letter B)
|
13 |
+
|
14 |
+
|
15 |
+
# Level 2: Simple Spatial Connection & Manipulation
|
16 |
+
|
17 |
+
- What borders X? --> Estimation based using BB, check for correct borders using predetermined NUTS (Example: What borders Sachsen => Brandenburg (Berlin), Thuringia, Bavaria, Sachsen-Anhalt)
|
18 |
+
- What NUTS is between X and Y? --> Use BB for simplification/preselection (What federal state is between Hessen and Saxony => Thuringia, Bavaria, Lower-Saxony)
|
19 |
+
- Reproject X into PROJECTION (Reproject the Geometry of Saxony in UTM)
|
20 |
+
|
21 |
+
|
22 |
+
|
23 |
+
# Level 3: Spacial calculation // User specification
|
24 |
+
***This demands a higher level of user specification)***
|
25 |
+
- What is CARDINAL from X? --> Need to extract the BB and check a multitude of NUTS for relation to X (Example: What is north of Saxony? => Brandenburg, Berlin, Mecklenburg)
|
26 |
+
-> Define what distance counts as "CARDINAL"; Combination of different NUTS-levels?
|
27 |
+
- Simplify geometry of X--> Load geometry of X,
|
28 |
+
- Degree of simplification? (Example question: simplify the geometry of Saxony, similar to [GeoJSON Utilities](http://opendatalab.de/projects/geojson-utilities/)?)
|
29 |
+
|
30 |
+
---
|
doc/requirements.txt
ADDED
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
aiofiles @ file:///home/conda/feedstock_root/build_artifacts/aiofiles_1698945915105/work
|
2 |
+
altair @ file:///home/conda/feedstock_root/build_artifacts/altair_1696364485230/work
|
3 |
+
annotated-types @ file:///home/conda/feedstock_root/build_artifacts/annotated-types_1696634205638/work
|
4 |
+
anyio @ file:///home/conda/feedstock_root/build_artifacts/anyio_1688651106312/work/dist
|
5 |
+
appnope==0.1.3
|
6 |
+
archspec @ file:///croot/archspec_1697725767277/work
|
7 |
+
asttokens==2.4.1
|
8 |
+
attrs @ file:///home/conda/feedstock_root/build_artifacts/attrs_1683424013410/work
|
9 |
+
boltons @ file:///Users/cbousseau/work/recipes/ci_py311/boltons_1677965141748/work
|
10 |
+
Brotli @ file:///Users/cbousseau/work/recipes/ci_py311/brotli-split_1677936346777/work
|
11 |
+
certifi==2023.7.22
|
12 |
+
cffi @ file:///Users/cbousseau/work/recipes/ci_py311/cffi_1677903595907/work
|
13 |
+
charset-normalizer @ file:///tmp/build/80754af9/charset-normalizer_1630003229654/work
|
14 |
+
click @ file:///home/conda/feedstock_root/build_artifacts/click_1692311806742/work
|
15 |
+
colorama @ file:///home/conda/feedstock_root/build_artifacts/colorama_1666700638685/work
|
16 |
+
comm==0.2.0
|
17 |
+
conda @ file:///Users/runner/miniforge3/conda-bld/conda_1699392642856/work
|
18 |
+
conda-content-trust @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_5324skqvu9/croot/conda-content-trust_1693490622873/work
|
19 |
+
conda-libmamba-solver @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_4egpn3sd7i/croot/conda-libmamba-solver_1698961807382/work/src
|
20 |
+
conda-package-handling @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_fc4cx8vjhj/croot/conda-package-handling_1690999937094/work
|
21 |
+
conda_package_streaming @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_aecpaup22q/croot/conda-package-streaming_1690987978274/work
|
22 |
+
contourpy==1.2.0
|
23 |
+
cryptography @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_905z2r5rpq/croot/cryptography_1694211573866/work
|
24 |
+
cycler @ file:///home/conda/feedstock_root/build_artifacts/cycler_1696677705766/work
|
25 |
+
decorator==5.1.1
|
26 |
+
exceptiongroup @ file:///home/conda/feedstock_root/build_artifacts/exceptiongroup_1692026125334/work
|
27 |
+
executing==2.0.1
|
28 |
+
fastapi @ file:///home/conda/feedstock_root/build_artifacts/fastapi_1698674661010/work
|
29 |
+
ffmpy @ file:///home/conda/feedstock_root/build_artifacts/ffmpy_1659474992694/work
|
30 |
+
filelock @ file:///home/conda/feedstock_root/build_artifacts/filelock_1698714947081/work
|
31 |
+
fonttools @ file:///Users/runner/miniforge3/conda-bld/fonttools_1700143150339/work
|
32 |
+
fsspec @ file:///home/conda/feedstock_root/build_artifacts/fsspec_1697919321618/work
|
33 |
+
gradio @ file:///home/conda/feedstock_root/build_artifacts/gradio_1699955356441/work
|
34 |
+
gradio_client @ file:///home/conda/feedstock_root/build_artifacts/gradio-client_1698767845356/work
|
35 |
+
h11 @ file:///home/conda/feedstock_root/build_artifacts/h11_1664132893548/work
|
36 |
+
h2 @ file:///home/conda/feedstock_root/build_artifacts/h2_1634280454336/work
|
37 |
+
hpack==4.0.0
|
38 |
+
httpcore @ file:///home/conda/feedstock_root/build_artifacts/httpcore_1699629103338/work
|
39 |
+
httpx @ file:///home/conda/feedstock_root/build_artifacts/httpx_1699030327261/work
|
40 |
+
huggingface-hub @ file:///home/conda/feedstock_root/build_artifacts/huggingface_hub_1700152335477/work
|
41 |
+
hyperframe @ file:///home/conda/feedstock_root/build_artifacts/hyperframe_1619110129307/work
|
42 |
+
idna @ file:///Users/cbousseau/work/recipes/ci_py311/idna_1677906072337/work
|
43 |
+
importlib-metadata @ file:///home/conda/feedstock_root/build_artifacts/importlib-metadata_1688754491823/work
|
44 |
+
importlib-resources @ file:///home/conda/feedstock_root/build_artifacts/importlib_resources_1699364556997/work
|
45 |
+
ipympl==0.9.3
|
46 |
+
ipython==8.17.2
|
47 |
+
ipython-genutils==0.2.0
|
48 |
+
ipywidgets==8.1.1
|
49 |
+
jedi==0.19.1
|
50 |
+
Jinja2 @ file:///home/conda/feedstock_root/build_artifacts/jinja2_1654302431367/work
|
51 |
+
joblib==1.3.2
|
52 |
+
jsonpatch @ file:///tmp/build/80754af9/jsonpatch_1615747632069/work
|
53 |
+
jsonpointer==2.1
|
54 |
+
jsonschema @ file:///home/conda/feedstock_root/build_artifacts/jsonschema-meta_1700159890288/work
|
55 |
+
jsonschema-specifications @ file:///home/conda/feedstock_root/build_artifacts/jsonschema-specifications_1700059145511/work
|
56 |
+
jupyterlab-widgets==3.0.9
|
57 |
+
kiwisolver==1.4.5
|
58 |
+
laspy @ file:///Users/runner/miniforge3/conda-bld/laspy_1699607310769/work
|
59 |
+
lazrs @ file:///Users/runner/miniforge3/conda-bld/lazrs-python_1698372679597/work
|
60 |
+
libmambapy @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_59l2npsw_8/croot/mamba-split_1698782625405/work/libmambapy
|
61 |
+
markdown-it-py @ file:///home/conda/feedstock_root/build_artifacts/markdown-it-py_1686175045316/work
|
62 |
+
MarkupSafe @ file:///Users/runner/miniforge3/conda-bld/markupsafe_1695367660391/work
|
63 |
+
matplotlib @ file:///Users/runner/miniforge3/conda-bld/matplotlib-suite_1678135673869/work
|
64 |
+
matplotlib-inline==0.1.6
|
65 |
+
mdurl @ file:///home/conda/feedstock_root/build_artifacts/mdurl_1639515908913/work
|
66 |
+
munkres==1.1.4
|
67 |
+
numpy @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_362zs5g963/croot/numpy_and_numpy_base_1695830450707/work/dist/numpy-1.26.0-cp311-cp311-macosx_11_0_arm64.whl#sha256=35f9bcbdc8071f8981937b450fba07496cbb1e0d2a724e1d0619e6e714b42590
|
68 |
+
orjson @ file:///Users/runner/miniforge3/conda-bld/orjson_1698619146121/work/target/wheels/orjson-3.9.10-cp311-cp311-macosx_11_0_arm64.whl#sha256=655d5a2e944e3ebde9e30989514201f288ed152b82cc50d789ec03998bac369f
|
69 |
+
packaging @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_6dm6d4jd_t/croot/packaging_1693575176524/work
|
70 |
+
pandas==2.0.0
|
71 |
+
parso==0.8.3
|
72 |
+
pexpect==4.8.0
|
73 |
+
Pillow @ file:///Users/runner/miniforge3/conda-bld/pillow_1684654235906/work
|
74 |
+
pkgutil_resolve_name @ file:///home/conda/feedstock_root/build_artifacts/pkgutil-resolve-name_1694617248815/work
|
75 |
+
pluggy @ file:///Users/cbousseau/work/recipes/ci_py311/pluggy_1677906980825/work
|
76 |
+
prompt-toolkit==3.0.41
|
77 |
+
ptyprocess==0.7.0
|
78 |
+
pure-eval==0.2.2
|
79 |
+
pycosat @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_3eg8vdcs6z/croot/pycosat_1696536519213/work
|
80 |
+
pycparser @ file:///tmp/build/80754af9/pycparser_1636541352034/work
|
81 |
+
pydantic @ file:///home/conda/feedstock_root/build_artifacts/pydantic_1700171233545/work
|
82 |
+
pydantic_core @ file:///Users/runner/miniforge3/conda-bld/pydantic-core_1700010143722/work
|
83 |
+
pydub @ file:///home/conda/feedstock_root/build_artifacts/pydub_1615612442567/work
|
84 |
+
Pygments @ file:///home/conda/feedstock_root/build_artifacts/pygments_1691408637400/work
|
85 |
+
pyOpenSSL @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_b8whqav6qm/croot/pyopenssl_1690223428943/work
|
86 |
+
pyparsing @ file:///home/conda/feedstock_root/build_artifacts/pyparsing_1690737849915/work
|
87 |
+
PySocks @ file:///Users/cbousseau/work/recipes/ci_py311/pysocks_1677906386870/work
|
88 |
+
python-dateutil @ file:///home/conda/feedstock_root/build_artifacts/python-dateutil_1626286286081/work
|
89 |
+
python-multipart @ file:///home/conda/feedstock_root/build_artifacts/python-multipart_1679167423335/work
|
90 |
+
pytz @ file:///home/conda/feedstock_root/build_artifacts/pytz_1693930252784/work
|
91 |
+
PyYAML @ file:///Users/runner/miniforge3/conda-bld/pyyaml_1695373486380/work
|
92 |
+
referencing @ file:///home/conda/feedstock_root/build_artifacts/referencing_1700053204647/work
|
93 |
+
requests @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_54zi68h2nb/croot/requests_1690400233316/work
|
94 |
+
rich @ file:///home/conda/feedstock_root/build_artifacts/rich-split_1700160075651/work/dist
|
95 |
+
rpds-py @ file:///Users/runner/miniforge3/conda-bld/rpds-py_1700156534986/work
|
96 |
+
ruamel.yaml @ file:///Users/cbousseau/work/recipes/ci_py311/ruamel.yaml_1677934845850/work
|
97 |
+
scikit-learn==1.3.2
|
98 |
+
scipy==1.11.3
|
99 |
+
semantic-version @ file:///home/conda/feedstock_root/build_artifacts/semantic_version_1653579368137/work
|
100 |
+
shellingham @ file:///home/conda/feedstock_root/build_artifacts/shellingham_1698144360966/work
|
101 |
+
six @ file:///home/conda/feedstock_root/build_artifacts/six_1620240208055/work
|
102 |
+
sniffio @ file:///home/conda/feedstock_root/build_artifacts/sniffio_1662051266223/work
|
103 |
+
stack-data==0.6.3
|
104 |
+
starlette @ file:///home/conda/feedstock_root/build_artifacts/starlette-recipe_1684245096404/work
|
105 |
+
threadpoolctl==3.2.0
|
106 |
+
tomlkit @ file:///home/conda/feedstock_root/build_artifacts/tomlkit_1690458286251/work
|
107 |
+
toolz @ file:///home/conda/feedstock_root/build_artifacts/toolz_1657485559105/work
|
108 |
+
tqdm @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_ac7zic_tin/croot/tqdm_1679561870178/work
|
109 |
+
traitlets==5.13.0
|
110 |
+
truststore @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_42mm7e6j06/croot/truststore_1695244298716/work
|
111 |
+
typer @ file:///home/conda/feedstock_root/build_artifacts/typer_1683029246636/work
|
112 |
+
typing_extensions @ file:///home/conda/feedstock_root/build_artifacts/typing_extensions_1695040754690/work
|
113 |
+
tzdata @ file:///home/conda/feedstock_root/build_artifacts/python-tzdata_1680081134351/work
|
114 |
+
urllib3==1.26.6
|
115 |
+
uvicorn @ file:///Users/runner/miniforge3/conda-bld/uvicorn-split_1699219080682/work
|
116 |
+
wcwidth==0.2.10
|
117 |
+
websockets @ file:///Users/runner/miniforge3/conda-bld/websockets_1695410063212/work
|
118 |
+
widgetsnbextension==4.0.9
|
119 |
+
zipp @ file:///home/conda/feedstock_root/build_artifacts/zipp_1695255097490/work
|
120 |
+
zstandard @ file:///Users/cbousseau/work/recipes/ci_py311_2/zstandard_1678996192313/work
|
main.py
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#######################################################################################################
|
2 |
+
# IMPORT
|
3 |
+
#######################################################################################################
|
4 |
+
|
5 |
+
import pandas as pd
|
6 |
+
import geopandas as gpd
|
7 |
+
import os
|
8 |
+
from configparser import ConfigParser
|
9 |
+
|
10 |
+
# modules
|
11 |
+
from modules.geojson_github_loader import download_github_geojson
|
12 |
+
from modules.geojson_processor import geojson_processor_to_csv
|
13 |
+
from modules.language_model import TAPAS
|
14 |
+
|
15 |
+
#######################################################################################################
|
16 |
+
# CONFIG
|
17 |
+
#######################################################################################################
|
18 |
+
|
19 |
+
print('\nCurrent Working Directory (CWD):\n' + os.getcwd())
|
20 |
+
|
21 |
+
config_object = ConfigParser()
|
22 |
+
if 'config.ini' in os.listdir():
|
23 |
+
config_object.read('config.ini')
|
24 |
+
print('Setting have been imported from the config file.')
|
25 |
+
else:
|
26 |
+
print('No config file in the CWD')
|
27 |
+
quit()
|
28 |
+
|
29 |
+
# changing CWD and input output folders
|
30 |
+
os.chdir(format(config_object['CONFIG']['CWD']))
|
31 |
+
|
32 |
+
DATA = os.getcwd() + '\\' + format(config_object['CONFIG']['Input'])
|
33 |
+
OUT = os.getcwd() + '\\' + format(config_object['CONFIG']['Output'])
|
34 |
+
TEMP = os.getcwd() + '\\' + format(config_object['CONFIG']['Temp'])
|
35 |
+
|
36 |
+
|
37 |
+
#######################################################################################################
|
38 |
+
# Load and prepare Data
|
39 |
+
#######################################################################################################
|
40 |
+
|
41 |
+
# load github data
|
42 |
+
# attributes
|
43 |
+
github_user = "Giedeon25"
|
44 |
+
github_repo = "GID-Project"
|
45 |
+
file_path_github = "main/data/Input/gadm41_DEU_1.json"
|
46 |
+
token = "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"
|
47 |
+
|
48 |
+
local_file_path = DATA + '\\' + 'gadm41_DEU_1.json'
|
49 |
+
|
50 |
+
output_file = TEMP + '\\' + 'gadm41_DEU_1'
|
51 |
+
|
52 |
+
# load from GitHub
|
53 |
+
geojson_data = download_github_geojson(github_user, github_repo, file_path_github, token)
|
54 |
+
|
55 |
+
# load locally
|
56 |
+
geojson_data = gpd.read_file(local_file_path)
|
57 |
+
print(geojson_data.head())
|
58 |
+
|
59 |
+
# convert and save data
|
60 |
+
# attributes
|
61 |
+
|
62 |
+
|
63 |
+
# function
|
64 |
+
geojson_processor_to_csv(geojson_data, output_file)
|
65 |
+
|
66 |
+
|
67 |
+
#######################################################################################################
|
68 |
+
# LLM
|
69 |
+
#######################################################################################################
|
70 |
+
|
71 |
+
# attributes
|
72 |
+
question = 'what is the geometry of Saxony?'
|
73 |
+
table_main = pd.read_csv(TEMP + '\\' + 'gadm41_DEU_1_main').astype(str)
|
74 |
+
table_geom = pd.read_csv(TEMP + '\\' + 'gadm41_DEU_1_geom')
|
75 |
+
|
76 |
+
# function
|
77 |
+
TAPAS(question, table_main)
|
main/GeoJSON_Bundesländer.py
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Imports --------------------------------------------------------------------------------------------------
|
2 |
+
# https://huggingface.co/tasks/table-question-answering
|
3 |
+
|
4 |
+
from transformers import pipeline
|
5 |
+
import pandas as pd
|
6 |
+
import time
|
7 |
+
import math
|
8 |
+
import os
|
9 |
+
|
10 |
+
# Small snippet to retrieve coordinates from a geojson file
|
11 |
+
import requests
|
12 |
+
import json
|
13 |
+
|
14 |
+
|
15 |
+
#Working Directory setzen
|
16 |
+
os.chdir('C:/Users/Jens_/Documents/Unterlagen/Studium Dresden/2. Semester/GIT06/GID-Project') #muss angepasst werden
|
17 |
+
|
18 |
+
|
19 |
+
|
20 |
+
# GADM Deutschland Beispielpolygone --------------------------------------------------------------------
|
21 |
+
# Vorbereitung
|
22 |
+
# Daten laden
|
23 |
+
with open('Daten/GeoJSON/gadm41_DEU_1.json', 'r', encoding='utf-8') as json_datei:
|
24 |
+
daten = json.load(json_datei)
|
25 |
+
os.close('Daten/GeoJSON/gadm41_DEU_1.json')
|
26 |
+
table = pd.DataFrame.from_dict(daten["features"])
|
27 |
+
|
28 |
+
|
29 |
+
# Daten in passendes Format bringen (Pandas Data-Frame)
|
30 |
+
prop = table['properties'] #Properties in die Tabelle integrieren
|
31 |
+
prop = pd.DataFrame(list(prop))
|
32 |
+
del table['properties']
|
33 |
+
table = pd.concat([table, prop], axis = 1)
|
34 |
+
|
35 |
+
geom = table["geometry"] # Koordinatenzeilen abspeichern und durch Indexe ersetzen (Berechnungsdauer)
|
36 |
+
table["geometry"] = [str(i) for i in range(table.shape[0])]
|
37 |
+
print(table)
|
38 |
+
|
39 |
+
|
40 |
+
# Modell
|
41 |
+
tqa = pipeline(task="table-question-answering", model="google/tapas-large-finetuned-wtq")
|
42 |
+
|
43 |
+
|
44 |
+
# Frage
|
45 |
+
question = 'give me the geometry of england'
|
46 |
+
|
47 |
+
|
48 |
+
# Berechnung
|
49 |
+
t = round(time.time()) # Berechnungszeit messen
|
50 |
+
i = tqa(table=table, query=question)['cells'][0]
|
51 |
+
|
52 |
+
try:
|
53 |
+
i = int(i)
|
54 |
+
answer = geom[i]
|
55 |
+
except:
|
56 |
+
InterruptedError ('Falsche Spalte (Es wurde kein Index ausgegeben)')
|
57 |
+
answer = 'answer: ' + i
|
58 |
+
|
59 |
+
t = round(time.time())-t
|
60 |
+
t = str(math.floor(t) // 3600) + "::" + str((t-(math.floor(t) // 360)*360) // 60) + "::" + str((t-(math.floor(t) // 60)*60))
|
61 |
+
|
62 |
+
|
63 |
+
# Überprüfung
|
64 |
+
print(answer)
|
65 |
+
table.iloc[i]
|
66 |
+
print(t) # ~2 sec
|
67 |
+
|
68 |
+
|
69 |
+
|
70 |
+
|
71 |
+
|
72 |
+
#Ziel der Frage, Orte der Frage
|
73 |
+
|
74 |
+
# Wie kann man verschiedene BL ausgegeben bekommen (in der Frage angegeben)
|
75 |
+
# Wie kann man verschiedene "Operationen" (Zwischen, alle, ect.) in der Frage erkennen
|
modules/__init__.py
ADDED
File without changes
|
modules/__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (217 Bytes). View file
|
|
modules/__pycache__/geojson_github_loader.cpython-311.pyc
ADDED
Binary file (1.58 kB). View file
|
|
modules/__pycache__/geojson_processor.cpython-311.pyc
ADDED
Binary file (3.65 kB). View file
|
|
modules/__pycache__/language_model.cpython-311.pyc
ADDED
Binary file (1.43 kB). View file
|
|
modules/find_neighbors.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import shapely
|
3 |
+
|
4 |
+
def get_neighbors(geometries):
|
5 |
+
"""
|
6 |
+
Find neighbors for all geometries.
|
7 |
+
|
8 |
+
Args:
|
9 |
+
geometries (str): list of Polygon coordinates
|
10 |
+
Returns:
|
11 |
+
pandas.core.series.Series: Pandas DataFrame Series (column) containing indexes of bordering polygons.
|
12 |
+
"""
|
13 |
+
|
14 |
+
if not all(isinstance(geom, (shapely.MultiPolygon, shapely.Polygon, str)) for geom in geometries) or not isinstance(geometries, (list, pd.Series)):
|
15 |
+
raise ValueError("geometries must be a list with shapely.MultiPolygons/shapely.Polygon or strings of MultiPolygons/Polygons")
|
16 |
+
|
17 |
+
out_col = []
|
18 |
+
|
19 |
+
if type(geometries[0]) in [shapely.MultiPolygon, shapely.Polygon]:
|
20 |
+
for r in range(geometries.shape[0]):
|
21 |
+
out_col.append([])
|
22 |
+
|
23 |
+
for i in range(geometries.shape[0]):
|
24 |
+
if not r == i:
|
25 |
+
if geometries[r].intersects(geometries[i]):
|
26 |
+
out_col[r].append(i)
|
27 |
+
|
28 |
+
return(out_col)
|
29 |
+
|
30 |
+
if type(geometries[0]) == str:
|
31 |
+
geometries = shapely.wkt.loads(geometries)
|
32 |
+
for r in range(geometries.shape[0]):
|
33 |
+
out_col.append([])
|
34 |
+
|
35 |
+
for i in range(geometries.shape[0]):
|
36 |
+
if not r == i:
|
37 |
+
if geometries[r].intersects(geometries[i]):
|
38 |
+
out_col[r].append(i)
|
39 |
+
|
40 |
+
return(out_col)
|
41 |
+
|
42 |
+
raise ValueError("geometries must be a list with shapely.MultiPolygons or strings of MultiPolygons or Polygons")
|
modules/geojson_github_loader.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import requests
|
3 |
+
import geopandas as gpd
|
4 |
+
|
5 |
+
def download_github_geojson(github_user, repository, file_path, token):
|
6 |
+
"""
|
7 |
+
Load GeoJSON data from a GitHub repository.
|
8 |
+
|
9 |
+
Args:
|
10 |
+
github_user (str): GitHub username.
|
11 |
+
repository (str): GitHub repository name.
|
12 |
+
file_path (str): Path of the GeoJSON file in the repository.
|
13 |
+
|
14 |
+
Returns:
|
15 |
+
pd.DataFrame: The loaded GeoJSON data.
|
16 |
+
"""
|
17 |
+
|
18 |
+
# headers with personal access token
|
19 |
+
headers = {
|
20 |
+
"Authorization": f"token {token}"
|
21 |
+
}
|
22 |
+
|
23 |
+
# Create a URL to the raw GeoJSON file in the repository
|
24 |
+
raw_url = f"https://raw.githubusercontent.com/{github_user}/{repository}/{file_path}"
|
25 |
+
|
26 |
+
print(f"Debug: raw_url = {raw_url}") # Debugging line
|
27 |
+
|
28 |
+
# Make a GET request to the URL
|
29 |
+
response = requests.get(raw_url, headers=headers)
|
30 |
+
|
31 |
+
if response.status_code == 200:
|
32 |
+
# Parse the GeoJSON data
|
33 |
+
geojson_data = gpd.read_file(response.text)
|
34 |
+
print("File loaded succesfully.")
|
35 |
+
print(geojson_data.head())
|
36 |
+
return geojson_data
|
37 |
+
|
38 |
+
else:
|
39 |
+
print(f"Failed to retrieve GeoJSON data. Status code: {response.status_code}")
|
40 |
+
return None
|
modules/geojson_processor.py
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import geopandas as gpd
|
3 |
+
|
4 |
+
def geojson_processor_to_csv(geojson_data, output_file):
|
5 |
+
"""
|
6 |
+
Procces and Convert GeoJSON data to a csv.
|
7 |
+
|
8 |
+
Args:
|
9 |
+
geojson_data (str): Parsed GeoJSON data path.
|
10 |
+
output_file (str): Name of the output CSV file.
|
11 |
+
Returns:
|
12 |
+
pd.DataFrame: Pandas DataFrames containing the GeoJSON features.
|
13 |
+
"""
|
14 |
+
|
15 |
+
# split geometries from the DataFrame to reduce processing time in later steps
|
16 |
+
geom_df = gpd.GeoDataFrame(geojson_data["geometry"], crs="EPSG:4326")
|
17 |
+
|
18 |
+
# get geometry bounds
|
19 |
+
df_bounds = geom_df['geometry'].bounds
|
20 |
+
|
21 |
+
# create bbox column from bounds
|
22 |
+
geom_df['bbox'] = list(zip(df_bounds['minx'], df_bounds['miny'], df_bounds['maxx'], df_bounds['maxy']))
|
23 |
+
print('bboxes added to df.')
|
24 |
+
|
25 |
+
# calculate geometry centroids
|
26 |
+
geom_df['centroid'] = geom_df['geometry'].centroid
|
27 |
+
print('centroids added to df.')
|
28 |
+
|
29 |
+
# find neighbors
|
30 |
+
geom_df['neighbors'] = None
|
31 |
+
|
32 |
+
# Iterate through the GeoDataFrame to find neighbors
|
33 |
+
for index, row in geom_df.iterrows():
|
34 |
+
neighbors = []
|
35 |
+
for other_index, other_row in geom_df.iterrows():
|
36 |
+
if index != other_index and row['geometry'].touches(other_row['geometry']):
|
37 |
+
neighbors.append(other_row['ID'])
|
38 |
+
geom_df.at[index, 'neighbors'] = neighbors
|
39 |
+
|
40 |
+
# save df as csv
|
41 |
+
geom_df.to_csv(output_file + '_geom', index=False)
|
42 |
+
print('geometry file saved')
|
43 |
+
|
44 |
+
# assign unique string identifiers to each row based on its position in the DataFrame
|
45 |
+
geojson_data["geometry"] = [output_file + '_geom;' + str(i) + ";0" for i in range(geojson_data.shape[0])]
|
46 |
+
geojson_data["bbox"] = [output_file + '_geom;' + str(i) + ";1" for i in range(geojson_data.shape[0])]
|
47 |
+
geojson_data["centroid"] = [output_file + '_geom;' + str(i) + ";2" for i in range(geojson_data.shape[0])]
|
48 |
+
geojson_data["neighbors"] = [output_file + '_geom;' + str(i) + ";3" for i in range(geojson_data.shape[0])]
|
49 |
+
|
50 |
+
# save df as csv
|
51 |
+
geojson_data.to_csv(output_file + '_main', index=False)
|
52 |
+
print('main file saved.')
|
53 |
+
|
54 |
+
return 'Processing done and saved'
|
55 |
+
|
56 |
+
|
modules/getCountrycode.py
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import geopandas as gpd
|
2 |
+
|
3 |
+
# Sample DataFrame acording to actual structure (use your own data)
|
4 |
+
data = {'GID_1': ['DEU.1_1','DEU.2_1'],
|
5 |
+
'GID_0': ['DEU', 'DEU'],
|
6 |
+
'COUNTRY': ['Germany', 'Germany'],
|
7 |
+
'NAME_1': ['Baden-Würtenberg', 'Bayern'],
|
8 |
+
'VARNAME_1': ['NA','Bavaria'],
|
9 |
+
'NL_NAME_1': ['NA', 'NA'],
|
10 |
+
'TYPE_1': ['Land', 'Freistaat'],
|
11 |
+
'ENGTYPE_1': ['State', 'Freestate'],
|
12 |
+
'CC_1': ['08','09'],
|
13 |
+
'HASC_1': ['DE.BW', 'DE.BY'], # Extra for subnational countys (https://de.wikipedia.org/wiki/Hierarchical_administrative_subdivision_codes)
|
14 |
+
'ISO_1': ['NA', 'DE-BY'], # International Order --> Check First (https://de.wikipedia.org/wiki/ISO_3166)
|
15 |
+
'geometry': [0,1]
|
16 |
+
}
|
17 |
+
gdf = gpd.GeoDataFrame(data)
|
18 |
+
|
19 |
+
# function to generate output
|
20 |
+
# Land should be a line from the geojson-table
|
21 |
+
# Currently only works for NUTS-1 areas!!!
|
22 |
+
|
23 |
+
def getCountrycode(land):
|
24 |
+
if land['ISO_1'] != 'NA':
|
25 |
+
return land['ISO_1']
|
26 |
+
elif land['HASC_1']:
|
27 |
+
return land['HASC_1']
|
28 |
+
else:
|
29 |
+
return False
|
30 |
+
|
modules/gradio_testing.py
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Import
|
2 |
+
import geopandas as gpd
|
3 |
+
|
4 |
+
# Sample DataFrame acording to actual structure (use your own data)
|
5 |
+
data = {'GID_1': ['DEU.1_1','DEU.2_1'],
|
6 |
+
'GID_0': ['DEU', 'DEU'],
|
7 |
+
'COUNTRY': ['Germany', 'Germany'],
|
8 |
+
'NAME_1': ['Baden-Würtenberg', 'Bayern'],
|
9 |
+
'VARNAME_1': ['NA','Bavaria'],
|
10 |
+
'NL_NAME_1': ['NA', 'NA'],
|
11 |
+
'TYPE_1': ['Land', 'Freistaat'],
|
12 |
+
'ENGTYPE_1': ['State', 'Freestate'],
|
13 |
+
'CC_1': ['08','09'],
|
14 |
+
'HASC_1': ['DE.BW', 'DE.BY'], # Extra for subnational countys (https://de.wikipedia.org/wiki/Hierarchical_administrative_subdivision_codes)
|
15 |
+
'ISO_1': ['NA', 'DE-BY'], # International Order --> Check First (https://de.wikipedia.org/wiki/ISO_3166)
|
16 |
+
'geometry': [0,1]
|
17 |
+
}
|
18 |
+
gdf = gpd.GeoDataFrame(data)
|
19 |
+
|
20 |
+
|
21 |
+
def getLand(landnr):
|
22 |
+
landnr = int(landnr)
|
23 |
+
#Test
|
24 |
+
data = {'GID_1': ['DEU.1_1','DEU.2_1'],
|
25 |
+
'GID_0': ['DEU', 'DEU'],
|
26 |
+
'COUNTRY': ['Germany', 'Germany'],
|
27 |
+
'NAME_1': ['Baden-Würtenberg', 'Bayern'],
|
28 |
+
'VARNAME_1': ['NA','Bavaria'],
|
29 |
+
'NL_NAME_1': ['NA', 'NA'],
|
30 |
+
'TYPE_1': ['Land', 'Freistaat'],
|
31 |
+
'ENGTYPE_1': ['State', 'Freestate'],
|
32 |
+
'CC_1': ['08','09'],
|
33 |
+
'HASC_1': ['DE.BW', 'DE.BY'], # Extra for subnational countys (https://de.wikipedia.org/wiki/Hierarchical_administrative_subdivision_codes)
|
34 |
+
'ISO_1': ['NA', 'DE-BY'], # International Order --> Check First (https://de.wikipedia.org/wiki/ISO_3166)
|
35 |
+
'geometry': [0,1]
|
36 |
+
}
|
37 |
+
gdf = gpd.GeoDataFrame(data)
|
38 |
+
landR = gdf.iloc[landnr]
|
39 |
+
return landR
|
40 |
+
|
41 |
+
# function to generate output
|
42 |
+
# Land should be a line from the geojson-table
|
43 |
+
# Currently only works for NUTS-1 areas!!!
|
44 |
+
|
45 |
+
def getCountrycode(land, level = 1):
|
46 |
+
|
47 |
+
iso = 'ISO_'+ str(level)
|
48 |
+
hasc = 'HASC_' + str(level)
|
49 |
+
if land[iso] != 'NA':
|
50 |
+
return str(land[iso])
|
51 |
+
elif land[hasc]:
|
52 |
+
return str(land[hasc])
|
53 |
+
else:
|
54 |
+
return False
|
55 |
+
|
56 |
+
|
57 |
+
def grad_Country(landnr):
|
58 |
+
land = getLand(landnr)
|
59 |
+
kuerzel = getCountrycode(land)
|
60 |
+
return str(kuerzel)
|
61 |
+
|
62 |
+
|
63 |
+
#'''
|
64 |
+
import gradio as gr
|
65 |
+
|
66 |
+
def greet(name):
|
67 |
+
return "Hello " + name + "!!"
|
68 |
+
|
69 |
+
iface = gr.Interface(fn=grad_Country, inputs="number", outputs="text")
|
70 |
+
|
71 |
+
if __name__ == '__main__':
|
72 |
+
|
73 |
+
iface.launch()
|
74 |
+
#'''
|
modules/language_model.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import pipeline
|
2 |
+
import pandas as pd
|
3 |
+
|
4 |
+
def TAPAS(question, table_main):
|
5 |
+
"""
|
6 |
+
Processing the question using an expression and the main and geom table.
|
7 |
+
|
8 |
+
Args:
|
9 |
+
question (str): the question.
|
10 |
+
table_main (df): main table
|
11 |
+
table_geom (df): geom table
|
12 |
+
Returns:
|
13 |
+
answer (str): answer to the question
|
14 |
+
"""
|
15 |
+
|
16 |
+
# set up a TAPAS pipeline for table-based question answering
|
17 |
+
tqa = pipeline(task="table-question-answering", model="google/tapas-large-finetuned-wtq")
|
18 |
+
|
19 |
+
# use the tqa pipeline to perform table-based question answering.
|
20 |
+
i = tqa(table=table_main, query=question)['cells'][0]
|
21 |
+
|
22 |
+
# Check if the output is the link to the TEMP DB:
|
23 |
+
# Has to be done because the entrys for geometry, ... are an array :(
|
24 |
+
if ';' in i:
|
25 |
+
i = i.split(";")
|
26 |
+
path = i[0]
|
27 |
+
r = int(i[1])
|
28 |
+
c = int(i[2])
|
29 |
+
answer_table = pd.read_csv(path)
|
30 |
+
answer = answer_table.iloc[r,c]
|
31 |
+
return(answer)
|
32 |
+
|
33 |
+
answer = str(i)
|
34 |
+
return(answer)
|
modules/neighbors.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import geopandas as gpd
|
2 |
+
from shapely.geometry import Polygon
|
3 |
+
|
4 |
+
# Sample DataFrame with geometries (use your own data)
|
5 |
+
data = {'ID': [1, 2, 3],
|
6 |
+
'geometry': [Polygon([(0, 0), (0, 2), (2, 2), (2, 0)]),
|
7 |
+
Polygon([(2, 0), (2, 2), (4, 2), (4, 0)]),
|
8 |
+
Polygon([(4, 0), (4, 2), (6, 2), (6, 0)])]}
|
9 |
+
gdf = gpd.GeoDataFrame(data, crs="EPSG:4326")
|
10 |
+
|
11 |
+
# Create a new column to store the neighboring IDs
|
12 |
+
gdf['neighbors'] = None
|
13 |
+
|
14 |
+
# Iterate through the GeoDataFrame to find neighbors
|
15 |
+
for index, row in gdf.iterrows():
|
16 |
+
neighbors = []
|
17 |
+
for other_index, other_row in gdf.iterrows():
|
18 |
+
if index != other_index and row['geometry'].touches(other_row['geometry']):
|
19 |
+
neighbors.append(other_row['ID'])
|
20 |
+
gdf.at[index, 'neighbors'] = neighbors
|
21 |
+
|
22 |
+
# Display the DataFrame with neighbors
|
23 |
+
print(gdf[['ID', 'neighbors']])
|
24 |
+
|
25 |
+
|
26 |
+
|
27 |
+
def find_neighbors(geom_df):
|
28 |
+
geom_df['neighbors'] = geom_df.apply(lambda row: find_single_neighbors(row, geom_df), axis=1)
|
29 |
+
return geom_df
|
30 |
+
|
31 |
+
def find_single_neighbors(row, geom_df):
|
32 |
+
neighbors = []
|
33 |
+
for other_index, other_row in geom_df.iterrows():
|
34 |
+
if row.name != other_index and row['geometry'].touches(other_row['geometry']):
|
35 |
+
neighbors.append(other_row['ID'])
|
36 |
+
return neighbors
|
37 |
+
|
38 |
+
# Example usage:
|
39 |
+
# Replace 'your_data.geojson' with the path to your GeoJSON file or any other supported format
|
40 |
+
# Make sure the GeoDataFrame has a 'geometry' column
|
41 |
+
your_gdf = gpd.read_file('your_data.geojson')
|
42 |
+
|
43 |
+
# Call the function to find neighbors
|
44 |
+
result_gdf = find_neighbors(your_gdf)
|
45 |
+
|
46 |
+
# Print the resulting GeoDataFrame
|
47 |
+
print(result_gdf)
|
requirements.txt
ADDED
Binary file (9.62 kB). View file
|
|