Spaces:
Sleeping
Sleeping
import re | |
from difflib import SequenceMatcher | |
import requests | |
import xml.etree.ElementTree as ET | |
import gradio as gr | |
from concurrent.futures import ThreadPoolExecutor | |
areaData = { | |
"Hong Kong": { | |
"Central and Western": [ | |
"Sai Ying Pun", "Kennedy Town", "Shek Tong Tsui", "Sai Wan", "Sheung Wan", | |
"Central", "Admiralty", "Mid-Levels West", "Mid-Levels", "The Peak" | |
], | |
"Wan Chai": [ | |
"Wan Chai", "Causeway Bay", "Happy Valley", "Tai Hang", "Stubbs Road", | |
"Jardine's Lookout" | |
], | |
"Eastern": [ | |
"Tin Hau", "Braemar Hill", "North Point", "Quarry Bay", "Sai Wan Ho", | |
"Shau Kei Wan", "Chai Wan", "Siu Sai Wan" | |
], | |
"Southern": [ | |
"Pok Fu Lam", "Aberdeen", "Ap Lei Chau", "Wong Chuk Hang", "Shouson Hill", | |
"Repulse Bay", "Chung Hom Kok", "Stanley", "Tai Tam", "Shek O", "Telegraph Bay" | |
] | |
}, | |
"Kowloon": { | |
"Yau Tsim Mong": [ | |
"Tsim Sha Tsui", "Yau Ma Tei", "West Kowloon", "Kowloon Tong", "Mong Kok", | |
"Tai Kok Tsui", "Jordan", "Prince Edward" | |
], | |
"Sham Shui Po": [ | |
"Mei Foo", "Lai Chi Kok", "Cheung Sha Wan", "Sham Shui Po", "Shek Kip Mei", "Tai Wo Ping", "Stonecutters Island" | |
], | |
"Kowloon City": [ | |
"Hung Hom", "To Kwa Wan", "Ma Tau Kok", "Ma Tau Wai", "Kai Tak", "Kowloon City", | |
"Ho Man Tin", "Kowloon Tong", "Beacon Hill" | |
], | |
"Wong Tai Sin": [ | |
"San Po Kong", "Wong Tai Sin", "Tung Tau", "Wang Tau Hom", "Lok Fu", "Diamond Hill", | |
"Tsz Wan Shan", "Ngau Chi Wan" | |
], | |
"Kwun Tong": [ | |
"Ping Shek", "Kowloon Bay", "Ngau Tau Kok", "Tsz Wan Shan", "Kwun Tong", | |
"Sau Mau Ping", "Lam Tin", "Yau Tong", "Lei Yue Mun" | |
] | |
}, | |
"New Territories": { | |
"Kwai Tsing": [ | |
"Kwai Chung", "Tsing Yi", "Kwai Fong" | |
], | |
"Tsuen Wan": [ | |
"Tsuen Wan", "Tsing Lung Bridge", "Tsing Hung Bridge", "Shen Tsuen", "Tsing Chung Koon", | |
"Ma Wan", "Tsing Lung Bridge" | |
], | |
"Tuen Mun": [ | |
"Tai Lam Chung", "Siu Lam", "Tuen Mun", "Lam Tei" | |
], | |
"Yuen Long": [ | |
"Hung Shui Kiu", "Ha Tsuen", "Lau Fau Shan", "Tin Shui Wai", "Yuen Long", "San Tin", | |
"Lok Ma Chau", "Kam Tin", "Shek Kong", "Pat Heung" | |
], | |
"North": [ | |
"Fanling", "Luen Wo Hui", "Sheung Shui", "Shek Wu Hui", "Sha Tau Kok", "Lok Keng", | |
"Wu Kau Tang" | |
], | |
"Tai Po": [ | |
"Tai Po Market", "Tai Po", "Tai Po Kau", "Tai Mei Tuk", "Plover Cove", "Cheung Uk Tau", | |
"Tai Wo" | |
], | |
"Sha Tin": [ | |
"Tai Wai", "Sha Tin", "Fo Tan", "Ma On Shan", "Shui Chuen O", "Ma On Shan" | |
], | |
"Sai Kung": [ | |
"Clear Water Bay", "Sai Kung", "Tai Mong Tsai", "Tseung Kwan O", "Hang Hau", | |
"Tiu Keng Leng", "Ma Yau Tong" | |
], | |
"Islands": [ | |
"Cheung Chau", "Peng Chau", "Lantau Island", "Tung Chung", "Lamma Island" | |
] | |
} | |
} | |
def normalize_text(text): | |
return re.sub(r'\s+', ' ', text.lower().strip()) | |
def normalize_address(address): | |
return re.sub(r'[^\w\s]', '', re.sub(r'\s+', ' ', address)).strip().upper() | |
def load_and_normalize_address_pool(file_paths): | |
address_pool = [] | |
for file_path in file_paths: | |
try: | |
with open(file_path, 'r') as f: | |
for line in f: | |
address = line.strip() | |
if address: | |
normalized = normalize_address(address) | |
address_pool.append((address, normalized)) | |
except FileNotFoundError: | |
print(f"File not found: {file_path}") | |
except Exception as e: | |
print(f"Error reading file {file_path}: {e}") | |
return address_pool | |
def similarity(a, b): | |
a, b = a.replace(' ', ''), b.replace(' ', '') | |
return SequenceMatcher(None, a, b).ratio() | |
def extract_relevant_part(user_input): | |
number_part = re.findall(r'\d+', user_input) | |
number_part = number_part[0] if number_part else '' | |
address_part = re.sub(r'^\d+', '', user_input).strip() | |
return number_part, address_part | |
def match_address(user_input, address_pool): | |
number_part, address_part = extract_relevant_part(user_input) | |
normalized_input = normalize_address(address_part) | |
best_match = None | |
highest_similarity = 0 | |
for original_address, normalized_address in address_pool: | |
sim = similarity(normalized_input, normalized_address) | |
if sim > highest_similarity: | |
highest_similarity = sim | |
best_match = original_address | |
if best_match: | |
best_match = f"{number_part} {best_match}".strip() if number_part else best_match | |
return best_match, highest_similarity | |
def fetch_address_from_als_api(user_input): | |
api_url = f"https://www.als.gov.hk/lookup?q={requests.utils.quote(user_input)}" | |
try: | |
response = requests.get(api_url) | |
response.raise_for_status() | |
tree = ET.ElementTree(ET.fromstring(response.content)) | |
root = tree.getroot() | |
result = {} | |
eng_premises = root.find(".//EngPremisesAddress") | |
if eng_premises is not None: | |
result['English Address'] = { | |
'Estate': eng_premises.findtext(".//EstateName", ''), | |
'Street': eng_premises.findtext(".//StreetName", ''), | |
'Building No': eng_premises.findtext(".//BuildingNoFrom", ''), | |
'District': eng_premises.findtext(".//DcDistrict", ''), | |
'Region': eng_premises.findtext(".//Region", '') | |
} | |
chi_premises = root.find(".//ChiPremisesAddress") | |
if chi_premises is not None: | |
result['Chinese Address'] = { | |
'Estate': chi_premises.findtext(".//EstateName", ''), | |
'Street': chi_premises.findtext(".//StreetName", ''), | |
'Building No': chi_premises.findtext(".//BuildingNoFrom", ''), | |
'District': chi_premises.findtext(".//DcDistrict", ''), | |
'Region': chi_premises.findtext(".//Region", '') | |
} | |
geo_info = root.find(".//GeospatialInformation") | |
if geo_info is not None: | |
result['Geospatial Information'] = { | |
'Latitude': geo_info.findtext(".//Latitude", ''), | |
'Longitude': geo_info.findtext(".//Longitude", ''), | |
'Northing': geo_info.findtext(".//Northing", ''), | |
'Easting': geo_info.findtext(".//Easting", '') | |
} | |
return result | |
except requests.RequestException as e: | |
return f"Error fetching data from ALS API: {e}" | |
def extract_building_from_address(user_input): | |
normalized_input = normalize_text(user_input) | |
match = re.match(r'([^,]+)', normalized_input) | |
return match.group(1).strip() if match else normalized_input | |
def address_search(user_inputs): | |
results = [] | |
user_inputs_list = user_inputs.splitlines() | |
def process_input(user_input): | |
building_part = extract_building_from_address(user_input) | |
normalized_input = normalize_address(building_part) | |
best_match, similarity_score = match_address(normalized_input, address_pool) | |
als_result = fetch_address_from_als_api(best_match) if best_match else "No match found." | |
result_str = f"Best match: {best_match} (Similarity: {similarity_score:.2f})\n" | |
if isinstance(als_result, dict): | |
for address_type, details in als_result.items(): | |
result_str += f"\n{address_type}:\n" | |
for key, value in details.items(): | |
result_str += f"{key}: {value}\n" | |
else: | |
result_str += als_result | |
return result_str | |
with ThreadPoolExecutor() as executor: | |
results = list(executor.map(process_input, user_inputs_list)) | |
return "\n\n".join(results) | |
def clean_area_data(area_data): | |
cleaned_area_data = {} | |
for region, districts in area_data.items(): | |
cleaned_districts = {} | |
for district, subdistricts in districts.items(): | |
valid_subdistricts = [normalize_text(name) for name in subdistricts if | |
not re.search(r'Non-Building|Invalid|Other', name, re.I)] | |
cleaned_districts[normalize_text(district)] = valid_subdistricts | |
cleaned_area_data[normalize_text(region)] = cleaned_districts | |
return cleaned_area_data | |
cleaned_area_data = clean_area_data(areaData) | |
file_paths = [ | |
'EngBuilding.txt', | |
'EngEstate.txt', | |
'EngStreet.txt', | |
'EngVillage.txt' | |
] | |
address_pool = load_and_normalize_address_pool(file_paths) | |
interface = gr.Interface( | |
fn=address_search, | |
inputs=gr.Textbox(label="Enter Addresses (one per line, allow Batch Processing)", lines=10), | |
outputs=gr.Textbox(label="ALS API Results"), | |
title="Address Lookup and Matching (English)", | |
description="Enter addresses to find the closest matches and fetch details from the ALS API." | |
) | |
interface.launch() |