import json
from functools import lru_cache
import gradio as gr
from difflib import SequenceMatcher

@lru_cache(maxsize=1)
def load_json_file(json_file):
    with open(json_file, 'r', encoding='utf-8') as file:
        return json.load(file)

def preprocess_jyutping_data(jyutping_data):
    return {
        char: syllable for syllable, mappings in jyutping_data.items()
        for mapping in mappings for char in mapping["漢字"]
    }


def chinese_to_jyutping(text, char_to_jyutping):
    return [char_to_jyutping.get(char, char) for char in text]


def get_similar_initials():
    return {
        'b': ['d', 'p'], 'c': ['s'], 'd': ['b', 't'], 'f': ['h'],
        'g': ['gw'], 'gw': ['g'], 'h': ['f'], 'j': ['z'],
        'jw': ['w'], 'l': ['n'], 'n': ['l'], 'ng': ['n'],
        'p': ['b'], 's': ['c'], 't': ['d'], 'w': ['jw'], 'z': ['j']
    }

def get_lazy_pronunciations():
    return {
        'n': ['l'], 'l': ['n'],
        'gw': ['g'], 'g': ['gw'],
        'k': ['t'], 't': ['k'],
        'ng': ['n'], 'n': ['ng']
    }


def are_jyutping_similar(jyutping1, jyutping2, similar_initials, lazy_pronunciations):
    initial1 = jyutping1[:2] if jyutping1[:2] in similar_initials else jyutping1[0]
    initial2 = jyutping2[:2] if jyutping2[:2] in similar_initials else jyutping2[0]

    return (initial1 == initial2 or
            initial2 in similar_initials.get(initial1, []) or
            initial2 in lazy_pronunciations.get(initial1, []))


@lru_cache(maxsize=1)
def get_char_to_jyutping():
    jyutping_data = load_json_file('lexi-can_key.json')
    return preprocess_jyutping_data(jyutping_data)


def calculate_phonetic_similarity(user_jyutping, result_jyutping, similar_initials, lazy_pronunciations):
    similar_count = sum(
        1 for uj in user_jyutping for rj in result_jyutping
        if are_jyutping_similar(uj, rj, similar_initials, lazy_pronunciations)
    )
    return similar_count / max(len(user_jyutping), len(result_jyutping))


def match_user_input(user_input):
    char_to_jyutping = get_char_to_jyutping()
    similar_initials = get_similar_initials()
    lazy_pronunciations = get_lazy_pronunciations()
    saved_results = load_json_file('jyutping_results_largec.json')

    user_jyutping = chinese_to_jyutping(user_input, char_to_jyutping)

    exact_match = next((result for result in saved_results
                        if set(user_jyutping).issubset(result["jyutping"])), None)

    if exact_match:
        return {
            "input_text": user_input,
            "input_jyutping": user_jyutping,
            "match": exact_match,
            "match_type": "exact"
        }

    matches = []
    for result in saved_results:
        phonetic_score = calculate_phonetic_similarity(user_jyutping, result["jyutping"], similar_initials,
                                                       lazy_pronunciations)
        text_similarity = SequenceMatcher(None, user_input, result["text"]).ratio()
        length_diff = abs(len(user_input) - len(result["text"]))
        length_penalty = 1 / (1 + length_diff)

        total_score = (phonetic_score * 0.6) + (text_similarity * 0.3) + (length_penalty * 0.1)
        matches.append((result, total_score))

    matches.sort(key=lambda x: x[1], reverse=True)
    top_matches = matches[:3]

    return {
        "input_text": user_input,
        "input_jyutping": user_jyutping,
        "matches": [
            {
                "match": match[0],
                "score": match[1],
                "match_type": "phonetic_similarity"
            } for match in top_matches
        ]
    }


sample_cases = [
    "龍民大廈", "得輔導西", "賀民天街", "荔枝支道", "黎知覺道", "元周街",
    "謝非道", "金中道", "得立街", "地梨根得里"
]


def gradio_app(custom_input, sample_case):
    user_input = sample_case if sample_case else custom_input
    if not user_input:
        return "Please enter text or select a sample case."

    result = match_user_input(user_input)

    if "match" in result:
        return json.dumps(result, ensure_ascii=False, indent=4)
    else:
        formatted_result = {
            "input_text": result["input_text"],
            "input_jyutping": result["input_jyutping"],
            "matches": [
                {
                    "text": match["match"]["text"],
                    "jyutping": match["match"]["jyutping"],
                    "score": round(match["score"], 4),
                    "match_type": match["match_type"]
                } for match in result["matches"]
            ]
        }
        return json.dumps(formatted_result, ensure_ascii=False, indent=4)


interface = gr.Interface(
    fn=gradio_app,
    inputs=[
        gr.Textbox(placeholder="Enter text", label="Placename/Street/Building name"),
        gr.Dropdown(choices=[None] + sample_cases, label="Choose a Sample Case")
    ],
    outputs=gr.JSON(label="Matching Result"),
    title="Cantonese Homophone and Phonetic Matching 粵語同音異字處理",
    description="Enter Cantonese text or select a sample case, and the app will return a match or the closest matches based on phonetic similarity. 輸入粵語文本或選擇一個範例案例，應用程式將傳回粵拼匹配或基於語音相似的最接近匹配。"
)

interface.launch()