File size: 4,141 Bytes
99d2415
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import gradio as gr
import numpy as np
from openai import OpenAI
import voyageai
from typing import List, Tuple


def initialize_clients(openai_key: str, voyage_key: str):
    """Initialize API clients with provided keys or environment variables"""
    openai_key = openai_key.strip() or None
    voyage_key = voyage_key.strip() or None
    return OpenAI(api_key=openai_key), voyageai.Client(api_key=voyage_key)


def get_openai_embedding(client: OpenAI, text: str) -> List[float]:
    """Get embedding from OpenAI's text-embedding-3-large model"""
    response = client.embeddings.create(input=text, model="text-embedding-3-large")
    return response.data[0].embedding


def get_voyage_embedding(client: voyageai.Client, text: str) -> List[float]:
    """Get embedding from Voyage's voyage-3 model"""
    result = client.embed([text], model="voyage-3")
    return result.embeddings[0]


def cosine_similarity(a: List[float], b: List[float]) -> float:
    """Calculate cosine similarity between two vectors"""
    a = np.array(a)
    b = np.array(b)
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))


def process_texts(
    openai_key: str, voyage_key: str, text1: str, text2: str
) -> Tuple[float, float, float]:
    """Process two texts and return their embeddings and similarities"""
    # Initialize clients with provided keys
    openai_client, voyage_client = initialize_clients(openai_key, voyage_key)

    # Get embeddings from both models
    openai_emb1 = get_openai_embedding(openai_client, text1)
    openai_emb2 = get_openai_embedding(openai_client, text2)
    voyage_emb1 = get_voyage_embedding(voyage_client, text1)
    voyage_emb2 = get_voyage_embedding(voyage_client, text2)

    # Calculate similarities
    openai_similarity = cosine_similarity(openai_emb1, openai_emb2)
    voyage_similarity = cosine_similarity(voyage_emb1, voyage_emb2)

    # Calculate difference in similarities
    similarity_diff = abs(openai_similarity - voyage_similarity)

    return openai_similarity, voyage_similarity, similarity_diff


def compare_embeddings(
    openai_key: str, voyage_key: str, text1: str, text2: str
) -> Tuple[str, str, str]:
    """Compare embeddings from both models and return formatted results"""
    try:
        openai_sim, voyage_sim, sim_diff = process_texts(
            openai_key, voyage_key, text1, text2
        )

        openai_result = f"{openai_sim:.4f}"
        voyage_result = f"{voyage_sim:.4f}"
        diff_result = f"{sim_diff:.4f}"

        return openai_result, voyage_result, diff_result
    except Exception as e:
        return f"Error: {str(e)}", "", ""


# Create Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("""
    # 埋め込みモデルの比較デモ
    
    対象モデルは OpenAI の text-embedding-3-large と Voyage AI の voyage-3 のふたつ。入力テキストに対して、それぞれのモデルでの類似度とその差分を計算する。
    
    ## API Key
    
    OpenAI と Voyage AI の API キーは下記より。
    
    - OpenAI API Key: [https://platform.openai.com/account/api-keys](https://platform.openai.com/account/api-keys)
    - Voyage AI API Key: [https://dash.voyageai.com](https://dash.voyageai.com)
    """)

    with gr.Row():
        openai_key = gr.Textbox(
            label="OpenAI API Key", placeholder="sk-...", type="password", scale=2
        )
        voyage_key = gr.Textbox(
            label="Voyage AI API Key", placeholder="pa-...", type="password", scale=2
        )

    with gr.Row():
        text1 = gr.Textbox(label="Text 1", lines=3)
        text2 = gr.Textbox(label="Text 2", lines=3)

    compare_btn = gr.Button("Compare")

    with gr.Row():
        openai_output = gr.Textbox(label="OpenAI text-embedding-3-large Similarity")
        voyage_output = gr.Textbox(label="Voyage AI voyage-3 Similarity")
        diff_output = gr.Textbox(label="Absolute Difference")

    compare_btn.click(
        compare_embeddings,
        inputs=[openai_key, voyage_key, text1, text2],
        outputs=[openai_output, voyage_output, diff_output],
    )

if __name__ == "__main__":
    demo.launch()