File size: 4,492 Bytes
e8eca0b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import os
import functools
import numpy as np

import fastapi
import voyageai
vo = voyageai.Client(os.environ['VOYAGE_API_KEY'])

theorems = [
    theorem.split('--ast--\n')
    for theorem in open('ast.txt').read().split('----ast----\n')[1:]
]
embed = np.load('embed_arr.npy')

app = fastapi.FastAPI()

top_n = 50

@functools.lru_cache(maxsize=2*1024*1024*1024//(80 + top_n*8))
def lru_search(query):
    query_embed = vo.embed([query], model="voyage-large-2-instruct", input_type="query").embeddings[0]
    similarities = np.dot(embed, query_embed)
    
    top = np.argpartition(similarities, -top_n)[-top_n:]
    top = top[np.argsort(similarities[top])][::-1]
    sim = similarities[top]
    return top, sim

@app.get("/search")
def search(query: str):
    # if query is too long, throw an error
    if len(query) > 80:
        raise fastapi.HTTPException(status_code=400, detail="Query is too long")
    top, sim = lru_search(query)
    
    results = []
    for i, similarity in zip(top, sim):
        module, name, typ, comment = theorems[i]
        results.append({
            "module": module,
            "name": name,
            "type": typ,
            "comment": comment,
            "similarity": similarity
        })
    return results

@app.get("/")
def read_root():
    # HTML
    return fastapi.responses.HTMLResponse(
    content="""
<html>
<head>
    <title>Search</title>
    <!-- tailwind css -->
    <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
    <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2.19/dist/tailwind.min.css" rel="stylesheet">
</head>
<body class="mx-24 my-8 text-lg flex flex-col">
    <h1 class="text-3xl">Search Mathlib</h1>
    <form onsubmit="search(); return false;" class="flex gap-2 mt-2 mb-3">
        <input autofocus type="text" name="query" class="grow border p-2 w-full rounded-lg focus:outline-none border-gray-300 focus:ring-1 focus:ring-gray-100">
        <button type="submit" class="bg-green-700 text-white p-2 w-24 rounded-lg">Search</button>
    </form>
    <ul id="results" class="list-disc">
    </ul>
    <script> 
        function removeFirstWord(str) {
            return str.split(' ').slice(1).join(' ')
        }
        // https://leanprover-community.github.io/mathlib4_docs/Mathlib/Tactic.Widget.SelectPanelUtils.html#def%20mkSelectionPanelRPC.match_1

        async function search() {
            const query = document.querySelector('input[name="query"]').value
            const response = await fetch(`/search?query=${query}`)
            if (!response.ok) {
                alert('Error: ' + (await response.json())['detail'])
                return
            }
            const results = await response.json()

            const resultsDiv = document.querySelector('#results')
            resultsDiv.innerHTML = ''

            for (const result of results) {
                const resultDiv = document.createElement('li')
                const link = "https://leanprover-community.github.io/mathlib4_docs/" + removeFirstWord(result.module).replaceAll('.', '/') + ".html#" + removeFirstWord(result.name)
                resultDiv.innerHTML = `
                    <div class="flex gap-1">
                        <a href="${link}"
                        class="text-xl mr-4 text-green-800 hover:text-green-600 
                          ">${result.name}</a>
                        <div class="flex-grow"></div>
                        <p class="text-gray-800">Similarity: ${result.similarity.toFixed(3)}</p>
                    </div>
                    <div class="flex flex-col">
                        <p class="font-mono">: ${result.type}</p>
                        <p class="text-gray-800 mt-1">${result.module}</p>
                        <p id="comment"></p>
                    </div>
                    
                    <div class="border my-1 border-gray-200"></div>
                `
                if (result.comment.trim() !== '') {
                    const comment = resultDiv.querySelector('#comment')
                    comment.innerHTML = marked.marked(`/-- ${result.comment.trim()} -/`)
                    comment.classList.add('bg-gray-50', 'text-gray-900', 'px-2', 'py-1', 'mt-0.5', 'rounded-lg')
                }
                resultsDiv.appendChild(resultDiv)
            }
        }
    </script>
</body>
</html>
    """
    )

'''
to run this with auto-reload, use the following command:
cd data
uvicorn test_embed:app --reload
'''