File size: 5,059 Bytes
b65779c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
883c5b4
b65779c
 
 
 
 
 
 
 
 
 
 
 
883c5b4
b65779c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e524d59
 
b65779c
 
 
12f3633
e524d59
b65779c
 
 
 
 
44d078f
 
 
b65779c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e524d59
 
b65779c
 
 
 
 
 
 
 
 
 
 
883c5b4
b65779c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import os
import functools
import numpy as np

import fastapi
import voyageai
vo = voyageai.Client(os.environ['VOYAGE_API_KEY'])

theorems = [
    theorem.split('--thm--\n')
    for theorem in open('theorems.txt').read().split('----thm----\n')[1:]
]
embed = np.load('embed_arr.npy')

app = fastapi.FastAPI()

top_n = 50

@functools.lru_cache(maxsize=2*1024*1024*1024//(400 + top_n*8))
def lru_search(query):
    query_embed = vo.embed([query], model="voyage-large-2-instruct", input_type="query").embeddings[0]
    similarities = np.dot(embed, query_embed)
    
    top = np.argpartition(similarities, -top_n)[-top_n:]
    top = top[np.argsort(similarities[top])][::-1]
    sim = similarities[top]
    return top, sim

@app.get("/search")
def search(query: str):
    # if query is too long, throw an error
    if len(query) > 400:
        raise fastapi.HTTPException(status_code=400, detail="Query is too long")
    top, sim = lru_search(query)
    
    results = []
    for i, similarity in zip(top, sim):
        module, name, typ, comment = theorems[i]
        results.append({
            "module": module,
            "name": name,
            "type": typ,
            "comment": comment,
            "similarity": similarity
        })
    return results

@app.get("/")
def read_root():
    # HTML
    return fastapi.responses.HTMLResponse(
    content="""
<html>
<head>
    <title>Search Mathlib</title>
    <!-- tailwind css -->
    <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
    <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2.19/dist/tailwind.min.css" rel="stylesheet">
</head>
<body class="overflow-scroll mx-auto w-10/12 my-8 text-lg flex flex-col">
    <h1 class="text-3xl mx-0.5">Search Mathlib</h1>
    <form onsubmit="search(); return false;" class="flex gap-2 mt-2 mb-3">
        <input autofocus type="text" name="query" class="grow border p-2 w-full rounded-lg focus:outline-none border-gray-300 focus:ring-1 focus:ring-gray-100">
        <button type="submit" class="bg-green-700 text-white p-2 w-24 rounded-lg">Search</button>
    </form>
    <ul id="results" class="list-disc">
        <div>You can search for any object declared in Mathlib and Lean, including theorems, functions, types, syntax, tactics, and metaprogramming functions.</div>
        <div>The neural network sees the name, type, file location, and comment of a declaration. Your search terms should match these as closely as possible. </div>
        <div>You can find README.md and the code I used in the Files tab. Thank you!</div>
    </ul>
    <script> 
        function removeFirstWord(str) {
            return str.split(' ').slice(1).join(' ')
        }
        // https://leanprover-community.github.io/mathlib4_docs/Mathlib/Tactic.Widget.SelectPanelUtils.html#def%20mkSelectionPanelRPC.match_1

        async function search() {
            const query = document.querySelector('input[name="query"]').value
            const response = await fetch(`/search?query=${query}`)
            if (!response.ok) {
                alert('Error: ' + (await response.json())['detail'])
                return
            }
            const results = await response.json()

            const resultsDiv = document.querySelector('#results')
            resultsDiv.innerHTML = ''

            for (const result of results) {
                const resultDiv = document.createElement('li')
                const link = "https://leanprover-community.github.io/mathlib4_docs/" + removeFirstWord(result.module).replaceAll('.', '/') + ".html#" + removeFirstWord(result.name)
                resultDiv.innerHTML = `
                    <div class="flex gap-1">
                        <a href="${link}"
                        class="text-xl mr-4 text-green-800 hover:text-green-600 
                          ">${result.name}</a>
                        <div class="flex-grow"></div>
                        <!-- The similarity info is only shown on larger screens -->
                        <p class="text-gray-800 hidden md:block">Similarity: ${result.similarity.toFixed(3)}</p>
                    </div>
                    <div class="flex flex-col">
                        <p class="font-mono">: ${result.type}</p>
                        <p class="text-gray-800 mt-1">${result.module}</p>
                        <p id="comment"></p>
                    </div>
                    
                    <div class="border my-1 border-gray-200"></div>
                `
                if (result.comment.trim() !== '') {
                    const comment = resultDiv.querySelector('#comment')
                    comment.innerHTML = `/-- ${marked.marked(result.comment.trim())} -/`
                    comment.classList.add('bg-gray-50', 'text-gray-900', 'px-2', 'py-1', 'mt-0.5', 'rounded-lg')
                }
                resultsDiv.appendChild(resultDiv)
            }
        }
    </script>
</body>
</html>
    """
    )

'''
to run this with auto-reload, use the following command:
cd data
uvicorn server:app --reload
'''