diff --git a/.gitattributes b/.gitattributes index c7d9f3332a950355d5a77d85000f05e6f45435ea..69de2ecd648a2ec58752f0f161585c815189a9c5 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,34 +1 @@ -*.7z filter=lfs diff=lfs merge=lfs -text -*.arrow filter=lfs diff=lfs merge=lfs -text -*.bin filter=lfs diff=lfs merge=lfs -text -*.bz2 filter=lfs diff=lfs merge=lfs -text -*.ckpt filter=lfs diff=lfs merge=lfs -text -*.ftz filter=lfs diff=lfs merge=lfs -text -*.gz filter=lfs diff=lfs merge=lfs -text -*.h5 filter=lfs diff=lfs merge=lfs -text -*.joblib filter=lfs diff=lfs merge=lfs -text -*.lfs.* filter=lfs diff=lfs merge=lfs -text -*.mlmodel filter=lfs diff=lfs merge=lfs -text -*.model filter=lfs diff=lfs merge=lfs -text -*.msgpack filter=lfs diff=lfs merge=lfs -text -*.npy filter=lfs diff=lfs merge=lfs -text -*.npz filter=lfs diff=lfs merge=lfs -text -*.onnx filter=lfs diff=lfs merge=lfs -text -*.ot filter=lfs diff=lfs merge=lfs -text -*.parquet filter=lfs diff=lfs merge=lfs -text -*.pb filter=lfs diff=lfs merge=lfs -text -*.pickle filter=lfs diff=lfs merge=lfs -text -*.pkl filter=lfs diff=lfs merge=lfs -text -*.pt filter=lfs diff=lfs merge=lfs -text -*.pth filter=lfs diff=lfs merge=lfs -text -*.rar filter=lfs diff=lfs merge=lfs -text -*.safetensors filter=lfs diff=lfs merge=lfs -text -saved_model/**/* filter=lfs diff=lfs merge=lfs -text -*.tar.* filter=lfs diff=lfs merge=lfs -text -*.tflite filter=lfs diff=lfs merge=lfs -text -*.tgz filter=lfs diff=lfs merge=lfs -text -*.wasm filter=lfs diff=lfs merge=lfs -text -*.xz filter=lfs diff=lfs merge=lfs -text -*.zip filter=lfs diff=lfs merge=lfs -text -*.zst filter=lfs diff=lfs merge=lfs -text -*tfevents* filter=lfs diff=lfs merge=lfs -text +index/**/* filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md index 3af01a93035e94b1985334619aedc343985b587c..2db223743939f9d3bcbafc3b6cefaace2afac41c 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,13 @@ --- -title: Imdb Search -emoji: ⚡ -colorFrom: indigo -colorTo: purple +title: IMDB search +emoji: 🐠 +colorFrom: blue +colorTo: blue sdk: gradio -sdk_version: 3.18.0 +sdk_version: 3.12.0 app_file: app.py pinned: false +license: apache-2.0 --- -Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference +Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference \ No newline at end of file diff --git a/app.py b/app.py new file mode 100644 index 0000000000000000000000000000000000000000..7ec6282b75b755035a255aafc7651d505f11af38 --- /dev/null +++ b/app.py @@ -0,0 +1,38 @@ +import gradio as gr +from datasets import load_dataset +from pyserini.search.lucene import LuceneSearcher + +searcher = LuceneSearcher("index") +ds = load_dataset("imdb", split="train") + +def search(query): + hits = searcher.search(query, k=10) + results = ds.select([int(hit.docid) for hit in hits]) + return results['text'] + + +if __name__ == "__main__": + demo = gr.Blocks() + + with demo: + with gr.Row(): + gr.Markdown(value="""#
IMDB search
""") + with gr.Row(): + query = gr.Textbox(lines=1, max_lines=1, placeholder="Search…", label="Query") + with gr.Row(): + submit_btn = gr.Button("🔍") + with gr.Column(): + c1 = gr.HTML(label="Results") + c2 = gr.HTML(label="Results") + c3 = gr.HTML(label="Results") + c4 = gr.HTML(label="Results") + c5 = gr.HTML(label="Results") + c6 = gr.HTML(label="Results") + c7 = gr.HTML(label="Results") + c8 = gr.HTML(label="Results") + c9 = gr.HTML(label="Results") + c10 = gr.HTML(label="Results") + query.submit(fn=search, inputs=[query], outputs=[c1, c2, c3, c4, c5, c6, c7, c8, c9, c10]) + submit_btn.click(search, inputs=[query], outputs=[c1, c2, c3, c4, c5, c6, c7, c8, c9, c10]) + + demo.launch(enable_queue=True, debug=True) \ No newline at end of file diff --git a/index/.gitkeep b/index/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/index/_c.fdm b/index/_c.fdm new file mode 100644 index 0000000000000000000000000000000000000000..10b8ac6adbb0b3e4985c39826ef97cd0e54ed4ea Binary files /dev/null and b/index/_c.fdm differ diff --git a/index/_c.fdt b/index/_c.fdt new file mode 100644 index 0000000000000000000000000000000000000000..e49bfc54ed689018520b8a95488144cc37c809d1 Binary files /dev/null and b/index/_c.fdt differ diff --git a/index/_c.fdx b/index/_c.fdx new file mode 100644 index 0000000000000000000000000000000000000000..6a5117cf644f557f657af78c40ac895bf372a1d5 Binary files /dev/null and b/index/_c.fdx differ diff --git a/index/_c.fnm b/index/_c.fnm new file mode 100644 index 0000000000000000000000000000000000000000..56d4d988faa2d929462e8d90bdf22efe14726d96 Binary files /dev/null and b/index/_c.fnm differ diff --git a/index/_c.nvd b/index/_c.nvd new file mode 100644 index 0000000000000000000000000000000000000000..3e65f3f2687952a41dea0f7f82dc9afdfb93bb7d Binary files /dev/null and b/index/_c.nvd differ diff --git a/index/_c.nvm b/index/_c.nvm new file mode 100644 index 0000000000000000000000000000000000000000..978fae64be3faaf75727138fd7af3f8eacfc4533 Binary files /dev/null and b/index/_c.nvm differ diff --git a/index/_c.si b/index/_c.si new file mode 100644 index 0000000000000000000000000000000000000000..3b0036eb79312f07d58280d5099f477ff86071c3 Binary files /dev/null and b/index/_c.si differ diff --git a/index/_c_Lucene90_0.doc b/index/_c_Lucene90_0.doc new file mode 100644 index 0000000000000000000000000000000000000000..5415752e4ac1b1f430fb128662810693f903830d Binary files /dev/null and b/index/_c_Lucene90_0.doc differ diff --git a/index/_c_Lucene90_0.dvd b/index/_c_Lucene90_0.dvd new file mode 100644 index 0000000000000000000000000000000000000000..93ac0aad32dfb61863a7ba8bc42ced4b3a135000 Binary files /dev/null and b/index/_c_Lucene90_0.dvd differ diff --git a/index/_c_Lucene90_0.dvm b/index/_c_Lucene90_0.dvm new file mode 100644 index 0000000000000000000000000000000000000000..2acc288ba50759bcb48e219b5d66cb810d75fbb1 Binary files /dev/null and b/index/_c_Lucene90_0.dvm differ diff --git a/index/_c_Lucene90_0.pos b/index/_c_Lucene90_0.pos new file mode 100644 index 0000000000000000000000000000000000000000..40685acedc3745d6cc7d726c773abc557da5a69a --- /dev/null +++ b/index/_c_Lucene90_0.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4ac8c1c910a978617aa54300a6c2421ab1295ad8fd3a28da9eca9ffec36948d +size 1240149 diff --git a/index/_c_Lucene90_0.tim b/index/_c_Lucene90_0.tim new file mode 100644 index 0000000000000000000000000000000000000000..891d5adfd52c2ba8a9b8a9876f2074135b3f9620 Binary files /dev/null and b/index/_c_Lucene90_0.tim differ diff --git a/index/_c_Lucene90_0.tip b/index/_c_Lucene90_0.tip new file mode 100644 index 0000000000000000000000000000000000000000..f1dbd1530a175d89e5fb4c0b67116bdc1817b7a4 Binary files /dev/null and b/index/_c_Lucene90_0.tip differ diff --git a/index/_c_Lucene90_0.tmd b/index/_c_Lucene90_0.tmd new file mode 100644 index 0000000000000000000000000000000000000000..fee1dfc6376c303e5864f8aca905970e8bc66d41 Binary files /dev/null and b/index/_c_Lucene90_0.tmd differ diff --git a/index/_d.fdm b/index/_d.fdm new file mode 100644 index 0000000000000000000000000000000000000000..9c575771c7a6404658315019e45ced9e2514bff8 Binary files /dev/null and b/index/_d.fdm differ diff --git a/index/_d.fdt b/index/_d.fdt new file mode 100644 index 0000000000000000000000000000000000000000..5206c2a8662eb51c91787a12397a33c12b00e906 Binary files /dev/null and b/index/_d.fdt differ diff --git a/index/_d.fdx b/index/_d.fdx new file mode 100644 index 0000000000000000000000000000000000000000..1d8f2cf1ab35be1234ac27a2fcb770cc4a333dd9 Binary files /dev/null and b/index/_d.fdx differ diff --git a/index/_d.fnm b/index/_d.fnm new file mode 100644 index 0000000000000000000000000000000000000000..351b0a6eeddccf1b8acd8b740ce525fad8ceb83e Binary files /dev/null and b/index/_d.fnm differ diff --git a/index/_d.nvd b/index/_d.nvd new file mode 100644 index 0000000000000000000000000000000000000000..661b3295974d300f895c3ad74d1162813e4b023b Binary files /dev/null and b/index/_d.nvd differ diff --git a/index/_d.nvm b/index/_d.nvm new file mode 100644 index 0000000000000000000000000000000000000000..b115c745767bcbe106596aa124f99c63a52646a1 Binary files /dev/null and b/index/_d.nvm differ diff --git a/index/_d.si b/index/_d.si new file mode 100644 index 0000000000000000000000000000000000000000..f29513b61c310bd51edc94f2413758f6af6d8ff2 Binary files /dev/null and b/index/_d.si differ diff --git a/index/_d_Lucene90_0.doc b/index/_d_Lucene90_0.doc new file mode 100644 index 0000000000000000000000000000000000000000..7d396ea61e3ff79dd7e755af8be8355ec8230e36 Binary files /dev/null and b/index/_d_Lucene90_0.doc differ diff --git a/index/_d_Lucene90_0.dvd b/index/_d_Lucene90_0.dvd new file mode 100644 index 0000000000000000000000000000000000000000..d4a7cbbf7e5cba08fa75d0df43f5fadf5d7654d8 Binary files /dev/null and b/index/_d_Lucene90_0.dvd differ diff --git a/index/_d_Lucene90_0.dvm b/index/_d_Lucene90_0.dvm new file mode 100644 index 0000000000000000000000000000000000000000..15d150d265cf77c2a500d90fa19932ba8d1340cb Binary files /dev/null and b/index/_d_Lucene90_0.dvm differ diff --git a/index/_d_Lucene90_0.pos b/index/_d_Lucene90_0.pos new file mode 100644 index 0000000000000000000000000000000000000000..64f55aac98e7346ce1845fb3d5d26ad92837ffbc --- /dev/null +++ b/index/_d_Lucene90_0.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf1185b23dfe7d554487554e5b731375c22821508fcf8781aec15d0899984efa +size 1262499 diff --git a/index/_d_Lucene90_0.tim b/index/_d_Lucene90_0.tim new file mode 100644 index 0000000000000000000000000000000000000000..6433aaf0b1f1fc2e83e3be404d8d814921d18d79 Binary files /dev/null and b/index/_d_Lucene90_0.tim differ diff --git a/index/_d_Lucene90_0.tip b/index/_d_Lucene90_0.tip new file mode 100644 index 0000000000000000000000000000000000000000..3e30a5ec74b6e0f1af1019750cf9669938d144fe Binary files /dev/null and b/index/_d_Lucene90_0.tip differ diff --git a/index/_d_Lucene90_0.tmd b/index/_d_Lucene90_0.tmd new file mode 100644 index 0000000000000000000000000000000000000000..bfb5eee9660fb3c8da2b99034b0f17511cbfc251 Binary files /dev/null and b/index/_d_Lucene90_0.tmd differ diff --git a/index/_e.fdm b/index/_e.fdm new file mode 100644 index 0000000000000000000000000000000000000000..454082aa384cfb8086ce2995dc0b15c4ad2ce4bb Binary files /dev/null and b/index/_e.fdm differ diff --git a/index/_e.fdt b/index/_e.fdt new file mode 100644 index 0000000000000000000000000000000000000000..7ca4d2995f1475584639fed152d0f3b05b3f0cfe Binary files /dev/null and b/index/_e.fdt differ diff --git a/index/_e.fdx b/index/_e.fdx new file mode 100644 index 0000000000000000000000000000000000000000..1a8f1e31889530040936442a1374bc34fef044ce Binary files /dev/null and b/index/_e.fdx differ diff --git a/index/_e.fnm b/index/_e.fnm new file mode 100644 index 0000000000000000000000000000000000000000..0ae8e27d7b8442884ed7a9aa8f1314d0b76c4580 Binary files /dev/null and b/index/_e.fnm differ diff --git a/index/_e.nvd b/index/_e.nvd new file mode 100644 index 0000000000000000000000000000000000000000..46d107aee53a07b000a3b6465c659bbbd52e8784 Binary files /dev/null and b/index/_e.nvd differ diff --git a/index/_e.nvm b/index/_e.nvm new file mode 100644 index 0000000000000000000000000000000000000000..d87516ca46d5757777ff0e898deb51f538432fb2 Binary files /dev/null and b/index/_e.nvm differ diff --git a/index/_e.si b/index/_e.si new file mode 100644 index 0000000000000000000000000000000000000000..6a0e026ce203bf654493183bf067a569299dcd06 Binary files /dev/null and b/index/_e.si differ diff --git a/index/_e_Lucene90_0.doc b/index/_e_Lucene90_0.doc new file mode 100644 index 0000000000000000000000000000000000000000..be62c65b8cd41158b36e002b180845376063fd01 Binary files /dev/null and b/index/_e_Lucene90_0.doc differ diff --git a/index/_e_Lucene90_0.dvd b/index/_e_Lucene90_0.dvd new file mode 100644 index 0000000000000000000000000000000000000000..f5c5094cd6b4e72ecbcb5994beb8603fcd1c4cf9 Binary files /dev/null and b/index/_e_Lucene90_0.dvd differ diff --git a/index/_e_Lucene90_0.dvm b/index/_e_Lucene90_0.dvm new file mode 100644 index 0000000000000000000000000000000000000000..d4616fe3b49677c175d5ea4e8c090b986451d261 Binary files /dev/null and b/index/_e_Lucene90_0.dvm differ diff --git a/index/_e_Lucene90_0.pos b/index/_e_Lucene90_0.pos new file mode 100644 index 0000000000000000000000000000000000000000..ba777a257c6b66fb680290ffb4d8ca8eada22531 --- /dev/null +++ b/index/_e_Lucene90_0.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ccaee5e5633b5c35ba7045c5e12e692587380d32c3020b81bac24a28da69ca2 +size 1238216 diff --git a/index/_e_Lucene90_0.tim b/index/_e_Lucene90_0.tim new file mode 100644 index 0000000000000000000000000000000000000000..fd06a9963205ee25f363ea89841f5f946510a06b Binary files /dev/null and b/index/_e_Lucene90_0.tim differ diff --git a/index/_e_Lucene90_0.tip b/index/_e_Lucene90_0.tip new file mode 100644 index 0000000000000000000000000000000000000000..3ee5b6de12031e8106e3d84edd2d8ad3f7ee0916 Binary files /dev/null and b/index/_e_Lucene90_0.tip differ diff --git a/index/_e_Lucene90_0.tmd b/index/_e_Lucene90_0.tmd new file mode 100644 index 0000000000000000000000000000000000000000..41f93e8dff6770fd61e7e136d0e4d2995f7d609a Binary files /dev/null and b/index/_e_Lucene90_0.tmd differ diff --git a/index/_f.fdm b/index/_f.fdm new file mode 100644 index 0000000000000000000000000000000000000000..61b9beb4afe5313305a2310a5e1f4c27e29551ac Binary files /dev/null and b/index/_f.fdm differ diff --git a/index/_f.fdt b/index/_f.fdt new file mode 100644 index 0000000000000000000000000000000000000000..ac01d8eabd06489e5d77169dd76ffa44bfc3baa3 Binary files /dev/null and b/index/_f.fdt differ diff --git a/index/_f.fdx b/index/_f.fdx new file mode 100644 index 0000000000000000000000000000000000000000..8862acba6cf02065a84921ecb552aeb75bb529a1 Binary files /dev/null and b/index/_f.fdx differ diff --git a/index/_f.fnm b/index/_f.fnm new file mode 100644 index 0000000000000000000000000000000000000000..3145f0ef9470db6ad05b0f4bafb528bf7f936cdf Binary files /dev/null and b/index/_f.fnm differ diff --git a/index/_f.nvd b/index/_f.nvd new file mode 100644 index 0000000000000000000000000000000000000000..3661480f2f798b9fc7390230c99a6dc0c1718781 Binary files /dev/null and b/index/_f.nvd differ diff --git a/index/_f.nvm b/index/_f.nvm new file mode 100644 index 0000000000000000000000000000000000000000..f532b7f80e4115225c1cd3c4b1e5ab8edecc5b28 Binary files /dev/null and b/index/_f.nvm differ diff --git a/index/_f.si b/index/_f.si new file mode 100644 index 0000000000000000000000000000000000000000..afc11e8ee19889f601d88489a67f808c022303ff Binary files /dev/null and b/index/_f.si differ diff --git a/index/_f_Lucene90_0.doc b/index/_f_Lucene90_0.doc new file mode 100644 index 0000000000000000000000000000000000000000..7ae37621d4fc93fa85bf5a6b1e6c1f63d833e35c Binary files /dev/null and b/index/_f_Lucene90_0.doc differ diff --git a/index/_f_Lucene90_0.dvd b/index/_f_Lucene90_0.dvd new file mode 100644 index 0000000000000000000000000000000000000000..5a947e5ac8ad16994d49ba38e4016abaa29ff611 Binary files /dev/null and b/index/_f_Lucene90_0.dvd differ diff --git a/index/_f_Lucene90_0.dvm b/index/_f_Lucene90_0.dvm new file mode 100644 index 0000000000000000000000000000000000000000..d80bee5604226dbab2ade0c7ac5e2e6011f495f3 Binary files /dev/null and b/index/_f_Lucene90_0.dvm differ diff --git a/index/_f_Lucene90_0.pos b/index/_f_Lucene90_0.pos new file mode 100644 index 0000000000000000000000000000000000000000..4c264eaac83ef60c64ca947d54515f2b36041f9e --- /dev/null +++ b/index/_f_Lucene90_0.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e48a050c215696f91bd8560cdd2ff18a40980ae17d782d160ec7ac18852d4258 +size 1277898 diff --git a/index/_f_Lucene90_0.tim b/index/_f_Lucene90_0.tim new file mode 100644 index 0000000000000000000000000000000000000000..837aa1d6a84d635012ac75302bc6887c7b6ca284 Binary files /dev/null and b/index/_f_Lucene90_0.tim differ diff --git a/index/_f_Lucene90_0.tip b/index/_f_Lucene90_0.tip new file mode 100644 index 0000000000000000000000000000000000000000..e507d49f4e6b51c1265aa6f9b84ff901ec02960e Binary files /dev/null and b/index/_f_Lucene90_0.tip differ diff --git a/index/_f_Lucene90_0.tmd b/index/_f_Lucene90_0.tmd new file mode 100644 index 0000000000000000000000000000000000000000..e41cc7a2aa0a8fc14838ddcd1090808115d94d1a Binary files /dev/null and b/index/_f_Lucene90_0.tmd differ diff --git a/index/segments_4 b/index/segments_4 new file mode 100644 index 0000000000000000000000000000000000000000..38729e3ae6f7b3a294aa545b615275ffa8f697d1 Binary files /dev/null and b/index/segments_4 differ diff --git a/index/write.lock b/index/write.lock new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/packages.txt b/packages.txt new file mode 100644 index 0000000000000000000000000000000000000000..7083f85c3741aaa661aabe2d5048ef5ebdb13b71 --- /dev/null +++ b/packages.txt @@ -0,0 +1 @@ +openjdk-11-jdk diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..29ed591f65d9215d9a4078df34c760758143bd82 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +pyserini +datasets +faiss-cpu +torch \ No newline at end of file