-
Notifications
You must be signed in to change notification settings - Fork 804
Expand file tree
/
Copy pathsimilarity.py
More file actions
73 lines (52 loc) · 1.87 KB
/
similarity.py
File metadata and controls
73 lines (52 loc) · 1.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
"""
Basic similarity search example. Used in the original txtai demo.
Requires streamlit to be installed.
pip install streamlit
"""
import os
import streamlit as st
from txtai.embeddings import Embeddings
class Application:
"""
Main application.
"""
def __init__(self):
"""
Creates a new application.
"""
# Create embeddings model, backed by sentence-transformers & transformers
self.embeddings = Embeddings({"path": "sentence-transformers/nli-mpnet-base-v2"})
def run(self):
"""
Runs a Streamlit application.
"""
st.title("Similarity Search")
st.markdown("This application runs a basic similarity search that identifies the best matching row for a query.")
data = [
"US tops 5 million confirmed virus cases",
"Canada's last fully intact ice shelf has suddenly collapsed, forming a Manhattan-sized iceberg",
"Beijing mobilises invasion craft along coast as Taiwan tensions escalate",
"The National Park Service warns against sacrificing slower friends in a bear attack",
"Maine man wins $1M from $25 lottery ticket",
"Make huge profits without work, earn up to $100,000 a day",
]
data = st.text_area("Data", value="\n".join(data))
query = st.text_input("Query")
data = data.split("\n")
if query:
# Get index of best section that best matches query
uid = self.embeddings.similarity(query, data)[0][0]
st.write(data[uid])
@st.cache(allow_output_mutation=True)
def create():
"""
Creates and caches a Streamlit application.
Returns:
Application
"""
return Application()
if __name__ == "__main__":
os.environ["TOKENIZERS_PARALLELISM"] = "false"
# Create and run application
app = create()
app.run()