Spaces:
Runtime error
Runtime error
preloading models brute force
Browse files
app.py
CHANGED
|
@@ -2,48 +2,51 @@ import gradio as gr
|
|
| 2 |
from transformers import AutoModel, AutoTokenizer
|
| 3 |
from sklearn.neighbors import NearestNeighbors
|
| 4 |
|
| 5 |
-
available_models = ['
|
| 6 |
-
'
|
| 7 |
|
| 8 |
-
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
-
for MODEL in available_models:
|
| 12 |
-
models[MODEL] = AutoModel.from_pretrained(MODEL)
|
| 13 |
-
tokenizers[MODEL] = AutoTokenizer.from_pretrained(MODEL)
|
| 14 |
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
algorithm='auto',
|
| 26 |
-
n_jobs=3)
|
| 27 |
-
|
| 28 |
-
nbrs = knn_model.fit(embedding_matrix)
|
| 29 |
-
|
| 30 |
-
distances, indices = nbrs.kneighbors(embedding_matrix)
|
| 31 |
-
|
| 32 |
-
return distances,indices,tokenizers[MODEL]
|
| 33 |
|
| 34 |
|
| 35 |
title = "How does a word's meaning change with time?"
|
| 36 |
|
| 37 |
def topk(word,model):
|
| 38 |
outs = []
|
| 39 |
-
distances, indices, tokenizer = topk_model(model)
|
| 40 |
-
|
| 41 |
-
index = tokenizer.encode(f'{word}')
|
| 42 |
-
for i in indices[index[1]]:
|
| 43 |
-
outs.append(tokenizer.decode(i))
|
| 44 |
-
print(tokenizer.decode(i))
|
| 45 |
|
| 46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
|
| 48 |
# with gr.Blocks() as demo:
|
| 49 |
# gr.Markdown(f" # {title}")
|
|
|
|
| 2 |
from transformers import AutoModel, AutoTokenizer
|
| 3 |
from sklearn.neighbors import NearestNeighbors
|
| 4 |
|
| 5 |
+
available_models = ['2019',
|
| 6 |
+
'2020']
|
| 7 |
|
| 8 |
+
model_2019 = AutoModel.from_pretrained('cardiffnlp/twitter-roberta-base-2019-90m')
|
| 9 |
+
tokenizers_2019 = AutoTokenizer.from_pretrained('cardiffnlp/twitter-roberta-base-2019-90m')
|
| 10 |
+
embedding_matrix_2019 = model_2019.embeddings.word_embeddings.weight
|
| 11 |
+
embedding_matrix_2019 = embedding_matrix_2019.detach().numpy()
|
| 12 |
+
knn_model_2019 = NearestNeighbors(n_neighbors=500,
|
| 13 |
+
metric='cosine',
|
| 14 |
+
algorithm='auto',
|
| 15 |
+
n_jobs=3)
|
| 16 |
+
nbrs_2019 = knn_model_2019.fit(embedding_matrix_2019)
|
| 17 |
+
distances_2019, indices_2019 = nbrs_2019.kneighbors(embedding_matrix_2019)
|
| 18 |
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
+
model_2020 = AutoModel.from_pretrained('cardiffnlp/twitter-roberta-base-jun2020')
|
| 21 |
+
tokenizers_2020 = AutoTokenizer.from_pretrained('cardiffnlp/twitter-roberta-base-jun2020')
|
| 22 |
+
embedding_matrix_2020 = model_2020.embeddings.word_embeddings.weight
|
| 23 |
+
embedding_matrix_2020 = embedding_matrix_2020.detach().numpy()
|
| 24 |
+
knn_model_2020 = NearestNeighbors(n_neighbors=500,
|
| 25 |
+
metric='cosine',
|
| 26 |
+
algorithm='auto',
|
| 27 |
+
n_jobs=3)
|
| 28 |
+
nbrs_2020 = knn_model_2020.fit(embedding_matrix_2020)
|
| 29 |
+
distances_2020, indices_2020 = nbrs_2020.kneighbors(embedding_matrix_2020)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
|
| 32 |
title = "How does a word's meaning change with time?"
|
| 33 |
|
| 34 |
def topk(word,model):
|
| 35 |
outs = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
+
if model == '2019':
|
| 38 |
+
index = tokenizers_2019.encode(f'{word}')
|
| 39 |
+
for i in indices_2019[index[1]]:
|
| 40 |
+
outs.append(tokenizers_2019.decode(i))
|
| 41 |
+
print(tokenizers_2019.decode(i))
|
| 42 |
+
return outs
|
| 43 |
+
|
| 44 |
+
if model == '2020':
|
| 45 |
+
index = tokenizers_2020.encode(f'{word}')
|
| 46 |
+
for i in indices_2020[index[1]]:
|
| 47 |
+
outs.append(tokenizers_2020.decode(i))
|
| 48 |
+
print(tokenizers_2020.decode(i))
|
| 49 |
+
return outs
|
| 50 |
|
| 51 |
# with gr.Blocks() as demo:
|
| 52 |
# gr.Markdown(f" # {title}")
|