Spaces:
Runtime error
Runtime error
Commit
Β·
63fd32a
1
Parent(s):
5354eb3
Update app.py
Browse files
app.py
CHANGED
|
@@ -54,20 +54,24 @@ sp = en_core_sci_lg.load()
|
|
| 54 |
all_stopwords = sp.Defaults.stop_words
|
| 55 |
|
| 56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
def remove_stopwords(sen):
|
| 58 |
sen_new = " ".join([i for i in sen if i not in stop_words])
|
| 59 |
return sen_new
|
| 60 |
|
| 61 |
|
| 62 |
|
| 63 |
-
def keyphrase_generator(article_link, model_1, model_2, max_num_keywords
|
| 64 |
-
word_embedding_model = models.Transformer(model_3)
|
| 65 |
-
pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension(),
|
| 66 |
-
pooling_mode_mean_tokens=True,
|
| 67 |
-
pooling_mode_cls_token=False,
|
| 68 |
-
pooling_mode_max_tokens=False)
|
| 69 |
|
| 70 |
-
embedder = SentenceTransformer(modules=[word_embedding_model, pooling_model])
|
| 71 |
element=[]
|
| 72 |
cluster_list_final=[]
|
| 73 |
comb_list=[]
|
|
@@ -265,18 +269,13 @@ igen_pubmed = gr.Interface(keyphrase_generator,
|
|
| 265 |
type="value",
|
| 266 |
default='sentence-transformers/all-mpnet-base-v1',
|
| 267 |
label="Select any SBERT model for keyphrases from the list below"),
|
| 268 |
-
gr.inputs.Slider(minimum=5, maximum=30, step=1, default=10, label="Max Keywords"),
|
| 269 |
-
gr.inputs.Dropdown(choices=['cambridgeltl/SapBERT-from-PubMedBERT-fulltext',
|
| 270 |
-
'cambridgeltl/SapBERT-from-PubMedBERT-fulltext-mean-token'],
|
| 271 |
-
type="value",
|
| 272 |
-
default='cambridgeltl/SapBERT-from-PubMedBERT-fulltext',
|
| 273 |
-
label="Select any SapBERT model for clustering from the list below")],
|
| 274 |
outputs=gr.outputs.Dataframe(type="auto", label="dataframe",max_cols=None, overflow_row_behaviour="paginate"),
|
| 275 |
theme="peach",
|
| 276 |
title="Scientific Article Keyphrase Generator", description="Generates the keyphrases from an article which best describes the article.",
|
| 277 |
article= "The work is based the paper <a href=https://dl.acm.org/doi/10.1145/3487664.3487701>provided here</a>."
|
| 278 |
"\t It uses the TextRank algorithm with SBERT to first find the top sentences and then extracts the keyphrases from those sentences using scispaCy and SBERT."
|
| 279 |
-
"\t The application then uses a <a href=https://arxiv.org/abs/2010.11784>UMLS based Bert model</a> to cluster the keyphrases using K-means clustering method and finally create a boolean query. After that the top 10 titles and abstracts are retrieved from PubMed database and displayed according to relevancy.
|
| 280 |
"\t The list of SBERT models required in the textboxes can be found in <a href=www.sbert.net/docs/pretrained_models.html>SBERT Pre-trained models hub</a>."
|
| 281 |
"\t The default model names are provided which can be changed from the list of pretrained models. "
|
| 282 |
"\t The value of keyphrases can be changed. The default value is 10, minimum is 5 and a maximum value of 30.")
|
|
|
|
| 54 |
all_stopwords = sp.Defaults.stop_words
|
| 55 |
|
| 56 |
|
| 57 |
+
|
| 58 |
+
word_embedding_model = models.Transformer('cambridgeltl/SapBERT-from-PubMedBERT-fulltext')
|
| 59 |
+
pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension(),
|
| 60 |
+
pooling_mode_mean_tokens=True,
|
| 61 |
+
pooling_mode_cls_token=False,
|
| 62 |
+
pooling_mode_max_tokens=False)
|
| 63 |
+
|
| 64 |
+
embedder = SentenceTransformer(modules=[word_embedding_model, pooling_model])
|
| 65 |
+
|
| 66 |
+
|
| 67 |
def remove_stopwords(sen):
|
| 68 |
sen_new = " ".join([i for i in sen if i not in stop_words])
|
| 69 |
return sen_new
|
| 70 |
|
| 71 |
|
| 72 |
|
| 73 |
+
def keyphrase_generator(article_link, model_1, model_2, max_num_keywords):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
|
|
|
|
| 75 |
element=[]
|
| 76 |
cluster_list_final=[]
|
| 77 |
comb_list=[]
|
|
|
|
| 269 |
type="value",
|
| 270 |
default='sentence-transformers/all-mpnet-base-v1',
|
| 271 |
label="Select any SBERT model for keyphrases from the list below"),
|
| 272 |
+
gr.inputs.Slider(minimum=5, maximum=30, step=1, default=10, label="Max Keywords")],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 273 |
outputs=gr.outputs.Dataframe(type="auto", label="dataframe",max_cols=None, overflow_row_behaviour="paginate"),
|
| 274 |
theme="peach",
|
| 275 |
title="Scientific Article Keyphrase Generator", description="Generates the keyphrases from an article which best describes the article.",
|
| 276 |
article= "The work is based the paper <a href=https://dl.acm.org/doi/10.1145/3487664.3487701>provided here</a>."
|
| 277 |
"\t It uses the TextRank algorithm with SBERT to first find the top sentences and then extracts the keyphrases from those sentences using scispaCy and SBERT."
|
| 278 |
+
"\t The application then uses a <a href=https://arxiv.org/abs/2010.11784>UMLS based Bert model</a> to cluster the keyphrases using K-means clustering method and finally create a boolean query. After that the top 10 titles and abstracts are retrieved from PubMed database and displayed according to relevancy. "
|
| 279 |
"\t The list of SBERT models required in the textboxes can be found in <a href=www.sbert.net/docs/pretrained_models.html>SBERT Pre-trained models hub</a>."
|
| 280 |
"\t The default model names are provided which can be changed from the list of pretrained models. "
|
| 281 |
"\t The value of keyphrases can be changed. The default value is 10, minimum is 5 and a maximum value of 30.")
|