Commit ·
d8780ff
1
Parent(s): 5b794a2
Update README.md
Browse files
README.md
CHANGED
|
@@ -58,5 +58,66 @@ Results:
|
|
| 58 |
The model was fine-tuned on the [Ukrainian (UD) corpus](https://universaldependencies.org/treebanks/uk_iu/index.html), released by the [non-profit organization Institute for Ukrainian](https://mova.institute).
|
| 59 |
Training code is also available [here](https://github.com/lang-uk/flair-pos).
|
| 60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
|
| 62 |
Copyright: [Dmytro Chaplynskyi](https://twitter.com/dchaplinsky), [lang-uk project](https://lang.org.ua), 2022
|
|
|
|
| 58 |
The model was fine-tuned on the [Ukrainian (UD) corpus](https://universaldependencies.org/treebanks/uk_iu/index.html), released by the [non-profit organization Institute for Ukrainian](https://mova.institute).
|
| 59 |
Training code is also available [here](https://github.com/lang-uk/flair-pos).
|
| 60 |
|
| 61 |
+
## [Usage demo](https://github.com/egorsmkv/flair-nlp-uk/blob/main/part_of_speech.py)
|
| 62 |
+
```python
|
| 63 |
+
from flair.data import Sentence
|
| 64 |
+
from flair.models import SequenceTagger
|
| 65 |
+
|
| 66 |
+
from pprint import pprint
|
| 67 |
+
|
| 68 |
+
tagger = SequenceTagger.load("dchaplinsky/flair-uk-pos")
|
| 69 |
+
|
| 70 |
+
sentence = Sentence("Я люблю Україну. Моє імʼя Марія Шевченко, я навчаюся в Київській політехніці.")
|
| 71 |
+
|
| 72 |
+
tagger.predict(sentence)
|
| 73 |
+
|
| 74 |
+
print(sentence)
|
| 75 |
+
print('---')
|
| 76 |
+
|
| 77 |
+
print('The following POS tags are found:')
|
| 78 |
+
|
| 79 |
+
pos_items = []
|
| 80 |
+
for label in sentence.get_labels():
|
| 81 |
+
all_labels = []
|
| 82 |
+
keys = label.data_point.annotation_layers.keys()
|
| 83 |
+
|
| 84 |
+
for key in keys:
|
| 85 |
+
all_labels.extend(
|
| 86 |
+
[
|
| 87 |
+
{'label': label.value, 'score': round(label.score, 4)}
|
| 88 |
+
for label in label.data_point.get_labels(key)
|
| 89 |
+
if label.data_point == label
|
| 90 |
+
]
|
| 91 |
+
)
|
| 92 |
+
|
| 93 |
+
pos_items.append({
|
| 94 |
+
'text': label.data_point.text,
|
| 95 |
+
'all_labels': all_labels,
|
| 96 |
+
})
|
| 97 |
+
|
| 98 |
+
pprint(pos_items)
|
| 99 |
+
|
| 100 |
+
# Result:
|
| 101 |
+
"""
|
| 102 |
+
Sentence: "Я люблю Україну . Моє імʼя Марія Шевченко , я навчаюся в Київській політехніці ." → ["Я"/PRON, "люблю"/VERB, "Україну"/PROPN, "."/PUNCT, "Моє"/DET, "імʼя"/NOUN, "Марія"/PROPN, "Шевченко"/PROPN, ","/PUNCT, "я"/PRON, "навчаюся"/VERB, "в"/ADP, "Київській"/ADJ, "політехніці"/NOUN, "."/PUNCT]
|
| 103 |
+
---
|
| 104 |
+
The following POS tags are found:
|
| 105 |
+
[{'all_labels': [{'label': 'PRON', 'score': 1.0}], 'text': 'Я'},
|
| 106 |
+
{'all_labels': [{'label': 'VERB', 'score': 1.0}], 'text': 'люблю'},
|
| 107 |
+
{'all_labels': [{'label': 'PROPN', 'score': 1.0}], 'text': 'Україну'},
|
| 108 |
+
{'all_labels': [{'label': 'PUNCT', 'score': 1.0}], 'text': '.'},
|
| 109 |
+
{'all_labels': [{'label': 'DET', 'score': 0.9999}], 'text': 'Моє'},
|
| 110 |
+
{'all_labels': [{'label': 'NOUN', 'score': 1.0}], 'text': 'імʼя'},
|
| 111 |
+
{'all_labels': [{'label': 'PROPN', 'score': 1.0}], 'text': 'Марія'},
|
| 112 |
+
{'all_labels': [{'label': 'PROPN', 'score': 1.0}], 'text': 'Шевченко'},
|
| 113 |
+
{'all_labels': [{'label': 'PUNCT', 'score': 1.0}], 'text': ','},
|
| 114 |
+
{'all_labels': [{'label': 'PRON', 'score': 1.0}], 'text': 'я'},
|
| 115 |
+
{'all_labels': [{'label': 'VERB', 'score': 1.0}], 'text': 'навчаюся'},
|
| 116 |
+
{'all_labels': [{'label': 'ADP', 'score': 1.0}], 'text': 'в'},
|
| 117 |
+
{'all_labels': [{'label': 'ADJ', 'score': 1.0}], 'text': 'Київській'},
|
| 118 |
+
{'all_labels': [{'label': 'NOUN', 'score': 1.0}], 'text': 'політехніці'},
|
| 119 |
+
{'all_labels': [{'label': 'PUNCT', 'score': 1.0}], 'text': '.'}]
|
| 120 |
+
"""
|
| 121 |
+
```
|
| 122 |
|
| 123 |
Copyright: [Dmytro Chaplynskyi](https://twitter.com/dchaplinsky), [lang-uk project](https://lang.org.ua), 2022
|