Spaces:
Running
Running
dataset model json en
Browse files
app.py
CHANGED
|
@@ -533,14 +533,21 @@ class ModeloDataset:
|
|
| 533 |
special_tokens = self.tokenizer.all_special_tokens
|
| 534 |
filtered_tokens = []
|
| 535 |
filtered_labels = []
|
| 536 |
-
|
| 537 |
-
for token, label in zip(new_tokens, new_identificadores):
|
| 538 |
-
if token not in special_tokens:
|
| 539 |
-
filtered_tokens.append(token)
|
| 540 |
-
filtered_labels.append(label)
|
| 541 |
-
|
| 542 |
|
| 543 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 544 |
|
| 545 |
###
|
| 546 |
### Procesa los tokens generados del texto de entradas con los tokens predichos, para generar los tokens por palabra
|
|
|
|
| 533 |
special_tokens = self.tokenizer.all_special_tokens
|
| 534 |
filtered_tokens = []
|
| 535 |
filtered_labels = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 536 |
|
| 537 |
+
|
| 538 |
+
for token_linea, label_linea in zip(new_tokens, new_identificadores):
|
| 539 |
+
for token, label in zip(token_linea, label_linea):
|
| 540 |
+
if token not in special_tokens:
|
| 541 |
+
print('token:*********************************', token)
|
| 542 |
+
filtered_tokens.append(token)
|
| 543 |
+
filtered_labels.append(label)
|
| 544 |
+
print('filtered_tokens')
|
| 545 |
+
print(filtered_tokens)
|
| 546 |
+
print('filtered_labels')
|
| 547 |
+
print(filtered_labels)
|
| 548 |
+
|
| 549 |
+
|
| 550 |
+
return filtered_labels,filtered_tokens #new_identificadores, new_tokens
|
| 551 |
|
| 552 |
###
|
| 553 |
### Procesa los tokens generados del texto de entradas con los tokens predichos, para generar los tokens por palabra
|