Spaces:
Sleeping
Sleeping
correcion etiquetas español/ingles
Browse files
app.py
CHANGED
|
@@ -261,12 +261,45 @@ class Model:
|
|
| 261 |
|
| 262 |
|
| 263 |
if (self.idioma=='es'):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 264 |
|
| 265 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 266 |
|
| 267 |
|
| 268 |
else:
|
| 269 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 270 |
|
| 271 |
new_identificadores = self.reordenacion_identificadores(ig_tokens,predicted_tokens_classes)
|
| 272 |
out1 = self.salida_json(new_tokens,new_identificadores)
|
|
|
|
| 261 |
|
| 262 |
|
| 263 |
if (self.idioma=='es'):
|
| 264 |
+
inputs = self.tokenizer(self.texto, return_tensors="pt",max_length=512, truncation=True)
|
| 265 |
+
with torch.no_grad():
|
| 266 |
+
outputs = self.model(**inputs)
|
| 267 |
+
logits = outputs.logits
|
| 268 |
+
predictions = torch.argmax(logits, dim=2)
|
| 269 |
|
| 270 |
+
predicted_token_class_ids = predictions[0].tolist()
|
| 271 |
+
|
| 272 |
+
|
| 273 |
+
predicted_tokens_classes = [self.model.config.id2label[label_id] for label_id in predicted_token_class_ids]
|
| 274 |
+
tokens = self.tokenizer.convert_ids_to_tokens(inputs.input_ids[0])
|
| 275 |
+
|
| 276 |
+
predicted_tokens_classes.pop(0)
|
| 277 |
+
predicted_tokens_classes.pop(len(predicted_tokens_classes)-1)
|
| 278 |
+
|
| 279 |
+
tokens.pop(0)
|
| 280 |
+
tokens.pop(len(tokens)-1)
|
| 281 |
+
new_tokens,ig_tokens=self.reordenacion_tokens_es(tokens,'Ġ')
|
| 282 |
|
| 283 |
|
| 284 |
else:
|
| 285 |
+
inputs = self.tokenizer(self.texto, return_tensors="pt")
|
| 286 |
+
with torch.no_grad():
|
| 287 |
+
outputs = self.model(**inputs)
|
| 288 |
+
logits = outputs.logits
|
| 289 |
+
predictions = torch.argmax(logits, dim=2)
|
| 290 |
+
|
| 291 |
+
predicted_token_class_ids = predictions[0].tolist()
|
| 292 |
+
|
| 293 |
+
|
| 294 |
+
predicted_tokens_classes = [self.model.config.id2label[label_id] for label_id in predicted_token_class_ids]
|
| 295 |
+
tokens = self.tokenizer.convert_ids_to_tokens(inputs.input_ids[0])
|
| 296 |
+
|
| 297 |
+
predicted_tokens_classes.pop(0)
|
| 298 |
+
predicted_tokens_classes.pop(len(predicted_tokens_classes)-1)
|
| 299 |
+
|
| 300 |
+
tokens.pop(0)
|
| 301 |
+
tokens.pop(len(tokens)-1)
|
| 302 |
+
new_tokens,ig_tokens=self.reordenacion_tokens(tokens,'#')
|
| 303 |
|
| 304 |
new_identificadores = self.reordenacion_identificadores(ig_tokens,predicted_tokens_classes)
|
| 305 |
out1 = self.salida_json(new_tokens,new_identificadores)
|