Section2 Tf

zh-CNhf-notebookschapter7course

标记(token)分类 (TensorFlow)

Install the Transformers, Datasets, and Evaluate libraries to run this notebook.

[ ]

You will need to setup git, adapt your email and name in the following cell.

[ ]

You will also need to be logged in to the Hugging Face Hub. Execute the following and enter your credentials.

[ ]
[ ]
[ ]
DatasetDict({
,    train: Dataset({
,        features: ['chunk_tags', 'id', 'ner_tags', 'pos_tags', 'tokens'],
,        num_rows: 14041
,    })
,    validation: Dataset({
,        features: ['chunk_tags', 'id', 'ner_tags', 'pos_tags', 'tokens'],
,        num_rows: 3250
,    })
,    test: Dataset({
,        features: ['chunk_tags', 'id', 'ner_tags', 'pos_tags', 'tokens'],
,        num_rows: 3453
,    })
,})
[ ]
['EU', 'rejects', 'German', 'call', 'to', 'boycott', 'British', 'lamb', '.']
[ ]
[3, 0, 7, 0, 0, 0, 7, 0, 0]
[ ]
Sequence(feature=ClassLabel(num_classes=9, names=['O', 'B-PER', 'I-PER', 'B-ORG', 'I-ORG', 'B-LOC', 'I-LOC', 'B-MISC', 'I-MISC'], names_file=None, id=None), length=-1, id=None)
[ ]
['O', 'B-PER', 'I-PER', 'B-ORG', 'I-ORG', 'B-LOC', 'I-LOC', 'B-MISC', 'I-MISC']
[ ]
'EU    rejects German call to boycott British lamb .'
,'B-ORG O       B-MISC O    O  O       B-MISC  O    O'
[ ]
[ ]
True
[ ]
['[CLS]', 'EU', 'rejects', 'German', 'call', 'to', 'boycott', 'British', 'la', '##mb', '.', '[SEP]']
[ ]
[None, 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, None]
[ ]
[ ]
[3, 0, 7, 0, 0, 0, 7, 0, 0]
,[-100, 3, 0, 7, 0, 0, 0, 7, 0, 0, 0, -100]
[ ]
[ ]
[ ]
[ ]
tensor([[-100,    3,    0,    7,    0,    0,    0,    7,    0,    0,    0, -100],
,        [-100,    1,    2, -100, -100, -100, -100, -100, -100, -100, -100, -100]])
[ ]
[-100, 3, 0, 7, 0, 0, 0, 7, 0, 0, 0, -100]
,[-100, 1, 2, -100]
[ ]
[ ]
[ ]
[ ]
9
[ ]
[ ]
[ ]
[ ]
[ ]
[ ]
['B-ORG', 'O', 'B-MISC', 'O', 'O', 'O', 'B-MISC', 'O', 'O']
[ ]
{'MISC': {'precision': 1.0, 'recall': 0.5, 'f1': 0.67, 'number': 2},
, 'ORG': {'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'number': 1},
, 'overall_precision': 1.0,
, 'overall_recall': 0.67,
, 'overall_f1': 0.8,
, 'overall_accuracy': 0.89}
[ ]
{'LOC': {'precision': 0.91, 'recall': 0.92, 'f1': 0.91, 'number': 1668},
, 'MISC': {'precision': 0.70, 'recall': 0.79, 'f1': 0.74, 'number': 702},
, 'ORG': {'precision': 0.85, 'recall': 0.90, 'f1': 0.88, 'number': 1661},
, 'PER': {'precision': 0.95, 'recall': 0.95, 'f1': 0.95, 'number': 1617},
, 'overall_precision': 0.87,
, 'overall_recall': 0.91,
, 'overall_f1': 0.89,
, 'overall_accuracy': 0.97}
[ ]
[{'entity_group': 'PER', 'score': 0.9988506, 'word': 'Sylvain', 'start': 11, 'end': 18},
, {'entity_group': 'ORG', 'score': 0.9647625, 'word': 'Hugging Face', 'start': 33, 'end': 45},
, {'entity_group': 'LOC', 'score': 0.9986118, 'word': 'Brooklyn', 'start': 49, 'end': 57}]