Section3 Pt

zh-CNchapter6hf-notebookscourse

快速标记器的特殊能力 (PyTorch)

Install the Transformers, Datasets, and Evaluate libraries to run this notebook.

[ ]
[ ]
<class 'transformers.tokenization_utils_base.BatchEncoding'>
[ ]
True
[ ]
True
[ ]
['[CLS]', 'My', 'name', 'is', 'S', '##yl', '##va', '##in', 'and', 'I', 'work', 'at', 'Hu', '##gging', 'Face', 'in',
, 'Brooklyn', '.', '[SEP]']
[ ]
[None, 0, 1, 2, 3, 3, 3, 3, 4, 5, 6, 7, 8, 8, 9, 10, 11, 12, None]
[ ]
Sylvain
[ ]
[{'entity': 'I-PER', 'score': 0.9993828, 'index': 4, 'word': 'S', 'start': 11, 'end': 12},
, {'entity': 'I-PER', 'score': 0.99815476, 'index': 5, 'word': '##yl', 'start': 12, 'end': 14},
, {'entity': 'I-PER', 'score': 0.99590725, 'index': 6, 'word': '##va', 'start': 14, 'end': 16},
, {'entity': 'I-PER', 'score': 0.9992327, 'index': 7, 'word': '##in', 'start': 16, 'end': 18},
, {'entity': 'I-ORG', 'score': 0.97389334, 'index': 12, 'word': 'Hu', 'start': 33, 'end': 35},
, {'entity': 'I-ORG', 'score': 0.976115, 'index': 13, 'word': '##gging', 'start': 35, 'end': 40},
, {'entity': 'I-ORG', 'score': 0.98879766, 'index': 14, 'word': 'Face', 'start': 41, 'end': 45},
, {'entity': 'I-LOC', 'score': 0.99321055, 'index': 16, 'word': 'Brooklyn', 'start': 49, 'end': 57}]
[ ]
[{'entity_group': 'PER', 'score': 0.9981694, 'word': 'Sylvain', 'start': 11, 'end': 18},
, {'entity_group': 'ORG', 'score': 0.97960204, 'word': 'Hugging Face', 'start': 33, 'end': 45},
, {'entity_group': 'LOC', 'score': 0.99321055, 'word': 'Brooklyn', 'start': 49, 'end': 57}]
[ ]
[ ]
torch.Size([1, 19])
,torch.Size([1, 19, 9])
[ ]
[0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 6, 6, 6, 0, 8, 0, 0]
[ ]
{0: 'O',
, 1: 'B-MISC',
, 2: 'I-MISC',
, 3: 'B-PER',
, 4: 'I-PER',
, 5: 'B-ORG',
, 6: 'I-ORG',
, 7: 'B-LOC',
, 8: 'I-LOC'}
[ ]
[{'entity': 'I-PER', 'score': 0.9993828, 'index': 4, 'word': 'S'},
, {'entity': 'I-PER', 'score': 0.99815476, 'index': 5, 'word': '##yl'},
, {'entity': 'I-PER', 'score': 0.99590725, 'index': 6, 'word': '##va'},
, {'entity': 'I-PER', 'score': 0.9992327, 'index': 7, 'word': '##in'},
, {'entity': 'I-ORG', 'score': 0.97389334, 'index': 12, 'word': 'Hu'},
, {'entity': 'I-ORG', 'score': 0.976115, 'index': 13, 'word': '##gging'},
, {'entity': 'I-ORG', 'score': 0.98879766, 'index': 14, 'word': 'Face'},
, {'entity': 'I-LOC', 'score': 0.99321055, 'index': 16, 'word': 'Brooklyn'}]
[ ]
[(0, 0), (0, 2), (3, 7), (8, 10), (11, 12), (12, 14), (14, 16), (16, 18), (19, 22), (23, 24), (25, 29), (30, 32),
, (33, 35), (35, 40), (41, 45), (46, 48), (49, 57), (57, 58), (0, 0)]
[ ]
yl
[ ]
[{'entity': 'I-PER', 'score': 0.9993828, 'index': 4, 'word': 'S', 'start': 11, 'end': 12},
, {'entity': 'I-PER', 'score': 0.99815476, 'index': 5, 'word': '##yl', 'start': 12, 'end': 14},
, {'entity': 'I-PER', 'score': 0.99590725, 'index': 6, 'word': '##va', 'start': 14, 'end': 16},
, {'entity': 'I-PER', 'score': 0.9992327, 'index': 7, 'word': '##in', 'start': 16, 'end': 18},
, {'entity': 'I-ORG', 'score': 0.97389334, 'index': 12, 'word': 'Hu', 'start': 33, 'end': 35},
, {'entity': 'I-ORG', 'score': 0.976115, 'index': 13, 'word': '##gging', 'start': 35, 'end': 40},
, {'entity': 'I-ORG', 'score': 0.98879766, 'index': 14, 'word': 'Face', 'start': 41, 'end': 45},
, {'entity': 'I-LOC', 'score': 0.99321055, 'index': 16, 'word': 'Brooklyn', 'start': 49, 'end': 57}]
[ ]
Hugging Face
[ ]
[{'entity_group': 'PER', 'score': 0.9981694, 'word': 'Sylvain', 'start': 11, 'end': 18},
, {'entity_group': 'ORG', 'score': 0.97960204, 'word': 'Hugging Face', 'start': 33, 'end': 45},
, {'entity_group': 'LOC', 'score': 0.99321055, 'word': 'Brooklyn', 'start': 49, 'end': 57}]