Export
预处理数据 (TensorFlow)
Install the Transformers, Datasets, and Evaluate libraries to run this notebook.
[ ]
[ ]
[ ]
DatasetDict({
, train: Dataset({
, features: ['sentence1', 'sentence2', 'label', 'idx'],
, num_rows: 3668
, })
, validation: Dataset({
, features: ['sentence1', 'sentence2', 'label', 'idx'],
, num_rows: 408
, })
, test: Dataset({
, features: ['sentence1', 'sentence2', 'label', 'idx'],
, num_rows: 1725
, })
,}) [ ]
{'idx': 0,
, 'label': 1,
, 'sentence1': 'Amrozi accused his brother , whom he called " the witness " , of deliberately distorting his evidence .',
, 'sentence2': 'Referring to him as only " the witness " , Amrozi accused his brother of deliberately distorting his evidence .'} [ ]
{'sentence1': Value(dtype='string', id=None),
, 'sentence2': Value(dtype='string', id=None),
, 'label': ClassLabel(num_classes=2, names=['not_equivalent', 'equivalent'], names_file=None, id=None),
, 'idx': Value(dtype='int32', id=None)} [ ]
[ ]
{
, 'input_ids': [101, 2023, 2003, 1996, 2034, 6251, 1012, 102, 2023, 2003, 1996, 2117, 2028, 1012, 102],
, 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1],
, 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
,} [ ]
['[CLS]', 'this', 'is', 'the', 'first', 'sentence', '.', '[SEP]', 'this', 'is', 'the', 'second', 'one', '.', '[SEP]']
[ ]
[ ]
[ ]
DatasetDict({
, train: Dataset({
, features: ['attention_mask', 'idx', 'input_ids', 'label', 'sentence1', 'sentence2', 'token_type_ids'],
, num_rows: 3668
, })
, validation: Dataset({
, features: ['attention_mask', 'idx', 'input_ids', 'label', 'sentence1', 'sentence2', 'token_type_ids'],
, num_rows: 408
, })
, test: Dataset({
, features: ['attention_mask', 'idx', 'input_ids', 'label', 'sentence1', 'sentence2', 'token_type_ids'],
, num_rows: 1725
, })
,}) [ ]
[ ]
[50, 59, 47, 67, 59, 50, 62, 32]
[ ]
{'attention_mask': TensorShape([8, 67]),
, 'input_ids': TensorShape([8, 67]),
, 'token_type_ids': TensorShape([8, 67]),
, 'labels': TensorShape([8])} [ ]