Section2 Tf

ithf-notebookschapter3course

Processare i dati (TensorFlow)

Install the Transformers, Datasets, and Evaluate libraries to run this notebook.

[ ]
[ ]
[ ]
DatasetDict({
,    train: Dataset({
,        features: ['sentence1', 'sentence2', 'label', 'idx'],
,        num_rows: 3668
,    })
,    validation: Dataset({
,        features: ['sentence1', 'sentence2', 'label', 'idx'],
,        num_rows: 408
,    })
,    test: Dataset({
,        features: ['sentence1', 'sentence2', 'label', 'idx'],
,        num_rows: 1725
,    })
,})
[ ]
{'idx': 0,
, 'label': 1,
, 'sentence1': 'Amrozi accused his brother , whom he called " the witness " , of deliberately distorting his evidence .',
, 'sentence2': 'Referring to him as only " the witness " , Amrozi accused his brother of deliberately distorting his evidence .'}
[ ]
{'sentence1': Value(dtype='string', id=None),
, 'sentence2': Value(dtype='string', id=None),
, 'label': ClassLabel(num_classes=2, names=['not_equivalent', 'equivalent'], names_file=None, id=None),
, 'idx': Value(dtype='int32', id=None)}
[ ]
[ ]
{ 
,  'input_ids': [101, 2023, 2003, 1996, 2034, 6251, 1012, 102, 2023, 2003, 1996, 2117, 2028, 1012, 102],
,  'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1],
,  'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
,}
[ ]
['[CLS]', 'this', 'is', 'the', 'first', 'sentence', '.', '[SEP]', 'this', 'is', 'the', 'second', 'one', '.', '[SEP]']
[ ]
[ ]
[ ]
DatasetDict({
,    train: Dataset({
,        features: ['attention_mask', 'idx', 'input_ids', 'label', 'sentence1', 'sentence2', 'token_type_ids'],
,        num_rows: 3668
,    })
,    validation: Dataset({
,        features: ['attention_mask', 'idx', 'input_ids', 'label', 'sentence1', 'sentence2', 'token_type_ids'],
,        num_rows: 408
,    })
,    test: Dataset({
,        features: ['attention_mask', 'idx', 'input_ids', 'label', 'sentence1', 'sentence2', 'token_type_ids'],
,        num_rows: 1725
,    })
,})
[ ]
[ ]
[50, 59, 47, 67, 59, 50, 62, 32]
[ ]
{'attention_mask': TensorShape([8, 67]),
, 'input_ids': TensorShape([8, 67]),
, 'token_type_ids': TensorShape([8, 67]),
, 'labels': TensorShape([8])}
[ ]