| import datasets | |
| from transformers import AutoTokenizer | |
| dataset = datasets.load_dataset( # <1> | |
| "rotten_tomatoes", # <1> | |
| split="train", # <1> | |
| ) # <1> | |
| tokenizer = AutoTokenizer.from_pretrained("bert-base-cased") | |
| dataset = datasets.map( # <2> | |
| lambda examples: tokenizer(examples["text"]), # <2> | |
| batched=True, # <2> | |
| ) # <2> | |
| # <3> | |