model =gpt2 total batch size=40 train num epochs=10 fp16 =True max seq length =40 eval_acc = 0.836 eval_loss = 1.0890175614092086