{ "config": { "audio_model": "facebook/wav2vec2-base", "text_model": "roberta-base", "fusion": "cross_attention", "learning_rate": "1e-05", "batch_size": "8", "num_epochs": "7", "max_audio_length": "80000", "max_text_length": "128", "fusion_dim": "256", "dropout": "0.3", "freeze_encoders": "False", "output_dir": "./multimodal-emotion-model", "hub_model_id": "hugoaslm/multimodal-emotion-recognition", "push_to_hub": "True", "gradient_accumulation_steps": "4", "warmup_ratio": "0.1", "weight_decay": "0.01", "seed": "42", "mode": "multimodal", "run_name": "final-cross-attn-ls01", "label_smoothing": "0.1" }, "val_metrics": { "eval_loss": 0.42387160658836365, "eval_accuracy": 0.8144072036018009, "eval_f1_weighted": 0.814478521287098, "eval_f1_macro": 0.7991105813540713, "eval_f1_angry": 0.8958677685950414, "eval_f1_calm": 0.59375, "eval_f1_disgust": 0.8183306055646481, "eval_f1_fear": 0.7622377622377622, "eval_f1_happy": 0.8248772504091653, "eval_f1_neutral": 0.8297213622291022, "eval_f1_sad": 0.7382753403933434, "eval_f1_surprise": 0.9298245614035088, "eval_runtime": 11.0916, "eval_samples_per_second": 180.226, "eval_steps_per_second": 22.54, "epoch": 7.0 }, "test_metrics": { "accuracy": 0.852760736196319, "f1_weighted": 0.8518738111646259, "f1_macro": 0.8508017687676436, "f1_angry": 0.8666666666666667, "f1_calm": 0.9230769230769231, "f1_disgust": 0.8163265306122449, "f1_fear": 0.819672131147541, "f1_happy": 0.8727272727272727, "f1_neutral": 0.8571428571428571, "f1_sad": 0.8 }, "classification_report": " precision recall f1-score support\n\n angry 0.8667 0.8667 0.8667 30\n disgust 0.9474 0.9000 0.9231 20\n fear 0.9091 0.7407 0.8163 27\n happy 0.8929 0.7576 0.8197 33\n neutral 0.8000 0.9600 0.8727 25\n sad 0.7826 0.9474 0.8571 19\n surprise 0.7273 0.8889 0.8000 9\n\n accuracy 0.8528 163\n macro avg 0.8466 0.8659 0.8508 163\nweighted avg 0.8612 0.8528 0.8519 163\n" }