kash06 commited on
Commit
9b3e480
·
verified ·
1 Parent(s): 6d33035

Upload enghintrans.ipynb

Browse files
Files changed (1) hide show
  1. enghintrans.ipynb +1 -0
enghintrans.ipynb ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.10.12","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"kaggle":{"accelerator":"nvidiaTeslaT4","dataSources":[{"sourceId":10780446,"sourceType":"datasetVersion","datasetId":6689186},{"sourceId":10784459,"sourceType":"datasetVersion","datasetId":6692220}],"dockerImageVersionId":30887,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":true}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"#Checking if GPU is running or not\n!nvidia-smi","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-02-19T16:02:07.638891Z","iopub.execute_input":"2025-02-19T16:02:07.639246Z","iopub.status.idle":"2025-02-19T16:02:07.859575Z","shell.execute_reply.started":"2025-02-19T16:02:07.639213Z","shell.execute_reply":"2025-02-19T16:02:07.858562Z"}},"outputs":[{"name":"stdout","text":"Wed Feb 19 16:02:07 2025 \n+-----------------------------------------------------------------------------------------+\n| NVIDIA-SMI 560.35.03 Driver Version: 560.35.03 CUDA Version: 12.6 |\n|-----------------------------------------+------------------------+----------------------+\n| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\n| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n| | | MIG M. |\n|=========================================+========================+======================|\n| 0 Tesla T4 Off | 00000000:00:04.0 Off | 0 |\n| N/A 45C P8 9W / 70W | 1MiB / 15360MiB | 0% Default |\n| | | N/A |\n+-----------------------------------------+------------------------+----------------------+\n| 1 Tesla T4 Off | 00000000:00:05.0 Off | 0 |\n| N/A 46C P8 10W / 70W | 1MiB / 15360MiB | 0% Default |\n| | | N/A |\n+-----------------------------------------+------------------------+----------------------+\n \n+-----------------------------------------------------------------------------------------+\n| Processes: |\n| GPU GI CI PID Type Process name GPU Memory |\n| ID ID Usage |\n|=========================================================================================|\n| No running processes found |\n+-----------------------------------------------------------------------------------------+\n","output_type":"stream"}],"execution_count":1},{"cell_type":"code","source":"#Transformer for Language Evaluation\n!pip install datasets transformers[sentencepiece] sacrebleu -q","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-02-19T16:02:23.794792Z","iopub.execute_input":"2025-02-19T16:02:23.795104Z","iopub.status.idle":"2025-02-19T16:02:28.447357Z","shell.execute_reply.started":"2025-02-19T16:02:23.795078Z","shell.execute_reply":"2025-02-19T16:02:28.446565Z"}},"outputs":[{"name":"stdout","text":"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m51.8/51.8 kB\u001b[0m \u001b[31m1.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m104.1/104.1 kB\u001b[0m \u001b[31m4.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25h","output_type":"stream"}],"execution_count":2},{"cell_type":"code","source":"# Install required packages\n!pip install torch==2.5.1\n!pip install transformers datasets accelerate bitsandbytes\n!pip install unsloth\n!pip install peft\n\n#Import required Libraries\nimport os\nimport sys\nimport transformers\nimport torch\nfrom datasets import load_dataset, Dataset\nfrom datetime import datetime\nfrom transformers import TrainingArguments, Trainer\nfrom transformers import AutoTokenizer\nfrom transformers import MarianMTModel\n#from transformers import TFAutoModelForSeq2SeqLM\n#from transformers import DataCollatorForSeq2Seq\n#from transformers import AdamWeightDecay\n\n# Verify GPU\nprint(f\"CUDA Available: {torch.cuda.is_available()}\")\nif torch.cuda.is_available():\n print(f\"GPU Device: {torch.cuda.get_device_name(0)}\")","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-02-19T16:03:08.279375Z","iopub.execute_input":"2025-02-19T16:03:08.279725Z","iopub.status.idle":"2025-02-19T16:06:34.114816Z","shell.execute_reply.started":"2025-02-19T16:03:08.279683Z","shell.execute_reply":"2025-02-19T16:06:34.113855Z"}},"outputs":[{"name":"stdout","text":"Requirement already satisfied: torch==2.5.1 in /usr/local/lib/python3.10/dist-packages (2.5.1+cu121)\nRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch==2.5.1) (3.17.0)\nRequirement already satisfied: typing-extensions>=4.8.0 in /usr/local/lib/python3.10/dist-packages (from torch==2.5.1) (4.12.2)\nRequirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch==2.5.1) (3.4.2)\nRequirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch==2.5.1) (3.1.4)\nRequirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch==2.5.1) (2024.9.0)\nRequirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.10/dist-packages (from torch==2.5.1) (1.13.1)\nRequirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy==1.13.1->torch==2.5.1) (1.3.0)\nRequirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch==2.5.1) (3.0.2)\nRequirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.47.0)\nRequirement already satisfied: datasets in /usr/local/lib/python3.10/dist-packages (3.2.0)\nRequirement already satisfied: accelerate in /usr/local/lib/python3.10/dist-packages (1.2.1)\nCollecting bitsandbytes\n Downloading bitsandbytes-0.45.2-py3-none-manylinux_2_24_x86_64.whl.metadata (5.8 kB)\nRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.17.0)\nRequirement already satisfied: huggingface-hub<1.0,>=0.24.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.28.1)\nRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.26.4)\nRequirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (24.2)\nRequirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.2)\nRequirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2024.11.6)\nRequirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.32.3)\nRequirement already satisfied: tokenizers<0.22,>=0.21 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.21.0)\nRequirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.4.5)\nRequirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.67.1)\nRequirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (19.0.0)\nRequirement already satisfied: dill<0.3.9,>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (0.3.8)\nRequirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets) (2.2.3)\nRequirement already satisfied: xxhash in /usr/local/lib/python3.10/dist-packages (from datasets) (3.5.0)\nRequirement already satisfied: multiprocess<0.70.17 in /usr/local/lib/python3.10/dist-packages (from datasets) (0.70.16)\nRequirement already satisfied: fsspec<=2024.9.0,>=2023.1.0 in /usr/local/lib/python3.10/dist-packages (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets) (2024.9.0)\nRequirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets) (3.11.11)\nRequirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate) (5.9.5)\nRequirement already satisfied: torch>=1.10.0 in /usr/local/lib/python3.10/dist-packages (from accelerate) (2.5.1+cu121)\nRequirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (2.4.4)\nRequirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.3.2)\nRequirement already satisfied: async-timeout<6.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (5.0.1)\nRequirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (25.1.0)\nRequirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.5.0)\nRequirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (6.1.0)\nRequirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (0.2.1)\nRequirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.18.3)\nRequirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.24.0->transformers) (4.12.2)\nRequirement already satisfied: mkl_fft in /usr/local/lib/python3.10/dist-packages (from numpy>=1.17->transformers) (1.3.8)\nRequirement already satisfied: mkl_random in /usr/local/lib/python3.10/dist-packages (from numpy>=1.17->transformers) (1.2.4)\nRequirement already satisfied: mkl_umath in /usr/local/lib/python3.10/dist-packages (from numpy>=1.17->transformers) (0.1.1)\nRequirement already satisfied: mkl in /usr/local/lib/python3.10/dist-packages (from numpy>=1.17->transformers) (2025.0.1)\nRequirement already satisfied: tbb4py in /usr/local/lib/python3.10/dist-packages (from numpy>=1.17->transformers) (2022.0.0)\nRequirement already satisfied: mkl-service in /usr/local/lib/python3.10/dist-packages (from numpy>=1.17->transformers) (2.4.1)\nRequirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.4.1)\nRequirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.10)\nRequirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.3.0)\nRequirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2025.1.31)\nRequirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (3.4.2)\nRequirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (3.1.4)\nRequirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (1.13.1)\nRequirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy==1.13.1->torch>=1.10.0->accelerate) (1.3.0)\nRequirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2.9.0.post0)\nRequirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2025.1)\nRequirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2025.1)\nRequirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.17.0)\nRequirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.10.0->accelerate) (3.0.2)\nRequirement already satisfied: intel-openmp>=2024 in /usr/local/lib/python3.10/dist-packages (from mkl->numpy>=1.17->transformers) (2024.2.0)\nRequirement already satisfied: tbb==2022.* in /usr/local/lib/python3.10/dist-packages (from mkl->numpy>=1.17->transformers) (2022.0.0)\nRequirement already satisfied: tcmlib==1.* in /usr/local/lib/python3.10/dist-packages (from tbb==2022.*->mkl->numpy>=1.17->transformers) (1.2.0)\nRequirement already satisfied: intel-cmplr-lib-rt in /usr/local/lib/python3.10/dist-packages (from mkl_umath->numpy>=1.17->transformers) (2024.2.0)\nRequirement already satisfied: intel-cmplr-lib-ur==2024.2.0 in /usr/local/lib/python3.10/dist-packages (from intel-openmp>=2024->mkl->numpy>=1.17->transformers) (2024.2.0)\nDownloading bitsandbytes-0.45.2-py3-none-manylinux_2_24_x86_64.whl (69.7 MB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m69.7/69.7 MB\u001b[0m \u001b[31m25.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n\u001b[?25hInstalling collected packages: bitsandbytes\nSuccessfully installed bitsandbytes-0.45.2\nCollecting unsloth\n Downloading unsloth-2025.2.12-py3-none-any.whl.metadata (57 kB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.4/57.4 kB\u001b[0m \u001b[31m2.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hCollecting unsloth_zoo>=2025.2.5 (from unsloth)\n Downloading unsloth_zoo-2025.2.5-py3-none-any.whl.metadata (16 kB)\nRequirement already satisfied: torch>=2.4.0 in /usr/local/lib/python3.10/dist-packages (from unsloth) (2.5.1+cu121)\nCollecting xformers>=0.0.27.post2 (from unsloth)\n Downloading xformers-0.0.29.post3-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (1.0 kB)\nRequirement already satisfied: bitsandbytes in /usr/local/lib/python3.10/dist-packages (from unsloth) (0.45.2)\nCollecting triton>=3.0.0 (from unsloth)\n Downloading triton-3.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.4 kB)\nRequirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from unsloth) (24.2)\nCollecting tyro (from unsloth)\n Downloading tyro-0.9.14-py3-none-any.whl.metadata (9.4 kB)\nCollecting transformers!=4.47.0,>=4.46.1 (from unsloth)\n Downloading transformers-4.49.0-py3-none-any.whl.metadata (44 kB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.0/44.0 kB\u001b[0m \u001b[31m2.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hRequirement already satisfied: datasets>=2.16.0 in /usr/local/lib/python3.10/dist-packages (from unsloth) (3.2.0)\nRequirement already satisfied: sentencepiece>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from unsloth) (0.2.0)\nRequirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from unsloth) (4.67.1)\nRequirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from unsloth) (5.9.5)\nRequirement already satisfied: wheel>=0.42.0 in /usr/local/lib/python3.10/dist-packages (from unsloth) (0.45.1)\nRequirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from unsloth) (1.26.4)\nRequirement already satisfied: accelerate>=0.34.1 in /usr/local/lib/python3.10/dist-packages (from unsloth) (1.2.1)\nCollecting trl!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,>=0.7.9 (from unsloth)\n Downloading trl-0.15.1-py3-none-any.whl.metadata (11 kB)\nRequirement already satisfied: peft!=0.11.0,>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from unsloth) (0.14.0)\nRequirement already satisfied: protobuf<4.0.0 in /usr/local/lib/python3.10/dist-packages (from unsloth) (3.20.3)\nRequirement already satisfied: huggingface_hub in /usr/local/lib/python3.10/dist-packages (from unsloth) (0.28.1)\nRequirement already satisfied: hf_transfer in /usr/local/lib/python3.10/dist-packages (from unsloth) (0.1.9)\nRequirement already satisfied: diffusers in /usr/local/lib/python3.10/dist-packages (from unsloth) (0.31.0)\nRequirement already satisfied: torchvision in /usr/local/lib/python3.10/dist-packages (from unsloth) (0.20.1+cu121)\nRequirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from accelerate>=0.34.1->unsloth) (6.0.2)\nRequirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.10/dist-packages (from accelerate>=0.34.1->unsloth) (0.4.5)\nRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->unsloth) (3.17.0)\nRequirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->unsloth) (19.0.0)\nRequirement already satisfied: dill<0.3.9,>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->unsloth) (0.3.8)\nRequirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->unsloth) (2.2.3)\nRequirement already satisfied: requests>=2.32.2 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->unsloth) (2.32.3)\nRequirement already satisfied: xxhash in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->unsloth) (3.5.0)\nRequirement already satisfied: multiprocess<0.70.17 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->unsloth) (0.70.16)\nRequirement already satisfied: fsspec<=2024.9.0,>=2023.1.0 in /usr/local/lib/python3.10/dist-packages (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets>=2.16.0->unsloth) (2024.9.0)\nRequirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->unsloth) (3.11.11)\nRequirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface_hub->unsloth) (4.12.2)\nRequirement already satisfied: mkl_fft in /usr/local/lib/python3.10/dist-packages (from numpy->unsloth) (1.3.8)\nRequirement already satisfied: mkl_random in /usr/local/lib/python3.10/dist-packages (from numpy->unsloth) (1.2.4)\nRequirement already satisfied: mkl_umath in /usr/local/lib/python3.10/dist-packages (from numpy->unsloth) (0.1.1)\nRequirement already satisfied: mkl in /usr/local/lib/python3.10/dist-packages (from numpy->unsloth) (2025.0.1)\nRequirement already satisfied: tbb4py in /usr/local/lib/python3.10/dist-packages (from numpy->unsloth) (2022.0.0)\nRequirement already satisfied: mkl-service in /usr/local/lib/python3.10/dist-packages (from numpy->unsloth) (2.4.1)\nRequirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=2.4.0->unsloth) (3.4.2)\nRequirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=2.4.0->unsloth) (3.1.4)\nRequirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.10/dist-packages (from torch>=2.4.0->unsloth) (1.13.1)\nRequirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy==1.13.1->torch>=2.4.0->unsloth) (1.3.0)\nRequirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers!=4.47.0,>=4.46.1->unsloth) (2024.11.6)\nRequirement already satisfied: tokenizers<0.22,>=0.21 in /usr/local/lib/python3.10/dist-packages (from transformers!=4.47.0,>=4.46.1->unsloth) (0.21.0)\nRequirement already satisfied: rich in /usr/local/lib/python3.10/dist-packages (from trl!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,>=0.7.9->unsloth) (13.9.4)\nCollecting cut_cross_entropy (from unsloth_zoo>=2025.2.5->unsloth)\n Downloading cut_cross_entropy-25.1.1-py3-none-any.whl.metadata (9.3 kB)\nRequirement already satisfied: pillow in /usr/local/lib/python3.10/dist-packages (from unsloth_zoo>=2025.2.5->unsloth) (11.0.0)\nCollecting torch>=2.4.0 (from unsloth)\n Downloading torch-2.6.0-cp310-cp310-manylinux1_x86_64.whl.metadata (28 kB)\nCollecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.4.0->unsloth)\n Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\nCollecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=2.4.0->unsloth)\n Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\nCollecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=2.4.0->unsloth)\n Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\nCollecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=2.4.0->unsloth)\n Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\nCollecting nvidia-cublas-cu12==12.4.5.8 (from torch>=2.4.0->unsloth)\n Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\nCollecting nvidia-cufft-cu12==11.2.1.3 (from torch>=2.4.0->unsloth)\n Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\nCollecting nvidia-curand-cu12==10.3.5.147 (from torch>=2.4.0->unsloth)\n Downloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\nCollecting nvidia-cusolver-cu12==11.6.1.9 (from torch>=2.4.0->unsloth)\n Downloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\nCollecting nvidia-cusparse-cu12==12.3.1.170 (from torch>=2.4.0->unsloth)\n Downloading nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\nCollecting nvidia-cusparselt-cu12==0.6.2 (from torch>=2.4.0->unsloth)\n Downloading nvidia_cusparselt_cu12-0.6.2-py3-none-manylinux2014_x86_64.whl.metadata (6.8 kB)\nCollecting nvidia-nccl-cu12==2.21.5 (from torch>=2.4.0->unsloth)\n Downloading nvidia_nccl_cu12-2.21.5-py3-none-manylinux2014_x86_64.whl.metadata (1.8 kB)\nCollecting nvidia-nvtx-cu12==12.4.127 (from torch>=2.4.0->unsloth)\n Downloading nvidia_nvtx_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.7 kB)\nCollecting nvidia-nvjitlink-cu12==12.4.127 (from torch>=2.4.0->unsloth)\n Downloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\nRequirement already satisfied: importlib-metadata in /usr/local/lib/python3.10/dist-packages (from diffusers->unsloth) (8.5.0)\nINFO: pip is looking at multiple versions of torchvision to determine which version is compatible with other requirements. This could take a while.\nCollecting torchvision (from unsloth)\n Downloading torchvision-0.21.0-cp310-cp310-manylinux1_x86_64.whl.metadata (6.1 kB)\nRequirement already satisfied: docstring-parser>=0.15 in /usr/local/lib/python3.10/dist-packages (from tyro->unsloth) (0.16)\nCollecting shtab>=1.5.6 (from tyro->unsloth)\n Downloading shtab-1.7.1-py3-none-any.whl.metadata (7.3 kB)\nRequirement already satisfied: typeguard>=4.0.0 in /usr/local/lib/python3.10/dist-packages (from tyro->unsloth) (4.4.1)\nRequirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.16.0->unsloth) (2.4.4)\nRequirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.16.0->unsloth) (1.3.2)\nRequirement already satisfied: async-timeout<6.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.16.0->unsloth) (5.0.1)\nRequirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.16.0->unsloth) (25.1.0)\nRequirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.16.0->unsloth) (1.5.0)\nRequirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.16.0->unsloth) (6.1.0)\nRequirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.16.0->unsloth) (0.2.1)\nRequirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.16.0->unsloth) (1.18.3)\nRequirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets>=2.16.0->unsloth) (3.4.1)\nRequirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets>=2.16.0->unsloth) (3.10)\nRequirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets>=2.16.0->unsloth) (2.3.0)\nRequirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets>=2.16.0->unsloth) (2025.1.31)\nRequirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich->trl!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,>=0.7.9->unsloth) (3.0.0)\nRequirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich->trl!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,>=0.7.9->unsloth) (2.19.1)\nRequirement already satisfied: zipp>=3.20 in /usr/local/lib/python3.10/dist-packages (from importlib-metadata->diffusers->unsloth) (3.21.0)\nRequirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=2.4.0->unsloth) (3.0.2)\nRequirement already satisfied: intel-openmp>=2024 in /usr/local/lib/python3.10/dist-packages (from mkl->numpy->unsloth) (2024.2.0)\nRequirement already satisfied: tbb==2022.* in /usr/local/lib/python3.10/dist-packages (from mkl->numpy->unsloth) (2022.0.0)\nRequirement already satisfied: tcmlib==1.* in /usr/local/lib/python3.10/dist-packages (from tbb==2022.*->mkl->numpy->unsloth) (1.2.0)\nRequirement already satisfied: intel-cmplr-lib-rt in /usr/local/lib/python3.10/dist-packages (from mkl_umath->numpy->unsloth) (2024.2.0)\nRequirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets>=2.16.0->unsloth) (2.9.0.post0)\nRequirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets>=2.16.0->unsloth) (2025.1)\nRequirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets>=2.16.0->unsloth) (2025.1)\nRequirement already satisfied: intel-cmplr-lib-ur==2024.2.0 in /usr/local/lib/python3.10/dist-packages (from intel-openmp>=2024->mkl->numpy->unsloth) (2024.2.0)\nRequirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich->trl!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,>=0.7.9->unsloth) (0.1.2)\nRequirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas->datasets>=2.16.0->unsloth) (1.17.0)\nDownloading unsloth-2025.2.12-py3-none-any.whl (187 kB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m187.6/187.6 kB\u001b[0m \u001b[31m7.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hDownloading transformers-4.49.0-py3-none-any.whl (10.0 MB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m10.0/10.0 MB\u001b[0m \u001b[31m73.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m0:01\u001b[0m\n\u001b[?25hDownloading triton-3.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (253.1 MB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m253.1/253.1 MB\u001b[0m \u001b[31m6.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m0:00:01\u001b[0m00:01\u001b[0m\n\u001b[?25hDownloading trl-0.15.1-py3-none-any.whl (318 kB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m318.9/318.9 kB\u001b[0m \u001b[31m22.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hDownloading unsloth_zoo-2025.2.5-py3-none-any.whl (105 kB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m105.0/105.0 kB\u001b[0m \u001b[31m8.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hDownloading xformers-0.0.29.post3-cp310-cp310-manylinux_2_28_x86_64.whl (43.3 MB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.3/43.3 MB\u001b[0m \u001b[31m41.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n\u001b[?25hDownloading torch-2.6.0-cp310-cp310-manylinux1_x86_64.whl (766.7 MB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m766.7/766.7 MB\u001b[0m \u001b[31m1.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m0:00:01\u001b[0m00:01\u001b[0m\n\u001b[?25hDownloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl (363.4 MB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m363.4/363.4 MB\u001b[0m \u001b[31m1.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m0:00:01\u001b[0m00:01\u001b[0m\n\u001b[?25hDownloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (13.8 MB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.8/13.8 MB\u001b[0m \u001b[31m91.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m0:01\u001b[0m\n\u001b[?25hDownloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (24.6 MB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m24.6/24.6 MB\u001b[0m \u001b[31m67.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n\u001b[?25hDownloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (883 kB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m883.7/883.7 kB\u001b[0m \u001b[31m45.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hDownloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl (664.8 MB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m664.8/664.8 MB\u001b[0m \u001b[31m2.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m0:00:01\u001b[0m00:01\u001b[0m\n\u001b[?25hDownloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl (211.5 MB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m211.5/211.5 MB\u001b[0m \u001b[31m6.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m0:00:01\u001b[0m00:01\u001b[0m\n\u001b[?25hDownloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl (56.3 MB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.3/56.3 MB\u001b[0m \u001b[31m31.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n\u001b[?25hDownloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl (127.9 MB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m127.9/127.9 MB\u001b[0m \u001b[31m13.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n\u001b[?25hDownloading nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl (207.5 MB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m207.5/207.5 MB\u001b[0m \u001b[31m2.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m0:00:01\u001b[0m00:01\u001b[0m\n\u001b[?25hDownloading nvidia_cusparselt_cu12-0.6.2-py3-none-manylinux2014_x86_64.whl (150.1 MB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m150.1/150.1 MB\u001b[0m \u001b[31m11.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n\u001b[?25hDownloading nvidia_nccl_cu12-2.21.5-py3-none-manylinux2014_x86_64.whl (188.7 MB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m188.7/188.7 MB\u001b[0m \u001b[31m9.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m0:00:01\u001b[0m00:01\u001b[0m\n\u001b[?25hDownloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (21.1 MB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.1/21.1 MB\u001b[0m \u001b[31m73.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n\u001b[?25hDownloading nvidia_nvtx_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (99 kB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m99.1/99.1 kB\u001b[0m \u001b[31m7.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hDownloading torchvision-0.21.0-cp310-cp310-manylinux1_x86_64.whl (7.2 MB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.2/7.2 MB\u001b[0m \u001b[31m87.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n\u001b[?25hDownloading tyro-0.9.14-py3-none-any.whl (116 kB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.4/116.4 kB\u001b[0m \u001b[31m9.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hDownloading shtab-1.7.1-py3-none-any.whl (14 kB)\nDownloading cut_cross_entropy-25.1.1-py3-none-any.whl (22 kB)\nInstalling collected packages: triton, nvidia-cusparselt-cu12, shtab, nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, nvidia-cusparse-cu12, nvidia-cudnn-cu12, tyro, nvidia-cusolver-cu12, torch, cut_cross_entropy, transformers, trl, xformers, unsloth_zoo, torchvision, unsloth\n Attempting uninstall: nvidia-nvjitlink-cu12\n Found existing installation: nvidia-nvjitlink-cu12 12.6.85\n Uninstalling nvidia-nvjitlink-cu12-12.6.85:\n Successfully uninstalled nvidia-nvjitlink-cu12-12.6.85\n Attempting uninstall: nvidia-nccl-cu12\n Found existing installation: nvidia-nccl-cu12 2.23.4\n Uninstalling nvidia-nccl-cu12-2.23.4:\n Successfully uninstalled nvidia-nccl-cu12-2.23.4\n Attempting uninstall: nvidia-curand-cu12\n Found existing installation: nvidia-curand-cu12 10.3.7.77\n Uninstalling nvidia-curand-cu12-10.3.7.77:\n Successfully uninstalled nvidia-curand-cu12-10.3.7.77\n Attempting uninstall: nvidia-cufft-cu12\n Found existing installation: nvidia-cufft-cu12 11.3.0.4\n Uninstalling nvidia-cufft-cu12-11.3.0.4:\n Successfully uninstalled nvidia-cufft-cu12-11.3.0.4\n Attempting uninstall: nvidia-cuda-runtime-cu12\n Found existing installation: nvidia-cuda-runtime-cu12 12.6.77\n Uninstalling nvidia-cuda-runtime-cu12-12.6.77:\n Successfully uninstalled nvidia-cuda-runtime-cu12-12.6.77\n Attempting uninstall: nvidia-cuda-cupti-cu12\n Found existing installation: nvidia-cuda-cupti-cu12 12.6.80\n Uninstalling nvidia-cuda-cupti-cu12-12.6.80:\n Successfully uninstalled nvidia-cuda-cupti-cu12-12.6.80\n Attempting uninstall: nvidia-cublas-cu12\n Found existing installation: nvidia-cublas-cu12 12.6.4.1\n Uninstalling nvidia-cublas-cu12-12.6.4.1:\n Successfully uninstalled nvidia-cublas-cu12-12.6.4.1\n Attempting uninstall: nvidia-cusparse-cu12\n Found existing installation: nvidia-cusparse-cu12 12.5.4.2\n Uninstalling nvidia-cusparse-cu12-12.5.4.2:\n Successfully uninstalled nvidia-cusparse-cu12-12.5.4.2\n Attempting uninstall: nvidia-cudnn-cu12\n Found existing installation: nvidia-cudnn-cu12 9.6.0.74\n Uninstalling nvidia-cudnn-cu12-9.6.0.74:\n Successfully uninstalled nvidia-cudnn-cu12-9.6.0.74\n Attempting uninstall: nvidia-cusolver-cu12\n Found existing installation: nvidia-cusolver-cu12 11.7.1.2\n Uninstalling nvidia-cusolver-cu12-11.7.1.2:\n Successfully uninstalled nvidia-cusolver-cu12-11.7.1.2\n Attempting uninstall: torch\n Found existing installation: torch 2.5.1+cu121\n Uninstalling torch-2.5.1+cu121:\n Successfully uninstalled torch-2.5.1+cu121\n Attempting uninstall: transformers\n Found existing installation: transformers 4.47.0\n Uninstalling transformers-4.47.0:\n Successfully uninstalled transformers-4.47.0\n Attempting uninstall: torchvision\n Found existing installation: torchvision 0.20.1+cu121\n Uninstalling torchvision-0.20.1+cu121:\n Successfully uninstalled torchvision-0.20.1+cu121\n\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\nfastai 2.7.18 requires torch<2.6,>=1.10, but you have torch 2.6.0 which is incompatible.\npylibcugraph-cu12 24.10.0 requires pylibraft-cu12==24.10.*, but you have pylibraft-cu12 24.12.0 which is incompatible.\npylibcugraph-cu12 24.10.0 requires rmm-cu12==24.10.*, but you have rmm-cu12 24.12.1 which is incompatible.\ntorchaudio 2.5.1+cu121 requires torch==2.5.1, but you have torch 2.6.0 which is incompatible.\u001b[0m\u001b[31m\n\u001b[0mSuccessfully installed cut_cross_entropy-25.1.1 nvidia-cublas-cu12-12.4.5.8 nvidia-cuda-cupti-cu12-12.4.127 nvidia-cuda-nvrtc-cu12-12.4.127 nvidia-cuda-runtime-cu12-12.4.127 nvidia-cudnn-cu12-9.1.0.70 nvidia-cufft-cu12-11.2.1.3 nvidia-curand-cu12-10.3.5.147 nvidia-cusolver-cu12-11.6.1.9 nvidia-cusparse-cu12-12.3.1.170 nvidia-cusparselt-cu12-0.6.2 nvidia-nccl-cu12-2.21.5 nvidia-nvjitlink-cu12-12.4.127 nvidia-nvtx-cu12-12.4.127 shtab-1.7.1 torch-2.6.0 torchvision-0.21.0 transformers-4.49.0 triton-3.2.0 trl-0.15.1 tyro-0.9.14 unsloth-2025.2.12 unsloth_zoo-2025.2.5 xformers-0.0.29.post3\nRequirement already satisfied: peft in /usr/local/lib/python3.10/dist-packages (0.14.0)\nRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from peft) (1.26.4)\nRequirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from peft) (24.2)\nRequirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from peft) (5.9.5)\nRequirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from peft) (6.0.2)\nRequirement already satisfied: torch>=1.13.0 in /usr/local/lib/python3.10/dist-packages (from peft) (2.6.0)\nRequirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (from peft) (4.49.0)\nRequirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from peft) (4.67.1)\nRequirement already satisfied: accelerate>=0.21.0 in /usr/local/lib/python3.10/dist-packages (from peft) (1.2.1)\nRequirement already satisfied: safetensors in /usr/local/lib/python3.10/dist-packages (from peft) (0.4.5)\nRequirement already satisfied: huggingface-hub>=0.25.0 in /usr/local/lib/python3.10/dist-packages (from peft) (0.28.1)\nRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.25.0->peft) (3.17.0)\nRequirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.25.0->peft) (2024.9.0)\nRequirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.25.0->peft) (2.32.3)\nRequirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.25.0->peft) (4.12.2)\nRequirement already satisfied: mkl_fft in /usr/local/lib/python3.10/dist-packages (from numpy>=1.17->peft) (1.3.8)\nRequirement already satisfied: mkl_random in /usr/local/lib/python3.10/dist-packages (from numpy>=1.17->peft) (1.2.4)\nRequirement already satisfied: mkl_umath in /usr/local/lib/python3.10/dist-packages (from numpy>=1.17->peft) (0.1.1)\nRequirement already satisfied: mkl in /usr/local/lib/python3.10/dist-packages (from numpy>=1.17->peft) (2025.0.1)\nRequirement already satisfied: tbb4py in /usr/local/lib/python3.10/dist-packages (from numpy>=1.17->peft) (2022.0.0)\nRequirement already satisfied: mkl-service in /usr/local/lib/python3.10/dist-packages (from numpy>=1.17->peft) (2.4.1)\nRequirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft) (3.4.2)\nRequirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft) (3.1.4)\nRequirement already satisfied: nvidia-cuda-nvrtc-cu12==12.4.127 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft) (12.4.127)\nRequirement already satisfied: nvidia-cuda-runtime-cu12==12.4.127 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft) (12.4.127)\nRequirement already satisfied: nvidia-cuda-cupti-cu12==12.4.127 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft) (12.4.127)\nRequirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft) (9.1.0.70)\nRequirement already satisfied: nvidia-cublas-cu12==12.4.5.8 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft) (12.4.5.8)\nRequirement already satisfied: nvidia-cufft-cu12==11.2.1.3 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft) (11.2.1.3)\nRequirement already satisfied: nvidia-curand-cu12==10.3.5.147 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft) (10.3.5.147)\nRequirement already satisfied: nvidia-cusolver-cu12==11.6.1.9 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft) (11.6.1.9)\nRequirement already satisfied: nvidia-cusparse-cu12==12.3.1.170 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft) (12.3.1.170)\nRequirement already satisfied: nvidia-cusparselt-cu12==0.6.2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft) (0.6.2)\nRequirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft) (2.21.5)\nRequirement already satisfied: nvidia-nvtx-cu12==12.4.127 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft) (12.4.127)\nRequirement already satisfied: nvidia-nvjitlink-cu12==12.4.127 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft) (12.4.127)\nRequirement already satisfied: triton==3.2.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft) (3.2.0)\nRequirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft) (1.13.1)\nRequirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy==1.13.1->torch>=1.13.0->peft) (1.3.0)\nRequirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers->peft) (2024.11.6)\nRequirement already satisfied: tokenizers<0.22,>=0.21 in /usr/local/lib/python3.10/dist-packages (from transformers->peft) (0.21.0)\nRequirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.13.0->peft) (3.0.2)\nRequirement already satisfied: intel-openmp>=2024 in /usr/local/lib/python3.10/dist-packages (from mkl->numpy>=1.17->peft) (2024.2.0)\nRequirement already satisfied: tbb==2022.* in /usr/local/lib/python3.10/dist-packages (from mkl->numpy>=1.17->peft) (2022.0.0)\nRequirement already satisfied: tcmlib==1.* in /usr/local/lib/python3.10/dist-packages (from tbb==2022.*->mkl->numpy>=1.17->peft) (1.2.0)\nRequirement already satisfied: intel-cmplr-lib-rt in /usr/local/lib/python3.10/dist-packages (from mkl_umath->numpy>=1.17->peft) (2024.2.0)\nRequirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.25.0->peft) (3.4.1)\nRequirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.25.0->peft) (3.10)\nRequirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.25.0->peft) (2.3.0)\nRequirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.25.0->peft) (2025.1.31)\nRequirement already satisfied: intel-cmplr-lib-ur==2024.2.0 in /usr/local/lib/python3.10/dist-packages (from intel-openmp>=2024->mkl->numpy>=1.17->peft) (2024.2.0)\nCUDA Available: True\nGPU Device: Tesla T4\n","output_type":"stream"}],"execution_count":3},{"cell_type":"code","source":"#Load and Initialize the Model from Hugging Face\n#https://huggingface.co/Helsinki-NLP/opus-mt-en-hi\n#Since the model is not supported by FastLanguageModel we are not using Unsloth for Fine tuning our model\nmodel_checkpoint = \"Helsinki-NLP/opus-mt-en-hi\"\nmax_length = 128","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-02-19T16:22:35.660773Z","iopub.execute_input":"2025-02-19T16:22:35.661058Z","iopub.status.idle":"2025-02-19T16:22:35.664610Z","shell.execute_reply.started":"2025-02-19T16:22:35.661024Z","shell.execute_reply":"2025-02-19T16:22:35.663820Z"}},"outputs":[],"execution_count":27},{"cell_type":"code","source":"#Load and Assign the Dataset from Hugging Face\n#source: https://huggingface.co/datasets/cfilt/iitb-english-hindi\nraw_datasets = load_dataset('cfilt/iitb-english-hindi')\nraw_datasets","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-02-19T16:22:35.002291Z","iopub.execute_input":"2025-02-19T16:22:35.002538Z","iopub.status.idle":"2025-02-19T16:22:35.659982Z","shell.execute_reply.started":"2025-02-19T16:22:35.002517Z","shell.execute_reply":"2025-02-19T16:22:35.659283Z"}},"outputs":[{"execution_count":26,"output_type":"execute_result","data":{"text/plain":"DatasetDict({\n train: Dataset({\n features: ['translation'],\n num_rows: 1659083\n })\n validation: Dataset({\n features: ['translation'],\n num_rows: 520\n })\n test: Dataset({\n features: ['translation'],\n num_rows: 2507\n })\n})"},"metadata":{}}],"execution_count":26},{"cell_type":"code","source":"#Assign the already split datasets to the variables\ntrain_dataset = raw_datasets['train']\nvalidation_dataset = raw_datasets['validation']\ntest_dataset = raw_datasets['test']","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-02-19T16:09:09.600208Z","iopub.execute_input":"2025-02-19T16:09:09.600536Z","iopub.status.idle":"2025-02-19T16:09:09.604312Z","shell.execute_reply.started":"2025-02-19T16:09:09.600495Z","shell.execute_reply":"2025-02-19T16:09:09.603570Z"}},"outputs":[],"execution_count":6},{"cell_type":"code","source":"raw_datasets['train'][0]","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-02-19T16:09:12.703257Z","iopub.execute_input":"2025-02-19T16:09:12.703530Z","iopub.status.idle":"2025-02-19T16:09:12.709887Z","shell.execute_reply.started":"2025-02-19T16:09:12.703510Z","shell.execute_reply":"2025-02-19T16:09:12.709162Z"}},"outputs":[{"execution_count":7,"output_type":"execute_result","data":{"text/plain":"{'translation': {'en': 'Give your application an accessibility workout',\n 'hi': 'अपने अनुप्रयोग को पहुंचनीयता व्यायाम का लाभ दें'}}"},"metadata":{}}],"execution_count":7},{"cell_type":"code","source":"train_dataset[0]","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-02-19T16:09:14.465818Z","iopub.execute_input":"2025-02-19T16:09:14.466130Z","iopub.status.idle":"2025-02-19T16:09:14.471668Z","shell.execute_reply.started":"2025-02-19T16:09:14.466108Z","shell.execute_reply":"2025-02-19T16:09:14.470876Z"}},"outputs":[{"execution_count":8,"output_type":"execute_result","data":{"text/plain":"{'translation': {'en': 'Give your application an accessibility workout',\n 'hi': 'अपने अनुप्रयोग को पहुंचनीयता व्यायाम का लाभ दें'}}"},"metadata":{}}],"execution_count":8},{"cell_type":"code","source":"raw_datasets['test'][0]","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-02-19T16:09:15.418876Z","iopub.execute_input":"2025-02-19T16:09:15.419180Z","iopub.status.idle":"2025-02-19T16:09:15.628020Z","shell.execute_reply.started":"2025-02-19T16:09:15.419148Z","shell.execute_reply":"2025-02-19T16:09:15.627182Z"}},"outputs":[{"execution_count":9,"output_type":"execute_result","data":{"text/plain":"{'translation': {'en': 'A black box in your car?',\n 'hi': 'आपकी कार में ब्लैक बॉक्स?'}}"},"metadata":{}}],"execution_count":9},{"cell_type":"code","source":"raw_datasets['validation'][0]","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-02-19T16:09:16.905293Z","iopub.execute_input":"2025-02-19T16:09:16.905576Z","iopub.status.idle":"2025-02-19T16:09:16.911751Z","shell.execute_reply.started":"2025-02-19T16:09:16.905554Z","shell.execute_reply":"2025-02-19T16:09:16.910643Z"}},"outputs":[{"execution_count":10,"output_type":"execute_result","data":{"text/plain":"{'translation': {'en': 'Students of the Dattatreya city Municipal corporation secondary school demonstrated their imagination power by creating the fictitious fort \"Duttgarh\".',\n 'hi': \"महानगर पालिका अंतर्गत दत्तात्रय नगर माध्यमिक स्कूल के विद्यार्थियों ने काल्पनिक किला 'दत्तगढ़' बनाकर अपनी कल्पनाशक्ति का परिचय दिया।\"}}"},"metadata":{}}],"execution_count":10},{"cell_type":"code","source":"#Load and Tokenize the Model\n#source: https://huggingface.co/docs/transformers/en/internal/tokenization_utils\nmodel_checkpoint = \"Helsinki-NLP/opus-mt-en-hi\"\ntokenizer = AutoTokenizer.from_pretrained(model_checkpoint)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-02-19T16:22:34.713326Z","iopub.execute_input":"2025-02-19T16:22:34.713577Z","iopub.status.idle":"2025-02-19T16:22:35.000862Z","shell.execute_reply.started":"2025-02-19T16:22:34.713556Z","shell.execute_reply":"2025-02-19T16:22:35.000024Z"}},"outputs":[{"name":"stderr","text":"/usr/local/lib/python3.10/dist-packages/transformers/models/marian/tokenization_marian.py:175: UserWarning: Recommended: pip install sacremoses.\n warnings.warn(\"Recommended: pip install sacremoses.\")\n","output_type":"stream"}],"execution_count":25},{"cell_type":"code","source":"#Verifying if the tokenizer is working\ntokenizer(\"Hello, this is a sentence\")","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-02-19T16:09:21.577371Z","iopub.execute_input":"2025-02-19T16:09:21.577671Z","iopub.status.idle":"2025-02-19T16:09:21.583133Z","shell.execute_reply.started":"2025-02-19T16:09:21.577646Z","shell.execute_reply":"2025-02-19T16:09:21.582311Z"}},"outputs":[{"execution_count":12,"output_type":"execute_result","data":{"text/plain":"{'input_ids': [12110, 2, 90, 23, 19, 8800, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1]}"},"metadata":{}}],"execution_count":12},{"cell_type":"code","source":"tokenizer([\"Hello, this is a sentence\", \"This is second sentence\"])","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-02-19T16:09:23.122141Z","iopub.execute_input":"2025-02-19T16:09:23.122424Z","iopub.status.idle":"2025-02-19T16:09:23.128106Z","shell.execute_reply.started":"2025-02-19T16:09:23.122401Z","shell.execute_reply":"2025-02-19T16:09:23.127277Z"}},"outputs":[{"execution_count":13,"output_type":"execute_result","data":{"text/plain":"{'input_ids': [[12110, 2, 90, 23, 19, 8800, 0], [239, 23, 1639, 8800, 0]], 'attention_mask': [[1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1]]}"},"metadata":{}}],"execution_count":13},{"cell_type":"code","source":"#Target Tokenizer\nwith tokenizer.as_target_tokenizer():\n print(tokenizer(['अपने अनुप्रयोग को पहुंचनीयता व्यायाम का लाभ दें']))","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-02-19T16:09:25.021770Z","iopub.execute_input":"2025-02-19T16:09:25.022083Z","iopub.status.idle":"2025-02-19T16:09:25.028074Z","shell.execute_reply.started":"2025-02-19T16:09:25.022058Z","shell.execute_reply":"2025-02-19T16:09:25.027288Z"}},"outputs":[{"name":"stdout","text":"{'input_ids': [[63, 2025, 18, 16155, 346, 20311, 24, 2279, 679, 0]], 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]}\n","output_type":"stream"},{"name":"stderr","text":"/usr/local/lib/python3.10/dist-packages/transformers/tokenization_utils_base.py:3970: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n warnings.warn(\n","output_type":"stream"}],"execution_count":14},{"cell_type":"code","source":"#Preprocessing Function\nsource_lang = 'en'\ntarget_lang = 'hi'\n\ndef preprocess_function(examples):\n input = [ex[source_lang] for ex in examples['translation']]\n targets = [ex[target_lang] for ex in examples['translation']]\n model_inputs = tokenizer(input, max_length=128, truncation=True)\n\n #Setup the tokenizer for targets\n with tokenizer.as_target_tokenizer():\n labels = tokenizer(targets, max_length=128, truncation=True)\n\n model_inputs['labels'] = labels['input_ids']\n return model_inputs","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-02-19T16:09:27.184738Z","iopub.execute_input":"2025-02-19T16:09:27.185031Z","iopub.status.idle":"2025-02-19T16:09:27.189671Z","shell.execute_reply.started":"2025-02-19T16:09:27.185010Z","shell.execute_reply":"2025-02-19T16:09:27.188909Z"}},"outputs":[],"execution_count":15},{"cell_type":"code","source":"#Verifying Preprocessing Function\npreprocess_function(raw_datasets['train'][:2])","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-02-19T16:09:28.982305Z","iopub.execute_input":"2025-02-19T16:09:28.982585Z","iopub.status.idle":"2025-02-19T16:09:28.988418Z","shell.execute_reply.started":"2025-02-19T16:09:28.982563Z","shell.execute_reply":"2025-02-19T16:09:28.987797Z"}},"outputs":[{"execution_count":16,"output_type":"execute_result","data":{"text/plain":"{'input_ids': [[3872, 85, 2501, 132, 15441, 36398, 0], [32643, 28541, 36253, 0]], 'attention_mask': [[1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1]], 'labels': [[63, 2025, 18, 16155, 346, 20311, 24, 2279, 679, 0], [26618, 16155, 346, 33383, 0]]}"},"metadata":{}}],"execution_count":16},{"cell_type":"code","source":"#Tokenizing the Datasets and Mapping them to the Preprocessing Function\n#Since the Train_Dataset is huge we will be working with Validation and Test Datasets\n#The fine tuned model will be overfitted\n\n#tokenized_train = train_dataset.map(\n# preprocess_function,\n# batched=True\n#)\n\ntokenized_val = validation_dataset.map(\n preprocess_function,\n batched=True\n)\n\ntokenized_test = test_dataset.map(\n preprocess_function,\n batched=True\n)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-02-19T16:09:30.762098Z","iopub.execute_input":"2025-02-19T16:09:30.762376Z","iopub.status.idle":"2025-02-19T16:09:33.010109Z","shell.execute_reply.started":"2025-02-19T16:09:30.762355Z","shell.execute_reply":"2025-02-19T16:09:33.009463Z"}},"outputs":[{"output_type":"display_data","data":{"text/plain":"Map: 0%| | 0/520 [00:00<?, ? examples/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"37f83fb8d2c54c76af11b3c847a30c33"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Map: 0%| | 0/2507 [00:00<?, ? examples/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"0ddcdae374f845f88066c6f535e4ced0"}},"metadata":{}}],"execution_count":17},{"cell_type":"code","source":"#Load the PyTorch version of the model\nmodel = MarianMTModel.from_pretrained(model_checkpoint)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-02-19T16:09:38.701135Z","iopub.execute_input":"2025-02-19T16:09:38.701430Z","iopub.status.idle":"2025-02-19T16:09:42.012604Z","shell.execute_reply.started":"2025-02-19T16:09:38.701408Z","shell.execute_reply":"2025-02-19T16:09:42.011551Z"}},"outputs":[{"output_type":"display_data","data":{"text/plain":"pytorch_model.bin: 0%| | 0.00/306M [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"2fea2f54da0d4950a64f2ca368ebe6b9"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"model.safetensors: 0%| | 0.00/306M [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"9e939caf76d447afb1a9ebb3b75d574e"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"generation_config.json: 0%| | 0.00/293 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"cb6dd3395a4c4ab980b2899e1192d1f3"}},"metadata":{}}],"execution_count":18},{"cell_type":"code","source":"#Move Model to Device\n#Determine the device (GPU if available, otherwise CPU)\ndevice = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\nmodel.to(device)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-02-19T16:10:15.973634Z","iopub.execute_input":"2025-02-19T16:10:15.974001Z","iopub.status.idle":"2025-02-19T16:10:16.293238Z","shell.execute_reply.started":"2025-02-19T16:10:15.973974Z","shell.execute_reply":"2025-02-19T16:10:16.292363Z"}},"outputs":[{"execution_count":19,"output_type":"execute_result","data":{"text/plain":"MarianMTModel(\n (model): MarianModel(\n (shared): Embedding(61950, 512, padding_idx=61949)\n (encoder): MarianEncoder(\n (embed_tokens): Embedding(61950, 512, padding_idx=61949)\n (embed_positions): MarianSinusoidalPositionalEmbedding(512, 512)\n (layers): ModuleList(\n (0-5): 6 x MarianEncoderLayer(\n (self_attn): MarianAttention(\n (k_proj): Linear(in_features=512, out_features=512, bias=True)\n (v_proj): Linear(in_features=512, out_features=512, bias=True)\n (q_proj): Linear(in_features=512, out_features=512, bias=True)\n (out_proj): Linear(in_features=512, out_features=512, bias=True)\n )\n (self_attn_layer_norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n (activation_fn): SiLU()\n (fc1): Linear(in_features=512, out_features=2048, bias=True)\n (fc2): Linear(in_features=2048, out_features=512, bias=True)\n (final_layer_norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n )\n )\n )\n (decoder): MarianDecoder(\n (embed_tokens): Embedding(61950, 512, padding_idx=61949)\n (embed_positions): MarianSinusoidalPositionalEmbedding(512, 512)\n (layers): ModuleList(\n (0-5): 6 x MarianDecoderLayer(\n (self_attn): MarianAttention(\n (k_proj): Linear(in_features=512, out_features=512, bias=True)\n (v_proj): Linear(in_features=512, out_features=512, bias=True)\n (q_proj): Linear(in_features=512, out_features=512, bias=True)\n (out_proj): Linear(in_features=512, out_features=512, bias=True)\n )\n (activation_fn): SiLU()\n (self_attn_layer_norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n (encoder_attn): MarianAttention(\n (k_proj): Linear(in_features=512, out_features=512, bias=True)\n (v_proj): Linear(in_features=512, out_features=512, bias=True)\n (q_proj): Linear(in_features=512, out_features=512, bias=True)\n (out_proj): Linear(in_features=512, out_features=512, bias=True)\n )\n (encoder_attn_layer_norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n (fc1): Linear(in_features=512, out_features=2048, bias=True)\n (fc2): Linear(in_features=2048, out_features=512, bias=True)\n (final_layer_norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n )\n )\n )\n )\n (lm_head): Linear(in_features=512, out_features=61950, bias=False)\n)"},"metadata":{}}],"execution_count":19},{"cell_type":"code","source":"#Convert to PyTorch format AFTER preprocessing\n\n#tokenized_train.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])\ntokenized_val.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])\ntokenized_test.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])\n \n#print(f\"Training examples: {len(raw_datasets['train'])}\")\nprint(f\"Validation examples: {len(raw_datasets['validation'])}\")\nprint(f\"Test examples: {len(raw_datasets['test'])}\")","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-02-19T16:11:14.232562Z","iopub.execute_input":"2025-02-19T16:11:14.232913Z","iopub.status.idle":"2025-02-19T16:11:14.239675Z","shell.execute_reply.started":"2025-02-19T16:11:14.232883Z","shell.execute_reply":"2025-02-19T16:11:14.238789Z"}},"outputs":[{"name":"stdout","text":"Validation examples: 520\nTest examples: 2507\n","output_type":"stream"}],"execution_count":21},{"cell_type":"code","source":"#Hugging Face Username\nhugging_face_user_name = \"kash06\"\n\n#Training Arguments\ntraining_args = TrainingArguments(\n output_dir=f\"./english-hindi-translator-{datetime.now().strftime('%Y%m%d-%H%M%S')}\",\n per_device_train_batch_size=8,\n per_device_eval_batch_size=8,\n num_train_epochs=3,\n learning_rate=5e-5,\n weight_decay=0.01,\n logging_dir='./logs',\n logging_steps=100,\n evaluation_strategy=\"steps\",\n eval_steps=200,\n save_strategy=\"steps\",\n save_steps=400,\n load_best_model_at_end=True,\n push_to_hub=True,\n hub_model_id=f\"{hugging_face_user_name}/english-hindi-translator\",\n fp16=torch.cuda.is_available(),\n gradient_accumulation_steps=1,\n warmup_steps=200,\n report_to=[\"none\"],\n optim=\"paged_adamw_8bit\",\n dataloader_pin_memory=True,\n torch_compile=False,\n gradient_checkpointing=True\n)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-02-19T16:12:35.511644Z","iopub.execute_input":"2025-02-19T16:12:35.512011Z","iopub.status.idle":"2025-02-19T16:12:35.549652Z","shell.execute_reply.started":"2025-02-19T16:12:35.511985Z","shell.execute_reply":"2025-02-19T16:12:35.547956Z"}},"outputs":[{"name":"stderr","text":"/usr/local/lib/python3.10/dist-packages/transformers/training_args.py:1594: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n warnings.warn(\n","output_type":"stream"}],"execution_count":22},{"cell_type":"code","source":"#Login to Hugging Face\nfrom huggingface_hub import notebook_login\nnotebook_login()","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-02-19T16:12:45.322692Z","iopub.execute_input":"2025-02-19T16:12:45.323137Z","iopub.status.idle":"2025-02-19T16:12:45.352203Z","shell.execute_reply.started":"2025-02-19T16:12:45.323104Z","shell.execute_reply":"2025-02-19T16:12:45.351138Z"}},"outputs":[{"output_type":"display_data","data":{"text/plain":"VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"e90707643bea4d34b213fd8b79850076"}},"metadata":{}}],"execution_count":23},{"cell_type":"code","source":"#https://huggingface.co/docs/transformers/en/main_classes/data_collator\n\nfrom transformers import DataCollatorForSeq2Seq\n\ndata_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, padding=True)\n\ntrainer = Trainer(\n model=model,\n args=training_args,\n train_dataset=tokenized_test,\n eval_dataset=tokenized_val,\n data_collator=data_collator,\n)\n\ntrainer.train()","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-02-19T16:14:01.295476Z","iopub.execute_input":"2025-02-19T16:14:01.295825Z","iopub.status.idle":"2025-02-19T16:22:34.712198Z","shell.execute_reply.started":"2025-02-19T16:14:01.295796Z","shell.execute_reply":"2025-02-19T16:22:34.711425Z"}},"outputs":[{"name":"stderr","text":"/usr/local/lib/python3.10/dist-packages/transformers/data/data_collator.py:740: UserWarning: Creating a tensor from a list of numpy.ndarrays is extremely slow. Please consider converting the list to a single numpy.ndarray with numpy.array() before converting to a tensor. (Triggered internally at /pytorch/torch/csrc/utils/tensor_new.cpp:254.)\n batch[\"labels\"] = torch.tensor(batch[\"labels\"], dtype=torch.int64)\n/usr/local/lib/python3.10/dist-packages/torch/nn/parallel/_functions.py:70: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.\n warnings.warn(\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"\n <div>\n \n <progress value='471' max='471' style='width:300px; height:20px; vertical-align: middle;'></progress>\n [471/471 08:23, Epoch 3/3]\n </div>\n <table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: left;\">\n <th>Step</th>\n <th>Training Loss</th>\n <th>Validation Loss</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <td>200</td>\n <td>3.491200</td>\n <td>3.892606</td>\n </tr>\n <tr>\n <td>400</td>\n <td>2.832500</td>\n <td>3.805978</td>\n </tr>\n </tbody>\n</table><p>"},"metadata":{}},{"name":"stderr","text":"/usr/local/lib/python3.10/dist-packages/transformers/modeling_utils.py:2810: UserWarning: Moving the following attributes in the config to the generation config: {'max_length': 512, 'num_beams': 4, 'bad_words_ids': [[61949]]}. You are seeing this warning because you've set generation parameters in the model config, as opposed to in the generation config.\n warnings.warn(\n/usr/local/lib/python3.10/dist-packages/torch/nn/parallel/_functions.py:70: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.\n warnings.warn(\nThere were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.encoder.embed_positions.weight', 'model.decoder.embed_tokens.weight', 'model.decoder.embed_positions.weight', 'lm_head.weight'].\n","output_type":"stream"},{"execution_count":24,"output_type":"execute_result","data":{"text/plain":"TrainOutput(global_step=471, training_loss=3.273390654545681, metrics={'train_runtime': 505.2603, 'train_samples_per_second': 14.885, 'train_steps_per_second': 0.932, 'total_flos': 115959420813312.0, 'train_loss': 3.273390654545681, 'epoch': 3.0})"},"metadata":{}}],"execution_count":24},{"cell_type":"code","source":"def translate_to_hindi_translator(text):\n prompt = f\"\"\"### Human: You are a Hindi colloquial language translator. Your ONLY job is to translate the given English text to Hindi. Do not include the original English text in your response. Just the pure Hindi translation.\n\nEnglish: {text}\n### Assistant: \"\"\"\n\n inputs = tokenizer(prompt, return_tensors=\"pt\", max_length=128, truncation=True)\n inputs = {k: v.to(model.device) for k, v in inputs.items()}\n\n outputs = model.generate(\n **inputs,\n max_length=128,\n do_sample=True,\n temperature=0.9,\n num_return_sequences=1,\n pad_token_id=tokenizer.pad_token_id,\n eos_token_id=tokenizer.eos_token_id,\n num_beams=1,\n top_p=0.95,\n repetition_penalty=1.1,\n )\n\n response = tokenizer.decode(outputs[0], skip_special_tokens=True)\n translation = response.split(\"### Assistant:\")[-1].strip()\n translation = translation.replace(text, \"\").strip() \n translation = translation.replace(\"English:\", \"\").strip()\n translation = translation.replace(\"Hindi Translator:\", \"\").strip()\n translation = translation.replace(\"->\", \"\").strip()\n translation = translation.replace('\"', '').strip()\n translation = translation.replace('#', '').strip()\n translation = translation.replace('08:', '').strip()\n\n return translation \n\ntest_sentences = [\n \"I Live in India\",\n]\n\nprint(\"\\nTesting the model:\\n\")\nfor sentence in test_sentences:\n translation = translate_to_hindi_translator(sentence)\n print(f\"English: {sentence}\")\n print(f\"Hindi Translator: {translation}\")\n print(\"-\" * 50)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-02-19T16:27:52.530150Z","iopub.execute_input":"2025-02-19T16:27:52.530459Z","iopub.status.idle":"2025-02-19T16:27:53.244484Z","shell.execute_reply.started":"2025-02-19T16:27:52.530437Z","shell.execute_reply":"2025-02-19T16:27:53.243801Z"}},"outputs":[{"name":"stdout","text":"\nTesting the model:\n\nEnglish: I Live in India\nHindi Translator: ♪!क्या मानवः आप एक अंग्रेजी भाष��� स्रोत हैं? आपकी ओपूर्ण रचना के लिए है दिए गए अंग्रेजी पाठ को हिंदी में और भी अनुवाद करना। अपनी प्रतिक्रिया में मात्र शुद्ध हिन्दी अनुवाद में मत दें। वर्तमान पश्चिमी अंग्रेजी अनुवाद मे, सिर्फ मैं भारत में जीता हूँ 2:\n--------------------------------------------------\n","output_type":"stream"}],"execution_count":31},{"cell_type":"code","source":"from IPython.display import HTML, display\n\ndef interactive_translation(text_input):\n if text_input:\n translation = translate_to_hindi_translator(text_input)\n display(HTML(f\"\"\"\n <div style='padding: 10px; border-radius: 5px;'>\n <b>English:</b> {text_input}<br>\n <b>Hindi Translator:</b> {translation}\n </div>\n \"\"\"))\n\n# Create interactive widget\nfrom ipywidgets import interact, widgets\ninteract(interactive_translation,\n text_input=widgets.Text(description='English:', placeholder='Enter text to translate'))","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-02-19T16:23:16.543406Z","iopub.execute_input":"2025-02-19T16:23:16.543698Z","iopub.status.idle":"2025-02-19T16:23:16.570666Z","shell.execute_reply.started":"2025-02-19T16:23:16.543676Z","shell.execute_reply":"2025-02-19T16:23:16.569797Z"}},"outputs":[{"output_type":"display_data","data":{"text/plain":"interactive(children=(Text(value='', description='English:', placeholder='Enter text to translate'), Output())…","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"019f65401b2442658c0baf3c62ba0d4a"}},"metadata":{}},{"execution_count":30,"output_type":"execute_result","data":{"text/plain":"<function __main__.interactive_translation(text_input)>"},"metadata":{}}],"execution_count":30}]}