indic-lid_trans2 / setup_models.py
Noumida's picture
Create setup_models.py
211bdc2 verified
import os
import subprocess
import sys
import zipfile
import urllib.request
from pathlib import Path
def download_file(url, filename):
"""Download a file with progress"""
print(f"Downloading {filename}...")
urllib.request.urlretrieve(url, filename)
print(f"βœ… Downloaded {filename}")
def setup_indiclid():
"""Setup IndicLID manually"""
print("=== Setting up IndicLID ===")
# Create directory structure
os.makedirs("ai4bharat", exist_ok=True)
os.makedirs("models", exist_ok=True)
# Download IndicLID source code
print("Downloading IndicLID source...")
subprocess.run([
"git", "clone", "--depth", "1",
"https://github.com/AI4Bharat/IndicLID.git",
"temp_indiclid"
], check=True)
# Copy necessary files
import shutil
source_file = "temp_indiclid/Inference/ai4bharat/IndicLID.py"
if os.path.exists(source_file):
shutil.copy2(source_file, "ai4bharat/IndicLID.py")
print("βœ… Copied IndicLID.py")
# Create __init__.py
with open("ai4bharat/__init__.py", "w") as f:
f.write("")
# Clean up
shutil.rmtree("temp_indiclid", ignore_errors=True)
# Download model files
model_urls = [
"https://github.com/AI4Bharat/IndicLID/releases/download/v1.0/indiclid-bert.zip",
"https://github.com/AI4Bharat/IndicLID/releases/download/v1.0/indiclid-ftn.zip",
"https://github.com/AI4Bharat/IndicLID/releases/download/v1.0/indiclid-ftr.zip"
]
os.chdir("models")
for url in model_urls:
filename = url.split("/")[-1]
try:
download_file(url, filename)
# Extract zip file
print(f"Extracting {filename}...")
with zipfile.ZipFile(filename, 'r') as zip_ref:
zip_ref.extractall('.')
# Remove zip file
os.remove(filename)
print(f"βœ… Extracted and cleaned {filename}")
except Exception as e:
print(f"❌ Error with {filename}: {e}")
os.chdir("..")
print("βœ… IndicLID setup complete!")
def patch_indiclid():
"""Apply necessary patches to IndicLID"""
indiclid_file = "ai4bharat/IndicLID.py"
if not os.path.exists(indiclid_file):
print("❌ IndicLID.py not found!")
return
print("Applying patches to IndicLID.py...")
# Read the file
with open(indiclid_file, "r") as f:
content = f.read()
# Apply patches
content = content.replace(
"torch.load(self.IndicLID_BERT_path, map_location = self.device)",
"torch.load(self.IndicLID_BERT_path, map_location=self.device, weights_only=False)"
)
# Write back
with open(indiclid_file, "w") as f:
f.write(content)
print("βœ… Patches applied successfully!")
if __name__ == "__main__":
try:
setup_indiclid()
patch_indiclid()
print("\nπŸŽ‰ Setup completed successfully!")
except Exception as e:
print(f"\n❌ Setup failed: {e}")
sys.exit(1)