hellosindh
/

indus-script-models

@@ -31,7 +31,7 @@ import torch.nn.functional as F
 # ── Auto-download from HuggingFace ────────────────────────────
-HF_REPO = "YOUR_USERNAME/indus-script-models"   # update after upload
 def download_models(repo_id=HF_REPO, local_dir="indus_models"):
     """Download all model files from HuggingFace."""
@@ -48,18 +48,25 @@ def download_models(repo_id=HF_REPO, local_dir="indus_models"):
 def get_model_dir():
-    """Find model directory — local DATA/models or downloaded."""
-    # Try local development path first
     local = Path("DATA/models")
     if local.exists():
         return local, Path("DATA")
-    # Try downloaded path
-    downloaded = Path("indus_models")
-    if downloaded.exists():
-        return downloaded / "models", downloaded
-    # Auto-download
     path = download_models()
-    return Path(path) / "models", Path(path)
 # ── Device ─────────────────────────────────────────────────────
@@ -73,8 +80,11 @@ PAD_ID = 816
 # ── Load helpers ───────────────────────────────────────────────
 def load_tokenizer(data_dir):
     from transformers import PreTrainedTokenizerFast
-    return PreTrainedTokenizerFast.from_pretrained(
-        str(data_dir / "indus_tokenizer"))
 def load_bert_mlm(model_dir):

 # ── Auto-download from HuggingFace ────────────────────────────
+HF_REPO = "hellosindh/indus-script-models"   # update after upload
 def download_models(repo_id=HF_REPO, local_dir="indus_models"):
     """Download all model files from HuggingFace."""
 def get_model_dir():
+    """
+    Find model directory.
+    Priority:
+      1. ./models/  (running from cloned HuggingFace repo)
+      2. DATA/models/  (running from original indus_script folder)
+      3. Auto-download from HuggingFace
+    """
+    # Running from cloned repo — models/ is right here
+    cloned = Path("models")
+    if cloned.exists() and (cloned / "nanogpt_indus.pt").exists():
+        data = Path("data") if Path("data").exists() else Path(".")
+        return cloned, data
+    # Running from original indus_script folder
     local = Path("DATA/models")
     if local.exists():
         return local, Path("DATA")
+    # Auto-download from HuggingFace
     path = download_models()
+    return Path(path) / "models", Path(path) / "data"
 # ── Device ─────────────────────────────────────────────────────
 # ── Load helpers ───────────────────────────────────────────────
 def load_tokenizer(data_dir):
     from transformers import PreTrainedTokenizerFast
+    # Try data/indus_tokenizer first, then just data_dir itself
+    tok_path = data_dir / "indus_tokenizer"
+    if not tok_path.exists():
+        tok_path = data_dir
+    return PreTrainedTokenizerFast.from_pretrained(str(tok_path))
 def load_bert_mlm(model_dir):