Spaces:
Sleeping
Sleeping
| # rag/models/initializer.py | |
| from transformers import AutoTokenizer | |
| import onnxruntime as ort | |
| from huggingface_hub import hf_hub_download | |
| from fastapi import FastAPI | |
| from config import HF_MODEL_REPO_ID, EMBED_MODEL, EMBED_DIR, RERANK_MODEL, RERANK_DIR | |
| # 공식 모델 경로 (원본 토크나이저를 불러오기 위해 지정) | |
| EMBEDDER_ORIGINAL_ID = "Qwen/Qwen3-Embedding-0.6B" | |
| RERANKER_ORIGINAL_ID = "Qwen/Qwen3-Reranker-0.6B" | |
| def initialize_models(app: FastAPI): | |
| # Embedder | |
| embedder_tokenizer = AutoTokenizer.from_pretrained( | |
| EMBEDDER_ORIGINAL_ID, | |
| trust_remote_code=True | |
| ) | |
| embedder_model_path = hf_hub_download( | |
| repo_id=HF_MODEL_REPO_ID, | |
| filename=EMBED_MODEL, | |
| subfolder=EMBED_DIR | |
| ) | |
| embedder_sess = ort.InferenceSession(embedder_model_path, providers=["CPUExecutionProvider"]) | |
| # Reranker | |
| reranker_tokenizer = AutoTokenizer.from_pretrained( | |
| RERANKER_ORIGINAL_ID, | |
| trust_remote_code=True | |
| ) | |
| reranker_model_path = hf_hub_download( | |
| repo_id=HF_MODEL_REPO_ID, | |
| filename=RERANK_MODEL, | |
| subfolder=RERANK_DIR | |
| ) | |
| reranker_sess = ort.InferenceSession(reranker_model_path, providers=["CPUExecutionProvider"]) | |
| # FastAPI app.state에 저장 → 전역 공유 | |
| app.state.embedder_tokenizer = embedder_tokenizer | |
| app.state.embedder_sess = embedder_sess | |
| app.state.reranker_tokenizer = reranker_tokenizer | |
| app.state.reranker_sess = reranker_sess | |