| from fastapi import FastAPI, HTTPException |
| from pydantic import BaseModel |
| import uvicorn |
| import os |
| import time |
| import hashlib |
| from contextlib import asynccontextmanager |
|
|
| from src.ingestion.semantic_splitter import ActivaSemanticSplitter |
| from src.extraction.extractor import NeuroSymbolicExtractor |
| from src.validation.validator import SemanticValidator |
| from src.graph.graph_loader import KnowledgeGraphPersister |
| from src.graph.entity_resolver import EntityResolver |
|
|
| |
| |
| ml_models = {} |
|
|
| @asynccontextmanager |
| async def lifespan(app: FastAPI): |
| |
| |
| print("⏳ Inizializzazione modelli (SentenceTransformers e Llama3) nel Lifespan...") |
| |
| ml_models["splitter"] = ActivaSemanticSplitter(model_name="all-MiniLM-L6-v2") |
| |
| schema_path = os.path.join("data", "schemas", "ARCO_schema.json") |
| ml_models["extractor"] = NeuroSymbolicExtractor(model_name="llama3", schema_path=schema_path) |
| |
| ml_models["persister"] = KnowledgeGraphPersister() |
| ml_models["resolver"] = EntityResolver(neo4j_driver=ml_models["persister"].driver, similarity_threshold=0.85) |
| ml_models["validator"] = SemanticValidator() |
| |
| print("✅ Modelli caricati e pronti a ricevere richieste!") |
| |
| yield |
| |
| |
| print("🛑 Spegnimento in corso... chiusura connessioni e pulizia memoria.") |
| if "persister" in ml_models and ml_models["persister"]: |
| ml_models["persister"].close() |
| ml_models.clear() |
|
|
| app = FastAPI( |
| title="Automated Semantic Discovery API", |
| description="Endpoint per l'ingestion testuale e l'estrazione neuro-simbolica", |
| version="1.0", |
| lifespan=lifespan |
| ) |
|
|
| class DiscoveryRequest(BaseModel): |
| documentText: str |
|
|
| @app.post("/api/discover") |
| def run_discovery(payload: DiscoveryRequest): |
| start_time = time.time() |
| raw_text = payload.documentText |
|
|
| if not raw_text or not raw_text.strip(): |
| raise HTTPException(status_code=400, detail="Il testo fornito è vuoto.") |
|
|
| |
| splitter = ml_models["splitter"] |
| extractor = ml_models["extractor"] |
| validator = ml_models["validator"] |
| resolver = ml_models["resolver"] |
| persister = ml_models["persister"] |
|
|
| |
| |
| chunks, _, _ = splitter.create_chunks(raw_text, percentile_threshold=90) |
| |
| |
| |
| all_triples = [] |
| all_entities = [] |
| for i, chunk in enumerate(chunks): |
| chunk_id = f"api_req_chunk_{i+1}" |
| extraction_result = extractor.extract(chunk, source_id=chunk_id) |
| |
| if extraction_result: |
| if extraction_result.triples: |
| all_triples.extend(extraction_result.triples) |
| if hasattr(extraction_result, 'entities') and extraction_result.entities: |
| all_entities.extend(extraction_result.entities) |
|
|
| if not all_triples: |
| return { |
| "status": "success", |
| "message": "Nessuna entità trovata.", |
| "graph_data": [] |
| } |
|
|
| |
| |
| entities_to_save = [] |
| try: |
| all_entities, all_triples, entities_to_save = resolver.resolve_entities(all_entities, all_triples) |
| except Exception as e: |
| print(f"⚠️ Errore nel resolver (skip): {e}") |
|
|
| |
| |
| |
| is_valid, report, _ = validator.validate_batch(entities_to_save, all_triples) |
| if not is_valid: |
| print("\n❌ [SHACL VALIDATION FAILED] Rilevate entità o relazioni non conformi all'ontologia:") |
| print(report) |
| print("-" * 60) |
| else: |
| print("\n✅ [SHACL VALIDATION SUCCESS] Tutte le triple ed entità rispettano i vincoli.") |
|
|
| |
| try: |
| persister.save_entities_and_triples(entities_to_save, all_triples) |
| except Exception as e: |
| print(f"⚠️ Errore salvataggio Neo4j: {e}") |
|
|
| |
| graph_data = [] |
| for t in all_triples: |
| subj = getattr(t, 'subject', t[0] if isinstance(t, tuple) else str(t)) |
| pred = getattr(t, 'predicate', t[1] if isinstance(t, tuple) else '') |
| obj = getattr(t, 'object', t[2] if isinstance(t, tuple) else '') |
|
|
| if isinstance(t, tuple) and len(t) > 3: |
| conf = t[3] |
| else: |
| conf = getattr(t, 'confidence', 1.0) |
| |
| subj_str = str(subj) |
| pred_str = str(pred) |
| obj_str = str(obj) |
|
|
| |
| node_id = hashlib.md5(subj_str.encode('utf-8')).hexdigest() |
|
|
| graph_data.append({ |
| "start_node_id": node_id, |
| "start_node_label": subj_str, |
| "relationship_type": pred_str, |
| "end_node_label": obj_str, |
| "confidence": float(conf) |
| }) |
|
|
| return { |
| "status": "success", |
| "message": "Estrazione semantica completata", |
| "execution_time_seconds": round(time.time() - start_time, 2), |
| "chunks_processed": len(chunks), |
| "triples_extracted": len(graph_data), |
| "shacl_valid": is_valid, |
| "graph_data": graph_data |
| } |
|
|
| if __name__ == "__main__": |
| uvicorn.run("api:app", host="0.0.0.0", port=5000, reload=True) |