Spaces:
No application file
No application file
File size: 3,495 Bytes
28a1786 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 | import os
import tempfile
import subprocess
import numpy as np
import librosa
from fastapi import UploadFile, HTTPException
from typing import Tuple
import logging
from app.config import settings
logger = logging.getLogger(__name__)
class AudioProcessor:
"""Production-grade audio preprocessing"""
@staticmethod
async def validate_file(file: UploadFile) -> Tuple[bytes, str]:
"""Validate file size and format"""
# Check file size
contents = await file.read()
size_mb = len(contents) / (1024 * 1024)
if size_mb > settings.MAX_FILE_SIZE_MB:
raise HTTPException(
status_code=413,
detail=f"File too large. Max {settings.MAX_FILE_SIZE_MB}MB"
)
# Check format
ext = file.filename.split('.')[-1].lower()
if ext not in settings.SUPPORTED_FORMATS:
raise HTTPException(
status_code=415,
detail=f"Unsupported format. Supported: {settings.SUPPORTED_FORMATS}"
)
return contents, ext
@staticmethod
async def convert_to_wav(input_bytes: bytes, input_ext: str) -> bytes:
"""Convert audio to WAV format (16kHz, mono)"""
with tempfile.NamedTemporaryFile(delete=False, suffix=f".{input_ext}") as f_in:
f_in.write(input_bytes)
input_path = f_in.name
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f_out:
output_path = f_out.name
try:
# FFmpeg conversion
cmd = [
"ffmpeg",
"-i", input_path,
"-ar", str(settings.TARGET_SAMPLE_RATE),
"-ac", "1",
"-acodec", "pcm_s16le",
"-y", # Overwrite output
output_path
]
result = subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=30
)
if result.returncode != 0:
logger.error(f"FFmpeg error: {result.stderr}")
raise HTTPException(
status_code=422,
detail="Audio conversion failed"
)
# Read converted file
with open(output_path, "rb") as f:
return f.read()
except subprocess.TimeoutExpired:
raise HTTPException(
status_code=408,
detail="Audio conversion timeout"
)
finally:
# Cleanup
for path in [input_path, output_path]:
if os.path.exists(path):
os.unlink(path)
@staticmethod
def get_audio_info(audio_bytes: bytes) -> dict:
"""Get audio metadata"""
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
tmp.write(audio_bytes)
path = tmp.name
try:
y, sr = librosa.load(path, sr=None)
duration = len(y) / sr
return {
"duration_seconds": round(duration, 2),
"sample_rate": sr,
"channels": 1 if len(y.shape) == 1 else y.shape[1],
"samples": len(y)
}
finally:
os.unlink(path)
audio_processor = AudioProcessor() |