dbatcode28's picture
fixex
3e50fa2
Raw
History Blame Contribute Delete
324 Bytes
from __future__ import annotations
from typing import Dict
from .models import RewardModel
STEP_PENALTY = -0.01
def build_reward(components: Dict[str, float], rationale: str) -> RewardModel:
value = round(sum(components.values()), 4)
return RewardModel(value=value, components=components, rationale=rationale)