Source code for chemistrylab.util.reward

'''
This module provides a default set of reward functions.
'''

import copy
import math
import numpy as np
import sys

from chemistrylab import material
from chemistrylab.vessel import Vessel
from typing import NamedTuple, Tuple, Callable, Optional, List




[docs]
def get_dissolved_amounts(vessel: Vessel, desired_material: str):
    """    
    Returns:
        Tuple[float,float]: 
            - The amount material that could be produced if you removed the solvent. This is the minimum of (quantity/stoich_coeff) for each dissolved component
            - The amount of mols of solutes to subtract from the total material amount
    """
    material_amounts = []
    dis_mats = material.REGISTRY[desired_material]().dissolve()
    # Dissolved version is already the target material
    if len(dis_mats) < 2: return 0,0
    min_amount=float("inf")
    n=0
    for mat in dis_mats:
        #Can't make any with what's dissolved
        if not mat._name in vessel.solute_dict: return 0,0
        # Determine how much of the target the solute would make
        amount = vessel.material_dict[mat._name].mol / dis_mats[mat]
        if amount< min_amount:
            min_amount=amount
        n+=dis_mats[mat]

    contributions=min_amount*(n-1)
    return min_amount,contributions

        

[docs]
class RewardGenerator():
    """
    RewardGenerator class generates rewards for a given set of vessels and desired materials.

    Args:
        use_purity (bool): True if reward is based on purity, False if reward is based on the amount of desired material.
        exclude_solvents (bool): True if solvents should be excluded from the total amount of materials for purity calculations.
        include_dissolved (bool): True if reward should include dissolved material components in the vessels as the desired material.
        exclude_mat (str, optional): A string representing a material which gives a negative reward.
    
    This class returns callable objects, which serve as reward functions
    """
    def __init__(self, use_purity, exclude_solvents, include_dissolved, exclude_mat=None):
        self.exclude_solvents=exclude_solvents
        self.include_dissolved=include_dissolved
        self.use_purity=use_purity
        self.exclude_mat=exclude_mat

[docs]
    def __call__(self,vessels: Tuple[Vessel], desired_material: str, exclude_material: Optional[str] = None):
        """
        Assign a reward to a set of vessels based off of what is desired/undesired

        Args:
            vessels (Tuple[Vessel]): A list of Vessel objects.
            desired_material (str): A string representing the desired material for which the reward should be calculated.
            exclude_material (Optional[str]): Currently unused
    
        Returns:
            float: A floating point number representing the calculated reward.

        """
        reward=0
        for v in vessels:
            mat_dict = v.material_dict
            # Get the amount of target material that could be extracted from dissolved components
            if self.include_dissolved:
                dissolved, exclude= get_dissolved_amounts(v,desired_material)
            else:
                dissolved=exclude=0
            # Grab the amount of target material
            amount=dissolved+(mat_dict[desired_material].mol if desired_material in mat_dict else 0)
            if self.exclude_mat is not None and (self.exclude_mat != desired_material):
                amount -= (mat_dict[self.exclude_mat].mol if self.exclude_mat in mat_dict else 0)
            #Purity requires you to multiply by (desired_amount)/(total_amount)
            if self.use_purity:
                #Total amount is either calculated using all materials, or just the non-solvents
                if self.exclude_solvents:
                    all_mats = sum(mat.mol for key,mat in mat_dict.items() if not mat.is_solvent())
                else:
                    all_mats = sum(mat.mol for key,mat in mat_dict.items())
                all_mats -= exclude
                if all_mats>0:
                    reward += amount**2/all_mats
            else:
                reward+= amount
        return reward