last changes
This commit is contained in:
@@ -12,7 +12,6 @@ from mathema.actors.cortex import Cortex
|
||||
from mathema.actors.sensor import Sensor
|
||||
from mathema.actors.neuron import Neuron
|
||||
from mathema.actors.actuator import Actuator
|
||||
from mathema.scape.scape import XorScape
|
||||
from mathema.scape.car_racing import CarRacingScape
|
||||
from mathema.envs.openai_car_racing import CarRacing
|
||||
|
||||
@@ -20,6 +19,24 @@ log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Exoself(Actor):
|
||||
"""
|
||||
Exoself actor coordinating genotype-driven agent evaluation and learning.
|
||||
|
||||
The Exoself represents the *outer control loop* of an agent in the mathema
|
||||
framework. It is responsible for:
|
||||
- loading a genotype snapshot from persistent storage (Neo4j),
|
||||
- constructing the executable phenotype (Sensors, Neurons, Actuators,
|
||||
Cortex, and Scape),
|
||||
- running repeated evaluation episodes,
|
||||
- applying evolutionary weight perturbations,
|
||||
- tracking and reporting fitness statistics,
|
||||
- persisting improved parameters back to the genotype store.
|
||||
|
||||
Conceptually, Exoself corresponds to the “body/executive self” around a
|
||||
cortex:
|
||||
- the Cortex handles step-by-step execution and fitness accumulation,
|
||||
- the Exoself handles episode-level control, learning, and persistence.
|
||||
"""
|
||||
def __init__(self, genotype: Dict[str, Any], file_name: Optional[str] = None):
|
||||
super().__init__("Exoself")
|
||||
self.monitor = None
|
||||
@@ -46,7 +63,14 @@ class Exoself(Actor):
|
||||
self._perturbed: List[Neuron] = []
|
||||
|
||||
@classmethod
|
||||
async def start(cls, agent_id: str, monitor) -> "Exoself":
|
||||
async def start(cls, agent_id: str, monitor):
|
||||
"""
|
||||
|
||||
Method start takes agent_id and monitor as parameters and is a class method. It initializes some attributes of
|
||||
the class and creates a task to run the _runner coroutine. If an exception is caught during execution, a placeholder
|
||||
_Dummy class is returned.
|
||||
|
||||
"""
|
||||
try:
|
||||
g = await load_genotype_snapshot(agent_id)
|
||||
except Exception as e:
|
||||
@@ -73,8 +97,8 @@ class Exoself(Actor):
|
||||
elapsed = 0.0
|
||||
try:
|
||||
fitness, evals, cycles, elapsed = await self.train_until_stop()
|
||||
except Exception as e:
|
||||
log.error(f"[Exoself {self.agent_id}] CRASH in train_until_stop(): {e!r}")
|
||||
except Exception as err:
|
||||
log.error(f"[Exoself {self.agent_id}] CRASH in train_until_stop(): {err!r}")
|
||||
fitness = float("-inf")
|
||||
evals = int(self.eval_acc)
|
||||
cycles = int(self.cycle_acc)
|
||||
@@ -82,8 +106,8 @@ class Exoself(Actor):
|
||||
finally:
|
||||
try:
|
||||
await monitor.cast(("terminated", self.agent_id, fitness, evals, cycles, elapsed))
|
||||
except Exception as e:
|
||||
log.error(f"[Exoself {self.agent_id}] FAILED to notify monitor: {e!r}")
|
||||
except Exception as err:
|
||||
log.error(f"[Exoself {self.agent_id}] FAILED to notify monitor: {err!r}")
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
self._runner_task = loop.create_task(_runner(), name=f"Exoself-{self.agent_id}")
|
||||
@@ -96,6 +120,10 @@ class Exoself(Actor):
|
||||
return Exoself(g, file_name=path)
|
||||
|
||||
async def run(self):
|
||||
"""
|
||||
run loop of the exoself. Builds the network (mapping from genotype to phenotype=
|
||||
and waits for messages of the cortex.
|
||||
"""
|
||||
self.build_pid_map_and_spawn()
|
||||
|
||||
self._link_cortex()
|
||||
@@ -116,6 +144,20 @@ class Exoself(Actor):
|
||||
return
|
||||
|
||||
async def run_evaluation(self):
|
||||
"""
|
||||
|
||||
Description:
|
||||
Method to run evaluation of exoself by building network and linking the components,
|
||||
spawning PID map, linking cortex, and running sensor, neuron, actuator actors.
|
||||
It processes messages from the inbox and terminates upon specific tags.
|
||||
|
||||
Parameters:
|
||||
None
|
||||
|
||||
Return:
|
||||
Tuple containing evaluation results in the format (fitness: float, flag: int, cycles: int, elapsed: float)
|
||||
|
||||
"""
|
||||
log.debug(f"exoself: build network and link...")
|
||||
self.build_pid_map_and_spawn()
|
||||
log.debug(f"exoself: link cortex...")
|
||||
@@ -140,6 +182,17 @@ class Exoself(Actor):
|
||||
return float("-inf"), 0, 0, 0.0
|
||||
|
||||
def build_pid_map_and_spawn(self):
|
||||
"""
|
||||
|
||||
Builds the PID map for the Cortex actor and spawns Neuron, Actuator, and Sensor actors.
|
||||
|
||||
Parameters:
|
||||
- self: reference to the class instance
|
||||
|
||||
Returns:
|
||||
- None
|
||||
|
||||
"""
|
||||
cx = self.g["cortex"]
|
||||
self.cx_actor = Cortex(
|
||||
cid=cx["id"],
|
||||
@@ -254,6 +307,7 @@ class Exoself(Actor):
|
||||
return []
|
||||
|
||||
def _link_cortex(self):
|
||||
|
||||
self.cx_actor.sensors = [a for a in self.sensor_actors if a]
|
||||
self.cx_actor.neurons = [a for a in self.neuron_actors if a]
|
||||
self.cx_actor.actuators = [a for a in self.actuator_actors if a]
|
||||
@@ -263,6 +317,23 @@ class Exoself(Actor):
|
||||
self.tasks.append(asyncio.create_task(self.cx_actor.run()))
|
||||
|
||||
async def train_until_stop(self):
|
||||
"""
|
||||
|
||||
train_until_stop method runs the training until the stop condition is met. It builds the PID map and spawns
|
||||
necessary components, including sensor actors, neuron actors, and actuator actors. If an actuator scape is present,
|
||||
it runs the actuator scape as well.
|
||||
|
||||
The method continuously waits for incoming messages from the inbox and processes them based on the message tag.
|
||||
If the tag is "evaluation_completed," it calls the _on_evaluation_completed method with the received fitness, cycles,
|
||||
and elapsed time. If the _on_evaluation_completed method returns a dictionary, the method returns a tuple containing
|
||||
the highest fitness, evaluation accuracy, cycle accuracy, and time accuracy.
|
||||
|
||||
If the message tag is "terminate," the method calls the _terminate_all method to stop the training process and
|
||||
returns a tuple with negative infinity for fitness and zeros for other metrics.
|
||||
|
||||
This method does not return any value explicitly during normal training execution.
|
||||
|
||||
"""
|
||||
self.build_pid_map_and_spawn()
|
||||
self._link_cortex()
|
||||
|
||||
@@ -291,6 +362,30 @@ class Exoself(Actor):
|
||||
return float("-inf"), 0, 0, 0.0
|
||||
|
||||
async def _on_evaluation_completed(self, fitness: float, cycles: int, elapsed: float):
|
||||
"""
|
||||
This method _on_evaluation_completed is an asynchronous function that handles the completion
|
||||
of an evaluation process.
|
||||
|
||||
Parameters:
|
||||
- fitness: a float representing the fitness value obtained from the evaluation process.
|
||||
- cycles: an integer indicating the number of cycles involved in the evaluation.
|
||||
- elapsed: a float representing the elapsed time for the evaluation process.
|
||||
|
||||
This method updates internal counters and logs information about the evaluation process. It also performs
|
||||
actions based on the evaluation results, such as updating the highest fitness value, backing up weights,
|
||||
or restoring weights of neuron actors.
|
||||
|
||||
If the number of attempts reaches the maximum allowed attempts, it stops the evaluation process,
|
||||
backs up the genotype, terminates all actors, and returns a dictionary containing information
|
||||
about the best fitness value, evaluation count, cycle count, and accumulated time.
|
||||
|
||||
Finally, it calculates the perturbation probability, selects a subset of neuron actors for weight perturbation,
|
||||
sends perturbation commands to selected neuron actors, and reactivates the cx_actor.
|
||||
|
||||
Note: This method does not have a return statement for successful execution. If an error occurs during the
|
||||
episode_done message sending, it will ignore the exception. No additional
|
||||
errors or exceptions are caught or handled in this method.
|
||||
"""
|
||||
self.eval_acc += 1
|
||||
self.cycle_acc += int(cycles)
|
||||
self.time_acc += float(elapsed)
|
||||
|
||||
Reference in New Issue
Block a user