last changes

This commit is contained in:
2026-02-21 10:58:05 +01:00
parent 841bc7c805
commit 0902732f60
136 changed files with 10387 additions and 2015 deletions

View File

@@ -12,7 +12,6 @@ from mathema.actors.cortex import Cortex
from mathema.actors.sensor import Sensor
from mathema.actors.neuron import Neuron
from mathema.actors.actuator import Actuator
from mathema.scape.scape import XorScape
from mathema.scape.car_racing import CarRacingScape
from mathema.envs.openai_car_racing import CarRacing
@@ -20,6 +19,24 @@ log = logging.getLogger(__name__)
class Exoself(Actor):
"""
Exoself actor coordinating genotype-driven agent evaluation and learning.
The Exoself represents the *outer control loop* of an agent in the mathema
framework. It is responsible for:
- loading a genotype snapshot from persistent storage (Neo4j),
- constructing the executable phenotype (Sensors, Neurons, Actuators,
Cortex, and Scape),
- running repeated evaluation episodes,
- applying evolutionary weight perturbations,
- tracking and reporting fitness statistics,
- persisting improved parameters back to the genotype store.
Conceptually, Exoself corresponds to the “body/executive self” around a
cortex:
- the Cortex handles step-by-step execution and fitness accumulation,
- the Exoself handles episode-level control, learning, and persistence.
"""
def __init__(self, genotype: Dict[str, Any], file_name: Optional[str] = None):
super().__init__("Exoself")
self.monitor = None
@@ -46,7 +63,14 @@ class Exoself(Actor):
self._perturbed: List[Neuron] = []
@classmethod
async def start(cls, agent_id: str, monitor) -> "Exoself":
async def start(cls, agent_id: str, monitor):
"""
Method start takes agent_id and monitor as parameters and is a class method. It initializes some attributes of
the class and creates a task to run the _runner coroutine. If an exception is caught during execution, a placeholder
_Dummy class is returned.
"""
try:
g = await load_genotype_snapshot(agent_id)
except Exception as e:
@@ -73,8 +97,8 @@ class Exoself(Actor):
elapsed = 0.0
try:
fitness, evals, cycles, elapsed = await self.train_until_stop()
except Exception as e:
log.error(f"[Exoself {self.agent_id}] CRASH in train_until_stop(): {e!r}")
except Exception as err:
log.error(f"[Exoself {self.agent_id}] CRASH in train_until_stop(): {err!r}")
fitness = float("-inf")
evals = int(self.eval_acc)
cycles = int(self.cycle_acc)
@@ -82,8 +106,8 @@ class Exoself(Actor):
finally:
try:
await monitor.cast(("terminated", self.agent_id, fitness, evals, cycles, elapsed))
except Exception as e:
log.error(f"[Exoself {self.agent_id}] FAILED to notify monitor: {e!r}")
except Exception as err:
log.error(f"[Exoself {self.agent_id}] FAILED to notify monitor: {err!r}")
loop = asyncio.get_running_loop()
self._runner_task = loop.create_task(_runner(), name=f"Exoself-{self.agent_id}")
@@ -96,6 +120,10 @@ class Exoself(Actor):
return Exoself(g, file_name=path)
async def run(self):
"""
run loop of the exoself. Builds the network (mapping from genotype to phenotype=
and waits for messages of the cortex.
"""
self.build_pid_map_and_spawn()
self._link_cortex()
@@ -116,6 +144,20 @@ class Exoself(Actor):
return
async def run_evaluation(self):
"""
Description:
Method to run evaluation of exoself by building network and linking the components,
spawning PID map, linking cortex, and running sensor, neuron, actuator actors.
It processes messages from the inbox and terminates upon specific tags.
Parameters:
None
Return:
Tuple containing evaluation results in the format (fitness: float, flag: int, cycles: int, elapsed: float)
"""
log.debug(f"exoself: build network and link...")
self.build_pid_map_and_spawn()
log.debug(f"exoself: link cortex...")
@@ -140,6 +182,17 @@ class Exoself(Actor):
return float("-inf"), 0, 0, 0.0
def build_pid_map_and_spawn(self):
"""
Builds the PID map for the Cortex actor and spawns Neuron, Actuator, and Sensor actors.
Parameters:
- self: reference to the class instance
Returns:
- None
"""
cx = self.g["cortex"]
self.cx_actor = Cortex(
cid=cx["id"],
@@ -254,6 +307,7 @@ class Exoself(Actor):
return []
def _link_cortex(self):
self.cx_actor.sensors = [a for a in self.sensor_actors if a]
self.cx_actor.neurons = [a for a in self.neuron_actors if a]
self.cx_actor.actuators = [a for a in self.actuator_actors if a]
@@ -263,6 +317,23 @@ class Exoself(Actor):
self.tasks.append(asyncio.create_task(self.cx_actor.run()))
async def train_until_stop(self):
"""
train_until_stop method runs the training until the stop condition is met. It builds the PID map and spawns
necessary components, including sensor actors, neuron actors, and actuator actors. If an actuator scape is present,
it runs the actuator scape as well.
The method continuously waits for incoming messages from the inbox and processes them based on the message tag.
If the tag is "evaluation_completed," it calls the _on_evaluation_completed method with the received fitness, cycles,
and elapsed time. If the _on_evaluation_completed method returns a dictionary, the method returns a tuple containing
the highest fitness, evaluation accuracy, cycle accuracy, and time accuracy.
If the message tag is "terminate," the method calls the _terminate_all method to stop the training process and
returns a tuple with negative infinity for fitness and zeros for other metrics.
This method does not return any value explicitly during normal training execution.
"""
self.build_pid_map_and_spawn()
self._link_cortex()
@@ -291,6 +362,30 @@ class Exoself(Actor):
return float("-inf"), 0, 0, 0.0
async def _on_evaluation_completed(self, fitness: float, cycles: int, elapsed: float):
"""
This method _on_evaluation_completed is an asynchronous function that handles the completion
of an evaluation process.
Parameters:
- fitness: a float representing the fitness value obtained from the evaluation process.
- cycles: an integer indicating the number of cycles involved in the evaluation.
- elapsed: a float representing the elapsed time for the evaluation process.
This method updates internal counters and logs information about the evaluation process. It also performs
actions based on the evaluation results, such as updating the highest fitness value, backing up weights,
or restoring weights of neuron actors.
If the number of attempts reaches the maximum allowed attempts, it stops the evaluation process,
backs up the genotype, terminates all actors, and returns a dictionary containing information
about the best fitness value, evaluation count, cycle count, and accumulated time.
Finally, it calculates the perturbation probability, selects a subset of neuron actors for weight perturbation,
sends perturbation commands to selected neuron actors, and reactivates the cx_actor.
Note: This method does not have a return statement for successful execution. If an error occurs during the
episode_done message sending, it will ignore the exception. No additional
errors or exceptions are caught or handled in this method.
"""
self.eval_acc += 1
self.cycle_acc += int(cycles)
self.time_acc += float(elapsed)