347 lines
13 KiB
Python
347 lines
13 KiB
Python
# exoself.py
|
||
import asyncio
|
||
import json
|
||
import math
|
||
import random
|
||
from collections import defaultdict
|
||
from typing import Any, Dict, List, Tuple, Optional
|
||
|
||
from actor import Actor
|
||
from cortex import Cortex
|
||
from sensor import Sensor
|
||
from neuron import Neuron
|
||
from actuator import Actuator
|
||
from scape import XorScape
|
||
|
||
|
||
class Exoself(Actor):
|
||
"""
|
||
Exoself übersetzt den Genotyp (JSON) in einen laufenden Phenotyp (Actors) und
|
||
steuert das simple Neuroevolution-Training (Backup/Restore/Perturb + Reactivate).
|
||
"""
|
||
def __init__(self, genotype: Dict[str, Any], file_name: Optional[str] = None):
|
||
super().__init__("Exoself")
|
||
self.g = genotype
|
||
self.file_name = file_name
|
||
|
||
self.cx_actor: Optional[Cortex] = None
|
||
self.sensor_actors: List[Sensor] = []
|
||
self.neuron_actors: List[Neuron] = []
|
||
self.actuator_actors: List[Actuator] = []
|
||
|
||
self.tasks: List[asyncio.Task] = []
|
||
|
||
# Training-Stats
|
||
self.highest_fitness = float("-inf")
|
||
self.eval_acc = 0
|
||
self.cycle_acc = 0
|
||
self.time_acc = 0.0
|
||
self.attempt = 0
|
||
self.MAX_ATTEMPTS = 50
|
||
self.actuator_scape = None
|
||
|
||
# zuletzt perturbierte Neuronen (für Restore)
|
||
self._perturbed: List[Neuron] = []
|
||
|
||
# ---------- Convenience ----------
|
||
@staticmethod
|
||
def from_file(path: str) -> "Exoself":
|
||
with open(path, "r") as f:
|
||
g = json.load(f)
|
||
return Exoself(g, file_name=path)
|
||
|
||
# ---------- Public API ----------
|
||
async def run(self):
|
||
# 1) Netzwerk bauen
|
||
self._build_pid_map_and_spawn()
|
||
|
||
# 2) Cortex verlinken + starten
|
||
self._link_cortex()
|
||
|
||
# 3) Actors starten (Sensoren/Neuronen/Aktuatoren)
|
||
for a in self.sensor_actors + self.neuron_actors + self.actuator_actors + [self.actuator_scape]:
|
||
self.tasks.append(asyncio.create_task(a.run()))
|
||
|
||
# 4) Hauptloop: auf Cortex-Events hören
|
||
while True:
|
||
msg = await self.inbox.get()
|
||
tag = msg[0]
|
||
|
||
if tag == "evaluation_completed":
|
||
_, fitness, cycles, elapsed = msg
|
||
await self._on_evaluation_completed(fitness, cycles, elapsed)
|
||
|
||
elif tag == "terminate":
|
||
await self._terminate_all()
|
||
return
|
||
|
||
# in exoself.py, innerhalb der Klasse Exoself
|
||
|
||
async def run_evaluation(self):
|
||
"""
|
||
Eine einzelne Episode/Evaluation:
|
||
- baut & verlinkt das Netz
|
||
- startet alle Actors
|
||
- wartet auf 'evaluation_completed' vom Cortex
|
||
- beendet alles und liefert (fitness, evals, cycles, elapsed)
|
||
"""
|
||
# 1) Netzwerk bauen & Cortex verlinken
|
||
print("build network and link...")
|
||
self._build_pid_map_and_spawn()
|
||
print("link cortex...")
|
||
self._link_cortex()
|
||
|
||
# 2) Sensor/Neuron/Aktuator-Tasks starten (Cortex startete _link_cortex bereits)
|
||
for a in self.sensor_actors + self.neuron_actors + self.actuator_actors:
|
||
self.tasks.append(asyncio.create_task(a.run()))
|
||
|
||
if self.actuator_scape:
|
||
self.tasks.append(asyncio.create_task(self.actuator_scape.run()))
|
||
|
||
print("network actors are running...")
|
||
|
||
# 3) Auf Abschluss warten
|
||
while True:
|
||
msg = await self.inbox.get()
|
||
print("message in exsoself: ", msg)
|
||
tag = msg[0]
|
||
if tag == "evaluation_completed":
|
||
_, fitness, cycles, elapsed = msg
|
||
# 4) Sauber terminieren
|
||
await self._terminate_all()
|
||
# Evals = 1 (eine Episode)
|
||
return float(fitness), 1, int(cycles), float(elapsed)
|
||
elif tag == "terminate":
|
||
await self._terminate_all()
|
||
# Falls vorzeitig terminiert wurde
|
||
return float("-inf"), 0, 0, 0.0
|
||
|
||
# ---------- Build ----------
|
||
def _build_pid_map_and_spawn(self):
|
||
"""
|
||
Baut Cortex, dann alle Neuronen (mit cx_pid=self.cx_actor), dann verlinkt Outputs schichtweise,
|
||
dann Sensoren/Aktuatoren (mit cx_pid=self.cx_actor). Achtung: Reihenfolge wichtig.
|
||
"""
|
||
cx = self.g["cortex"]
|
||
# Cortex zuerst (damit wir cx_pid an Kinder übergeben können)
|
||
self.cx_actor = Cortex(
|
||
cid=cx["id"],
|
||
exoself_pid=self,
|
||
sensor_pids=[], # werden gleich gesetzt
|
||
neuron_pids=[],
|
||
actuator_pids=[]
|
||
)
|
||
|
||
self.actuator_scape = XorScape()
|
||
|
||
# Neuronen nach Layer gruppieren (damit outputs korrekt gesetzt werden)
|
||
layers: Dict[int, List[Dict[str, Any]]] = defaultdict(list)
|
||
for n in self.g["neurons"]:
|
||
layers[n["layer_index"]].append(n)
|
||
ordered_layers = [layers[i] for i in sorted(layers)]
|
||
|
||
# Platzhalter: wir benötigen später Referenzen nach ID
|
||
id2neuron_actor: Dict[Any, Neuron] = {}
|
||
|
||
# Zuerst alle Neuronen erzeugen (ohne Outputs), damit wir Referenzen haben
|
||
for layer in ordered_layers:
|
||
for n in layer:
|
||
input_idps = [(iw["input_id"], iw["weights"]) for iw in n["input_weights"]]
|
||
neuron = Neuron(
|
||
nid=n["id"],
|
||
cx_pid=self.cx_actor,
|
||
af_name=n.get("activation_function", "tanh"),
|
||
input_idps=input_idps,
|
||
output_pids=[] # füllen wir gleich
|
||
)
|
||
id2neuron_actor[n["id"]] = neuron
|
||
self.neuron_actors.append(neuron)
|
||
|
||
# Jetzt Outputs pro Layer setzen:
|
||
# - für Nicht-Output-Layer: Outputs = Neuronen der nächsten Schicht
|
||
# - für Output-Layer: Outputs = Aktuator(en) (setzen wir nachdem Aktuatoren erzeugt sind)
|
||
for li in range(len(ordered_layers) - 1):
|
||
next_pids = [id2neuron_actor[nx["id"]] for nx in ordered_layers[li + 1]]
|
||
for n in ordered_layers[li]:
|
||
id2neuron_actor[n["id"]].outputs = next_pids
|
||
|
||
# Aktuatoren anlegen (brauchen cx_pid und fanin_ids)
|
||
# Genotyp kann "actuator" (ein Objekt) oder "actuators" (Liste) haben – wir unterstützen beides.
|
||
actuators = self._get_actuators_block()
|
||
if not actuators:
|
||
raise ValueError("Genotype must include 'actuator' or 'actuators'.")
|
||
|
||
for a in actuators:
|
||
fanin_ids = a.get("fanin_ids", [])
|
||
expect = len(fanin_ids) if fanin_ids else 0
|
||
actuator = Actuator(
|
||
aid=a["id"],
|
||
cx_pid=self.cx_actor,
|
||
name=a["name"],
|
||
fanin_ids=fanin_ids,
|
||
expect_count=expect,
|
||
scape=self.actuator_scape
|
||
)
|
||
self.actuator_actors.append(actuator)
|
||
|
||
# Output-Layer Neuronen → Outputs = Aktuatoren
|
||
if ordered_layers:
|
||
last_layer = ordered_layers[-1]
|
||
out_targets = self.actuator_actors # Liste
|
||
for n in last_layer:
|
||
id2neuron_actor[n["id"]].outputs = out_targets
|
||
|
||
# Sensor(en) anlegen (brauchen cx_pid und fanout auf erste Schicht)
|
||
sensors = self._get_sensors_block()
|
||
if not sensors:
|
||
raise ValueError("Genotype must include 'sensor' or 'sensors'.")
|
||
|
||
first_layer = ordered_layers[0] if ordered_layers else []
|
||
first_layer_pids = [id2neuron_actor[n["id"]] for n in first_layer]
|
||
|
||
for s in sensors:
|
||
sensor = Sensor(
|
||
sid=s["id"],
|
||
cx_pid=self.cx_actor,
|
||
name=s["name"],
|
||
vector_length=s["vector_length"],
|
||
fanout_pids=first_layer_pids,
|
||
scape=self.actuator_scape
|
||
)
|
||
self.sensor_actors.append(sensor)
|
||
|
||
def _get_sensors_block(self) -> List[Dict[str, Any]]:
|
||
if "sensors" in self.g:
|
||
return list(self.g["sensors"])
|
||
if "sensor" in self.g:
|
||
return [self.g["sensor"]]
|
||
return []
|
||
|
||
def _get_actuators_block(self) -> List[Dict[str, Any]]:
|
||
if "actuators" in self.g:
|
||
return list(self.g["actuators"])
|
||
if "actuator" in self.g:
|
||
return [self.g["actuator"]]
|
||
return []
|
||
|
||
# ---------- Link ----------
|
||
def _link_cortex(self):
|
||
"""
|
||
Übergibt dem Cortex die fertigen Listen und setzt awaiting_sync.
|
||
Startet dann den Cortex-Task.
|
||
"""
|
||
self.cx_actor.sensors = [a for a in self.sensor_actors if a]
|
||
self.cx_actor.neurons = [a for a in self.neuron_actors if a]
|
||
self.cx_actor.actuators = [a for a in self.actuator_actors if a]
|
||
|
||
# Wichtig: vor Start die erwarteten AIDs setzen,
|
||
# damit der erste Sensor-Trigger nicht in eine leere awaiting_sync läuft.
|
||
self.cx_actor.awaiting_sync = set(a.aid for a in self.cx_actor.actuators)
|
||
|
||
# Cortex starten
|
||
self.tasks.append(asyncio.create_task(self.cx_actor.run()))
|
||
|
||
# ---------- Training-Loop Reaction ----------
|
||
async def _on_evaluation_completed(self, fitness: float, cycles: int, elapsed: float):
|
||
self.eval_acc += 1
|
||
self.cycle_acc += int(cycles)
|
||
self.time_acc += float(elapsed)
|
||
|
||
print(f"[Exoself] evaluation_completed: fitness={fitness:.6f} cycles={cycles} time={elapsed:.3f}s")
|
||
|
||
if fitness > self.highest_fitness:
|
||
self.highest_fitness = fitness
|
||
self.attempt = 0
|
||
# Backup aller Neuronen
|
||
for n in self.neuron_actors:
|
||
await n.send(("weight_backup",))
|
||
else:
|
||
self.attempt += 1
|
||
# Restore nur der zuletzt perturbierten Neuronen
|
||
for n in self._perturbed:
|
||
await n.send(("weight_restore",))
|
||
|
||
# Stop-Kriterium?
|
||
if self.attempt >= self.MAX_ATTEMPTS:
|
||
print(f"[Exoself] STOP. Best fitness={self.highest_fitness:.6f} evals={self.eval_acc} cycles={self.cycle_acc}")
|
||
await self._backup_genotype()
|
||
await self._terminate_all()
|
||
return {
|
||
"best_fitness": self.highest_fitness,
|
||
"eval_acc": self.eval_acc,
|
||
"cycle_acc": self.cycle_acc,
|
||
"time_acc": self.time_acc,
|
||
}
|
||
|
||
# Perturbiere Teilmenge der Neuronen
|
||
tot = len(self.neuron_actors)
|
||
mp = 1.0 / math.sqrt(max(1, tot))
|
||
self._perturbed = [n for n in self.neuron_actors if random.random() < mp]
|
||
|
||
for n in self._perturbed:
|
||
await n.send(("weight_perturb",))
|
||
|
||
# Nächste Episode starten
|
||
await self.cx_actor.send(("reactivate",))
|
||
|
||
# ---------- Backup Genotype ----------
|
||
async def _backup_genotype(self):
|
||
"""
|
||
Holt von allen Neuronen die aktuellen Weights und schreibt sie in self.g.
|
||
Speichert optional in self.file_name.
|
||
"""
|
||
# 1) Request
|
||
remaining = len(self.neuron_actors)
|
||
for n in self.neuron_actors:
|
||
await n.send(("get_backup",))
|
||
|
||
# 2) Collect vom Cortex-Postfach (Neuronen senden an cx_pid → cx leitet an Exoself weiter
|
||
# oder du hast sie direkt an Exoself schicken lassen; falls direkt an Cortex, dann
|
||
# lausche hier stattdessen auf self.cx_actor.inbox. In deinem Neuron-Code geht es an cx_pid,
|
||
# und in deiner bisherigen Implementierung hast du aus dem Cortex-Postfach gelesen.
|
||
# Hier vereinfachen wir: Neuronen senden direkt an EXOSELF (passe Neuron ggf. an).
|
||
backups: List[Tuple[Any, List[Tuple[Any, List[float]]]]] = []
|
||
|
||
while remaining > 0:
|
||
msg = await self.inbox.get()
|
||
if msg[0] == "backup_from_neuron":
|
||
_, nid, idps = msg
|
||
backups.append((nid, idps))
|
||
remaining -= 1
|
||
|
||
# 3) Update JSON
|
||
# exoself.py -> in _backup_genotype()
|
||
id2n = {n["id"]: n for n in self.g["neurons"]}
|
||
for nid, idps in backups:
|
||
if nid not in id2n:
|
||
continue
|
||
new_iw = []
|
||
bias_val = None
|
||
for item in idps:
|
||
if isinstance(item[0], str) and item[0] == "bias":
|
||
bias_val = float(item[1]) if not isinstance(item[1], list) else float(item[1][0])
|
||
else:
|
||
input_id, weights = item
|
||
new_iw.append({"input_id": input_id, "weights": list(weights)})
|
||
id2n[nid]["input_weights"] = new_iw
|
||
# Bias mit abspeichern (Variante B):
|
||
if bias_val is not None:
|
||
id2n[nid].setdefault("input_weights", []).append({"input_id": "bias", "weights": [bias_val]})
|
||
|
||
# 4) Save
|
||
if self.file_name:
|
||
with open(self.file_name, "w") as f:
|
||
json.dump(self.g, f, indent=2)
|
||
print(f"[Exoself] Genotype updated → {self.file_name}")
|
||
|
||
# ---------- Termination ----------
|
||
async def _terminate_all(self):
|
||
for a in self.sensor_actors + self.neuron_actors + self.actuator_actors:
|
||
await a.send(("terminate",))
|
||
if self.cx_actor:
|
||
await self.cx_actor.send(("terminate",))
|
||
for t in self.tasks:
|
||
if not t.done():
|
||
t.cancel()
|
||
self.tasks.clear()
|