final implementation of shc

2025-09-26 14:18:00 +02:00
parent 0ace1cec3c
commit 90e67652ab
12 changed files with 186 additions and 462 deletions
--- a/experiments/stochastic_hillclimber/actors/exoself.py
+++ b/experiments/stochastic_hillclimber/actors/exoself.py
@@ -1,4 +1,3 @@
-# exoself.py
 import asyncio
 import json
 import math
@@ -15,10 +14,6 @@ from scape import XorScape


 class Exoself(Actor):
-    """
-    Exoself übersetzt den Genotyp (JSON) in einen laufenden Phenotyp (Actors) und
-    steuert das simple Neuroevolution-Training (Backup/Restore/Perturb + Reactivate).
-    """
    def __init__(self, genotype: Dict[str, Any], file_name: Optional[str] = None):
        super().__init__("Exoself")
        self.g = genotype
@@ -40,29 +35,22 @@ class Exoself(Actor):
        self.MAX_ATTEMPTS = 50
        self.actuator_scape = None

-        # zuletzt perturbierte Neuronen (für Restore)
        self._perturbed: List[Neuron] = []

-    # ---------- Convenience ----------
    @staticmethod
    def from_file(path: str) -> "Exoself":
        with open(path, "r") as f:
            g = json.load(f)
        return Exoself(g, file_name=path)

-    # ---------- Public API ----------
    async def run(self):
-        # 1) Netzwerk bauen
        self._build_pid_map_and_spawn()

-        # 2) Cortex verlinken + starten
        self._link_cortex()

-        # 3) Actors starten (Sensoren/Neuronen/Aktuatoren)
        for a in self.sensor_actors + self.neuron_actors + self.actuator_actors + [self.actuator_scape]:
            self.tasks.append(asyncio.create_task(a.run()))

-        # 4) Hauptloop: auf Cortex-Events hören
        while True:
            msg = await self.inbox.get()
            tag = msg[0]
@@ -75,23 +63,12 @@ class Exoself(Actor):
                await self._terminate_all()
                return

-        # in exoself.py, innerhalb der Klasse Exoself
-
    async def run_evaluation(self):
-        """
-        Eine einzelne Episode/Evaluation:
-        - baut & verlinkt das Netz
-        - startet alle Actors
-        - wartet auf 'evaluation_completed' vom Cortex
-        - beendet alles und liefert (fitness, evals, cycles, elapsed)
-        """
-        # 1) Netzwerk bauen & Cortex verlinken
        print("build network and link...")
        self._build_pid_map_and_spawn()
        print("link cortex...")
        self._link_cortex()

-        # 2) Sensor/Neuron/Aktuator-Tasks starten (Cortex startete _link_cortex bereits)
        for a in self.sensor_actors + self.neuron_actors + self.actuator_actors:
            self.tasks.append(asyncio.create_task(a.run()))

@@ -100,20 +77,16 @@ class Exoself(Actor):

        print("network actors are running...")

-        # 3) Auf Abschluss warten
        while True:
            msg = await self.inbox.get()
            print("message in exsoself: ", msg)
            tag = msg[0]
            if tag == "evaluation_completed":
                _, fitness, cycles, elapsed = msg
-                # 4) Sauber terminieren
                await self._terminate_all()
-                # Evals = 1 (eine Episode)
                return float(fitness), 1, int(cycles), float(elapsed)
            elif tag == "terminate":
                await self._terminate_all()
-                # Falls vorzeitig terminiert wurde
                return float("-inf"), 0, 0, 0.0

    # ---------- Build ----------
@@ -127,23 +100,20 @@ class Exoself(Actor):
        self.cx_actor = Cortex(
            cid=cx["id"],
            exoself_pid=self,
-            sensor_pids=[],   # werden gleich gesetzt
+            sensor_pids=[],
            neuron_pids=[],
            actuator_pids=[]
        )

        self.actuator_scape = XorScape()

-        # Neuronen nach Layer gruppieren (damit outputs korrekt gesetzt werden)
        layers: Dict[int, List[Dict[str, Any]]] = defaultdict(list)
        for n in self.g["neurons"]:
            layers[n["layer_index"]].append(n)
        ordered_layers = [layers[i] for i in sorted(layers)]

-        # Platzhalter: wir benötigen später Referenzen nach ID
        id2neuron_actor: Dict[Any, Neuron] = {}

-        # Zuerst alle Neuronen erzeugen (ohne Outputs), damit wir Referenzen haben
        for layer in ordered_layers:
            for n in layer:
                input_idps = [(iw["input_id"], iw["weights"]) for iw in n["input_weights"]]
@@ -157,16 +127,11 @@ class Exoself(Actor):
                id2neuron_actor[n["id"]] = neuron
                self.neuron_actors.append(neuron)

-        # Jetzt Outputs pro Layer setzen:
-        # - für Nicht-Output-Layer: Outputs = Neuronen der nächsten Schicht
-        # - für Output-Layer: Outputs = Aktuator(en) (setzen wir nachdem Aktuatoren erzeugt sind)
        for li in range(len(ordered_layers) - 1):
            next_pids = [id2neuron_actor[nx["id"]] for nx in ordered_layers[li + 1]]
            for n in ordered_layers[li]:
                id2neuron_actor[n["id"]].outputs = next_pids

-        # Aktuatoren anlegen (brauchen cx_pid und fanin_ids)
-        # Genotyp kann "actuator" (ein Objekt) oder "actuators" (Liste) haben – wir unterstützen beides.
        actuators = self._get_actuators_block()
        if not actuators:
            raise ValueError("Genotype must include 'actuator' or 'actuators'.")
@@ -184,14 +149,12 @@ class Exoself(Actor):
            )
            self.actuator_actors.append(actuator)

-        # Output-Layer Neuronen → Outputs = Aktuatoren
        if ordered_layers:
            last_layer = ordered_layers[-1]
-            out_targets = self.actuator_actors  # Liste
+            out_targets = self.actuator_actors
            for n in last_layer:
                id2neuron_actor[n["id"]].outputs = out_targets

-        # Sensor(en) anlegen (brauchen cx_pid und fanout auf erste Schicht)
        sensors = self._get_sensors_block()
        if not sensors:
            raise ValueError("Genotype must include 'sensor' or 'sensors'.")
@@ -224,24 +187,46 @@ class Exoself(Actor):
            return [self.g["actuator"]]
        return []

-    # ---------- Link ----------
    def _link_cortex(self):
-        """
-        Übergibt dem Cortex die fertigen Listen und setzt awaiting_sync.
-        Startet dann den Cortex-Task.
-        """
        self.cx_actor.sensors = [a for a in self.sensor_actors if a]
        self.cx_actor.neurons = [a for a in self.neuron_actors if a]
        self.cx_actor.actuators = [a for a in self.actuator_actors if a]

-        # Wichtig: vor Start die erwarteten AIDs setzen,
-        # damit der erste Sensor-Trigger nicht in eine leere awaiting_sync läuft.
        self.cx_actor.awaiting_sync = set(a.aid for a in self.cx_actor.actuators)

-        # Cortex starten
        self.tasks.append(asyncio.create_task(self.cx_actor.run()))

-    # ---------- Training-Loop Reaction ----------
+    async def train_until_stop(self):
+        self._build_pid_map_and_spawn()
+        self._link_cortex()
+
+        # 2) Start tasks
+        for a in self.sensor_actors + self.neuron_actors + self.actuator_actors:
+            self.tasks.append(asyncio.create_task(a.run()))
+        if self.actuator_scape:
+            self.tasks.append(asyncio.create_task(self.actuator_scape.run()))
+
+        while True:
+            msg = await self.inbox.get()
+            tag = msg[0]
+
+            if tag == "evaluation_completed":
+                _, fitness, cycles, elapsed = msg
+                maybe_stats = await self._on_evaluation_completed(fitness, cycles, elapsed)
+                # _on_evaluation_completed() ruft bei Stop bereits _backup_genotype() und _terminate_all()
+                if isinstance(maybe_stats, dict):
+                    # Trainingsende – Daten aus self.* zurückgeben (wie im Buch: Fitness/Evals/Cycles/Time)
+                    return (
+                        float(self.highest_fitness),
+                        int(self.eval_acc),
+                        int(self.cycle_acc),
+                        float(self.time_acc),
+                    )
+
+            elif tag == "terminate":
+                await self._terminate_all()
+                return float("-inf"), 0, 0, 0.0
+
    async def _on_evaluation_completed(self, fitness: float, cycles: int, elapsed: float):
        self.eval_acc += 1
        self.cycle_acc += int(cycles)
@@ -249,21 +234,20 @@ class Exoself(Actor):

        print(f"[Exoself] evaluation_completed: fitness={fitness:.6f} cycles={cycles} time={elapsed:.3f}s")

-        if fitness > self.highest_fitness:
+        REL = 1e-6
+        if fitness > self.highest_fitness * (1.0 + REL):
            self.highest_fitness = fitness
            self.attempt = 0
-            # Backup aller Neuronen
            for n in self.neuron_actors:
                await n.send(("weight_backup",))
        else:
            self.attempt += 1
-            # Restore nur der zuletzt perturbierten Neuronen
            for n in self._perturbed:
                await n.send(("weight_restore",))

-        # Stop-Kriterium?
        if self.attempt >= self.MAX_ATTEMPTS:
-            print(f"[Exoself] STOP. Best fitness={self.highest_fitness:.6f} evals={self.eval_acc} cycles={self.cycle_acc}")
+            print(
+                f"[Exoself] STOP. Best fitness={self.highest_fitness:.6f} evals={self.eval_acc} cycles={self.cycle_acc}")
            await self._backup_genotype()
            await self._terminate_all()
            return {
@@ -273,7 +257,6 @@ class Exoself(Actor):
                "time_acc": self.time_acc,
            }

-        # Perturbiere Teilmenge der Neuronen
        tot = len(self.neuron_actors)
        mp = 1.0 / math.sqrt(max(1, tot))
        self._perturbed = [n for n in self.neuron_actors if random.random() < mp]
@@ -281,25 +264,13 @@ class Exoself(Actor):
        for n in self._perturbed:
            await n.send(("weight_perturb",))

-        # Nächste Episode starten
        await self.cx_actor.send(("reactivate",))

-    # ---------- Backup Genotype ----------
    async def _backup_genotype(self):
-        """
-        Holt von allen Neuronen die aktuellen Weights und schreibt sie in self.g.
-        Speichert optional in self.file_name.
-        """
-        # 1) Request
        remaining = len(self.neuron_actors)
        for n in self.neuron_actors:
            await n.send(("get_backup",))

-        # 2) Collect vom Cortex-Postfach (Neuronen senden an cx_pid → cx leitet an Exoself weiter
-        # oder du hast sie direkt an Exoself schicken lassen; falls direkt an Cortex, dann
-        # lausche hier stattdessen auf self.cx_actor.inbox. In deinem Neuron-Code geht es an cx_pid,
-        # und in deiner bisherigen Implementierung hast du aus dem Cortex-Postfach gelesen.
-        # Hier vereinfachen wir: Neuronen senden direkt an EXOSELF (passe Neuron ggf. an).
        backups: List[Tuple[Any, List[Tuple[Any, List[float]]]]] = []

        while remaining > 0:
@@ -309,8 +280,6 @@ class Exoself(Actor):
                backups.append((nid, idps))
                remaining -= 1

-        # 3) Update JSON
-        # exoself.py -> in _backup_genotype()
        id2n = {n["id"]: n for n in self.g["neurons"]}
        for nid, idps in backups:
            if nid not in id2n:
@@ -324,17 +293,14 @@ class Exoself(Actor):
                    input_id, weights = item
                    new_iw.append({"input_id": input_id, "weights": list(weights)})
            id2n[nid]["input_weights"] = new_iw
-            # Bias mit abspeichern (Variante B):
            if bias_val is not None:
                id2n[nid].setdefault("input_weights", []).append({"input_id": "bias", "weights": [bias_val]})

-        # 4) Save
        if self.file_name:
            with open(self.file_name, "w") as f:
                json.dump(self.g, f, indent=2)
            print(f"[Exoself] Genotype updated → {self.file_name}")

-    # ---------- Termination ----------
    async def _terminate_all(self):
        for a in self.sensor_actors + self.neuron_actors + self.actuator_actors:
            await a.send(("terminate",))