# TODO: this will be one env for both systems import math import numpy as np import logging import Box2D from Box2D import (b2FixtureDef, b2PolygonShape, b2ContactListener) log = logging.getLogger(__name__) # Optional gym import for spaces only; code runs without strict gym registration try: import gymnasium as gym from gymnasium import spaces from gymnasium.utils import seeding GYM_AVAILABLE = True except Exception: GYM_AVAILABLE = False spaces = None seeding = None # Car dynamics from classic gym (Box2D) from gymnasium.envs.box2d.car_dynamics import Car # --- pygame renderer --- import pygame DEBUG_DRAWING = False LOOK_AHEAD = 10 STATE_W = 96 STATE_H = 96 VIDEO_W = 1200 VIDEO_H = 800 WINDOW_W = 1350 WINDOW_H = 950 SCALE = 6.0 TRACK_RAD = 900 / SCALE PLAYFIELD = 2000 / SCALE FPS = 60 ZOOM = 2.7 ZOOM_FOLLOW = True TRACK_DETAIL_STEP = 21 / SCALE TRACK_TURN_RATE = 0.31 TRACK_WIDTH = 40 / SCALE BORDER = 8 / SCALE BORDER_MIN_COUNT = 4 ROAD_COLOR = [0.4, 0.4, 0.4] # limits & timeouts MAX_TIME_SEC = 90.0 MAX_STEPS = int(FPS * MAX_TIME_SEC) NO_PROGRESS_SEC = 8.0 NO_PROGRESS_STEPS = int(FPS * NO_PROGRESS_SEC) STALL_MIN_SPEED = 4.0 STALL_SEC = 4.0 STALL_STEPS = int(FPS * STALL_SEC) FUEL_LIMIT = 120.0 # ---------------------------- # Utilities # ---------------------------- def standardize_angle(theta: float) -> float: return np.remainder(theta + np.pi, 2 * np.pi) - np.pi def f2c(rgb_float): """float [0..1] -> int [0..255] color tuple""" return tuple(max(0, min(255, int(255 * x))) for x in rgb_float) # ---------------------------- # MyState: feature container # ---------------------------- class MyState: def __init__(self): self.angle_deltas = None self.reward = None self.on_road = None self.laps = None self.wheel_angle = None self.car_angle = None self.angular_vel = None self.true_speed = None self.off_center = None self.vel_angle = None def as_array(self, n: int): return np.append( self.angle_deltas[:n], [ self.wheel_angle, self.car_angle, self.angular_vel, self.true_speed, self.off_center, self.vel_angle, ], ).astype(np.float32) def as_feature_vector(self, lookahead: int = LOOK_AHEAD): return self.as_array(lookahead) # ---------------------------- # Contact listener: counts tiles, progress & reward # ---------------------------- class FrictionDetector(b2ContactListener): def __init__(self, env): super().__init__() self.env = env def BeginContact(self, contact): self._contact(contact, True) def EndContact(self, contact): self._contact(contact, False) def _contact(self, contact, begin): tile = None obj = None u1 = contact.fixtureA.body.userData u2 = contact.fixtureB.body.userData if u1 and "road_friction" in u1.__dict__: tile = u1 obj = u2 if u2 and "road_friction" in u2.__dict__: tile = u2 obj = u1 if not tile: return tile.color[0] = ROAD_COLOR[0] tile.color[1] = ROAD_COLOR[1] tile.color[2] = ROAD_COLOR[2] if not obj or "tiles" not in obj.__dict__: return if begin: obj.tiles.add(tile) if tile.index_on_track == self.env.next_road_tile: self.env.reward += 1000.0 / len(self.env.track) self.env.tile_visited_count += 1 self.env.next_road_tile += 1 if self.env.next_road_tile >= len(self.env.road): self.env.next_road_tile = 0 self.env.laps += 1 else: if tile in obj.tiles: obj.tiles.remove(tile) self.env.on_road = len(obj.tiles) > 0 # ---------------------------- # CarRacing with pygame rendering and Gym(nasium) 0.26+ compatible step/reset # ---------------------------- class CarRacing(gym.Env if GYM_AVAILABLE else object): metadata = { "render_modes": ["human", "rgb_array", None], "render_fps": FPS, } def __init__(self, seed_value: int = 5, render_mode: str | None = "human"): self._prev_s = 0.0 self._interp = 0.0 # RNG self.offroad_frames = None if seeding is not None: self.np_random, _ = seeding.np_random(seed_value) else: self.np_random = np.random.RandomState(seed_value) # Physics world self.contactListener_keepref = FrictionDetector(self) self.world = Box2D.b2World((0, 0), contactListener=self.contactListener_keepref) # Gym-style spaces (optional) if GYM_AVAILABLE: self.action_space = spaces.Box( np.array([-1, 0, 0], dtype=np.float32), np.array([+1, +1, +1], dtype=np.float32), dtype=np.float32, ) feat_dim = LOOK_AHEAD + 6 self.observation_space = spaces.Box( low=-np.inf, high=np.inf, shape=(feat_dim,), dtype=np.float32 ) # State self.viewer = None # unused (pyglet placeholder) self.road = None self.car = None self.reward = 0.0 self.prev_reward = 0.0 self.laps = 0 self.on_road = True self.ctrl_pts = None self.outward_vectors = None self.angles = None self.angle_deltas = None self.original_road_poly = None self.indices = None self.my_state = MyState() self.next_road_tile = 0 # Rendering self.render_mode = render_mode self._pg = None # pygame objects container # Episode control self.tile_visited_count = 0 self.t = 0.0 self.human_render = False # Build initial track + car self._build_new_episode() self.offroad_frames = 0 self.offroad_grace_frames = int(0.7 * FPS) self.offroad_penalty_per_frame = 2.0 self.steps = 0 self._last_progress_count = 0 self._no_progress_steps = 0 self._stall_steps = 0 # ------------------------ # Helpers: pygame # ------------------------ class _PygameCtx: def __init__(self): self.initialized = False self.screen = None self.clock = None self.font = None self.rgb_surface = None # offscreen for rgb_array def _ensure_pygame(self): if self._pg is None: self._pg = self._PygameCtx() if not self._pg.initialized: if not pygame.get_init(): pygame.init() flags = 0 if self.render_mode == "human": self._pg.screen = pygame.display.set_mode((WINDOW_W, WINDOW_H)) else: # offscreen surface; we can still blit/draw onto it self._pg.screen = pygame.Surface((WINDOW_W, WINDOW_H)) self._pg.clock = pygame.time.Clock() try: pygame.font.init() self._pg.font = pygame.font.SysFont("Arial", 20) except Exception: self._pg.font = None self._pg.initialized = True def _world_to_screen(self, x, y, zoom, angle, scroll_x, scroll_y): ca, sa = math.cos(angle), math.sin(angle) # rotate around (scroll_x, scroll_y) rx = (x - scroll_x) * ca + (y - scroll_y) * sa ry = -(x - scroll_x) * sa + (y - scroll_y) * ca # scale & translate (match original camera placement) sx = int(WINDOW_W / 2 + rx * zoom) sy = int(WINDOW_H / 4 + ry * zoom) return sx, sy def get_feature_vector(self, lookahead: int = LOOK_AHEAD) -> list[float]: my_s: MyState = self.my_state vec = my_s.as_feature_vector(lookahead).tolist() return vec def _draw_polygon_world(self, poly, color, zoom, angle, scroll_x, scroll_y): pts = [self._world_to_screen(px, py, zoom, angle, scroll_x, scroll_y) for (px, py) in poly] pygame.draw.polygon(self._pg.screen, f2c(color), pts) def _draw_body(self, body, color=(0.7, 0.7, 0.7), zoom=1.0, angle=0.0, scroll_x=0.0, scroll_y=0.0): # Draw each fixture polygon col = f2c(color) for fixture in body.fixtures: shape = fixture.shape if isinstance(shape, b2PolygonShape): verts = [body.transform * v for v in shape.vertices] pts = [self._world_to_screen(v[0], v[1], zoom, angle, scroll_x, scroll_y) for v in verts] pygame.draw.polygon(self._pg.screen, col, pts, width=0) # ------------------------ # Track & episode setup # ------------------------ def _destroy(self): if not self.road: return # userData lösen, dann Bodies zerstören for t in self.road: try: t.userData = None except Exception: pass try: self.world.DestroyBody(t) except Exception: pass self.road = [] if self.car is not None: try: self.car.destroy() except Exception: pass self.car = None def _create_track(self): CHECKPOINTS = 12 checkpoints = [] for c in range(CHECKPOINTS): alpha = 2 * math.pi * c / CHECKPOINTS + self.np_random.uniform(0, 2 * math.pi * 1 / CHECKPOINTS) rad = self.np_random.uniform(TRACK_RAD / 3, TRACK_RAD) if c == 0: alpha = 0 rad = 1.5 * TRACK_RAD if c == CHECKPOINTS - 1: alpha = 2 * math.pi * c / CHECKPOINTS self.start_alpha = 2 * math.pi * (-0.5) / CHECKPOINTS rad = 1.5 * TRACK_RAD checkpoints.append((alpha, rad * math.cos(alpha), rad * math.sin(alpha))) self.road = [] x, y, beta = 1.5 * TRACK_RAD, 0, 0 dest_i = 0 laps = 0 track = [] no_freeze = 2500 visited_other_side = False while True: alpha = math.atan2(y, x) if visited_other_side and alpha > 0: laps += 1 visited_other_side = False if alpha < 0: visited_other_side = True alpha += 2 * math.pi while True: failed = True while True: dest_alpha, dest_x, dest_y = checkpoints[dest_i % len(checkpoints)] if alpha <= dest_alpha: failed = False break dest_i += 1 if dest_i % len(checkpoints) == 0: break if not failed: break alpha -= 2 * math.pi continue r1x = math.cos(beta) r1y = math.sin(beta) p1x = -r1y p1y = r1x dest_dx = dest_x - x dest_dy = dest_y - y proj = r1x * dest_dx + r1y * dest_dy while beta - alpha > 1.5 * math.pi: beta -= 2 * math.pi while beta - alpha < -1.5 * math.pi: beta += 2 * math.pi prev_beta = beta proj *= SCALE if proj > 0.3: beta -= min(TRACK_TURN_RATE, abs(0.001 * proj)) if proj < -0.3: beta += min(TRACK_TURN_RATE, abs(0.001 * proj)) x += p1x * TRACK_DETAIL_STEP y += p1y * TRACK_DETAIL_STEP track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y)) if laps > 4: break no_freeze -= 1 if no_freeze == 0: break i1, i2 = -1, -1 i = len(track) while True: i -= 1 if i == 0: return False pass_through_start = track[i][0] > self.start_alpha >= track[i - 1][0] if pass_through_start and i2 == -1: i2 = i elif pass_through_start and i1 == -1: i1 = i break print(f"Track generation: {i1}..{i2} -> {i2 - i1}-tiles track") assert i1 != -1 and i2 != -1 track = track[i1: i2 - 1] first_beta = track[0][1] first_perp_x = math.cos(first_beta) first_perp_y = math.sin(first_beta) well_glued_together = np.sqrt( np.square(first_perp_x * (track[0][2] - track[-1][2])) + np.square(first_perp_y * (track[0][3] - track[-1][3])) ) if well_glued_together > TRACK_DETAIL_STEP: return False border = [False] * len(track) for i in range(len(track)): good = True oneside = 0 for neg in range(BORDER_MIN_COUNT): beta1 = track[i - neg - 0][1] beta2 = track[i - neg - 1][1] good &= abs(beta1 - beta2) > TRACK_TURN_RATE * 0.2 oneside += np.sign(beta1 - beta2) good &= abs(oneside) == BORDER_MIN_COUNT border[i] = bool(good) for i in range(len(track)): for neg in range(BORDER_MIN_COUNT): border[i - neg] |= border[i] self.road_poly = [] for i in range(len(track)): alpha1, beta1, x1, y1 = track[i] alpha2, beta2, x2, y2 = track[i - 1] road1_l = (x1 - TRACK_WIDTH * math.cos(beta1), y1 - TRACK_WIDTH * math.sin(beta1)) road1_r = (x1 + TRACK_WIDTH * math.cos(beta1), y1 + TRACK_WIDTH * math.sin(beta1)) road2_l = (x2 - TRACK_WIDTH * math.cos(beta2), y2 - TRACK_WIDTH * math.sin(beta2)) road2_r = (x2 + TRACK_WIDTH * math.cos(beta2), y2 + TRACK_WIDTH * math.sin(beta2)) t = self.world.CreateStaticBody( fixtures=b2FixtureDef(shape=b2PolygonShape(vertices=[road1_l, road1_r, road2_r, road2_l])) ) t.userData = t t.index_on_track = i c = 0.01 * (i % 3) t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c] t.road_visited = False t.road_friction = 1.0 t.fixtures[0].sensor = True self.road_poly.append(([road1_l, road1_r, road2_r, road2_l], t.color)) self.road.append(t) if border[i]: side = np.sign(beta2 - beta1) b1_l = (x1 + side * TRACK_WIDTH * math.cos(beta1), y1 + side * TRACK_WIDTH * math.sin(beta1)) b1_r = ( x1 + side * (TRACK_WIDTH + BORDER) * math.cos(beta1), y1 + side * (TRACK_WIDTH + BORDER) * math.sin(beta1), ) b2_l = (x2 + side * TRACK_WIDTH * math.cos(beta2), y2 + side * TRACK_WIDTH * math.sin(beta2)) b2_r = ( x2 + side * (TRACK_WIDTH + BORDER) * math.cos(beta2), y2 + side * (TRACK_WIDTH + BORDER) * math.sin(beta2), ) self.road_poly.append(([b1_l, b1_r, b2_r, b2_l], (1, 1, 1) if i % 2 == 0 else (1, 0, 0))) self.track = track self.original_road_poly = [((list(poly)), list(color)) for (poly, color) in self.road_poly] self.ctrl_pts = np.array(list(map(lambda x: x[2:], self.track))) self.angles = np.array(list(map(lambda x: x[1], self.track))) self.outward_vectors = [np.array([np.cos(theta), np.sin(theta)]) for theta in self.angles] angle_deltas = self.angles - np.roll(self.angles, 1) self.angle_deltas = np.array(list(map(standardize_angle, angle_deltas))) self.indices = np.array(range(len(self.ctrl_pts))) return True def _build_new_episode(self): # build track (may retry) self._destroy() self.reward = 0.0 self.prev_reward = 0.0 self.tile_visited_count = 0 self.t = 0.0 self.road_poly = [] self.human_render = False self.laps = 0 self.on_road = True self.next_road_tile = 0 while True: success = self._create_track() if success: break print("retry to generate track (normal if there are not many of this messages)") self.car = Car(self.world, *self.track[0][1:4]) # attach tiles set to car for contact tracking self.car.tiles = set() self.steps = 0 self._last_progress_count = 0 self._no_progress_steps = 0 self._stall_steps = 0 # ------------------------ # Public API (Gym 0.26+/Gymnasium style) # ------------------------ def _update_features(self): v1 = self.outward_vectors[self.next_road_tile - 2] v2 = np.array(self.car.hull.position) - self.ctrl_pts[self.next_road_tile - 1] off_center = float(np.dot(v1, v2)) angular_vel = float(self.car.hull.angularVelocity) vel = self.car.hull.linearVelocity true_speed = float(np.linalg.norm(vel)) car_angle = float(self.car.hull.angle - self.angles[self.next_road_tile]) wheel_angle = float(self.car.wheels[0].joint.angle) if true_speed < 0.2: vel_angle = 0.0 else: vel_angle = float(math.atan2(vel[1], vel[0]) - (self.angles[self.next_road_tile] + np.pi / 2)) wheel_angle = standardize_angle(wheel_angle) car_angle = standardize_angle(car_angle) vel_angle = standardize_angle(vel_angle) tip = np.array((self.car.wheels[0].position + self.car.wheels[1].position) / 2) p1 = self.ctrl_pts[self.next_road_tile - 1] p2 = self.ctrl_pts[self.next_road_tile - 2] u = (p1 - p2) / TRACK_DETAIL_STEP v = (tip - p2) / TRACK_DETAIL_STEP interp = float(np.dot(v, u)) interp_angle_deltas = np.interp(self.indices + interp, self.indices, self.angle_deltas) self.my_state.angle_deltas = np.roll(interp_angle_deltas, -self.next_road_tile) self.my_state.reward = self.reward self.my_state.on_road = self.on_road self.my_state.laps = self.laps self.my_state.true_speed = true_speed self.my_state.off_center = off_center self.my_state.wheel_angle = wheel_angle self.my_state.car_angle = car_angle self.my_state.angular_vel = angular_vel self.my_state.vel_angle = vel_angle # Normalization self.my_state.angle_deltas *= 2.3 self.my_state.true_speed /= 100.0 self.my_state.off_center /= TRACK_WIDTH self.my_state.wheel_angle *= 2.1 self.my_state.car_angle *= 1.5 self.my_state.vel_angle *= 1.5 self.my_state.angular_vel /= 3.74 interp_angle_deltas = np.interp(self.indices + interp, self.indices, self.angle_deltas) self._interp = interp # <- für Fortschritts-Reward im step() def reset(self, *, seed: int | None = None, options: dict | None = None): if seed is not None: if seeding is not None: self.np_random, _ = seeding.np_random(seed) else: self.np_random = np.random.RandomState(seed) self._build_new_episode() # Wichtig: initiale Features befüllen self._update_features() obs = self.my_state.as_feature_vector(LOOK_AHEAD).astype(np.float32) info = {} return obs, info def fast_reset(self): # keep the same track, respawn car self.car = None self.laps = 0 self.on_road = True self.next_road_tile = 0 self.reward = 0.0 self.prev_reward = 0.0 self.tile_visited_count = 0 self.t = 0.0 self.human_render = False for tile in self.road: tile.road_visited = False self.road_poly = [((list(poly)), list(color)) for (poly, color) in self.original_road_poly] try: self.car.destroy() except Exception: pass self.car = Car(self.world, *self.track[0][1:4]) self.car.tiles = set() self.steps = 0 self._last_progress_count = 0 self._no_progress_steps = 0 self._stall_steps = 0 return self.step(np.array([0.0, 0.0, 0.0], dtype=np.float32)) def step(self, action): # log.info("got action: {}".format(action)) # Expect action: [steer (-1..1), gas (0..1), brake (0..1)] if action is not None: # TODO: this was changed from -float(action[0]) self.car.steer(float(action[0])) self.car.gas(float(action[1])) self.car.brake(float(action[2])) self.car.step(1.0 / FPS) self.world.Step(1.0 / FPS, 6 * 30, 2 * 30) self.t += 1.0 / FPS self.steps += 1 terminated = False truncated = False # -- stall logic --- speed = float(np.linalg.norm(self.car.hull.linearVelocity)) if speed < STALL_MIN_SPEED: self._stall_steps += 1 else: self._stall_steps = 0 if self._stall_steps >= STALL_STEPS: self.reward -= 15.0 terminated = True if action is not None: # (1) ALLE Reward-Änderungen zuerst einarbeiten self.reward -= 1.0 / FPS # Zeitstrafe # Ziel erreicht? if self.tile_visited_count == len(self.track): terminated = False self.tile_visited_count = 0 # Out-of-bounds: Strafe IN reward addieren (nicht step_reward überschreiben) x, y = self.car.hull.position if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD: self.reward -= 100.0 terminated = True # Offroad: kontinuierliche Strafe + Grace-Fenster; bei Timeout Zusatzstrafe if not self.on_road: self.offroad_frames += 1 self.reward -= self.offroad_penalty_per_frame / FPS if self.offroad_frames > self.offroad_grace_frames: self.reward -= 20.0 terminated = True else: self.offroad_frames = 0 # self.reward += 0.02 * speed / FPS # --- DICHTES SIGNAL: Vortrieb entlang der Streckentangente --- # Tangentialrichtung der Strecke am aktuellen Referenz-Index: theta = float(self.angles[self.next_road_tile]) # lokale Fahrtrichtung t_hat = np.array([-math.sin(theta), math.cos(theta)], dtype=np.float32) vel = np.array(self.car.hull.linearVelocity, dtype=np.float32) forward = float(np.dot(vel, t_hat)) # Vorwärtskomponente (kann <0 sein) if forward > 0.0: # self.reward += 0.03 * forward / FPS # kleiner, dichter Bonus self.reward += 0.2 * forward / FPS # --- KONTINUIERLICHER STRECKENFORTSCHRITT (s-Koordinate) --- # s = (Index des zuletzt passierten Kontrollpunkts) + Interp innerhalb des Segments n = float(len(self.ctrl_pts)) s_now = ((self.next_road_tile - 1) % len(self.ctrl_pts)) + float(self._interp) ds = s_now - self._prev_s # zyklische Korrektur (Lap-Übergang) if ds < -0.5 * n: ds += n if ds > 0.0: self.reward += 4.0 * ds / FPS # Fortschritt in "Tiles pro Sekunde" (klein halten!) self._prev_s = s_now if self.tile_visited_count > self._last_progress_count: self._last_progress_count = self.tile_visited_count self._no_progress_steps = 0 else: self._no_progress_steps += 1 if self._no_progress_steps >= NO_PROGRESS_STEPS: truncated = True # (2) JETZT genau einmal das Delta bilden step_reward = self.reward - self.prev_reward self.prev_reward = self.reward else: step_reward = 0.0 # --- Feature computation (unverändert) --- self._update_features() obs = self.my_state.as_feature_vector(LOOK_AHEAD).astype(np.float32) info = {} # features nicht mehr nötig return obs, step_reward, terminated, truncated, info def _get_observation(self): # This env is feature-first; return None unless user asks for rgb_array via render() return None # ------------------------ # Rendering (pygame) # ------------------------ def render(self): self._ensure_pygame() # Handle window events only in human mode if self.render_mode == "human": for event in pygame.event.get(): if event.type == pygame.QUIT: self.close() return None # Camera math (match original) zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min(self.t, 1) scroll_x = self.car.hull.position[0] scroll_y = self.car.hull.position[1] angle = -self.car.hull.angle vel = self.car.hull.linearVelocity if np.linalg.norm(vel) > 0.5: angle = math.atan2(vel[0], vel[1]) # Draw grass background self._pg.screen.fill((102, 230, 102)) # simple grid for texture k = PLAYFIELD / 20.0 grid_color = (110, 240, 110) for x in range(-20, 20, 2): for y in range(-20, 20, 2): x0, y0 = k * x + 0, k * y + 0 x1, y1 = k * x + k, k * y + k p0 = self._world_to_screen(x0, y0, zoom, angle, scroll_x, scroll_y) p1 = self._world_to_screen(x1, y0, zoom, angle, scroll_x, scroll_y) p2 = self._world_to_screen(x1, y1, zoom, angle, scroll_x, scroll_y) p3 = self._world_to_screen(x0, y1, zoom, angle, scroll_x, scroll_y) pygame.draw.polygon(self._pg.screen, grid_color, [p0, p1, p2, p3]) # Road polygons for poly, color in self.road_poly: self._draw_polygon_world(poly, color, zoom, angle, scroll_x, scroll_y) # Draw car hull + wheels (approx) car_col = (0.25, 0.25, 0.25) self._draw_body(self.car.hull, car_col, zoom, angle, scroll_x, scroll_y) for w in self.car.wheels: self._draw_body(w, (0.15, 0.15, 0.15), zoom, angle, scroll_x, scroll_y) # Indicators (speed, wheel, gyro) if self._pg.font is not None: # simple HUD text txt = f"reward={self.reward:0.1f} laps={self.laps}" surf = self._pg.font.render(txt, True, (255, 255, 255)) self._pg.screen.blit(surf, (10, 10)) # Output if self.render_mode == "human": pygame.display.flip() self._pg.clock.tick(FPS) return None else: # Offscreen: return RGB array like gym does arr = pygame.surfarray.array3d(self._pg.screen) # (W,H,3) arr = np.transpose(arr, (1, 0, 2)) # -> (H,W,3) return arr def close(self): try: if self._pg and self._pg.initialized: pygame.display.quit() pygame.quit() except Exception: pass self._pg = None # ---------------------------- # Keyboard demo (pygame) # ---------------------------- if __name__ == "__main__": import pygame pygame.init() env = CarRacing(render_mode="human") action = np.array([0.0, 0.0, 0.0], dtype=np.float32) running = True def handle_keys(a): keys = pygame.key.get_pressed() steer = 0.0 if keys[pygame.K_LEFT]: steer -= 1.0 if keys[pygame.K_RIGHT]: steer += 1.0 gas = 1.0 if keys[pygame.K_UP] else 0.0 brake = 0.5 if keys[pygame.K_DOWN] else 0.0 a[0], a[1], a[2] = steer, gas, brake # initial reset env.reset() try: while running: for event in pygame.event.get(): if event.type == pygame.QUIT: running = False if event.type == pygame.KEYDOWN and event.key == pygame.K_RETURN: env.fast_reset() handle_keys(action) obs, r, terminated, truncated, info = env.step(action) # print every ~200 frames if int(env.t * FPS) % 200 == 0: ms: MyState = info.get("features") if ms is not None: print( f"speed={ms.true_speed:5.2f} off_center={ms.off_center:+.2f} car_ang={ms.car_angle:+.2f} " f"reward={r:+.2f}" f"reward={ms.reward:+.2f}" ) env.render() if terminated or truncated: env.fast_reset() finally: env.close()