# app.py # RL Gridworld (Q-learning) – Warehouse Robot Demo # Styling: dark industrial robotics theme (option 2) import time import numpy as np import gradio as gr import matplotlib.pyplot as plt from matplotlib.patches import Rectangle, FancyBboxPatch from io import BytesIO from PIL import Image from collections import deque # ========================================================= # 🎨 CUSTOM CSS (industrial, calm, dark) # ========================================================= CUSTOM_CSS = """ body { background: radial-gradient(circle at top left, #1b2b4a 0%, #050814 45%, #02030a 100%); color: #e7f3ff; } .gradio-container { max-width: 1100px !important; margin: 0 auto !important; padding-top: 28px; font-family: system-ui, -apple-system, BlinkMacSystemFont, "SF Pro Text", "Segoe UI", sans-serif; } /* Headings */ h1, h2, h3 { color: #ffd27d; letter-spacing: 0.06em; } /* Text */ p, li { color: #d6e6ff; } /* Panels / cards */ .gr-group, .gr-box, .gr-panel { background: radial-gradient(circle at top left, rgba(255, 200, 120, 0.06), rgba(4, 9, 29, 0.98)); border-radius: 22px; border: 1px solid rgba(255, 200, 120, 0.28); box-shadow: 0 0 22px rgba(255, 180, 80, 0.12); } /* Labels */ label { color: #ffddaa !important; text-transform: uppercase; font-size: 0.75rem !important; letter-spacing: 0.12em; } /* Inputs */ textarea, input[type="text"] { background: rgba(2, 8, 26, 0.92) !important; border-radius: 16px !important; border: 1px solid rgba(255, 190, 100, 0.35) !important; color: #e7f3ff !important; } /* Sliders */ input[type="range"] { accent-color: #ffb347; } /* Buttons */ button { border-radius: 999px !important; font-weight: 600 !important; } button.primary { background: linear-gradient(90deg, #ffb347, #ffcc80) !important; color: #1a0f02 !important; border: none !important; box-shadow: 0 0 16px rgba(255, 180, 80, 0.45); } button.secondary { background: rgba(12, 20, 40, 0.9) !important; color: #ffd9a0 !important; border: 1px solid rgba(255, 200, 120, 0.35) !important; } """ # ========================================================= # πŸ€– GRIDWORLD ENVIRONMENT # ========================================================= ACTIONS = ["↑", "β†’", "↓", "←"] ACTION_DELTAS = { 0: (-1, 0), 1: (0, 1), 2: (1, 0), 3: (0, -1), } def neighbors(r, c, n): for dr, dc in [(-1,0),(1,0),(0,-1),(0,1)]: nr, nc = r+dr, c+dc if 0 <= nr < n and 0 <= nc < n: yield nr, nc def has_path(size, start, goal, blocked): q = deque([start]) seen = {start} while q: cur = q.popleft() if cur == goal: return True for nxt in neighbors(*cur, size): if nxt not in seen and nxt not in blocked: seen.add(nxt) q.append(nxt) return False def generate_obstacles(size, start, goal, density, rng): walls, lava = set(), set() for _ in range(60): walls.clear() lava.clear() for r in range(size): for c in range(size): if (r,c) in (start, goal): continue if rng.random() < density: (walls if rng.random() < 0.7 else lava).add((r,c)) if has_path(size, start, goal, walls | lava): return walls, lava density = max(0, density - 0.02) return set(), set() class GridWorld: def __init__(self, size, walls, lava): self.size = size self.start = (0,0) self.goal = (size-1, size-1) self.walls = walls self.lava = lava self.reset() def reset(self): self.pos = self.start return self.pos def step(self, action): dr, dc = ACTION_DELTAS[action] r, c = self.pos nr, nc = r+dr, c+dc if (nr < 0 or nr >= self.size or nc < 0 or nc >= self.size or (nr,nc) in self.walls): nr, nc = r, c self.pos = (nr,nc) if self.pos == self.goal: return self.pos, 10.0, True if self.pos in self.lava: return self.pos, -10.0, True return self.pos, -0.1, False # ========================================================= # 🧠 Q-LEARNING AGENT # ========================================================= class QAgent: def __init__(self, size, alpha, gamma): self.Q = np.zeros((size, size, 4), dtype=np.float32) self.alpha = alpha self.gamma = gamma def act(self, s, eps): if np.random.rand() < eps: return np.random.randint(4) return int(np.argmax(self.Q[s])) def act_greedy(self, s): return int(np.argmax(self.Q[s])) def update(self, s, a, r, s2, done): target = r if done else r + self.gamma * np.max(self.Q[s2]) self.Q[s + (a,)] += self.alpha * (target - self.Q[s + (a,)]) # ========================================================= # 🎨 RENDERING # ========================================================= def fig_to_pil(fig): buf = BytesIO() fig.savefig(buf, format="png", dpi=160, bbox_inches="tight") plt.close(fig) buf.seek(0) return Image.open(buf) def draw(env, agent=None, episode=None, step=None, ret=None): n = env.size fig, ax = plt.subplots(figsize=(5.4,5.4)) ax.set_xlim(0,n); ax.set_ylim(0,n) ax.axis("off") ax.add_patch(Rectangle((0,0), n,n, facecolor="#0b1020")) for r in range(n): for c in range(n): x,y = c,n-1-r color = "#121a33" if (r,c) == env.goal: color="#0f2f1f" if (r,c) in env.lava: color="#3a1414" if (r,c) in env.walls: color="#1b1b1b" ax.add_patch(FancyBboxPatch( (x+0.05,y+0.05),0.9,0.9, boxstyle="round,pad=0.02", facecolor=color, edgecolor="#2a355f" )) def icon(rc, txt): r,c = rc ax.text(c+0.5, n-1-r+0.5, txt, ha="center", va="center", fontsize=22) icon(env.goal,"🏁") for p in env.lava: icon(p,"πŸ”₯") for p in env.walls: icon(p,"🧱") icon(env.pos,"πŸ€–") title = f"Episode {episode} | Step {step} | Return {ret:+.2f}" if episode else "" ax.text(0, n+0.2, title, color="#ffd27d") return fig_to_pil(fig) # ========================================================= # πŸš€ TRAIN / PLAY # ========================================================= def train(grid, density, alpha, gamma, eps_s, eps_e, eps_d, episodes, max_steps, speed): rng = np.random.default_rng() walls,lava = generate_obstacles(grid,(0,0),(grid-1,grid-1),density,rng) env = GridWorld(grid,walls,lava) agent = QAgent(grid,alpha,gamma) eps = eps_s for ep in range(1, episodes+1): s = env.reset() ret = 0 for t in range(1, max_steps+1): a = agent.act(s, eps) s2,r,d = env.step(a) agent.update(s,a,r,s2,d) s = s2; ret += r yield draw(env,agent,ep,t,ret), agent.Q, (walls,lava) time.sleep(speed) if d: break eps = max(eps_e, eps*eps_d) def play(Q, env_state, grid, max_steps, speed): walls,lava = env_state env = GridWorld(grid,walls,lava) agent = QAgent(grid,0,0); agent.Q = Q s = env.reset(); ret=0 for t in range(1, max_steps+1): a = agent.act_greedy(s) s,r,d = env.step(a) ret+=r yield draw(env,agent,"PLAY",t,ret) time.sleep(speed) if d: break # ========================================================= # πŸ–₯️ UI # ========================================================= with gr.Blocks(css=CUSTOM_CSS, theme=gr.themes.Soft()) as demo: with gr.Row(): with gr.Column(scale=3): gr.Markdown(""" ### πŸ€– Een robot in het magazijn De robot leert zelf hoe hij veilig en efficiΓ«nt door het magazijn beweegt, zonder regels of kaart. """) with gr.Column(scale=2): gr.Image("humanoid-robot-apptronic-1024x684.jpg.webp", show_label=False) q_state = gr.State(None) env_state = gr.State(None) with gr.Row(): with gr.Column(): grid = gr.Slider(4,10,5,label="Grid size") density = gr.Slider(0,0.45,0.15,label="Obstacle density") alpha = gr.Slider(0.01,1,0.45,label="Alpha") gamma = gr.Slider(0,0.999,0.97,label="Gamma") eps_s = gr.Slider(0,1,0.9,label="Epsilon start") eps_e = gr.Slider(0,0.2,0.02,label="Epsilon end") eps_d = gr.Slider(0.9,0.999,0.985,label="Epsilon decay") episodes = gr.Slider(1,300,200,label="Episodes") max_steps = gr.Slider(5,200,60,label="Max steps") speed = gr.Slider(0,0.1,0.02,label="Speed") train_btn = gr.Button("πŸš€ Train", variant="primary") play_btn = gr.Button("▢️ Play", variant="secondary") with gr.Column(): frame = gr.Image(height=520) train_btn.click( train, inputs=[grid,density,alpha,gamma,eps_s,eps_e,eps_d,episodes,max_steps,speed], outputs=[frame,q_state,env_state], ) play_btn.click( play, inputs=[q_state,env_state,grid,max_steps,speed], outputs=frame, ) demo.launch()