Intelligent Explorer – Q-Learning Robot Navigation in a Grid

profileF1234567891
COE292_Intelligent_Explorer_Starter.ipynb

{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# COE292 — Homework 1: Intelligent Explorer (Starter)\n", "\n", "Complete the **TODO** sections only. Do **not** rename functions or change their arguments.\n", "\n", "You will implement the core Q-learning logic while the scaffolding (seeding, BFS reachability, environment helpers, printing utilities) is provided for you.\n", "\n", "## Parts\n", "- **Part 1 (Training)**: Learn for 1000 episodes from zero knowledge; print knowledge matrices and the final greedy path + energy.\n", "- **Part 2 (Step-by-Step)**: Apply Q-updates along a fixed path with a given initial knowledge vector; print per-step updates and the final matrix.\n", "\n", "**Rules**\n", "- Only the Python standard library and `numpy` are allowed.\n", "- Keep output formats **exact** (spacing/labels) because the autograder parses them.\n", "- Set your `STUDENT_ID` in the first code cell.\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# =========================\n", "# 1) CONFIG / SEEDING\n", "# =========================\n", "\n", "import hashlib\n", "import random\n", "from collections import deque\n", "from typing import Tuple, Set, List, Dict, Optional\n", "import numpy as np\n", "import re\n", "\n", "STUDENT_ID = \"PUT_YOUR_ID_HERE\" # <-- TODO: set to your KFUPM ID as a string\n", "\n", "def seed_from_student_id(student_id: str) -> int:\n", " \"\"\"\n", " Deterministic seed from student ID (string or integer-like).\n", " Seeds both Python 'random' and NumPy RNG. Returns the seed used.\n", " \"\"\"\n", " sid = str(student_id).strip()\n", " if not sid:\n", " raise ValueError(\"Student ID is required and must be non-empty.\")\n", " if sid.isdigit():\n", " seed = int(sid) % (2**32 - 1)\n", " else:\n", " seed = int(hashlib.sha256(sid.encode(\"utf-8\")).hexdigest(), 16) % (2**32 - 1)\n", " random.seed(seed)\n", " np.random.seed(seed)\n", " return seed\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# =========================\n", "# 2) BFS REACHABILITY\n", "# =========================\n", "\n", "def is_path_possible(n: int, obstacles: Set[Tuple[int, int]]) -> bool:\n", " \"\"\"BFS to check if (0,0) can reach (n-1,n-1) avoiding obstacles.\"\"\"\n", " start, goal = (0, 0), (n - 1, n - 1)\n", " if start in obstacles or goal in obstacles:\n", " return False\n", " q = deque([start])\n", " seen = {start}\n", " while q:\n", " r, c = q.popleft()\n", " if (r, c) == goal:\n", " return True\n", " for dr, dc in [(-1,0),(1,0),(0,-1),(0,1)]:\n", " nr, nc = r + dr, c + dc\n", " if 0 <= nr < n and 0 <= nc < n:\n", " nxt = (nr, nc)\n", " if nxt not in seen and nxt not in obstacles:\n", " seen.add(nxt)\n", " q.append(nxt)\n", " return False\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# =========================\n", "# 3) ENVIRONMENT\n", "# =========================\n", "\n", "ACTIONS = ['N', 'S', 'E', 'W']\n", "DIRS = {'N': (-1, 0), 'S': (1, 0), 'E': (0, 1), 'W': (0, -1)}\n", "\n", "def in_bounds(r: int, c: int, n: int) -> bool:\n", " return 0 <= r < n and 0 <= c < n\n", "\n", "def step_once(pos: Tuple[int,int], action: str, n: int) -> Tuple[int,int]:\n", " dr, dc = DIRS[action]\n", " nr, nc = pos[0] + dr, pos[1] + dc\n", " return (nr, nc) if in_bounds(nr, nc, n) else pos # wall -> stay\n", "\n", "def energy_cost(cell: Tuple[int,int], obstacles: Set[Tuple[int,int]]) -> int:\n", " # +3 if obstacle, else +1\n", " return 3 if cell in obstacles else 1\n", "\n", "class MiniMineEnv:\n", " \"\"\"\n", " n×n grid; start (0,0), goal (n-1,n-1); 'obstacles' are high-energy cells.\n", " Step reward = -energy_cost (normal -1, obstacle -3); +10 on entering goal.\n", " Episode ends when goal is reached OR after n^2 steps.\n", " \"\"\"\n", " def __init__(self, n: int, obstacles: Set[Tuple[int,int]], goal: Tuple[int,int]=None, max_steps: Optional[int]=None):\n", " self.n = int(n)\n", " self.obstacles = set(obstacles)\n", " self.goal = goal if goal is not None else (self.n - 1, self.n - 1)\n", " self.max_steps = self.n * self.n if max_steps is None else int(max_steps)\n", " self.reset()\n", "\n", " def reset(self):\n", " self.pos = (0,0); self.steps = 0; self.done = False\n", " return self.pos\n", "\n", " def reward_for(self, landing: Tuple[int,int]) -> float:\n", " r = -float(energy_cost(landing, self.obstacles))\n", " if landing == self.goal:\n", " r += 10.0\n", " return r\n", "\n", " def step(self, action: str):\n", " if self.done:\n", " return self.pos, 0.0, True, {}\n", " nxt = step_once(self.pos, action, self.n)\n", " self.steps += 1\n", " r = self.reward_for(nxt)\n", " self.done = (nxt == self.goal) or (self.steps >= self.max_steps)\n", " self.pos = nxt\n", " return nxt, r, self.done, {}\n", "\n", " def state_to_index(self, pos: Tuple[int,int]) -> int:\n", " return pos[0] * self.n + pos[1]\n", "\n", " def index_to_state(self, idx: int) -> Tuple[int,int]:\n", " return (idx // self.n, idx % self.n)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# =========================\n", "# 4) PART 1 — TRAINING (TODOs)\n", "# =========================\n", "\n", "def init_q(n: int, num_actions: int = 4, init_value: float = 0.0) -> List[List[float]]:\n", " \"\"\"\n", " TODO: return an n*n by num_actions Q-table initialized to init_value.\n", " \"\"\"\n", " # --- YOUR CODE HERE ---\n", " raise NotImplementedError\n", "\n", "def epsilon_greedy(Q: List[List[float]], s_idx: int, epsilon: float) -> int:\n", " \"\"\"\n", " TODO: with prob epsilon choose a random action; else choose an argmax action.\n", " \"\"\"\n", " # --- YOUR CODE HERE ---\n", " raise NotImplementedError\n", "\n", "def q_update(Q: List[List[float]], s_idx: int, a_idx: int, reward: float, s2_idx: int,\n", " gamma: float, alpha: float, done: bool):\n", " \"\"\"\n", " TODO: Q-learning update:\n", " target = reward + gamma * (0 if done else max(Q[s2_idx]))\n", " Q[s_idx][a_idx] += alpha * (target - Q[s_idx][a_idx])\n", " \"\"\"\n", " # --- YOUR CODE HERE ---\n", " raise NotImplementedError\n", "\n", "def value_grid(Q: List[List[float]], n: int, round_to: int=1) -> List[List[float]]:\n", " \"\"\"\n", " TODO: return an n×n grid where each cell is max_a Q[s,a] for that state, rounded.\n", " State index s = r*n + c\n", " \"\"\"\n", " # --- YOUR CODE HERE ---\n", " raise NotImplementedError\n", "\n", "def pretty_print_grid(grid: List[List[float]], round_cells: bool=False):\n", " \"\"\"\n", " Provided helper for printing grids. Keep format exact.\n", " \"\"\"\n", " for row in grid:\n", " if round_cells:\n", " print(\"[\" + \", \".join(f\"{float(x):.1f}\" for x in row) + \"]\")\n", " else:\n", " print(\" \".join(str(x) for x in row))\n", "\n", "def train_with_fixed_eps(env: MiniMineEnv,\n", " Q: List[List[float]],\n", " episodes: int,\n", " gamma: float,\n", " alpha: float,\n", " epsilon: float,\n", " snapshot_points: Set[int]) -> Dict[int, List[List[float]]]:\n", " \"\"\"\n", " TODO:\n", " - For ep in 1..episodes:\n", " * env.reset()\n", " * while not env.done:\n", " - s = state_to_index\n", " - choose action via epsilon_greedy\n", " - step env; compute s2\n", " - q_update\n", " Save snapshots for episodes in snapshot_points and also for 0 and 'episodes'.\n", " - Return {episode: V-grid}\n", " \"\"\"\n", " # --- YOUR CODE HERE ---\n", " raise NotImplementedError\n", "\n", "def greedy_rollout(env: MiniMineEnv, Q: List[List[float]]):\n", " \"\"\"\n", " TODO: Starting from (0,0), repeatedly pick a best (argmax) action and step\n", " until env.done or a loop is detected. Return (path, total_reward, steps, reached_goal_bool).\n", " \"\"\"\n", " # --- YOUR CODE HERE ---\n", " raise NotImplementedError\n", "\n", "def parse_assignment_input(line: str):\n", " \"\"\"\n", " Format:\n", " n ; gamma ; alpha ; epsilon ; obstacles={(r,c),(...)} ; Seed=12345 (Seed is optional)\n", " TODO: parse and return (n, gamma, alpha, epsilon, obstacles, seed:int)\n", " Use seed_from_student_id(STUDENT_ID) if Seed= is not provided.\n", " \"\"\"\n", " # --- YOUR CODE HERE ---\n", " raise NotImplementedError\n", "\n", "def solve(input_line: str, episodes: int = 1000, seed: Optional[int] = None, max_steps: Optional[int] = None):\n", " \"\"\"\n", " TODO:\n", " - Parse input\n", " - Seed RNG with 'seed' (if given) or seed_from_student_id(STUDENT_ID)\n", " - Build env, Q = init_q(...)\n", " - Train with snapshot points at multiples of 100 plus 0 and final\n", " - Print:\n", " Episode 0: Knowledge Matrix\n", " ... (Episode 100, 200, ..., 1000)\n", " Episode 1000: Knowledge Matrix\n", " Then greedy path, total reward, reached goal flag, and energy consumption (per spec).\n", " \"\"\"\n", " # --- YOUR CODE HERE ---\n", " raise NotImplementedError\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# =========================\n", "# 5) PART 2 — STEP-BY-STEP (TODOs)\n", "# =========================\n", "\n", "def init_q_table(n: int) -> List[List[float]]:\n", " \"\"\"\n", " TODO: return an n*n x 4 table initialized with zeros.\n", " \"\"\"\n", " # --- YOUR CODE HERE ---\n", " raise NotImplementedError\n", "\n", "def q_update_rule(q_table: List[List[float]], state_idx: int, action_idx: int,\n", " reward: float, next_state_idx: int, gamma: float, alpha: float, is_done: bool):\n", " \"\"\"\n", " TODO: Apply the Bellman update to q_table[state_idx][action_idx] using:\n", " target = reward + gamma * (0 if is_done else max(q_table[next_state_idx]))\n", " \"\"\"\n", " # --- YOUR CODE HERE ---\n", " raise NotImplementedError\n", "\n", "def get_value_grid(q_table: List[List[float]], n: int) -> List[List[float]]:\n", " \"\"\"\n", " TODO: same as value_grid, but for the part-2 q_table. Round to 1 decimal.\n", " \"\"\"\n", " # --- YOUR CODE HERE ---\n", " raise NotImplementedError\n", "\n", "def parse_part2_input(input_line: str) -> Dict:\n", " \"\"\"\n", " Format:\n", " n ; gamma ; alpha ; obstacles={(r,c),...} ; knowledge=v1,...,v_{n*n} ; path=A1,A2,...\n", " TODO: parse into a dict with keys: n, gamma, alpha, obstacles, knowledge (list of floats), path (list of str)\n", " \"\"\"\n", " # --- YOUR CODE HERE ---\n", " raise NotImplementedError\n", "\n", "def solve_part2(input_line: str):\n", " \"\"\"\n", " TODO:\n", " - Parse input\n", " - Build q_table where each state's 4 actions start from its knowledge value\n", " - Simulate along the provided path; at each step:\n", " print: \"Step k: Move X -> Cell (r,c), Updated Value=V, Cumulative Energy=E\"\n", " - At the end:\n", " print \"\\nFinal Knowledge Matrix:\" then the formatted rows from get_value_grid(...)\n", " \"\"\"\n", " # --- YOUR CODE HERE ---\n", " raise NotImplementedError\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# =========================\n", "# 6) SAMPLE RUN (LOCAL)\n", "# =========================\n", "if __name__ == \"__main__\":\n", " # Small local smoke tests (you can comment out before submission)\n", "\n", " # --- Part 1 sample ---\n", " seed_from_student_id(STUDENT_ID if (STUDENT_ID and STUDENT_ID != \"PUT_YOUR_ID_HERE\") else \"433\")\n", " n = 4\n", " obstacles = {(0,1),(2,2)}\n", " sample_line1 = f\"{n} ; 0.9 ; 0.3 ; 0.2 ; obstacles={{{','.join(map(str, sorted(obstacles)))}}}\"\n", " try:\n", " print(\"\\n[Sample] Part 1 solve():\\n\")\n", " solve(sample_line1, episodes=10) # shorter for a quick check\n", " except NotImplementedError:\n", " print(\"Implement Part 1 functions to run this sample.\")\n", "\n", " # --- Part 2 sample ---\n", " knowledge = \",\".join(str(x) for x in [5,12,3,7,8,0,10,15,6,14,1,9,11,13,2,4])\n", " path = \"S,E,E,N\"\n", " sample_line2 = f\"4 ; 0.9 ; 0.3 ; obstacles={(0,1),(2,2)} ; knowledge={knowledge} ; path={path}\"\n", " try:\n", " print(\"\\n[Sample] Part 2 solve_part2():\\n\")\n", " solve_part2(sample_line2)\n", " except NotImplementedError:\n", " print(\"Implement Part 2 functions to run this sample.\")\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "name": "python", "version": "3.11.8", "mimetype": "text/x-python", "file_extension": ".py", "pygments_lexer": "ipython3", "nbconvert_exporter": "python", "codemirror_mode": { "name": "ipython", "version": 3 } } }, "nbformat": 4, "nbformat_minor": 5 }