massive improvements and bug fixes

alexlin2 · alexlin2 · commit 76c822f620f5 · 2025-08-11T17:43:06.000-07:00
diff --git a/dimos/manipulation/visual_servoing/utils.py b/dimos/manipulation/visual_servoing/utils.py
@@ -30,7 +30,7 @@
     compose_transforms,
     yaw_towards_point,
     get_distance,
-    retract_distance,
+    offset_distance,
 )
 
 
@@ -261,7 +261,7 @@ def update_target_grasp_pose(
     updated_pose = Pose(target_pos, target_orientation)
 
     if grasp_distance > 0.0:
-        return retract_distance(updated_pose, grasp_distance)
+        return offset_distance(updated_pose, grasp_distance)
     else:
         return updated_pose
 
diff --git a/dimos/navigation/frontier_exploration/wavefront_frontier_goal_selector.py b/dimos/navigation/frontier_exploration/wavefront_frontier_goal_selector.py
@@ -32,6 +32,7 @@
 from dimos.msgs.nav_msgs import OccupancyGrid, CostValues
 from dimos.utils.logging_config import setup_logger
 from dimos_lcm.std_msgs import Bool
+from dimos.utils.transform_utils import get_distance
 
 logger = setup_logger("dimos.robot.unitree.frontier_exploration")
 
@@ -100,7 +101,8 @@ def __init__(
         self,
         min_frontier_perimeter: float = 0.5,
         occupancy_threshold: int = 99,
-        safe_distance: float = 2.0,
+        safe_distance: float = 3.0,
+        lookahead_distance: float = 5.0,
         max_explored_distance: float = 10.0,
         info_gain_threshold: float = 0.03,
         num_no_gain_attempts: int = 4,
@@ -122,6 +124,7 @@ def __init__(
         self.occupancy_threshold = occupancy_threshold
         self.safe_distance = safe_distance
         self.max_explored_distance = max_explored_distance
+        self.lookahead_distance = lookahead_distance
         self.info_gain_threshold = info_gain_threshold
         self.num_no_gain_attempts = num_no_gain_attempts
         self._cache = FrontierCache()
@@ -496,35 +499,43 @@ def _compute_comprehensive_frontier_score(
     ) -> float:
         """Compute comprehensive score considering multiple criteria."""
 
-        # 1. Information gain (frontier size)
+        # 1. Distance from robot (preference for moderate distances)
+        robot_distance = get_distance(frontier, robot_pose)
+
+        # Distance score: prefer moderate distances (not too close, not too far)
+        # Normalized to 0-1 range
+        distance_score = 1.0 / (1.0 + abs(robot_distance - self.lookahead_distance))
+
+        # 2. Information gain (frontier size)
         # Normalize by a reasonable max frontier size
         max_expected_frontier_size = self.min_frontier_perimeter / costmap.resolution * 10
         info_gain_score = min(frontier_size / max_expected_frontier_size, 1.0)
 
-        # 2. Distance to explored goals (bonus for being far from explored areas)
+        # 3. Distance to explored goals (bonus for being far from explored areas)
         # Normalize by a reasonable max distance (e.g., 10 meters)
         explored_goals_distance = self._compute_distance_to_explored_goals(frontier)
         explored_goals_score = min(explored_goals_distance / self.max_explored_distance, 1.0)
 
-        # 3. Distance to obstacles (score based on safety)
+        # 4. Distance to obstacles (score based on safety)
         # 0 = too close to obstacles, 1 = at or beyond safe distance
         obstacles_distance = self._compute_distance_to_obstacles(frontier, costmap)
         if obstacles_distance >= self.safe_distance:
             obstacles_score = 1.0  # Fully safe
         else:
             obstacles_score = obstacles_distance / self.safe_distance  # Linear penalty
 
-        # 4. Direction momentum (already in 0-1 range from dot product)
+        # 5. Direction momentum (already in 0-1 range from dot product)
         momentum_score = self._compute_direction_momentum_score(frontier, robot_pose)
 
         logger.info(
-            f"Info gain score: {info_gain_score}, Explored goals score: {explored_goals_score}, Obstacles score: {obstacles_score}, Momentum score: {momentum_score}"
+            f"Distance score: {distance_score:.2f}, Info gain: {info_gain_score:.2f}, Explored goals: {explored_goals_score:.2f}, Obstacles: {obstacles_score:.2f}, Momentum: {momentum_score:.2f}"
         )
 
-        # Combine scores with consistent scaling (no arbitrary multipliers)
+        # Combine scores with consistent scaling
         total_score = (
-            0.5 * info_gain_score  # 30% information gain
+            0.3 * info_gain_score  # 30% information gain
             + 0.3 * explored_goals_score  # 30% distance from explored goals
+            + 0.2 * distance_score  # 20% distance optimization
             + 0.15 * obstacles_score  # 15% distance from obstacles
             + 0.05 * momentum_score  # 5% direction momentum
         )
diff --git a/dimos/perception/object_tracker.py b/dimos/perception/object_tracker.py
@@ -90,7 +90,12 @@ def __init__(
         self.orb = cv2.ORB_create()
         self.bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=False)
         self.original_des = None  # Store original ORB descriptors
+        self.original_kps = None  # Store original ORB keypoints
         self.reid_fail_count = 0  # Counter for consecutive re-id failures
+        self.last_good_matches = []  # Store good matches for visualization
+        self.last_roi_kps = None  # Store last ROI keypoints for visualization
+        self.last_roi_bbox = None  # Store last ROI bbox for visualization
+        self.reid_confirmed = False  # Store current reid confirmation state
 
         # For tracking latest frame data
         self._latest_rgb_frame: Optional[np.ndarray] = None
@@ -182,7 +187,7 @@ def track(
         # Extract initial features
         roi = self._latest_rgb_frame[y1:y2, x1:x2]
         if roi.size > 0:
-            _, self.original_des = self.orb.detectAndCompute(roi, None)
+            self.original_kps, self.original_des = self.orb.detectAndCompute(roi, None)
             if self.original_des is None:
                 logger.warning("No ORB features found in initial ROI.")
                 self.stop_track()
@@ -217,23 +222,31 @@ def reid(self, frame, current_bbox) -> bool:
         if roi.size == 0:
             return False  # Empty ROI cannot match
 
-        _, des_current = self.orb.detectAndCompute(roi, None)
+        kps_current, des_current = self.orb.detectAndCompute(roi, None)
         if des_current is None or len(des_current) < 2:
             return False  # Need at least 2 descriptors for knnMatch
 
+        # Store ROI keypoints and bbox for visualization
+        self.last_roi_kps = kps_current
+        self.last_roi_bbox = [x1, y1, x2, y2]
+
         # Handle case where original_des has only 1 descriptor (cannot use knnMatch with k=2)
         if len(self.original_des) < 2:
             matches = self.bf.match(self.original_des, des_current)
+            self.last_good_matches = matches  # Store all matches for visualization
             good_matches = len(matches)
         else:
             matches = self.bf.knnMatch(self.original_des, des_current, k=2)
             # Apply Lowe's ratio test robustly
+            good_matches_list = []
             good_matches = 0
             for match_pair in matches:
                 if len(match_pair) == 2:
                     m, n = match_pair
                     if m.distance < 0.75 * n.distance:
+                        good_matches_list.append(m)
                         good_matches += 1
+            self.last_good_matches = good_matches_list  # Store good matches for visualization
 
         return good_matches >= self.reid_threshold
 
@@ -261,7 +274,12 @@ def _reset_tracking_state(self):
         self.tracking_bbox = None
         self.tracking_initialized = False
         self.original_des = None
+        self.original_kps = None
         self.reid_fail_count = 0  # Reset counter
+        self.last_good_matches = []
+        self.last_roi_kps = None
+        self.last_roi_bbox = None
+        self.reid_confirmed = False  # Reset reid confirmation state
 
         # Publish empty detections to clear any visualizations
         empty_2d = Detection2DArray(detections_length=0, header=Header(), detections=[])
@@ -298,6 +316,16 @@ def stop_track(self) -> bool:
         logger.info("Tracking stopped")
         return True
 
+    @rpc
+    def is_tracking(self) -> bool:
+        """
+        Check if the tracker is currently tracking an object successfully.
+
+        Returns:
+            bool: True if tracking is active and REID is confirmed, False otherwise
+        """
+        return self.tracking_initialized and self.reid_confirmed
+
     def _process_tracking(self):
         """Process current frame for tracking and publish detections."""
         if self._latest_rgb_frame is None or self.tracker is None or not self.tracking_initialized:
@@ -316,11 +344,14 @@ def _process_tracking(self):
             current_bbox_x1y1x2y2 = [x, y, x + w, y + h]
             # Perform re-ID check
             reid_confirmed_this_frame = self.reid(frame, current_bbox_x1y1x2y2)
+            self.reid_confirmed = reid_confirmed_this_frame  # Store for is_tracking() RPC
 
             if reid_confirmed_this_frame:
                 self.reid_fail_count = 0
             else:
                 self.reid_fail_count += 1
+        else:
+            self.reid_confirmed = False  # No tracking if tracker failed
 
         # Determine final success
         if tracker_succeeded:
@@ -480,10 +511,53 @@ def _process_tracking(self):
                     self._latest_rgb_frame, detections_3d, show_coordinates=True, bboxes_2d=bbox_2d
                 )
 
+                # Overlay REID feature matches if available
+                if self.last_good_matches and self.last_roi_kps and self.last_roi_bbox:
+                    viz_image = self._draw_reid_matches(viz_image)
+
                 # Convert to Image message and publish
                 viz_msg = Image.from_numpy(viz_image)
                 self.tracked_overlay.publish(viz_msg)
 
+    def _draw_reid_matches(self, image: np.ndarray) -> np.ndarray:
+        """Draw REID feature matches on the image."""
+        viz_image = image.copy()
+
+        x1, y1, x2, y2 = self.last_roi_bbox
+
+        # Draw keypoints from current ROI in green
+        for kp in self.last_roi_kps:
+            pt = (int(kp.pt[0] + x1), int(kp.pt[1] + y1))
+            cv2.circle(viz_image, pt, 3, (0, 255, 0), -1)
+
+        for match in self.last_good_matches:
+            current_kp = self.last_roi_kps[match.trainIdx]
+            pt_current = (int(current_kp.pt[0] + x1), int(current_kp.pt[1] + y1))
+
+            # Draw a larger circle for matched points in yellow
+            cv2.circle(viz_image, pt_current, 5, (0, 255, 255), 2)  # Yellow for matched points
+
+            # Draw match strength indicator (smaller circle with intensity based on distance)
+            # Lower distance = better match = brighter color
+            intensity = int(255 * (1.0 - min(match.distance / 100.0, 1.0)))
+            cv2.circle(viz_image, pt_current, 2, (intensity, intensity, 255), -1)
+
+        text = f"REID Matches: {len(self.last_good_matches)}/{len(self.last_roi_kps) if self.last_roi_kps else 0}"
+        cv2.putText(viz_image, text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2)
+
+        if len(self.last_good_matches) >= self.reid_threshold:
+            status_text = "REID: CONFIRMED"
+            status_color = (0, 255, 0)  # Green
+        else:
+            status_text = f"REID: WEAK ({self.reid_fail_count}/{self.reid_fail_tolerance})"
+            status_color = (0, 165, 255)  # Orange
+
+        cv2.putText(
+            viz_image, status_text, (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.7, status_color, 2
+        )
+
+        return viz_image
+
     def _get_depth_from_bbox(self, bbox: List[int]) -> Optional[float]:
         """Calculate depth from bbox using the 25th percentile of closest points."""
         if self._latest_depth_frame is None:
@@ -504,8 +578,6 @@ def _get_depth_from_bbox(self, bbox: List[int]) -> Optional[float]:
         valid_depths = roi_depth[np.isfinite(roi_depth) & (roi_depth > 0)]
 
         if len(valid_depths) > 0:
-            # Take the 25th percentile of the closest (smallest) depth values
-            # This helps get a robust depth estimate for the front surface of the object
             depth_25th_percentile = float(np.percentile(valid_depths, 25))
             return depth_25th_percentile
 
diff --git a/dimos/perception/spatial_perception.py b/dimos/perception/spatial_perception.py
@@ -29,7 +29,6 @@
 from dimos.core import In, Module, Out, rpc
 from dimos.msgs.sensor_msgs import Image
 from dimos.msgs.geometry_msgs import Vector3, Quaternion, Pose, PoseStamped
-from dimos.robot.unitree_webrtc.type.odometry import Odometry
 from dimos.utils.logging_config import setup_logger
 from dimos.agents.memory.spatial_vector_db import SpatialVectorDB
 from dimos.agents.memory.image_embedding import ImageEmbeddingProvider
@@ -52,7 +51,7 @@ class SpatialMemory(Module):
 
     # LCM inputs
     video: In[Image] = None
-    odom: In[Odometry] = None
+    odom: In[PoseStamped] = None
 
     def __init__(
         self,
@@ -168,7 +167,7 @@ def __init__(
 
         # Track latest data for processing
         self._latest_video_frame: Optional[np.ndarray] = None
-        self._latest_odom: Optional[Odometry] = None
+        self._latest_odom: Optional[PoseStamped] = None
         self._process_interval = 1
 
         logger.info(f"SpatialMemory initialized with model {embedding_model}")
@@ -185,7 +184,7 @@ def set_video(image_msg: Image):
             else:
                 logger.warning("Received image message without data attribute")
 
-        def set_odom(odom_msg: Odometry):
+        def set_odom(odom_msg: PoseStamped):
             self._latest_odom = odom_msg
 
         self.video.subscribe(set_video)
diff --git a/dimos/robot/unitree_webrtc/camera_module.py b/dimos/robot/unitree_webrtc/camera_module.py
@@ -61,9 +61,10 @@ class UnitreeCameraModule(Module):
     def __init__(
         self,
         camera_intrinsics: List[float],
+        world_frame_id: str = "world",
         camera_frame_id: str = "camera_link",
         base_frame_id: str = "base_link",
-        gt_depth_scale: float = 2.2,
+        gt_depth_scale: float = 2.0,
         **kwargs,
     ):
         """
@@ -82,6 +83,7 @@ def __init__(
         self.camera_intrinsics = camera_intrinsics
         self.camera_frame_id = camera_frame_id
         self.base_frame_id = base_frame_id
+        self.world_frame_id = world_frame_id
 
         # Initialize components
         from dimos.models.depth.metric3d import Metric3D
@@ -296,7 +298,7 @@ def _publish_camera_pose(self, header: Header):
         try:
             # Look up transform from base_link to camera_link
             transform = self.tf.get(
-                parent_frame=self.base_frame_id,
+                parent_frame=self.world_frame_id,
                 child_frame=self.camera_frame_id,
                 time_point=header.ts,
                 time_tolerance=1.0,
@@ -306,7 +308,7 @@ def _publish_camera_pose(self, header: Header):
                 # Create PoseStamped from transform
                 pose_msg = PoseStamped(
                     ts=header.ts,
-                    frame_id=self.base_frame_id,
+                    frame_id=self.camera_frame_id,
                     position=transform.translation,
                     orientation=transform.rotation,
                 )
diff --git a/dimos/robot/unitree_webrtc/unitree_go2.py b/dimos/robot/unitree_webrtc/unitree_go2.py
@@ -50,7 +50,7 @@
 from dimos.utils.data import get_data
 from dimos.utils.logging_config import setup_logger
 from dimos.utils.testing import TimedSensorReplay
-from dimos.utils.transform_utils import retract_distance
+from dimos.utils.transform_utils import offset_distance
 from dimos.perception.common.utils import extract_pose_from_detection3d
 from dimos.perception.object_tracker import ObjectTracking
 from dimos_lcm.std_msgs import Bool
@@ -361,8 +361,10 @@ def _deploy_perception(self):
             output_dir=self.spatial_memory_dir,
         )
 
-        self.spatial_memory_module.video.connect(self.connection.video)
-        self.spatial_memory_module.odom.connect(self.connection.odom)
+        self.spatial_memory_module.video.transport = core.LCMTransport("/go2/color_image", Image)
+        self.spatial_memory_module.odom.transport = core.LCMTransport(
+            "/go2/camera_pose", PoseStamped
+        )
 
         logger.info("Spatial memory module deployed and connected")
 
@@ -531,7 +533,7 @@ def get_odom(self) -> PoseStamped:
         """
         return self.connection.get_odom()
 
-    def navigate_to_object(self, bbox: List[float], distance: float, timeout: float = 30.0):
+    def navigate_to_object(self, bbox: List[float], distance: float = 0.5, timeout: float = 30.0):
         """Navigate to an object by tracking it and maintaining a specified distance.
 
         Args:
@@ -563,13 +565,18 @@ def navigate_to_object(self, bbox: List[float], distance: float, timeout: float
                     logger.info("Object tracking goal reached")
                     return True
 
+            if not self.object_tracker.is_tracking():
+                continue
+
             detection_topic = Topic("/go2/detection3d", Detection3DArray)
             detection_msg = self.lcm.wait_for_message(detection_topic, timeout=1.0)
 
             if detection_msg and len(detection_msg.detections) > 0:
                 target_pose = extract_pose_from_detection3d(detection_msg.detections[0])
 
-                retracted_pose = retract_distance(target_pose, distance)
+                retracted_pose = offset_distance(
+                    target_pose, distance, approach_vector=Vector3(-1, 0, 0)
+                )
 
                 goal_pose = PoseStamped(
                     frame_id=detection_msg.header.frame_id,
@@ -579,7 +586,7 @@ def navigate_to_object(self, bbox: List[float], distance: float, timeout: float
                 self.navigator.set_goal(goal_pose)
                 goal_set = True
 
-            time.sleep(0.3)
+            time.sleep(0.25)
 
         logger.info("Object tracking timed out")
         return False
diff --git a/dimos/skills/navigation.py b/dimos/skills/navigation.py
diff --git a/dimos/utils/test_transform_utils.py b/dimos/utils/test_transform_utils.py
diff --git a/dimos/utils/transform_utils.py b/dimos/utils/transform_utils.py