PTG-Kitware · Purg · Oct 30, 2024 · Oct 21, 2024 · Oct 23, 2024 · Oct 25, 2024
diff --git a/angel_system/activity_classification/tcn_hpl/predict.py b/angel_system/activity_classification/tcn_hpl/predict.py
@@ -56,36 +56,66 @@ def set_video(self, video_name: str) -> None:
             else:
                 self._vid = self._dset.add_video(name=video_name)
 
-    def collect(
+    def add_image(
         self,
         frame_index: int,
-        activity_pred: int,
-        activity_conf_vec: Sequence[float],
         name: Optional[str] = None,
         file_name: Optional[str] = None,
-        activity_gt: Optional[int] = None,
-    ) -> None:
+    ) -> int:
         """
-        See `CocoDataset.add_image` for more details.
+        Add an image to the dataset. Returns the global image id.
+        If the image was already added (by name or file name), returns -1.
         """
         with self._lock:
             if self._vid is None:
                 raise RuntimeError(
                     "No video set before results collection. See `set_video` method."
                 )
-            packet = dict(
+
+            # get the global id for the image from the frame number
+            # add the image
+            img = dict(
                 video_id=self._vid,
                 frame_index=frame_index,
-                activity_pred=activity_pred,
-                activity_conf=list(activity_conf_vec),
             )
             if name is not None:
-                packet["name"] = name
+                img["name"] = name
             if file_name is not None:
-                packet["file_name"] = file_name
-            if activity_gt is not None:
-                packet["activity_gt"] = activity_gt
-            self._dset.add_image(**packet)
+                img["file_name"] = file_name
+            # save the gid from the image to link to the annot
+            try:
+                gid = self._dset.add_image(**img)
+            except Exception:
+                return -1  # image already exists
+
+            return gid
+
+    def collect(
+        self,
+        gid: int,
+        activity_pred: int,
+        activity_conf_vec: Sequence[float],
+    ) -> None:
+        """
+        See `CocoDataset.add_image` for more details.
+
+        :param gid: Global image id.
+        :param activity_pred: Predicted activity class index.
+        :param activity_conf_vec: Confidence vector for all activity classes.
+        """
+        with self._lock:
+            if self._vid is None:
+                raise RuntimeError(
+                    "No video set before results collection. See `set_video` method."
+                )
+
+            # add the annotation
+            self._dset.add_annotation(
+                image_id=gid,
+                category_id=activity_pred,
+                score=activity_conf_vec[activity_pred],
+                prob=list(activity_conf_vec),
+            )
 
     def write_file(self):
         """

diff --git a/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py b/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py
@@ -10,6 +10,7 @@
 from typing import Callable
 from typing import List
 from typing import Optional
+import re
 from typing import Tuple
 
 import kwcoco
@@ -99,6 +100,10 @@
 # activity prediction for the "live" image will not occur until object
 # detections are predicted for that frame.
 PARAM_WINDOW_LEADS_WITH_OBJECTS = "window_leads_with_objects"
+# Debug file saved out to the filesystem for understanding the node's
+# inputs when it decides not to create an activity classification.
+# the format will be csv with a list of the object detections and the pose
+PARAM_DEBUG_FILE = "debug_file"
 
 
 class NoActivityClassification(Exception):
@@ -156,6 +161,7 @@ def __init__(self):
                 (PARAM_TOPIC, "medical"),
                 (PARAM_POSE_REPEAT_RATE, 0),
                 (PARAM_WINDOW_LEADS_WITH_OBJECTS, False),
+                (PARAM_DEBUG_FILE, ""),
             ],
         )
         self._img_ts_topic = param_values[PARAM_IMG_TS_TOPIC]
@@ -512,6 +518,7 @@ def _window_criterion_correct_size(self, window: InputBuffer) -> bool:
                 f"Window is not the appropriate size "
                 f"(actual:{len(window)} != {self._window_size}:expected)"
             )
+
         return window_ok
 
     def _window_criterion_new_leading_frame(self, window: InputWindow) -> bool:
@@ -585,6 +592,9 @@ def rt_loop(self):
                     have_leading_object=self._window_lead_with_objects,
                 )
 
+                window_end_frame = window.frames[-1][0]
+                image_gid = self._collect_image(window_end_frame)
+
                 # log.info(f"buffer contents: {window.obj_dets}")
 
                 # if enable_time_trace_logging:
@@ -615,7 +625,7 @@ def rt_loop(self):
                         act_msg = self._process_window(window)
                         # log.info(f"activity message: {act_msg}")
 
-                        self._collect_results(act_msg)
+                        self._collect_results(act_msg, image_gid)
                         # set the header right before publishing so that the time is after processing
                         act_msg.header.frame_id = "Activity Classification"
                         act_msg.header.stamp = self.get_clock().now().to_msg()
@@ -628,6 +638,14 @@ def rt_loop(self):
                             "not yield an activity classification for "
                             "publishing."
                         )
+                        if self._debug_file != "":
+                            # save the info for why this window was not processed
+                            repr = window.__repr__()
+                            # clean this output for easier viewing (CSV)
+                            repr = "index" + repr  # add a column for the index
+                            repr = re.sub(" +", ",", repr)  # replace spaces with commas
+                            with open(self._debug_file, "a") as f:
+                                f.write(f"{repr}\n")
 
                     # This window has completed processing - record its leading
                     # timestamp now.
@@ -762,7 +780,7 @@ def _process_window(self, window: InputWindow) -> ActivityDetection:
 
         return activity_msg
 
-    def _collect_results(self, msg: ActivityDetection):
+    def _collect_image(self, end_frame_time: Time) -> int:
         """
         Collect into our ResultsCollector instance from the produced activity
         classification message if we were initialized to do that.
@@ -777,11 +795,31 @@ def _collect_results(self, msg: ActivityDetection):
             # Use window end timestamp nanoseconds as the frame index.
             # When reading from an input COCO file, this aligns with the input
             # `image` `frame_index` attributes.
-            frame_index = time_to_int(msg.source_stamp_end_frame)
-            pred_cls_idx = int(np.argmax(msg.conf_vec))
-            rc.collect(
+            frame_index = time_to_int(end_frame_time)
+            gid = rc.add_image(
                 frame_index=frame_index,
                 name=f"ros-frame-nsec-{frame_index}",
+            )
+            return gid
+        return -1
+
+    def _collect_results(self, msg: ActivityDetection, gid: int) -> None:
+        """
+        Collect into our ResultsCollector instance from the produced activity
+        classification message if we were initialized to do that.
+
+        This method does nothing if this node has not been initialized to
+        collect results.
+
+        :param msg: ROS2 activity classification message that would be output.
+        :param gid: Global ID of the image associated with the activity
+        """
+        rc = self._results_collector
+        if rc is not None:
+            # use the gid that was created when the image was added
+            pred_cls_idx = int(np.argmax(msg.conf_vec))
+            rc.collect(
+                gid=gid,
                 activity_pred=pred_cls_idx,
                 activity_conf_vec=list(msg.conf_vec),
             )

diff --git a/ros/angel_utils/scripts/convert_video_to_ros_bag.py b/ros/angel_utils/scripts/convert_video_to_ros_bag.py
@@ -1,4 +1,12 @@
 #!/usr/bin/env python3
+"""
+Convert a video (mp4) or a series of images into a ROS bag.
+
+Example running (inside ROS environment):
+ros2 run angel_utils convert_video_to_ros_bag.py \
+  --video-fn video.mp4 \
+  --output-bag-folder ros_bags/new_bag
+"""
 import argparse
 from glob import glob
 from pathlib import Path
@@ -104,31 +112,32 @@ def convert_video_to_bag(
 
     # Starting at this so our first increment starts us at frame ID 0.
     frame_id = -1
-    start_ts = rclpy.time.Time(nanoseconds=time.time_ns())
     for frame, frame_rel_ts in frame_iter:
         frame_id += 1
         # Only proceed if we don't have a down-sample rate specified or if the
         # current frame aligns with the down-sample rate.
         if downsample_rate is not None and frame_id % downsample_rate != 0:
             continue
         print(f"==== FRAME {frame_id} ====")
-        # Create timestamp
-
-        frame_ts = start_ts + rclpy.duration.Duration(seconds=frame_rel_ts)
-        frame_ts_msg = frame_ts.to_msg()
-        print("timestamp", frame_ts)
 
         # Create image message
         image_msg = bridge.cv2_to_imgmsg(frame, encoding="bgr8")
-        image_msg.header.stamp = frame_ts_msg
+        # split the frame timestamp into sec and nsec
+        seconds = frame_rel_ts
+        nsec = int((seconds - int(seconds)) * 1_000_000_000)
+        seconds = int(seconds)
+        image_msg.header.stamp.sec = seconds
+        image_msg.header.stamp.nanosec = nsec
+        print(f"timestamp: {image_msg.header.stamp}")
+
         image_msg.header.frame_id = "PVFramesBGR"
 
         # Write to bag
         try:
             bag_writer.write(
                 output_image_topic,
                 serialize_message(image_msg),
-                frame_ts.nanoseconds,
+                image_msg.header.stamp.nanosec,
             )
         except Exception as err:
             # Truncating the error message because it printed out the whole image_msg input