riscv-collab · lz-bro · Nov 20, 2024 · aap-sc · Nov 20, 2024 · lz-bro
diff --git a/src/target/riscv/riscv.c b/src/target/riscv/riscv.c
@@ -37,6 +37,8 @@
 
 #define RISCV_TRIGGER_HIT_NOT_FOUND ((int64_t)-1)
 
+#define RISCV_HALT_GROUP_REPOLL_LIMIT 5
+
 static uint8_t ir_dtmcontrol[4] = {DTMCONTROL};
 struct scan_field select_dtmcontrol = {
 	.in_value = NULL,
@@ -3722,6 +3724,8 @@ int riscv_openocd_poll(struct target *target)
 {
 	LOG_TARGET_DEBUG(target, "Polling all harts.");
 
+	struct riscv_info *i = riscv_info(target);
+
 	struct list_head *targets;
 
 	LIST_HEAD(single_target_list);
@@ -3743,6 +3747,7 @@ int riscv_openocd_poll(struct target *target)
 	unsigned int should_resume = 0;
 	unsigned int halted = 0;
 	unsigned int running = 0;
+	unsigned int cause_groups = 0;
 	struct target_list *entry;
 	foreach_smp_target(entry, targets) {
 		struct target *t = entry->target;
@@ -3790,6 +3795,52 @@ int riscv_openocd_poll(struct target *target)
 		LOG_TARGET_DEBUG(target, "resume all");
 		riscv_resume(target, true, 0, 0, 0, false);
 	} else if (halted && running) {
+		LOG_TARGET_DEBUG(target, "SMP group is in inconsistent state: %u halted, %u running",
+					halted, running);
+
+		/* The SMP group is in an inconsistent state - some harts in the group have halted
+		 * whereas others are running. The reasons for that (and corresponding
+		 * OpenOCD actions) could be:
+		 * 1) The targets are in the process of halting due to halt groups
+		 *    but not all of them halted --> poll again so that the halt reason of every
+		 *    hart can be accurately determined (e.g. semihosting).
+		 * 2) The targets do not support halt groups --> OpenOCD must halt
+		 *    the remaining harts by a standard halt request.
+		 * 3) The hart states got out of sync for some other unknown reason (problem?). -->
+		 *    Same as previous - try to halt the harts by a standard halt request
+		 *    to get them back in sync. */
+
+		/* Detect if the harts are just in the process of halting due to a halt group */
+		foreach_smp_target(entry, targets)
+		{
+			struct target *t = entry->target;
+			if (t->state == TARGET_HALTED) {
+				riscv_reg_t dcsr;
+				if (riscv_reg_get(t, &dcsr, GDB_REGNO_DCSR) != ERROR_OK)
+					return ERROR_FAIL;
+				if (get_field(dcsr, CSR_DCSR_CAUSE) == CSR_DCSR_CAUSE_GROUP)
+					cause_groups++;
+				else
+					/* This hart has halted due to something else than a halt group.
+					 * Don't continue checking the rest - exit early. */
+					break;
+			}
+		}
 } else if (halted && running) { 
 	LOG_TARGET_DEBUG(target, "halt all; halted=%d", 
 		halted); 
 	riscv_halt(target); 
 } else { 
 				if (halt_reason == RISCV_HALT_EBREAK) { 
 					int retval; 
 					/* Detect if this EBREAK is a semihosting request. If so, handle it. */ 
 					switch (riscv_semihosting(target, &retval)) { 
 						case SEMIHOSTING_NONE: 
 							break; 
 						case SEMIHOSTING_WAITING: 
 							/* This hart should remain halted. */ 
 							*next_action = RPH_REMAIN_HALTED; 
 							break; 
 						case SEMIHOSTING_HANDLED: 
 							/* This hart should be resumed, along with any other 
 							* harts that halted due to haltgroups. */ 
 							*next_action = RPH_RESUME; 
 							return ERROR_OK; 
 						case SEMIHOSTING_ERROR: 
 							return retval; 
 					} 
 				} 
 } else if (halted && running) { 
 	LOG_TARGET_DEBUG(target, "halt all; halted=%d", 
 		halted); 
 	riscv_halt(target); 
 } else { 
 				if (halt_reason == RISCV_HALT_EBREAK) { 
 					int retval; 
 					/* Detect if this EBREAK is a semihosting request. If so, handle it. */ 
 					switch (riscv_semihosting(target, &retval)) { 
 						case SEMIHOSTING_NONE: 
 							break; 
 						case SEMIHOSTING_WAITING: 
 							/* This hart should remain halted. */ 
 							*next_action = RPH_REMAIN_HALTED; 
 							break; 
 						case SEMIHOSTING_HANDLED: 
 							/* This hart should be resumed, along with any other 
 							* harts that halted due to haltgroups. */ 
 							*next_action = RPH_RESUME; 
 							return ERROR_OK; 
 						case SEMIHOSTING_ERROR: 
 							return retval; 
 					} 
 				} 
+		if (halted == cause_groups) {
+			LOG_TARGET_DEBUG(target, "The harts appear to just be in the process of halting due to a halt group.");
+			if (i->halt_group_repoll_count < RISCV_HALT_GROUP_REPOLL_LIMIT) {
+				/* Wait a little, then re-poll. */
+				i->halt_group_repoll_count++;
+				alive_sleep(10);
+				LOG_TARGET_DEBUG(target, "Re-polling the state of the SMP group.");
+				return riscv_openocd_poll(target);
+			}
+			/* We have already re-polled multiple times but the halt group is still inconsistent. */
+			LOG_TARGET_DEBUG(target, "Re-polled the SMP group %d times it is still not in a consistent state.",
+					RISCV_HALT_GROUP_REPOLL_LIMIT);
+		}
+
+		/* Halting the whole SMP group to bring it in sync. */
 		LOG_TARGET_DEBUG(target, "halt all; halted=%d",
 			halted);
 		riscv_halt(target);
@@ -3807,6 +3858,8 @@ int riscv_openocd_poll(struct target *target)
 		}
 	}
 
+	i->halt_group_repoll_count = 0;
+
 	/* Call tick() for every hart. What happens in tick() is opaque to this
 	 * layer. The reason it's outside the previous loop is that at this point
 	 * the state of every hart has settled, so any side effects happening in

diff --git a/src/target/riscv/riscv.h b/src/target/riscv/riscv.h
@@ -193,6 +193,7 @@ struct riscv_info {
 	/* Used by riscv_openocd_poll(). */
 	bool halted_needs_event_callback;
 	enum target_event halted_callback_event;
+	unsigned int halt_group_repoll_count;
 
 	enum riscv_isrmasking_mode isrmask_mode;