diff --git a/isic/ingest/utils/metadata.py b/isic/ingest/utils/metadata.py
index ddb95166..d73ad7d6 100644
--- a/isic/ingest/utils/metadata.py
+++ b/isic/ingest/utils/metadata.py
@@ -65,10 +65,15 @@ def _validate_df_consistency(batch: Iterable[dict]) -> tuple[ColumnRowErrors, li
     column_error_rows: ColumnRowErrors = defaultdict(list)
     batch_problems: list[Problem] = []
 
-    # Since rows have to be evaluated twice, we need to convert the iterator to a list
-    batch = list(batch)
+    # since batch can be exhausted, keep track of all the batch level metadata rows
+    # so we can validate them after exhausting the batch.
+    metadata_rows: list[MetadataRow] = []
 
     for i, row in enumerate(batch):
+        metadata_rows.append(
+            MetadataRow(patient_id=row.get("patient_id"), lesion_id=row.get("lesion_id"))
+        )
+
         try:
             MetadataRow.model_validate(row)
         except PydanticValidationError as e:
@@ -80,12 +85,7 @@ def _validate_df_consistency(batch: Iterable[dict]) -> tuple[ColumnRowErrors, li
     # currently only applies to patient/lesion checks, we can sparsely populate the MetadataRow
     # objects to save on memory.
     try:
-        MetadataBatch(
-            items=[
-                MetadataRow(patient_id=row.get("patient_id"), lesion_id=row.get("lesion_id"))
-                for row in batch
-            ]
-        )
+        MetadataBatch(items=metadata_rows)
     except PydanticValidationError as e:
         for error in e.errors():
             examples = error["ctx"]["examples"] if "ctx" in error else []
@@ -111,7 +111,7 @@ def validate_archive_consistency(
     a lesion doesn't belong to more than one patient.
     """
     # this is used to speed up the random access we need below
-    df = df.set_index("filename")
+    df.set_index("filename", inplace=True)
 
     def cohort_df_merged_metadata_rows():
         """