Skip to content

Commit

Permalink
additional sanity check
Browse files Browse the repository at this point in the history
  • Loading branch information
AkshitaB committed Oct 2, 2024
1 parent 9ce96e6 commit 0689fe8
Showing 1 changed file with 4 additions and 2 deletions.
6 changes: 4 additions & 2 deletions olmo/data/iterable_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,14 +80,16 @@ def _build_and_save_global_indices(self):
global_indices = self._build_global_indices()
log.info(f"dataset 0: {self.dataset[0]}")
log.info(f"dataset 1: {self.dataset[1]}")
log.info(f"global indices 0: {global_indices[0]}")
log.info(f"global indices 1: {global_indices[1]}")
log.info(f"global indices: {global_indices}")
global_indices_mmap = np.memmap(
self.global_indices_file, dtype=np.uint32, mode="w+", shape=(len(global_indices),)
)
global_indices_mmap[:] = global_indices
global_indices_mmap.flush()
del global_indices_mmap
sanity = np.memmap(self.global_indices_file, mode="r+", dtype=np.uint32)
log.info(f"sanity check: {sanity}")
del sanity
log.info("Global data order indices saved to '%s'", self.global_indices_file)
barrier()

Expand Down

0 comments on commit 0689fe8

Please sign in to comment.