From 209bf2403d4bc8d431df191eb629afd132daea39 Mon Sep 17 00:00:00 2001 From: Tibo De Peuter Date: Tue, 9 Dec 2025 22:50:59 +0100 Subject: [PATCH] fix: Avoid copy list conversion --- src/dataset_loaders/Dataset.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/dataset_loaders/Dataset.py b/src/dataset_loaders/Dataset.py index 63763af..7bd3c2d 100644 --- a/src/dataset_loaders/Dataset.py +++ b/src/dataset_loaders/Dataset.py @@ -2,6 +2,7 @@ from abc import abstractmethod, ABC from os.path import join, curdir from typing import Callable +import numpy as np import torch from torch import Tensor from torch.utils.data import Dataset as TorchDataset @@ -57,7 +58,8 @@ class Dataset(TorchDataset, ABC): self.chunk_offsets = self.get_offsets() self.bytes = ''.join(tqdm(self.data[:len(self.chunk_offsets)], desc="Encoding data")).encode('utf-8', errors='replace') - self.tensor = torch.tensor(list(self.bytes), dtype=torch.long) + bytes_array = np.frombuffer(self.bytes, dtype=np.uint8) # Zero-copy + self.tensor = torch.from_numpy(bytes_array).to(torch.long) def get_offsets(self): """