diff --git a/CNN-model/datasets/EnWik9.py b/CNN-model/datasets/EnWik9.py new file mode 100644 index 0000000..6d56f52 --- /dev/null +++ b/CNN-model/datasets/EnWik9.py @@ -0,0 +1,11 @@ +from datasets import load_dataset +from os.path import curdir, join + +class EnWik9DataSet: + def __init__(self): + path = join(curdir, "data") + self.data = load_dataset("haukur/enwik9", cache_dir=path, split="train") + + + def __len__(self): + return len(self.data) \ No newline at end of file