diff --git a/.gitignore b/.gitignore
index 35ba91e..c9b2746 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,5 @@
 __pycache__
 .idea/
-data/
\ No newline at end of file
+data/
+saved_models/
+results/
diff --git a/.python-version b/.python-version
index e4fba21..2c07333 100644
--- a/.python-version
+++ b/.python-version
@@ -1 +1 @@
-3.12
+3.11
diff --git a/README.md b/README.md
index e69de29..2b0b5f7 100644
--- a/README.md
+++ b/README.md
@@ -0,0 +1,26 @@
+# neural compression
+
+Example usage:
+
+```shell
+python main.py --debug train --dataset enwik9 --data-root ~/data/datasets/ml --method optuna --model transformer --model-save-path ~/data/ml-models/test-transformer.pt
+
+python benchmark.py --debug train --dataset enwik9 --data-root ~/data/datasets/ml --method optuna --model cnn --model-save-path ~/data/ml-models/test-cnn.pt
+```
+
+## Running locally
+
+```
+uv sync --all-extras
+```
+
+## Running on the Ghent University HPC
+
+See the [Infrastructure docs](https://docs.hpc.ugent.be/infrastructure/#gpu-clusters) for more information about the clusters.
+
+```
+module swap cluster/joltik # Specify the (GPU) cluster, {joltik,accelgor,litleo}
+
+qsub job.pbs               # Submit job
+qstat                      # Check status
+```
diff --git a/benchmark.py b/benchmark.py
new file mode 100644
index 0000000..452d79e
--- /dev/null
+++ b/benchmark.py
@@ -0,0 +1,12 @@
+from main import main
+from src.utils.benchmark import execute_benchmark
+from src.utils.benchmark_dataclasses import BenchmarkItem
+
+
+def benchmark():
+    # Just calling `main` is the easiest way to allow all functionality
+    execute_benchmark(benchmark_item=BenchmarkItem(task=main, arguments={}), results_dir="results")
+
+
+if __name__ == "__main__":
+    benchmark()
\ No newline at end of file
diff --git a/dataset_loaders/Dataset.py b/dataset_loaders/Dataset.py
deleted file mode 100644
index fbac1a6..0000000
--- a/dataset_loaders/Dataset.py
+++ /dev/null
@@ -1,26 +0,0 @@
-from abc import abstractmethod, ABC
-from os.path import join, curdir
-from typing import Callable
-
-from torch.utils.data import Dataset as TorchDataset
-
-"""
-Author: Tibo De Peuter
-"""
-class Dataset(TorchDataset, ABC):
-    """Abstract base class for datasets."""
-    @abstractmethod
-    def __init__(self, root: str, transform: Callable = None):
-        """
-        :param root: Relative path to the dataset root directory
-        """
-        self._root: str = join(curdir, 'data', root)
-        self.transform = transform
-        self.dataset = None
-
-    @property
-    def root(self):
-        return self._root
-
-    def __len__(self):
-        return len(self.dataset)
\ No newline at end of file
diff --git a/dataset_loaders/EnWik9.py b/dataset_loaders/EnWik9.py
deleted file mode 100644
index bef57a1..0000000
--- a/dataset_loaders/EnWik9.py
+++ /dev/null
@@ -1,43 +0,0 @@
-from datasets import load_dataset
-from torch.utils.data import Dataset
-import torch
-from os.path import curdir, join
-from typing import Callable
-
-
-class EnWik9DataSet(Dataset):
-    def __init__(self, root: str = "data", transform: Callable | None = None):
-        super().__init__()
-        self.transform = transform
-
-        # HuggingFace dataset: string text
-        path = join(curdir, root)
-        data = load_dataset("haukur/enwik9", cache_dir=path, split="train")
-
-        # Extract raw text
-        text = data["text"]
-
-        # Convert text (Python string) → bytes → tensor of ints 0–255
-        # UTF-8 but non-ASCII bytes may exceed 255, so enforce modulo or ignore errors
-        byte_data = "".join(text).encode("utf-8", errors="replace")
-        self.data = torch.tensor(list(byte_data), dtype=torch.long)
-
-        # Model uses fixed 128-length context
-        self.context_length = 128
-
-    def __len__(self):
-        # number of sliding windows
-        return len(self.data) - self.context_length
-
-    def __getitem__(self, idx):
-        # context window
-        x = self.data[idx : idx + self.context_length]
-
-        # next byte target
-        y = self.data[idx + self.context_length]
-
-        if self.transform:
-            x = self.transform(x)
-
-        return x, y
-
diff --git a/dataset_loaders/LoremIpsumDataset.py b/dataset_loaders/LoremIpsumDataset.py
deleted file mode 100644
index 6ea0a85..0000000
--- a/dataset_loaders/LoremIpsumDataset.py
+++ /dev/null
@@ -1,34 +0,0 @@
-from typing import Callable
-
-import torch
-from os.path import curdir, join
-from lorem.text import TextLorem
-from .Dataset import Dataset
-
-
-class LoremIpsumDataset(Dataset):
-    def __init__(self, root: str = "data", transform: Callable = None):
-        super().__init__(root, transform)
-
-        # Generate text and convert to bytes
-        _lorem = TextLorem()
-        _text = ' '.join(_lorem._word() for _ in range(512))
-
-        path = join(curdir, "data")
-        self._root = path
-        # Convert text to bytes (UTF-8 encoded)
-        self.dataset = torch.tensor([ord(c) % 256 for c in list(_text)], dtype=torch.long)
-        self.context_length = 128
-
-    def __len__(self):
-        # Number of possible sequences of length sequence_length
-        return self.dataset.size(0) - self.context_length
-
-    def __getitem__(self, idx):
-        x = self.dataset[idx: idx + self.context_length]
-        y = self.dataset[idx + self.context_length]
-
-        if self.transform is not None:
-            x = self.transform(x)
-
-        return x, y
diff --git a/dataset_loaders/__init__.py b/dataset_loaders/__init__.py
deleted file mode 100644
index 58336a2..0000000
--- a/dataset_loaders/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from .EnWik9 import EnWik9DataSet
-from .LoremIpsumDataset import LoremIpsumDataset
-from .Dataset import Dataset
\ No newline at end of file
diff --git a/job.pbs b/job.pbs
index dc6509e..154dc9e 100644
--- a/job.pbs
+++ b/job.pbs
@@ -1,15 +1,40 @@
 #!/bin/bash
 
-
+#PBS -N nc-cnn-enwik9-optuna
 #PBS -l gpus=1
-#PBS -l walltime=03:00:00
+#PBS -l walltime=08:00:00
+#PBS -l mem=60gb
 #PBS -m abe
 
-module load PyTorch/2.1.2-foss-2023a-CUDA-12.1.1
+CACHE_DIR="${VSC_SCRATCH}/.cache"                # Directory to use as cache
+UV_DIR="${VSC_SCRATCH}/uv"                       # Directory to install packages
+VENV="${UV_DIR}/venv"
+
+DATA_DIR="${VSC_DATA}/datasets"
+RESULTS_DIR="${VSC_DATA}/neural-compression/$( date +%Y%m%d-%H%M-%S%N)-results"
+
+mkdir -p "${DATA_DIR}" "${RESULTS_DIR}" || true
+
+module purge
+module load PyTorch-bundle/2.1.2-foss-2023a-CUDA-12.1.1
 module load Optuna/3.5.0-foss-2023a
+module load matplotlib/2.2.5-foss-2023a-Python-2.7.18
 
-cd $PBS_O_WORKDIR
+cd "${PBS_O_WORKDIR}" || exit
 
-source training_env/bin/activate
+UV_PYTHON_INSTALL_DIR="${UV_DIR}/python" UV_PYTHON_INSTALL_DIR="${UV_DIR}/python" \
+  uv --cache-dir="${CACHE_DIR}/uv" \
+  venv "${VENV}" --clear
 
-python main_cnn.py --method train
\ No newline at end of file
+source "${VENV}/bin/activate"
+
+UV_PYTHON_INSTALL_DIR="${UV_DIR}/python" UV_PYTHON_INSTALL_DIR="${UV_DIR}/python" \
+  uv --cache-dir="${CACHE_DIR}/uv" \
+  sync --active --no-dev
+
+cd "${PBS_O_WORKDIR}" || exit
+
+python main.py train \
+  --method=optuna \
+  --dataset=enwik9 --data-root="${DATA_DIR}" \
+  --model=cnn --model-save-path="${RESULTS_DIR}/cnn-enwik9-optuna.pt"
diff --git a/main.py b/main.py
index 621a17d..41fdb8e 100644
--- a/main.py
+++ b/main.py
@@ -1,64 +1,49 @@
-from argparse import ArgumentParser
-from math import ceil
+from src.args import parse_arguments
+from src.process import compress
+from src.train import train
+from src.utils import determine_device
 
-import torch
-from torch.utils.data import DataLoader
 
-from dataset_loaders import EnWik9DataSet, LoremIpsumDataset, Dataset
-from trainers import OptunaTrainer, Trainer, FullTrainer
+def main():
+    args, print_help = parse_arguments()
 
-BATCH_SIZE = 64
+    device = determine_device()
+    print(f"Running on device: {device}...")
 
-if torch.cuda.is_available():
-    DEVICE = "cuda"
-elif torch.backends.mps.is_available():
-    DEVICE = "mps"
-else:
-    DEVICE = "cpu"
+    match args.mode:
+        case 'train':
+            size = int(args.size) if args.size else None
+            if args.method == 'optuna':
+                size = 2 ** 12
+                print(f"Using size {size} for optuna (was {args.size})")
+            if args.debug:
+                size = 2 ** 10
+                print(f"Using size {size} for debug (was {args.size})")
+
+            train(
+                device=device,
+                dataset=args.dataset,
+                data_root=args.data_root,
+                n_trials=3 if args.debug else None,
+                size=size,
+                method=args.method,
+                model_name=args.model,
+                model_path=args.model_load_path,
+                model_out=args.model_save_path
+            )
+
+        case 'compress':
+            compress(device=device,
+                     model_path=args.model_load_path,
+                     input_file=args.input_file,
+                     output_file=args.output_file
+                     )
+
+        case _:
+            raise NotImplementedError(f"Mode {args.mode} is not implemented yet")
+
+    print("Done")
 
-# hyper parameters
-context_length = 128
 
 if __name__ == "__main__":
-    print(f"Running on device: {DEVICE}...")
-    parser = ArgumentParser()
-    parser.add_argument("--method", choices=["optuna", "train"], required=True)
-    parser.add_argument("--models-path", type=str, required=False)
-    args = parser.parse_args()
-
-    print("Loading in the dataset...")
-    if args.method == "train":
-        dataset: Dataset = EnWik9DataSet(transform=lambda x: x.to(DEVICE))
-    elif args.method == "optuna":
-        dataset: Dataset = LoremIpsumDataset(transform=lambda x: x.to(DEVICE))
-    else:
-        raise ValueError(f"Unknown method: {args.method}")
-
-    dataset_length = len(dataset)
-    print(f"Dataset size = {dataset_length}")
-
-    training_size = ceil(0.8 * dataset_length)
-
-    print(f"Training set size = {training_size}, Validation set size {dataset_length - training_size}")
-
-    train_set, validate_set = torch.utils.data.random_split(dataset,
-                                                            [training_size, dataset_length - training_size])
-    training_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)
-    validation_loader = DataLoader(validate_set, batch_size=BATCH_SIZE, shuffle=False)
-    loss_fn = torch.nn.CrossEntropyLoss()
-
-    model = None
-    if args.model_path is not None:
-        print("Loading the models...")
-        model = torch.load(args.model_path)
-
-    trainer: Trainer = OptunaTrainer() if args.method == "optuna" else FullTrainer()
-
-    trainer.execute(
-        model=model,
-        train_loader=training_loader,
-        validation_loader=validation_loader,
-        loss_fn=loss_fn,
-        n_epochs=200,
-        device=DEVICE
-    )
+    main()
diff --git a/models/__init__.py b/models/__init__.py
deleted file mode 100644
index 42e6d4c..0000000
--- a/models/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-from .cnn import CNNPredictor
-from .transformer import Transformer
\ No newline at end of file
diff --git a/models/transformer/__init__.py b/models/transformer/__init__.py
deleted file mode 100644
index 6ff14d5..0000000
--- a/models/transformer/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .transformer import Transformer
\ No newline at end of file
diff --git a/models/transformer/transformer.py b/models/transformer/transformer.py
deleted file mode 100644
index 63032eb..0000000
--- a/models/transformer/transformer.py
+++ /dev/null
@@ -1,60 +0,0 @@
-from typing import Optional
-
-import torch.nn as nn
-from torch import Tensor
-
-
-class Transformer(nn.Transformer):
-    def __init__(
-            self,
-            d_model=512,
-            nhead=8,
-            num_encoder_layers=6,
-            num_decoder_layers=6,
-            dim_feedforward=2048,
-            dropout=0.1,
-            activation="relu",
-            layer_norm_eps=1e-05
-    ):
-        super().__init__(
-            d_model=d_model,
-            nhead=nhead,
-            num_encoder_layers=num_encoder_layers,
-            num_decoder_layers=num_decoder_layers,
-            dim_feedforward=dim_feedforward,
-            dropout=dropout,
-            activation=activation,
-            layer_norm_eps=layer_norm_eps,
-            batch_first=False,
-            norm_first=False,
-            device=None,
-            dtype=None
-        )
-
-    def forward(
-            self,
-            src: Tensor,
-            tgt: Tensor,
-            src_mask: Optional[Tensor] = None,
-            tgt_mask: Optional[Tensor] = None,
-            memory_mask: Optional[Tensor] = None,
-            src_key_padding_mask: Optional[Tensor] = None,
-            tgt_key_padding_mask: Optional[Tensor] = None,
-            memory_key_padding_mask: Optional[Tensor] = None,
-            src_is_causal: Optional[bool] = None,
-            tgt_is_causal: Optional[bool] = None,
-            memory_is_causal: bool = False,
-    ) -> Tensor:
-        return super().forward(
-            src,
-            tgt,
-            src_mask,
-            tgt_mask,
-            memory_mask,
-            src_key_padding_mask,
-            tgt_key_padding_mask,
-            memory_key_padding_mask,
-            src_is_causal,
-            tgt_is_causal,
-            memory_is_causal,
-        )
diff --git a/pyproject.toml b/pyproject.toml
index 232451d..fa21be3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -3,14 +3,21 @@ name = "project-ml"
 version = "0.1.0"
 description = "Add your description here"
 readme = "README.md"
-requires-python = ">=3.12"
+requires-python = ">=3.11"
 dependencies = [
-    "datasets>=4.4.1",
+    "datasets>=3.2.0",
+    "huggingface_hub==0.27.0",
+    "fsspec==2024.9.0",
     "lorem>=0.1.1",
-    "matplotlib>=3.10.7",
-    "numpy>=2.3.4",
-    "optuna>=4.5.0",
-    "torch>=2.9.0",
-    "torchdata>=0.11.0",
-    "torchvision>=0.24.0",
+]
+
+[project.optional-dependencies]
+dev = [
+    "hydra-core>=1.3.2",
+    "matplotlib>=3.10.7",
+    "memray>=1.19.1",
+    "optuna==4.5.0",
+    "torch==2.9.0",
+    "torchdata==0.7.1",
+    "torchvision==0.24.0",
 ]
diff --git a/simple/.keep b/results/.keep
similarity index 100%
rename from simple/.keep
rename to results/.keep
diff --git a/saved_models/final_model.pt b/saved_models/final_model.pt
deleted file mode 100644
index 3b0aae5..0000000
Binary files a/saved_models/final_model.pt and /dev/null differ
diff --git a/src/args.py b/src/args.py
new file mode 100644
index 0000000..3fc8325
--- /dev/null
+++ b/src/args.py
@@ -0,0 +1,45 @@
+from argparse import ArgumentParser
+
+from src.dataset_loaders import dataset_called
+
+
+def parse_arguments():
+    parser = ArgumentParser(prog="NeuralCompression")
+    parser.add_argument("--debug", "-d", action="store_true", required=False,
+                        help="Enable debug mode: smaller datasets, more information")
+    parser.add_argument("--verbose", "-v", action="store_true", required=False,
+                        help="Enable verbose mode")
+
+    dataparser = ArgumentParser(add_help=False)
+    dataparser.add_argument("--data-root", type=str, required=False)
+    dataparser.add_argument("--dataset", choices=dataset_called.keys(), required=True)
+
+    modelparser = ArgumentParser(add_help=False)
+    modelparser.add_argument("--model", "-m", type=str, required=False,
+                             help="Which model to use")
+    modelparser.add_argument("--model-load-path", type=str, required=False,
+                             help="Filepath to the model to load")
+    modelparser.add_argument("--model-save-path", type=str, required=False,
+                             help="Filepath to the model to save")
+
+    fileparser = ArgumentParser(add_help=False)
+    fileparser.add_argument("--input-file", "-i", required=False, type=str)
+    fileparser.add_argument("--output-file", "-o", required=False, type=str)
+
+    subparsers = parser.add_subparsers(dest="mode", required=True,
+                                       help="Mode to run in")
+
+    train_parser = subparsers.add_parser("train",
+                                         parents=[dataparser, modelparser],
+                                         help="Do a full training")
+    train_parser.add_argument("--method",
+                              choices=["fetch", "optuna", "full"], required=True,
+                              help="Method to use for training")
+    train_parser.add_argument("--size", "-s", type=int, required=False,
+                              help="Size of the subset of the dataset to use")
+
+    compress_parser = subparsers.add_parser("compress", parents=[modelparser, fileparser])
+
+    decompress_parser = subparsers.add_parser("decompress", parents=[modelparser, fileparser])
+
+    return parser.parse_args(), parser.print_help
diff --git a/src/dataset_loaders/Dataset.py b/src/dataset_loaders/Dataset.py
new file mode 100644
index 0000000..f3c5786
--- /dev/null
+++ b/src/dataset_loaders/Dataset.py
@@ -0,0 +1,128 @@
+from abc import abstractmethod, ABC
+from itertools import accumulate
+from os.path import join, curdir
+from typing import Callable
+
+import numpy as np
+import torch
+from torch import Tensor
+from torch.utils.data import Dataset as TorchDataset
+from tqdm import tqdm
+
+"""
+Author: Tibo De Peuter
+"""
+
+
+class Dataset(TorchDataset, ABC):
+    """Abstract base class for datasets."""
+
+    @abstractmethod
+    def __init__(self,
+                 name: str,
+                 root: str | None,
+                 split: str = 'train',
+                 transform: Callable = None,
+                 size: int = -1
+                 ):
+        """
+        :param root: Path to the dataset root directory
+        :param split: The dataset split, e.g. 'train', 'validation', 'test'
+        :param size: Override the maximum size of the dataset, useful for debugging
+        """
+        if root is None:
+            root = join(curdir, 'data')
+
+        self._root = join(root, name)
+        self.split = split
+        self.transform = transform
+        self.size = size
+        self.data = None
+
+        self.chunk_offsets: list[int] = []
+        self.bytes: bytes = bytes()
+        self.tensor: Tensor = torch.tensor([])
+
+    @property
+    def root(self):
+        return self._root
+
+    def __len__(self):
+        return len(self.dataset)
+
+    def process_data(self):
+        self.chunk_offsets = self.get_offsets()
+        if self.size == -1:
+            # Just use the whole dataset
+            self.bytes = ''.join(tqdm(self.data, desc="Encoding data", leave=False)).encode('utf-8', errors='replace')
+        else:
+            # Use only partition, calculate offsets
+            self.bytes = (''.join(tqdm(self.data[:len(self.chunk_offsets)], desc="Encoding data", leave=False))
+                          .encode('utf-8', errors='replace'))
+
+        bytes_array = np.frombuffer(self.bytes, dtype=np.uint8)  # Zero-copy
+        self.tensor = torch.from_numpy(bytes_array).to(torch.long, non_blocking=True)
+
+    def get_offsets(self):
+        """
+        Calculate for each chunk how many bytes came before it
+        """
+        data = self.data
+        size = self.size
+
+        if size == -1:
+            return [0, *accumulate(tqdm(map(len, data), desc="Calculating offsets", leave=False, total=len(data)))]
+
+        offsets = [0]
+        total = 0
+        append = offsets.append
+        for chunk in tqdm(data):
+            if total >= size:
+                break
+            total += len(chunk)
+            append(total)
+        return offsets
+
+    def get_chunked_item(self, idx: int, offsets: list[int], context_length: int):
+        item = ''
+
+        # Determine first chunk in which item is located
+        chunk_idx = 0
+        while idx >= offsets[chunk_idx]:
+            chunk_idx += 1
+        chunk_idx -= 1
+
+        # Extract item from chunks
+        chunk = str(self.data[chunk_idx])
+        chunk_start = offsets[chunk_idx]
+
+        chunk_item_start = idx - chunk_start
+        item_len_remaining = context_length + 1
+
+        assert len(item) + item_len_remaining == context_length + 1
+
+        while chunk_item_start + item_len_remaining > len(chunk):
+            adding_now_len = len(chunk) - chunk_item_start
+            item += chunk[chunk_item_start:]
+
+            chunk_idx += 1
+            chunk = str(self.data[chunk_idx])
+
+            chunk_item_start = 0
+            item_len_remaining -= adding_now_len
+
+            assert len(item) + item_len_remaining == context_length + 1
+
+        item += chunk[chunk_item_start: chunk_item_start + item_len_remaining]
+
+        assert len(item) == context_length + 1, f"Expected item of length {context_length + 1}, was {len(item)}"
+
+        # Transform to tensor
+        data = ''.join(item).encode('utf-8', errors='replace')
+        t = torch.tensor(list(data), dtype=torch.long)
+        x, y = t[:-1], t[-1]
+
+        if self.transform:
+            x = self.transform(x)
+
+        return x, y
diff --git a/src/dataset_loaders/EnWik9.py b/src/dataset_loaders/EnWik9.py
new file mode 100644
index 0000000..0af0be3
--- /dev/null
+++ b/src/dataset_loaders/EnWik9.py
@@ -0,0 +1,59 @@
+from math import ceil
+from typing import Callable
+
+from datasets import load_dataset, Features, Value
+
+from .Dataset import Dataset
+
+
+class EnWik9DataSet(Dataset):
+    """
+    Hugging Face: https://huggingface.co/datasets/haukur/enwik9
+    """
+
+    def __init__(self,
+                 root: str | None = None,
+                 split: str = 'train',
+                 transform: Callable | None = None,
+                 size: int = -1
+                 ):
+        super().__init__('enwik9', root, split, transform, size)
+
+        print(f"Loading from HuggingFace")
+        ft = Features({'text': Value('string')})
+        # Don't pass split here, dataset only contains training
+        text_chunks = load_dataset("haukur/enwik9", cache_dir=self.root, split='train', features=ft)
+        self.data = text_chunks['text']
+        self.size = size
+
+        # Model uses fixed 128-length context
+        self.context_length = 128
+
+        self.process_data()
+
+        # Define splits manually, because they do not exist in the dataset
+        split_point = ceil(self.chunk_offsets[-1] * 0.8)
+
+        if self.split == 'train':
+            self.start_byte = 0
+            self.end_byte = split_point
+        elif self.split == 'validation':
+            self.start_byte = split_point
+            self.end_byte = self.chunk_offsets[-1]
+        else:
+            raise ValueError("split must be 'train' or 'validation'")
+
+        print("Done initializing dataset")
+
+    def __len__(self):
+        return self.end_byte - self.start_byte - self.context_length
+
+    def __getitem__(self, idx):
+        # return self.get_chunked_item(idx, self.chunk_offsets, self.context_length)
+        x = self.tensor[self.start_byte + idx:self.start_byte + idx + self.context_length]
+        y = self.tensor[self.start_byte + idx + self.context_length]
+
+        if self.transform:
+            x = self.transform(x)
+
+        return x, y
diff --git a/src/dataset_loaders/LoremIpsumDataset.py b/src/dataset_loaders/LoremIpsumDataset.py
new file mode 100644
index 0000000..5dece41
--- /dev/null
+++ b/src/dataset_loaders/LoremIpsumDataset.py
@@ -0,0 +1,63 @@
+from math import ceil
+from typing import Callable
+
+from lorem.text import TextLorem
+from tqdm import tqdm
+
+from .Dataset import Dataset
+
+
+class LoremIpsumDataset(Dataset):
+    def __init__(self,
+                 root: str | None = None,
+                 split: str = 'train',
+                 transform: Callable = None,
+                 size: int = 2**30
+                 ):
+        super().__init__('lorem_ipsum', root, split, transform, size)
+
+        _lorem = TextLorem()
+
+        self.data = ' '.join(_lorem._word() for _ in tqdm(range(size), desc="Generating data"))
+        self.size = size
+
+        self.context_length = 128
+
+        self.process_data()
+
+        split_point = ceil(self.chunk_offsets[-1] * 0.8)
+
+        if self.split == 'train':
+            self.start_byte = 0
+            self.end_byte = split_point
+        elif self.split == 'validation':
+            self.start_byte = split_point
+            self.end_byte = self.chunk_offsets[-1]
+        else:
+            raise ValueError("split must be 'train' or 'validation'")
+
+        print("Done initializing dataset")
+
+    def __len__(self):
+        return self.end_byte - self.start_byte - self.context_length
+
+    def __getitem__(self, idx):
+        # Get sequence of characters
+        # x_str = self.text[idx: idx + self.context_length]
+        # y_char = self.text[idx + self.context_length]
+        #
+        # # Convert to tensors
+        # x = torch.tensor([ord(c) % 256 for c in x_str], dtype=torch.long)
+        # y = torch.tensor(ord(y_char) % 256, dtype=torch.long)
+        #
+        # if self.transform is not None:
+        #     x = self.transform(x)
+        #
+        # return x, y
+        x = self.tensor[self.start_byte + idx:self.start_byte + idx + self.context_length]
+        y = self.tensor[self.start_byte + idx + self.context_length]
+
+        if self.transform:
+            x = self.transform(x)
+
+        return x, y
diff --git a/src/dataset_loaders/OpenGenomeDataset.py b/src/dataset_loaders/OpenGenomeDataset.py
new file mode 100644
index 0000000..05ee2b5
--- /dev/null
+++ b/src/dataset_loaders/OpenGenomeDataset.py
@@ -0,0 +1,51 @@
+from typing import Callable
+
+from datasets import load_dataset, Value, Features
+
+from .Dataset import Dataset
+
+
+class OpenGenomeDataset(Dataset):
+    """
+    Hugging Face: https://huggingface.co/datasets/LongSafari/open-genome
+
+    :param split Either 'train', 'test' or 'validation'
+    :param stage Either 'sample', 'stage1' or 'stage2'.
+        'sample' only provides a 'validation' split
+    """
+
+    def __init__(self,
+                 root: str | None = None,
+                 split: str = 'train',
+                 transform: Callable = None,
+                 size: int = -1,
+                 stage: str = 'stage2'
+                 ):
+        super().__init__('open_genome', root, split, transform, size)
+
+        print(f"Loading from HuggingFace (stage: {stage}, split: {split})")
+        ft = Features({'text': Value('string')})
+        data = load_dataset("LongSafari/open-genome", stage, split=split, cache_dir=self.root, features=ft)
+        self.data = data['text']
+        self.size = size
+
+        # Model uses fixed 128-length context
+        self.context_length = 128
+
+        self.process_data()
+
+        print("Done initializing dataset")
+
+    def __len__(self):
+        # return len(self.data) - self.context_length
+        return self.chunk_offsets[-1] - self.context_length
+
+    def __getitem__(self, idx):
+        # return self.get_chunked_item(idx, self.chunk_offsets, self.context_length)
+        x = self.tensor[idx:idx + self.context_length]
+        y = self.tensor[idx + self.context_length]
+
+        if self.transform:
+            x = self.transform(x)
+
+        return x, y
diff --git a/src/dataset_loaders/__init__.py b/src/dataset_loaders/__init__.py
new file mode 100644
index 0000000..f23312c
--- /dev/null
+++ b/src/dataset_loaders/__init__.py
@@ -0,0 +1,10 @@
+from .Dataset import Dataset
+from .EnWik9 import EnWik9DataSet
+from .LoremIpsumDataset import LoremIpsumDataset
+from .OpenGenomeDataset import OpenGenomeDataset
+
+dataset_called: dict[str, type[Dataset]] = {
+    'enwik9': EnWik9DataSet,
+    'lorem_ipsum': LoremIpsumDataset,
+    'opengenome': OpenGenomeDataset
+}
diff --git a/src/models/Model.py b/src/models/Model.py
new file mode 100644
index 0000000..af8d5d3
--- /dev/null
+++ b/src/models/Model.py
@@ -0,0 +1,14 @@
+from abc import ABC, abstractmethod
+
+from torch import nn
+
+
+class Model(nn.Module, ABC):
+    @abstractmethod
+    def __init__(self, loss_function = None):
+        super().__init__()
+        self._loss_function = loss_function
+
+    @property
+    def loss_function(self):
+        return self._loss_function
diff --git a/src/models/__init__.py b/src/models/__init__.py
new file mode 100644
index 0000000..e329dbc
--- /dev/null
+++ b/src/models/__init__.py
@@ -0,0 +1,8 @@
+from .Model import Model
+from .cnn import CNNPredictor
+from .transformer import ByteTransformer
+
+model_called: dict[str, type[Model]] = {
+    'cnn': CNNPredictor,
+    'transformer': ByteTransformer
+}
diff --git a/models/cnn/__init__.py b/src/models/cnn/__init__.py
similarity index 100%
rename from models/cnn/__init__.py
rename to src/models/cnn/__init__.py
diff --git a/models/cnn/cnn.py b/src/models/cnn/cnn.py
similarity index 93%
rename from models/cnn/cnn.py
rename to src/models/cnn/cnn.py
index 05768d7..22e8843 100644
--- a/models/cnn/cnn.py
+++ b/src/models/cnn/cnn.py
@@ -1,14 +1,16 @@
-import torch
 import torch.nn as nn
 
-class CNNPredictor(nn.Module):
+from src.models import Model
+
+
+class CNNPredictor(Model):
     def __init__(
             self, 
             vocab_size=256, 
             embed_dim=64, 
             hidden_dim=128,
         ):
-        super().__init__()
+        super().__init__(nn.CrossEntropyLoss())
 
         # 1. Embedding: maps bytes (0–255) → vectors
         self.embed = nn.Embedding(vocab_size, embed_dim)
diff --git a/src/models/transformer/__init__.py b/src/models/transformer/__init__.py
new file mode 100644
index 0000000..9817800
--- /dev/null
+++ b/src/models/transformer/__init__.py
@@ -0,0 +1 @@
+from .transformer import ByteTransformer
\ No newline at end of file
diff --git a/src/models/transformer/transformer.py b/src/models/transformer/transformer.py
new file mode 100644
index 0000000..f85e60d
--- /dev/null
+++ b/src/models/transformer/transformer.py
@@ -0,0 +1,70 @@
+from typing import Optional
+
+import torch.nn as nn
+from torch import Tensor, arange
+
+from src.models import Model
+
+
+class LearnedPositionalEncoding(Model):
+    def __init__(self, max_len, d_model):
+        super().__init__()
+        self.pos_emb = nn.Embedding(max_len, d_model)
+
+    def forward(self, x):
+        # x: [seq, batch, d_model]
+        seq_len = x.size(0)
+        positions = arange(seq_len, device=x.device).unsqueeze(1)  # [seq, 1]
+        return x + self.pos_emb(positions)  # broadcast over batch
+
+class ByteTransformer(nn.Module):
+    def __init__(
+            self,
+            d_model=512,
+            nhead=8,
+            num_encoder_layers=6,
+            num_decoder_layers=6,
+            dim_feedforward=2048,
+            dropout=0.1,
+            activation="relu",
+            layer_norm_eps=1e-05,
+            max_len=128
+    ):
+        super().__init__()
+        self.src_embedding = nn.Embedding(256, d_model)
+        self.tgt_embedding = nn.Embedding(256, d_model)
+
+        self.src_pos = LearnedPositionalEncoding(max_len, d_model)
+        self.tgt_pos = LearnedPositionalEncoding(max_len, d_model)
+
+        self.transformer = nn.Transformer(
+            d_model=d_model,
+            nhead=nhead,
+            num_encoder_layers=num_encoder_layers,
+            num_decoder_layers=num_decoder_layers,
+            dim_feedforward=dim_feedforward,
+            dropout=dropout,
+            activation=activation,
+            layer_norm_eps=layer_norm_eps,
+            batch_first=False,
+            norm_first=False,
+            device=None,
+            dtype=None,
+        )
+
+        self.output_proj = nn.Linear(d_model, 256)
+
+        self.loss_function = nn.CrossEntropyLoss()
+
+    def forward(
+            self,
+            src: Tensor,
+            tgt: Tensor,
+    ) -> Tensor:
+        src_embeds = self.src_embedding(src)
+        tgt_embeds = self.tgt_embedding(tgt)
+
+        src_pos = self.src_pos(src_embeds)
+        tgt_pos = self.tgt_pos(tgt_embeds)
+
+        return self.output_proj(self.transformer(src_pos, tgt_pos))
diff --git a/src/process.py b/src/process.py
new file mode 100644
index 0000000..b2edda3
--- /dev/null
+++ b/src/process.py
@@ -0,0 +1,30 @@
+import torch
+
+
+def compress(
+    device,
+    model_path: str,
+    output_file: str,
+    input_file: str | None = None
+):
+    # Get input to compress
+    if input_file:
+        with open(input_file, "rb") as file:
+            byte_data = file.read()
+    else:
+        # Read from stdin
+        text = input()
+        byte_data = text.encode('utf-8', errors='replace')
+
+    tensor = torch.tensor(list(byte_data), dtype=torch.long)
+    print(tensor)
+
+    # Get model
+    model = torch.load(model_path, weights_only=False)
+
+    # TODO Feed to model for compression, store result
+    return
+
+
+def decompress():
+    return NotImplementedError("Decompression is not implemented yet")
diff --git a/src/train.py b/src/train.py
new file mode 100644
index 0000000..ee4a99a
--- /dev/null
+++ b/src/train.py
@@ -0,0 +1,74 @@
+from pathlib import Path
+
+import torch
+from torch.utils.data import DataLoader
+
+from src.dataset_loaders import dataset_called
+from src.models import model_called
+from src.trainers import OptunaTrainer, Trainer, FullTrainer
+
+
+def train(
+        device,
+        dataset: str,
+        data_root: str,
+        n_trials: int | None = None,
+        size: int | None = None,
+        method: str = 'optuna',
+        model_name: str | None = None,
+        model_path: str | None = None,
+        model_out: str | None = None
+):
+    batch_size = 2
+
+    assert model_name or model_path, "Either a model to train or a model to load from model_path must be provided"
+
+    if model_name:
+        print("Creating model")
+        model = model_called[model_name]
+    else:
+        print("Loading model from disk")
+        model = torch.load(model_path, weights_only=False)
+
+    dataset_common_args = {
+        'root': data_root,
+        'transform': lambda x: x.to(device),
+    }
+
+    if size:
+        dataset_common_args['size'] = size
+
+    print("Loading in the dataset...")
+    if dataset in dataset_called:
+        training_set = dataset_called[dataset](split='train', **dataset_common_args)
+        validate_set = dataset_called[dataset](split='validation', **dataset_common_args)
+    else:
+        # TODO Allow to import arbitrary files
+        raise NotImplementedError(f"Importing external datasets is not implemented yet")
+
+    if method == 'fetch':
+        # TODO More to earlier in chain, because now everything is converted into tensors as well?
+        exit(0)
+
+    print(f"Training set size = {len(training_set)}, Validation set size {len(validate_set)}")
+    training_loader = DataLoader(training_set, batch_size=batch_size, shuffle=True)
+    validation_loader = DataLoader(validate_set, batch_size=batch_size, shuffle=False)
+
+    trainer: Trainer = OptunaTrainer(n_trials=n_trials) if method == "optuna" else FullTrainer()
+
+    print("Training")
+    best_model = trainer.execute(
+        model=model,
+        train_loader=training_loader,
+        validation_loader=validation_loader,
+        n_epochs=n_trials,
+        device=device
+    )
+
+    print("Saving model...")
+    f = model_out or f"saved_models/{model.__class__.__name__}.pt"
+    # Make sure path exists
+    Path(f).parent.mkdir(parents=True, exist_ok=True)
+    torch.save(best_model, f)
+    print(f"Saved model to '{f}'")
+
diff --git a/trainers/FullTrainer.py b/src/trainers/FullTrainer.py
similarity index 62%
rename from trainers/FullTrainer.py
rename to src/trainers/FullTrainer.py
index 7f7882a..cfe9b08 100644
--- a/trainers/FullTrainer.py
+++ b/src/trainers/FullTrainer.py
@@ -1,26 +1,26 @@
-from typing import Callable
-
-import torch
-from torch import nn as nn
+from torch import nn
 from torch.utils.data import DataLoader
 
-from .trainer import Trainer
 from .train import train
+from .trainer import Trainer
+from ..models import Model
 from ..utils import print_losses
 
+
 class FullTrainer(Trainer):
     def execute(
             self,
-            model: nn.Module | None,
+            model: Model,
             train_loader: DataLoader,
             validation_loader: DataLoader,
-            loss_fn: Callable[[torch.Tensor, torch.Tensor], torch.Tensor],
-            n_epochs: int,
+            n_epochs: int | None,
             device: str
-    ) -> None:
+    ) -> nn.Module:
         if model is None:
             raise ValueError("Model must be provided: run optuna optimizations first")
 
         model.to(device)
-        train_loss, val_loss = train(model, train_loader, validation_loader, loss_fn, n_epochs)
-        print_losses(train_loss, val_loss)
\ No newline at end of file
+        train_loss, val_loss = train(model, train_loader, validation_loader, model.loss_function, n_epochs, device=device)
+        print_losses(train_loss, val_loss)
+
+        return model
diff --git a/src/trainers/OptunaTrainer.py b/src/trainers/OptunaTrainer.py
new file mode 100644
index 0000000..e40aeeb
--- /dev/null
+++ b/src/trainers/OptunaTrainer.py
@@ -0,0 +1,72 @@
+import optuna
+import optuna.trial as tr
+from torch import nn
+from torch.utils.data import DataLoader
+
+from .train import train
+from .trainer import Trainer
+from ..models import Model, CNNPredictor, ByteTransformer
+
+
+def create_model(trial: tr.Trial, model: nn.Module):
+    match model.__class__:
+        case CNNPredictor.__class__:
+            return model(
+                hidden_dim=trial.suggest_int("hidden_dim", 64, 512, log=True),
+                embed_dim=trial.suggest_int("embed_dim", 64, 512, log=True),
+                vocab_size=256,
+            )
+        case ByteTransformer.__class__:
+            nhead = trial.suggest_categorical("nhead", [2, 4, 8])  # Only powers of 2
+            # d_model_dim = nhead * trial.suggest_int("d_model_mult", 64 // nhead, 512 // nhead)
+            return model(
+                d_model=128, # hard coded for now as data loaders provide fixed (B, 128) tensors
+                nhead=nhead,
+                num_encoder_layers=trial.suggest_int("num_encoder_layers", 2, 6, log=True),
+                num_decoder_layers=trial.suggest_int("num_decoder_layers", 2, 6, log=True),
+                dim_feedforward=trial.suggest_int("dim_feedforward", 64, 512, log=True),
+                dropout=trial.suggest_float("dropout", 0.01, 0.5, log=True),
+                activation=trial.suggest_categorical("activation", ["relu", "gelu"]),
+                layer_norm_eps=trial.suggest_float("layer_norm_eps", 1e-8, 1e-6, log=True),
+            )
+    return None
+
+
+def objective_function(
+        trial: tr.Trial,
+        training_loader: DataLoader,
+        validation_loader: DataLoader,
+        model: Model,
+        device: str
+):
+    model = create_model(trial, model).to(device)
+    _, validation_loss = train(model, training_loader, validation_loader, model.loss_function, device=device)
+    return min(validation_loss)
+
+
+class OptunaTrainer(Trainer):
+    def __init__(self, n_trials: int | None = None):
+        super().__init__()
+        self.n_trials = n_trials if n_trials else 20
+        print(f"Creating Optuna trainer(n_trials = {self.n_trials})")
+
+    def execute(
+            self,
+            model: Model,
+            train_loader: DataLoader,
+            validation_loader: DataLoader,
+            n_epochs: int,
+            device: str
+    ) -> nn.Module:
+        study = optuna.create_study(direction="minimize")
+        study.optimize(
+            lambda trial: objective_function(trial, train_loader, validation_loader, model, device),
+            n_trials=self.n_trials
+        )
+
+        best_params = study.best_trial.params
+        best_model = model(
+            **best_params
+        )
+
+        return best_model
diff --git a/trainers/__init__.py b/src/trainers/__init__.py
similarity index 100%
rename from trainers/__init__.py
rename to src/trainers/__init__.py
diff --git a/trainers/train.py b/src/trainers/train.py
similarity index 61%
rename from trainers/train.py
rename to src/trainers/train.py
index be4aa34..61a6d09 100644
--- a/trainers/train.py
+++ b/src/trainers/train.py
@@ -1,38 +1,60 @@
+from typing import Callable
+
 import torch
-import torch.nn as nn
 from torch.utils.data.dataloader import DataLoader
 from tqdm import tqdm
-from typing import Callable
+
+from ..models import ByteTransformer, Model
+
+
+def _forward(model: Model, x: torch.Tensor, device: str) -> torch.Tensor:
+    if isinstance(model, ByteTransformer):
+        tgt_in = torch.cat([
+            torch.zeros(x.shape[0], 1, device=device, dtype=torch.long),
+            x[:, :-1]
+        ], dim=1)
+        logits = model(x, tgt_in)
+
+        # only consider the last time step of the model where the full context
+        # is available
+        return logits[:, -1, :]
+    return model(x)
 
 
 def train(
-        model: nn.Module,
+        model: Model,
         training_loader: DataLoader,
         validation_loader: DataLoader,
-        loss_fn: Callable[[torch.Tensor, torch.Tensor], torch.Tensor],
-        epochs: int = 100,
+        loss_fn: Callable,
+        epochs: int | None = None,
         learning_rate: float = 1e-3,
         weight_decay: float = 1e-8,
         device="cuda"
 ) -> tuple[list[float], list[float]]:
-    
     model.to(device)
     optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
-    
+
     avg_training_losses = []
     avg_validation_losses = []
 
+    if epochs is None:
+        epochs = 100
+
     for epoch in range(epochs):
 
         model.train()
         total_loss = []
 
         for x, y in tqdm(training_loader):
-            x = x.long().to(device)  # important for Embedding
-            y = y.long().to(device)  # must be (B,) for CE
+            # size (B, 128)
+            x = x.long().to(device)
+
+            # size (B)
+            y = y.long().to(device)
 
             optimizer.zero_grad()
-            logits = model(x)  # (B, 256)
+            logits = _forward(model, x, device)
+
             loss = loss_fn(logits, y)
             loss.backward()
             optimizer.step()
@@ -49,7 +71,7 @@ def train(
                 x = x.long().to(device)
                 y = y.long().to(device)
 
-                logits = model(x)
+                logits = _forward(model, x, device)
                 loss = loss_fn(logits, y)
                 losses.append(loss.item())
 
diff --git a/trainers/trainer.py b/src/trainers/trainer.py
similarity index 67%
rename from trainers/trainer.py
rename to src/trainers/trainer.py
index 8543589..19e6480 100644
--- a/trainers/trainer.py
+++ b/src/trainers/trainer.py
@@ -1,7 +1,5 @@
 from abc import ABC, abstractmethod
-from typing import Callable
 
-import torch
 import torch.nn as nn
 from torch.utils.data import DataLoader
 
@@ -15,8 +13,7 @@ class Trainer(ABC):
             model: nn.Module | None,
             train_loader: DataLoader,
             validation_loader: DataLoader,
-            loss_fn: Callable[[torch.Tensor, torch.Tensor], torch.Tensor],
-            n_epochs: int,
+            n_epochs: int | None,
             device: str
-    ) -> None:
-        pass
\ No newline at end of file
+    ) -> nn.Module:
+        pass
diff --git a/utils/__init__.py b/src/utils/__init__.py
similarity index 100%
rename from utils/__init__.py
rename to src/utils/__init__.py
diff --git a/src/utils/benchmark.py b/src/utils/benchmark.py
new file mode 100644
index 0000000..be9acb1
--- /dev/null
+++ b/src/utils/benchmark.py
@@ -0,0 +1,175 @@
+"""Utilities functions for benchmarking."""
+import json
+import string
+from concurrent.futures import ThreadPoolExecutor
+from logging import getLogger
+from os import getpid, path
+from pathlib import Path
+from random import choices
+from subprocess import DEVNULL, PIPE, CalledProcessError, TimeoutExpired, run
+from timeit import timeit
+from typing import Callable
+
+from memray import Tracker
+
+from ..utils.benchmark_dataclasses import BenchmarkItem, BenchmarkResult
+
+log = getLogger(__name__)
+
+
+def get_commit_hash() -> str:
+    """
+    Get the commit hash of the current git repository.
+
+    If not working in a git repository, return a random string that looks like a commit hash.
+    """
+    try:
+        return run(
+            ["git", "rev-parse", "--short", "HEAD"],
+            check=True,
+            stdout=PIPE,
+            stderr=DEVNULL,
+            text=True,
+        ).stdout.strip()
+    except CalledProcessError as e:
+        log.error(
+            "Could not determine the commit hash. Are you using a git repository?:\n%s",
+            e,
+        )
+        log.error("Using a random string as commit hash.")
+        return "".join(choices(string.hexdigits[:-6], k=40))
+
+
+def init_stat_file(stat_file: Path, header: str) -> int:
+    """Initialize a statistics file with a header."""
+    # Check if the parent directory exists
+    stat_file.parent.mkdir(parents=True, exist_ok=True)
+
+    # Check if the file exists
+    if stat_file.exists():
+        # Nothing left to do
+        return 0
+
+    # Initialize the file by writing the header to it.
+    log.debug("Initializing statistics file %s", stat_file)
+    stat_file.touch()
+    stat_file.write_text(f"{header}\n", encoding="utf-8")
+    return 1
+
+
+def track_time_memory(task: Callable, result: BenchmarkResult, mem_file: Path, mem_json_file: Path):
+    """Track the time and memory consumption of a task."""
+
+    def task_with_result():
+        result.value = task()
+
+    # Measure memory consumption
+    with Tracker(file_name=mem_file, native_traces=True, follow_fork=True, memory_interval_ms=1):
+        try:
+            # Measure runtime
+            result.runtime = timeit(task_with_result, number=1, globals=globals())
+        except BaseException as e:
+            log.error("Error while timing the program:\n%s", e, exc_info=True)
+            return None
+
+    # Convert binary memory file into JSON.
+    try:
+        run(
+            [
+                "python",
+                "-m",
+                "memray",
+                "stats",
+                "--json",
+                "--num-largest",
+                "1",
+                "--output",
+                mem_json_file,
+                mem_file,
+            ],
+            check=True,
+            timeout=100,
+            stdout=DEVNULL,
+        )
+        # Parse JSON to get peak_memory
+        mem_results = json.loads(mem_json_file.read_text(encoding="utf-8"))
+        result.peak_memory = mem_results["metadata"]["peak_memory"]
+
+    except CalledProcessError as e:
+        log.error(
+            "Something went wrong while processing the memray memory file %s:\n%s",
+            mem_file,
+            e,
+        )
+    except TimeoutExpired as e:
+        log.error(
+            "Timeout expired while processing the memray memory file %s:\n%s}",
+            mem_file,
+            e,
+        )
+
+    return result
+
+
+def execute_benchmark(
+    benchmark_item: BenchmarkItem,
+    results_dir: str | Path,
+    timeout: int = 100,
+) -> BenchmarkResult:
+    """Execute a benchmark and track its runtime and peak memory consumption."""
+    mem_file = Path(path.join(results_dir, f"memray-{benchmark_item.task.__name__}.mem"))
+    mem_json_file = Path(path.join(results_dir, f"memray-{benchmark_item.task.__name__}.json"))
+
+    result = BenchmarkResult(benchmark_item)
+
+    try:
+        # Time and track memory usage
+        # Kill after timeout in seconds
+        with ThreadPoolExecutor() as executor:
+            future = executor.submit(
+                lambda: track_time_memory(
+                    lambda: benchmark_item.task(**benchmark_item.arguments), result, mem_file, mem_json_file
+                )
+            )
+            executed_result = future.result(timeout=timeout)
+
+        if executed_result is not None:
+            result = executed_result
+
+        log.info(
+            "PID %d: %s finished [%.6f seconds, %d bytes]",
+            getpid(),
+            benchmark_item.get_method(),
+            result.runtime,
+            result.peak_memory,
+        )
+    except TimeoutError:
+        log.error("Timeout expired while running the benchmark_suite, cleaning up now.")
+
+        log.info(
+            "PID %d: %s failed after timeout (%d seconds)",
+            getpid(),
+            benchmark_item.get_method(),
+            timeout,
+        )
+    finally:
+        # Clean up memory dump file to save disk space.
+        mem_file.unlink()
+
+    return result
+
+
+if __name__ == "__main__":
+    import hydra
+
+    # Dummy example, read the contents of the dataset
+    def _read_contents(filename):
+        with open(filename, encoding="utf-8") as f:
+            log.info("Dataset content: %s", f.read())
+
+    def _read_contents_wrapper(cfg):
+        return _read_contents(cfg.dataset.path)
+
+    hydra_wrapped = hydra.main(config_path="../../config", config_name="config", version_base="1.2")(
+        _read_contents_wrapper
+    )()
diff --git a/src/utils/benchmark_dataclasses.py b/src/utils/benchmark_dataclasses.py
new file mode 100644
index 0000000..5dadee4
--- /dev/null
+++ b/src/utils/benchmark_dataclasses.py
@@ -0,0 +1,79 @@
+"""
+Benchmark data classes.
+
+This module contains the BenchmarkResult class which is used to store and print the results of a
+benchmark_suite.
+"""
+
+from dataclasses import dataclass
+from typing import Any, Callable
+
+
+@dataclass(init=True)
+class BenchmarkItem:
+    """A class used to represent a benchmark_suite (iteration)."""
+
+    task: Callable
+    arguments: dict
+
+    def __str__(self) -> str:
+        """String representation of the BenchmarkItem object."""
+        return self.get_in_data_format()
+
+    def get_method(self) -> str:
+        """
+        Format the method as if it were a function call.
+        """
+        method_name = self.task.__name__
+        arguments = ", ".join(
+            f'{key}={str(value)[:15]}'
+            for key, value in self.arguments.items()
+        )
+        return f"{method_name}({arguments})"
+
+    def get_in_data_format(self) -> str:
+        """
+        Format the benchmark_suite item to be printed to a .dat file.
+        """
+        # Flatten out arguments
+        values = list(self.__dict__.values())
+        values[1:2] = values[1].values()
+
+        return " ".join(map(str, values))
+
+    def get_header(self) -> str:
+        """
+        Returns the header which is just the names of the fields separated by spaces.
+        """
+        return " ".join(self.__dict__.keys())
+
+
+@dataclass(init=True)
+class BenchmarkResult:
+    """A class used to represent the result of a benchmark_suite."""
+
+    benchmark_item: BenchmarkItem
+    runtime: float = 0
+    peak_memory: int = 0
+    value: Any = None
+
+    def __str__(self) -> str:
+        """String representation of the BenchmarkResult object."""
+        return self.get_in_data_format()
+
+    def get_in_data_format(self) -> str:
+        """
+        Format the benchmark_suite result to be printed to a .dat file.
+        """
+        return " ".join(map(str, self.__dict__.values()))
+
+    def get_header(self) -> str:
+        """
+        Returns the header which is just the names of the fields separated by spaces.
+        """
+        # Get header of the BenchmarkItem
+        keys = list(self.__annotations__.keys())
+        keys[0:1] = self.benchmark_item.__annotations__.keys()
+        keys[1:2] = self.benchmark_item.arguments.keys()
+
+        return " ".join(keys)
diff --git a/utils/utils.py b/src/utils/utils.py
similarity index 57%
rename from utils/utils.py
rename to src/utils/utils.py
index df27ee5..4929f20 100644
--- a/utils/utils.py
+++ b/src/utils/utils.py
@@ -1,6 +1,8 @@
+from os import path
+
+import matplotlib.pyplot as plt
 import torch
 from torch.utils.data import TensorDataset
-import matplotlib.pyplot as plt
 
 
 def make_context_pairs(data: bytes, context_length: int) -> TensorDataset:
@@ -10,11 +12,13 @@ def make_context_pairs(data: bytes, context_length: int) -> TensorDataset:
     y = data[context_length:]
     return TensorDataset(x, y)
 
+
 def print_distribution(from_to: tuple[int, int], probabilities: list[float]):
     plt.hist(range(from_to[0], from_to[1]), weights=probabilities)
     plt.show()
 
-def print_losses(train_losses: list[float], validation_losses: list[float], show=False):
+
+def print_losses(train_losses: list[float], validation_losses: list[float], filename: str | None = None, show=False):
     plt.plot(train_losses, label="Training loss")
     plt.plot(validation_losses, label="Validation loss")
     plt.xlabel("Epoch")
@@ -23,7 +27,26 @@ def print_losses(train_losses: list[float], validation_losses: list[float], show
 
     if show:
         plt.show()
-    plt.savefig("losses.png")
+
+    if filename is None:
+        filename = path.join("results", "losses.png")
+
+    print(f"Saving losses to {filename}...")
+    plt.savefig(filename)
+
+
+def determine_device():
+    # NVIDIA GPUs (most HPC clusters)
+    if torch.cuda.is_available():
+        return torch.device("cuda")
+    # Apple Silicon (macOS)
+    elif getattr(torch.backends, "mps", None) and torch.backends.mps.is_available():
+        return torch.device("mps")
+    # Intel GPUs (oneAPI)
+    elif hasattr(torch, "xpu") and torch.xpu.is_available():
+        return torch.device("xpu")
+    else:
+        return torch.device("cpu")
 
 
 def load_data(path: str) -> bytes:
diff --git a/trainers/OptunaTrainer.py b/trainers/OptunaTrainer.py
deleted file mode 100644
index 28253a5..0000000
--- a/trainers/OptunaTrainer.py
+++ /dev/null
@@ -1,57 +0,0 @@
-from typing import Callable
-
-import optuna
-import optuna.trial as tr
-import torch
-from torch import nn as nn
-from torch.utils.data import DataLoader
-
-from .trainer import Trainer
-from ..models.cnn import CNNPredictor
-from .train import train
-
-
-def create_model(trial: tr.Trial, vocab_size: int = 256):
-    hidden_dim = trial.suggest_int("hidden_dim", 64, 512, log=True)
-    embedding_dim = trial.suggest_int("embed_dim", 64, 512, log=True)
-
-    return CNNPredictor(
-        vocab_size=vocab_size,
-        hidden_dim=hidden_dim,
-        embed_dim=embedding_dim,
-    )
-
-
-def objective_function(
-        trial: tr.Trial,
-        training_loader: DataLoader,
-        validation_loader: DataLoader,
-        loss_fn: Callable[[torch.Tensor, torch.Tensor], torch.Tensor],
-        device: str
-):
-    model = create_model(trial).to(device)
-    _, validation_loss = train(model, training_loader, validation_loader, loss_fn)
-    return min(validation_loss)
-
-
-class OptunaTrainer(Trainer):
-    def execute(
-            self,
-            model: nn.Module | None,
-            train_loader: DataLoader,
-            validation_loader: DataLoader,
-            loss_fn: Callable[[torch.Tensor, torch.Tensor], torch.Tensor],
-            n_epochs: int,
-            device: str
-    ) -> None:
-        study = optuna.create_study(study_name="CNN network", direction="minimize")
-        study.optimize(
-            lambda trial: objective_function(trial, train_loader, validation_loader, loss_fn, device),
-            n_trials=20
-        )
-
-        best_params = study.best_trial.params
-        best_model = CNNPredictor(
-            **best_params
-        )
-        torch.save(best_model, f"saved_models/{model.__class__.__name__}.pt")
diff --git a/transformer-xl/LICENSE b/transformer-xl/LICENSE
deleted file mode 100644
index 261eeb9..0000000
--- a/transformer-xl/LICENSE
+++ /dev/null
@@ -1,201 +0,0 @@
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright [yyyy] [name of copyright owner]
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
diff --git a/transformer-xl/README.md b/transformer-xl/README.md
deleted file mode 100644
index 9f12978..0000000
--- a/transformer-xl/README.md
+++ /dev/null
@@ -1,34 +0,0 @@
-# Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context
-
-This repository contains the code in both **PyTorch** and **TensorFlow** for our paper
->[Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context](http://arxiv.org/abs/1901.02860)
-
->Zihang Dai\*, Zhilin Yang\*, Yiming Yang, Jaime Carbonell, Quoc V. Le, Ruslan Salakhutdinov (*: equal contribution)
-
->Preprint 2018
-
-## TensorFlow
-
-- The source code is in the `tf/` folder, supporting (1) single-node multi-gpu training, and (2) multi-host TPU training.
-- Besides the source code, we also provide pretrained "TensorFlow" models with state-of-the-art (SoTA) performances reported in the paper.
-- Please refer to `tf/README.md` for details.
-
-## PyTorch
-
-- The source code is in the `pytorch/` folder, supporting single-node multi-gpu training via the module `nn.DataParallel`.
-- Please refer to `pytorch/README.md` for details.
-
-## Results
-
-Transformer-XL achieves new state-of-the-art results on multiple language modeling benchmarks. Transformer-XL is also the first to break through the 1.0 barrier on char-level language modeling. Below is a summary.
-
-Method | enwiki8 | text8 | One Billion Word | WT-103 | PTB (w/o finetuning)
--- | -- | -- | -- | -- | -- 
-Previous Best | 1.06 | 1.13 | 23.7 | 20.5 | 55.5
-Transformer-XL | **0.99** | **1.08** | **21.8** | **18.3** | **54.5**
-
-
-
-## Acknowledgement
-
-A large portion of the `getdata.sh` script comes from the [awd-lstm](https://github.com/salesforce/awd-lstm-lm/) repo. Happy Language Modeling :)
diff --git a/transformer-xl/getdata.sh b/transformer-xl/getdata.sh
deleted file mode 100755
index 7804757..0000000
--- a/transformer-xl/getdata.sh
+++ /dev/null
@@ -1,90 +0,0 @@
-echo "=== Acquiring datasets ==="
-echo "---"
-
-mkdir -p data
-cd data
-
-if [[ ! -d 'wikitext-2' ]]; then
-    echo "- Downloading WikiText-2 (WT2)"
-    wget --quiet --continue https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip
-    unzip -q wikitext-2-v1.zip
-    cd wikitext-2
-    mv wiki.train.tokens train.txt
-    mv wiki.valid.tokens valid.txt
-    mv wiki.test.tokens test.txt
-    cd ..
-fi
-
-echo "- Downloading WikiText-103 (WT2)"
-if [[ ! -d 'wikitext-103' ]]; then
-    wget --continue https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-103-v1.zip
-    unzip -q wikitext-103-v1.zip
-    cd wikitext-103
-    mv wiki.train.tokens train.txt
-    mv wiki.valid.tokens valid.txt
-    mv wiki.test.tokens test.txt
-    cd ..
-fi
-
-echo "- Downloading enwik8 (Character)"
-if [[ ! -d 'enwik8' ]]; then
-    mkdir -p enwik8
-    cd enwik8
-    wget --continue http://mattmahoney.net/dc/enwik8.zip
-    wget https://raw.githubusercontent.com/salesforce/awd-lstm-lm/master/data/enwik8/prep_enwik8.py
-    python3 prep_enwik8.py
-    cd ..
-fi
-
-echo "- Downloading text8 (Character)"
-if [[ ! -d 'text8' ]]; then
-    mkdir -p text8
-    cd text8
-    wget --continue http://mattmahoney.net/dc/text8.zip
-    python ../../prep_text8.py
-    cd ..
-fi
-
-echo "- Downloading Penn Treebank (PTB)"
-if [[ ! -d 'penn' ]]; then
-    wget --quiet --continue http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz
-    tar -xzf simple-examples.tgz
-
-    mkdir -p penn
-    cd penn
-    mv ../simple-examples/data/ptb.train.txt train.txt
-    mv ../simple-examples/data/ptb.test.txt test.txt
-    mv ../simple-examples/data/ptb.valid.txt valid.txt
-    cd ..
-
-    echo "- Downloading Penn Treebank (Character)"
-    mkdir -p pennchar
-    cd pennchar
-    mv ../simple-examples/data/ptb.char.train.txt train.txt
-    mv ../simple-examples/data/ptb.char.test.txt test.txt
-    mv ../simple-examples/data/ptb.char.valid.txt valid.txt
-    cd ..
-
-    rm -rf simple-examples/
-fi
-
-echo "- Downloading 1B words"
-
-if [[ ! -d 'one-billion-words' ]]; then
-    mkdir -p one-billion-words
-    cd one-billion-words
-
-    wget --no-proxy http://www.statmt.org/lm-benchmark/1-billion-word-language-modeling-benchmark-r13output.tar.gz
-    tar xzvf 1-billion-word-language-modeling-benchmark-r13output.tar.gz
-
-    path="1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/"
-    cat ${path}/news.en.heldout-00000-of-00050 > valid.txt
-    cat ${path}/news.en.heldout-00000-of-00050 > test.txt
-
-    wget https://github.com/rafaljozefowicz/lm/raw/master/1b_word_vocab.txt
-
-    cd ..
-fi
-
-echo "---"
-echo "Happy language modeling :)"
diff --git a/transformer-xl/prep_text8.py b/transformer-xl/prep_text8.py
deleted file mode 100644
index 65b1ce7..0000000
--- a/transformer-xl/prep_text8.py
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/usr/bin/env python
-# coding=utf-8
-
-import os
-import sys
-import zipfile
-
-from io import open
-
-if os.path.exists('train.txt'):
-    print('Tokenized text8 already exists - skipping processing')
-    sys.exit()
-
-data = zipfile.ZipFile('text8.zip').extractall()
-data = open('text8', 'r', encoding='utf-8').read()
-
-print('Length of text8: {}'.format(len(data)))
-
-num_test_chars = 5000000
-
-train_data = data[: -2 * num_test_chars]
-valid_data = data[-2 * num_test_chars: -num_test_chars]
-test_data = data[-num_test_chars:]
-
-for fn, part in [('train.txt', train_data), ('valid.txt', valid_data), ('test.txt', test_data)]:
-    print('{} will have {} bytes'.format(fn, len(part)))
-    print('- Tokenizing...')
-    # Change space ' ' to underscore '_'
-    part_str = ' '.join(['_' if c == ' ' else c for c in part.strip()])
-    print('- Writing...')
-    f = open(fn, 'w').write(part_str)
-    f = open(fn + '.raw', 'w', encoding='utf-8').write(part)
diff --git a/transformer-xl/pytorch/.DS_Store b/transformer-xl/pytorch/.DS_Store
deleted file mode 100644
index 5008ddf..0000000
Binary files a/transformer-xl/pytorch/.DS_Store and /dev/null differ
diff --git a/transformer-xl/pytorch/README.md b/transformer-xl/pytorch/README.md
deleted file mode 100644
index 965422b..0000000
--- a/transformer-xl/pytorch/README.md
+++ /dev/null
@@ -1,62 +0,0 @@
-## Introduction
-
-This directory contains our pytorch implementation of Transformer-XL. Note that our state-of-the-art results reported in the paper were obtained by training the model on a large-scale TPU cluster, and our pytorch codebase currently does not support distributed training. Here we provide two sets of hyperparameters and scripts:
-- `*large.sh` are for the SoTA setting with large models which might not be directly runnable on a local GPU machine.
-- `*base.sh` are for the base models which can be run on a few GPUs.
-
-The pytorch implementation produces similar results to the TF codebase under the same settings in our preliminary experiments.
-
-
-## Prerequisite
-
-- Pytorch 0.4: `conda install pytorch torchvision -c pytorch`
-
-
-## Data Prepration
-
-`bash getdata.sh`
-
-## Training and Evaluation
-
-#### Replicate the "bpc = 1.06" result on `enwik8` with a 12-layer Transformer-XL
-
-- Make sure the machine have **4 GPUs**, each with **at least 11G memory**
-
-- Training
-
-  `bash run_enwik8_base.sh train --work_dir PATH_TO_WORK_DIR`
-
-- Evaluation
-
-  `bash run_enwik8_base.sh eval --work_dir PATH_TO_WORK_DIR`
-
-
-
-#### Replicate the "PPL = 24.03" result on `wikitext-103` with Transformer-XL
-
-- Make sure the machine have **4 GPUs**, each with **at least 11G memory**
-
-- Training
-
-  `bash run_wt103_base.sh train --work_dir PATH_TO_WORK_DIR`
-
-- Evaluation
-
-  `bash run_wt103_base.sh eval --work_dir PATH_TO_WORK_DIR`
-
-
-
-#### Other options:
-
-- `--batch_chunk`: this option allows one to trade speed for memory. For `batch_chunk > 1`, the program will split each training batch into `batch_chunk` sub-batches and perform forward and backward on each sub-batch sequentially, with the gradient accumulated and divided by `batch_chunk`. Hence, the memory usage will propertionally lower while the computation time will inversely higher. 
-- `--div_val`: when using adaptive softmax and embedding, the embedding dimension is divided by `div_val` from bin $i$ to bin $i+1$. This saves both GPU memory and the parameter budget.
-- `--fp16` and `--dynamic-loss-scale`: Run in pseudo-fp16 mode (fp16 storage fp32 math) with dynamic loss scaling. 
-  - Note: to explore the `--fp16` option, please make sure the `apex` package is installed (https://github.com/NVIDIA/apex/).
-- To see performance without the recurrence mechanism, simply use `mem_len=0` in all your scripts.
-- To see performance of a standard Transformer without relative positional encodings or recurrence mechanisms, use `attn_type=2` and `mem_len=0`.
-
-
-#### Other datasets:
-
-- `Text8` character-level language modeling: check out `run_text8_base.sh`
-- `lm1b` word-level language modeling: check out `run_lm1b_base.sh`
diff --git a/transformer-xl/pytorch/data_utils.py b/transformer-xl/pytorch/data_utils.py
deleted file mode 100644
index df762a7..0000000
--- a/transformer-xl/pytorch/data_utils.py
+++ /dev/null
@@ -1,273 +0,0 @@
-import os, sys
-import glob
-
-from collections import Counter, OrderedDict
-import numpy as np
-import torch
-
-from utils.vocabulary import Vocab
-
-class LMOrderedIterator(object):
-    def __init__(self, data, bsz, bptt, device='cpu', ext_len=None):
-        """
-            data -- LongTensor -- the LongTensor is strictly ordered
-        """
-        self.bsz = bsz
-        self.bptt = bptt
-        self.ext_len = ext_len if ext_len is not None else 0
-
-        self.device = device
-
-        # Work out how cleanly we can divide the dataset into bsz parts.
-        self.n_step = data.size(0) // bsz
-
-        # Trim off any extra elements that wouldn't cleanly fit (remainders).
-        data = data.narrow(0, 0, self.n_step * bsz)
-
-        # Evenly divide the data across the bsz batches.
-        self.data = data.view(bsz, -1).t().contiguous().to(device)
-
-        # Number of mini-batches
-        self.n_batch = (self.n_step + self.bptt - 1) // self.bptt
-
-    def get_batch(self, i, bptt=None):
-        if bptt is None: bptt = self.bptt
-        seq_len = min(bptt, self.data.size(0) - 1 - i)
-
-        end_idx = i + seq_len
-        beg_idx = max(0, i - self.ext_len)
-
-        data = self.data[beg_idx:end_idx]
-        target = self.data[i+1:i+1+seq_len]
-
-        return data, target, seq_len
-
-    def get_fixlen_iter(self, start=0):
-        for i in range(start, self.data.size(0) - 1, self.bptt):
-            yield self.get_batch(i)
-
-    def get_varlen_iter(self, start=0, std=5, min_len=5, max_deviation=3):
-        max_len = self.bptt + max_deviation * std
-        i = start
-        while True:
-            bptt = self.bptt if np.random.random() < 0.95 else self.bptt / 2.
-            bptt = min(max_len, max(min_len, int(np.random.normal(bptt, std))))
-            data, target, seq_len = self.get_batch(i, bptt)
-            i += seq_len
-            yield data, target, seq_len
-            if i >= self.data.size(0) - 2:
-                break
-
-    def __iter__(self):
-        return self.get_fixlen_iter()
-
-
-class LMShuffledIterator(object):
-    def __init__(self, data, bsz, bptt, device='cpu', ext_len=None, shuffle=False):
-        """
-            data -- list[LongTensor] -- there is no order among the LongTensors
-        """
-        self.data = data
-
-        self.bsz = bsz
-        self.bptt = bptt
-        self.ext_len = ext_len if ext_len is not None else 0
-
-        self.device = device
-        self.shuffle = shuffle
-
-    def get_sent_stream(self):
-        # index iterator
-        epoch_indices = np.random.permutation(len(self.data)) if self.shuffle \
-            else np.array(range(len(self.data)))
-
-        # sentence iterator
-        for idx in epoch_indices:
-            yield self.data[idx]
-
-    def stream_iterator(self, sent_stream):
-        # streams for each data in the batch
-        streams = [None] * self.bsz
-
-        data = torch.LongTensor(self.bptt, self.bsz)
-        target = torch.LongTensor(self.bptt, self.bsz)
-
-        n_retain = 0
-
-        while True:
-            # data   : [n_retain+bptt x bsz]
-            # target : [bptt x bsz]
-            data[n_retain:].fill_(-1)
-            target.fill_(-1)
-
-            valid_batch = True
-
-            for i in range(self.bsz):
-                n_filled = 0
-                try:
-                    while n_filled < self.bptt:
-                        if streams[i] is None or len(streams[i]) <= 1:
-                            streams[i] = next(sent_stream)
-                        # number of new tokens to fill in
-                        n_new = min(len(streams[i]) - 1, self.bptt - n_filled)
-                        # first n_retain tokens are retained from last batch
-                        data[n_retain+n_filled:n_retain+n_filled+n_new, i] = \
-                            streams[i][:n_new]
-                        target[n_filled:n_filled+n_new, i] = \
-                            streams[i][1:n_new+1]
-                        streams[i] = streams[i][n_new:]
-                        n_filled += n_new
-                except StopIteration:
-                    valid_batch = False
-                    break
-
-            if not valid_batch:
-                return
-
-            data = data.to(self.device)
-            target = target.to(self.device)
-
-            yield data, target, self.bptt
-
-            n_retain = min(data.size(0), self.ext_len)
-            if n_retain > 0:
-                data[:n_retain] = data[-n_retain:]
-            data.resize_(n_retain + self.bptt, data.size(1))
-
-    def __iter__(self):
-        # sent_stream is an iterator
-        sent_stream = self.get_sent_stream()
-
-        for batch in self.stream_iterator(sent_stream):
-            yield batch
-
-
-class LMMultiFileIterator(LMShuffledIterator):
-    def __init__(self, paths, vocab, bsz, bptt, device='cpu', ext_len=None,
-        shuffle=False):
-
-        self.paths = paths
-        self.vocab = vocab
-
-        self.bsz = bsz
-        self.bptt = bptt
-        self.ext_len = ext_len if ext_len is not None else 0
-
-        self.device = device
-        self.shuffle = shuffle
-
-    def get_sent_stream(self, path):
-        sents = self.vocab.encode_file(path, add_double_eos=True)
-        if self.shuffle:
-            np.random.shuffle(sents)
-        sent_stream = iter(sents)
-
-        return sent_stream
-
-    def __iter__(self):
-        if self.shuffle:
-            np.random.shuffle(self.paths)
-
-        for path in self.paths:
-            # sent_stream is an iterator
-            sent_stream = self.get_sent_stream(path)
-            for batch in self.stream_iterator(sent_stream):
-                yield batch
-
-
-class Corpus(object):
-    def __init__(self, path, dataset, *args, **kwargs):
-        self.dataset = dataset
-        self.vocab = Vocab(*args, **kwargs)
-
-        if self.dataset in ['ptb', 'wt2', 'enwik8', 'text8']:
-            self.vocab.count_file(os.path.join(path, 'train.txt'))
-            self.vocab.count_file(os.path.join(path, 'valid.txt'))
-            self.vocab.count_file(os.path.join(path, 'test.txt'))
-        elif self.dataset == 'wt103':
-            self.vocab.count_file(os.path.join(path, 'train.txt'))
-        elif self.dataset == 'lm1b':
-            train_path_pattern = os.path.join(
-                path, '1-billion-word-language-modeling-benchmark-r13output',
-                'training-monolingual.tokenized.shuffled', 'news.en-*')
-            train_paths = glob.glob(train_path_pattern)
-            # the vocab will load from file when build_vocab() is called
-
-        self.vocab.build_vocab()
-
-        if self.dataset in ['ptb', 'wt2', 'wt103']:
-            self.train = self.vocab.encode_file(
-                os.path.join(path, 'train.txt'), ordered=True)
-            self.valid = self.vocab.encode_file(
-                os.path.join(path, 'valid.txt'), ordered=True)
-            self.test  = self.vocab.encode_file(
-                os.path.join(path, 'test.txt'), ordered=True)
-        elif self.dataset in ['enwik8', 'text8']:
-            self.train = self.vocab.encode_file(
-                os.path.join(path, 'train.txt'), ordered=True, add_eos=False)
-            self.valid = self.vocab.encode_file(
-                os.path.join(path, 'valid.txt'), ordered=True, add_eos=False)
-            self.test  = self.vocab.encode_file(
-                os.path.join(path, 'test.txt'), ordered=True, add_eos=False)
-        elif self.dataset == 'lm1b':
-            self.train = train_paths
-            self.valid = self.vocab.encode_file(
-                os.path.join(path, 'valid.txt'), ordered=False, add_double_eos=True)
-            self.test  = self.vocab.encode_file(
-                os.path.join(path, 'test.txt'), ordered=False, add_double_eos=True)
-
-    def get_iterator(self, split, *args, **kwargs):
-        if split == 'train':
-            if self.dataset in ['ptb', 'wt2', 'wt103', 'enwik8', 'text8']:
-                data_iter = LMOrderedIterator(self.train, *args, **kwargs)
-            elif self.dataset == 'lm1b':
-                kwargs['shuffle'] = True
-                data_iter = LMMultiFileIterator(self.train, self.vocab, *args, **kwargs)
-        elif split in ['valid', 'test']:
-            data = self.valid if split == 'valid' else self.test
-            if self.dataset in ['ptb', 'wt2', 'wt103', 'enwik8', 'text8']:
-                data_iter = LMOrderedIterator(data, *args, **kwargs)
-            elif self.dataset == 'lm1b':
-                data_iter = LMShuffledIterator(data, *args, **kwargs)
-
-        return data_iter
-
-
-def get_lm_corpus(datadir, dataset):
-    fn = os.path.join(datadir, 'cache.pt')
-    if os.path.exists(fn):
-        print('Loading cached dataset...')
-        corpus = torch.load(fn)
-    else:
-        print('Producing dataset {}...'.format(dataset))
-        kwargs = {}
-        if dataset in ['wt103', 'wt2']:
-            kwargs['special'] = ['<eos>']
-            kwargs['lower_case'] = False
-        elif dataset == 'ptb':
-            kwargs['special'] = ['<eos>']
-            kwargs['lower_case'] = True
-        elif dataset == 'lm1b':
-            kwargs['special'] = []
-            kwargs['lower_case'] = False
-            kwargs['vocab_file'] = os.path.join(datadir, '1b_word_vocab.txt')
-        elif dataset in ['enwik8', 'text8']:
-            pass
-
-        corpus = Corpus(datadir, dataset, **kwargs)
-        torch.save(corpus, fn)
-
-    return corpus
-
-if __name__ == '__main__':
-    import argparse
-    parser = argparse.ArgumentParser(description='unit test')
-    parser.add_argument('--datadir', type=str, default='../data/text8',
-                        help='location of the data corpus')
-    parser.add_argument('--dataset', type=str, default='text8',
-                        choices=['ptb', 'wt2', 'wt103', 'lm1b', 'enwik8', 'text8'],
-                        help='dataset name')
-    args = parser.parse_args()
-
-    corpus = get_lm_corpus(args.datadir, args.dataset)
-    print('Vocab size : {}'.format(len(corpus.vocab.idx2sym)))
diff --git a/transformer-xl/pytorch/eval.py b/transformer-xl/pytorch/eval.py
deleted file mode 100644
index eff3618..0000000
--- a/transformer-xl/pytorch/eval.py
+++ /dev/null
@@ -1,122 +0,0 @@
-# coding: utf-8
-import argparse
-import time
-import math
-import os, sys
-
-import torch
-
-from data_utils import get_lm_corpus
-from mem_transformer import MemTransformerLM
-from utils.exp_utils import get_logger
-
-parser = argparse.ArgumentParser(description='PyTorch Transformer Language Model')
-parser.add_argument('--data', type=str, default='../data/wikitext-103',
-                    help='location of the data corpus')
-parser.add_argument('--dataset', type=str, default='wt103',
-                    choices=['wt103', 'lm1b', 'enwik8', 'text8'],
-                    help='dataset name')
-parser.add_argument('--split', type=str, default='all',
-                    choices=['all', 'valid', 'test'],
-                    help='which split to evaluate')
-parser.add_argument('--batch_size', type=int, default=10,
-                    help='batch size')
-parser.add_argument('--tgt_len', type=int, default=5,
-                    help='number of tokens to predict')
-parser.add_argument('--ext_len', type=int, default=0,
-                    help='length of the extended context')
-parser.add_argument('--mem_len', type=int, default=0,
-                    help='length of the retained previous heads')
-parser.add_argument('--clamp_len', type=int, default=-1,
-                    help='max positional embedding index')
-parser.add_argument('--cuda', action='store_true',
-                    help='use CUDA')
-parser.add_argument('--work_dir', type=str, required=True,
-                    help='path to the work_dir')
-parser.add_argument('--no_log', action='store_true',
-                    help='do not log the eval result')
-parser.add_argument('--same_length', action='store_true',
-                    help='set same length attention with masking')
-args = parser.parse_args()
-assert args.ext_len >= 0, 'extended context length must be non-negative'
-
-device = torch.device("cuda" if args.cuda else "cpu")
-
-# Get logger
-logging = get_logger(os.path.join(args.work_dir, 'log.txt'),
-                     log_=not args.no_log)
-
-# Load dataset
-corpus = get_lm_corpus(args.data, args.dataset)
-ntokens = len(corpus.vocab)
-
-va_iter = corpus.get_iterator('valid', args.batch_size, args.tgt_len,
-    device=device, ext_len=args.ext_len)
-te_iter = corpus.get_iterator('test', args.batch_size, args.tgt_len,
-    device=device, ext_len=args.ext_len)
-
-# Load the best saved model.
-with open(os.path.join(args.work_dir, 'model.pt'), 'rb') as f:
-    model = torch.load(f)
-model.backward_compatible()
-model = model.to(device)
-
-logging('Evaluating with bsz {} tgt_len {} ext_len {} mem_len {} clamp_len {}'.format(
-       args.batch_size, args.tgt_len, args.ext_len, args.mem_len, args.clamp_len))
-
-model.reset_length(args.tgt_len, args.ext_len, args.mem_len)
-if args.clamp_len > 0:
-    model.clamp_len = args.clamp_len
-if args.same_length:
-    model.same_length = True
-
-###############################################################################
-# Evaluation code
-###############################################################################
-def evaluate(eval_iter):
-    # Turn on evaluation mode which disables dropout.
-    model.eval()
-    total_len, total_loss = 0, 0.
-    start_time = time.time()
-    with torch.no_grad():
-        mems = tuple()
-        for idx, (data, target, seq_len) in enumerate(eval_iter):
-            ret = model(data, target, *mems)
-            loss, mems = ret[0], ret[1:]
-            loss = loss.mean()
-            total_loss += seq_len * loss.item()
-            total_len += seq_len
-        total_time = time.time() - start_time
-    logging('Time : {:.2f}s, {:.2f}ms/segment'.format(
-            total_time, 1000 * total_time / (idx+1)))
-    return total_loss / total_len
-
-# Run on test data.
-if args.split == 'all':
-    test_loss = evaluate(te_iter)
-    valid_loss = evaluate(va_iter)
-elif args.split == 'valid':
-    valid_loss = evaluate(va_iter)
-    test_loss = None
-elif args.split == 'test':
-    test_loss = evaluate(te_iter)
-    valid_loss = None
-
-def format_log(loss, split):
-    if args.dataset in ['enwik8', 'text8']:
-        log_str = '| {0} loss {1:5.2f} | {0} bpc {2:9.5f} '.format(
-            split, loss, loss / math.log(2))
-    else:
-        log_str = '| {0} loss {1:5.2f} | {0} ppl {2:9.3f} '.format(
-            split, loss, math.exp(loss))
-    return log_str
-
-log_str = ''
-if valid_loss is not None:
-    log_str += format_log(valid_loss, 'valid')
-if test_loss is not None:
-    log_str += format_log(test_loss, 'test')
-
-logging('=' * 100)
-logging(log_str)
-logging('=' * 100)
diff --git a/transformer-xl/pytorch/mem_transformer.py b/transformer-xl/pytorch/mem_transformer.py
deleted file mode 100644
index ed02ee9..0000000
--- a/transformer-xl/pytorch/mem_transformer.py
+++ /dev/null
@@ -1,812 +0,0 @@
-import sys
-import math
-import functools
-
-import numpy as np
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-sys.path.append('utils')
-from proj_adaptive_softmax import ProjectedAdaptiveLogSoftmax
-from log_uniform_sampler import LogUniformSampler, sample_logits
-
-class PositionalEmbedding(nn.Module):
-    def __init__(self, demb):
-        super(PositionalEmbedding, self).__init__()
-
-        self.demb = demb
-
-        inv_freq = 1 / (10000 ** (torch.arange(0.0, demb, 2.0) / demb))
-        self.register_buffer('inv_freq', inv_freq)
-
-    def forward(self, pos_seq, bsz=None):
-        sinusoid_inp = torch.ger(pos_seq, self.inv_freq)
-        pos_emb = torch.cat([sinusoid_inp.sin(), sinusoid_inp.cos()], dim=-1)
-
-        if bsz is not None:
-            return pos_emb[:,None,:].expand(-1, bsz, -1)
-        else:
-            return pos_emb[:,None,:]
-
-
-class PositionwiseFF(nn.Module):
-    def __init__(self, d_model, d_inner, dropout, pre_lnorm=False):
-        super(PositionwiseFF, self).__init__()
-
-        self.d_model = d_model
-        self.d_inner = d_inner
-        self.dropout = dropout
-
-        self.CoreNet = nn.Sequential(
-            nn.Linear(d_model, d_inner), nn.ReLU(inplace=True),
-            nn.Dropout(dropout),
-            nn.Linear(d_inner, d_model),
-            nn.Dropout(dropout),
-        )
-
-        self.layer_norm = nn.LayerNorm(d_model)
-
-        self.pre_lnorm = pre_lnorm
-
-    def forward(self, inp):
-        if self.pre_lnorm:
-            ##### layer normalization + positionwise feed-forward
-            core_out = self.CoreNet(self.layer_norm(inp))
-
-            ##### residual connection
-            output = core_out + inp
-        else:
-            ##### positionwise feed-forward
-            core_out = self.CoreNet(inp)
-
-            ##### residual connection + layer normalization
-            output = self.layer_norm(inp + core_out)
-
-        return output
-
-class MultiHeadAttn(nn.Module):
-    def __init__(self, n_head, d_model, d_head, dropout, dropatt=0, 
-                 pre_lnorm=False):
-        super(MultiHeadAttn, self).__init__()
-
-        self.n_head = n_head
-        self.d_model = d_model
-        self.d_head = d_head
-        self.dropout = dropout
-
-        self.q_net = nn.Linear(d_model, n_head * d_head, bias=False)
-        self.kv_net = nn.Linear(d_model, 2 * n_head * d_head, bias=False)
-
-        self.drop = nn.Dropout(dropout)
-        self.dropatt = nn.Dropout(dropatt)
-        self.o_net = nn.Linear(n_head * d_head, d_model, bias=False)
-
-        self.layer_norm = nn.LayerNorm(d_model)
-
-        self.scale = 1 / (d_head ** 0.5)
-
-        self.pre_lnorm = pre_lnorm
-
-    def forward(self, h, attn_mask=None, mems=None):
-        ##### multihead attention
-        # [hlen x bsz x n_head x d_head]
-
-        if mems is not None:
-            c = torch.cat([mems, h], 0)
-        else:
-            c = h
-
-        if self.pre_lnorm:
-            ##### layer normalization
-            c = self.layer_norm(c)
-
-        head_q = self.q_net(h)
-        head_k, head_v = torch.chunk(self.kv_net(c), 2, -1)
-
-        head_q = head_q.view(h.size(0), h.size(1), self.n_head, self.d_head)
-        head_k = head_k.view(c.size(0), c.size(1), self.n_head, self.d_head)
-        head_v = head_v.view(c.size(0), c.size(1), self.n_head, self.d_head)
-
-        # [qlen x klen x bsz x n_head]
-        attn_score = torch.einsum('ibnd,jbnd->ijbn', (head_q, head_k))
-        attn_score.mul_(self.scale)
-        if attn_mask is not None and attn_mask.any().item():
-            if attn_mask.dim() == 2:
-                attn_score.masked_fill_(attn_mask[None,:,:,None], -float('inf'))
-            elif attn_mask.dim() == 3:
-                attn_score.masked_fill_(attn_mask[:,:,:,None], -float('inf'))
-
-        # [qlen x klen x bsz x n_head]
-        attn_prob = F.softmax(attn_score, dim=1)
-        attn_prob = self.dropatt(attn_prob)
-
-        # [qlen x klen x bsz x n_head] + [klen x bsz x n_head x d_head] -> [qlen x bsz x n_head x d_head]
-        attn_vec = torch.einsum('ijbn,jbnd->ibnd', (attn_prob, head_v))
-        attn_vec = attn_vec.contiguous().view(
-            attn_vec.size(0), attn_vec.size(1), self.n_head * self.d_head)
-
-        ##### linear projection
-        attn_out = self.o_net(attn_vec)
-        attn_out = self.drop(attn_out)
-
-        if self.pre_lnorm:
-            ##### residual connection
-            output = h + attn_out
-        else:
-            ##### residual connection + layer normalization
-            output = self.layer_norm(h + attn_out)
-
-        return output
-
-class RelMultiHeadAttn(nn.Module):
-    def __init__(self, n_head, d_model, d_head, dropout, dropatt=0,
-                 tgt_len=None, ext_len=None, mem_len=None, pre_lnorm=False):
-        super(RelMultiHeadAttn, self).__init__()
-
-        self.n_head = n_head
-        self.d_model = d_model
-        self.d_head = d_head
-        self.dropout = dropout
-
-        self.qkv_net = nn.Linear(d_model, 3 * n_head * d_head, bias=False)
-
-        self.drop = nn.Dropout(dropout)
-        self.dropatt = nn.Dropout(dropatt)
-        self.o_net = nn.Linear(n_head * d_head, d_model, bias=False)
-
-        self.layer_norm = nn.LayerNorm(d_model)
-
-        self.scale = 1 / (d_head ** 0.5)
-
-        self.pre_lnorm = pre_lnorm
-
-    def _parallelogram_mask(self, h, w, left=False):
-        mask = torch.ones((h, w)).byte()
-        m = min(h, w)
-        mask[:m,:m] = torch.triu(mask[:m,:m])
-        mask[-m:,-m:] = torch.tril(mask[-m:,-m:])
-
-        if left:
-            return mask
-        else:
-            return mask.flip(0)
-
-    def _shift(self, x, qlen, klen, mask, left=False):
-        if qlen > 1:
-            zero_pad = torch.zeros((x.size(0), qlen-1, x.size(2), x.size(3)),
-                                   device=x.DEVICE, dtype=x.dtype)
-        else:
-            zero_pad = torch.zeros(0, device=x.DEVICE, dtype=x.dtype)
-
-        if left:
-            mask = mask.flip(1)
-            x_padded = torch.cat([zero_pad, x], dim=1).expand(qlen, -1, -1, -1)
-        else:
-            x_padded = torch.cat([x, zero_pad], dim=1).expand(qlen, -1, -1, -1)
-
-        x = x_padded.masked_select(mask[:,:,None,None]) \
-                    .view(qlen, klen, x.size(2), x.size(3))
-
-        return x
-
-    def _rel_shift(self, x, zero_triu=False):
-        zero_pad = torch.zeros((x.size(0), 1, *x.size()[2:]),
-                               device=x.DEVICE, dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=1)
-
-        x_padded = x_padded.view(x.size(1) + 1, x.size(0), *x.size()[2:])
-
-        x = x_padded[1:].view_as(x)
-
-        if zero_triu:
-            ones = torch.ones((x.size(0), x.size(1)))
-            x = x * torch.tril(ones, x.size(1) - x.size(0))[:,:,None,None]
-
-        return x
-
-    def forward(self, w, r, attn_mask=None, mems=None):
-        raise NotImplementedError
-
-class RelPartialLearnableMultiHeadAttn(RelMultiHeadAttn):
-    def __init__(self, *args, **kwargs):
-        super(RelPartialLearnableMultiHeadAttn, self).__init__(*args, **kwargs)
-
-        self.r_net = nn.Linear(self.d_model, self.n_head * self.d_head, bias=False)
-
-    def forward(self, w, r, r_w_bias, r_r_bias, attn_mask=None, mems=None):
-        qlen, rlen, bsz = w.size(0), r.size(0), w.size(1)
-
-        if mems is not None:
-            cat = torch.cat([mems, w], 0)
-            if self.pre_lnorm:
-                w_heads = self.qkv_net(self.layer_norm(cat))
-            else:
-                w_heads = self.qkv_net(cat)
-            r_head_k = self.r_net(r)
-
-            w_head_q, w_head_k, w_head_v = torch.chunk(w_heads, 3, dim=-1)
-            w_head_q = w_head_q[-qlen:]
-        else:
-            if self.pre_lnorm:
-                w_heads = self.qkv_net(self.layer_norm(w))
-            else:
-                w_heads = self.qkv_net(w)
-            r_head_k = self.r_net(r)
-
-            w_head_q, w_head_k, w_head_v = torch.chunk(w_heads, 3, dim=-1)
-
-        klen = w_head_k.size(0)
-
-        w_head_q = w_head_q.view(qlen, bsz, self.n_head, self.d_head)           # qlen x bsz x n_head x d_head
-        w_head_k = w_head_k.view(klen, bsz, self.n_head, self.d_head)           # qlen x bsz x n_head x d_head
-        w_head_v = w_head_v.view(klen, bsz, self.n_head, self.d_head)           # qlen x bsz x n_head x d_head
-
-        r_head_k = r_head_k.view(rlen, self.n_head, self.d_head)                # qlen x n_head x d_head
-
-        #### compute attention score
-        rw_head_q = w_head_q + r_w_bias                                         # qlen x bsz x n_head x d_head
-        AC = torch.einsum('ibnd,jbnd->ijbn', (rw_head_q, w_head_k))             # qlen x klen x bsz x n_head
-
-        rr_head_q = w_head_q + r_r_bias
-        BD = torch.einsum('ibnd,jnd->ijbn', (rr_head_q, r_head_k))              # qlen x klen x bsz x n_head
-        BD = self._rel_shift(BD)
-
-        # [qlen x klen x bsz x n_head]
-        attn_score = AC + BD
-        attn_score.mul_(self.scale)
-
-        #### compute attention probability
-        if attn_mask is not None and attn_mask.any().item():
-            if attn_mask.dim() == 2:
-                attn_score = attn_score.float().masked_fill(
-                    attn_mask[None,:,:,None], -float('inf')).type_as(attn_score)
-            elif attn_mask.dim() == 3:
-                attn_score = attn_score.float().masked_fill(
-                    attn_mask[:,:,:,None], -float('inf')).type_as(attn_score)
-
-        # [qlen x klen x bsz x n_head]
-        attn_prob = F.softmax(attn_score, dim=1)
-        attn_prob = self.dropatt(attn_prob)
-
-        #### compute attention vector
-        attn_vec = torch.einsum('ijbn,jbnd->ibnd', (attn_prob, w_head_v))
-
-        # [qlen x bsz x n_head x d_head]
-        attn_vec = attn_vec.contiguous().view(
-            attn_vec.size(0), attn_vec.size(1), self.n_head * self.d_head)
-
-        ##### linear projection
-        attn_out = self.o_net(attn_vec)
-        attn_out = self.drop(attn_out)
-
-        if self.pre_lnorm:
-            ##### residual connection
-            output = w + attn_out
-        else:
-            ##### residual connection + layer normalization
-            output = self.layer_norm(w + attn_out)
-
-        return output
-
-class RelLearnableMultiHeadAttn(RelMultiHeadAttn):
-    def __init__(self, *args, **kwargs):
-        super(RelLearnableMultiHeadAttn, self).__init__(*args, **kwargs)
-
-    def forward(self, w, r_emb, r_w_bias, r_bias, attn_mask=None, mems=None):
-        # r_emb: [klen, n_head, d_head], used for term B
-        # r_w_bias: [n_head, d_head], used for term C
-        # r_bias: [klen, n_head], used for term D
-
-        qlen, bsz = w.size(0), w.size(1)
-
-        if mems is not None:
-            cat = torch.cat([mems, w], 0)
-            if self.pre_lnorm:
-                w_heads = self.qkv_net(self.layer_norm(cat))
-            else:
-                w_heads = self.qkv_net(cat)
-            w_head_q, w_head_k, w_head_v = torch.chunk(w_heads, 3, dim=-1)
-
-            w_head_q = w_head_q[-qlen:]
-        else:
-            if self.pre_lnorm:
-                w_heads = self.qkv_net(self.layer_norm(w))
-            else:
-                w_heads = self.qkv_net(w)
-            w_head_q, w_head_k, w_head_v = torch.chunk(w_heads, 3, dim=-1)
-
-        klen = w_head_k.size(0)
-
-        w_head_q = w_head_q.view(qlen, bsz, self.n_head, self.d_head)
-        w_head_k = w_head_k.view(klen, bsz, self.n_head, self.d_head)
-        w_head_v = w_head_v.view(klen, bsz, self.n_head, self.d_head)
-
-        if klen > r_emb.size(0):
-            r_emb_pad = r_emb[0:1].expand(klen-r_emb.size(0), -1, -1)
-            r_emb = torch.cat([r_emb_pad, r_emb], 0)
-            r_bias_pad = r_bias[0:1].expand(klen-r_bias.size(0), -1)
-            r_bias = torch.cat([r_bias_pad, r_bias], 0)
-        else:
-            r_emb = r_emb[-klen:]
-            r_bias = r_bias[-klen:]
-
-        #### compute attention score
-        rw_head_q = w_head_q + r_w_bias[None]                                   # qlen x bsz x n_head x d_head
-
-        AC = torch.einsum('ibnd,jbnd->ijbn', (rw_head_q, w_head_k))             # qlen x klen x bsz x n_head
-        B_ = torch.einsum('ibnd,jnd->ijbn', (w_head_q, r_emb))                  # qlen x klen x bsz x n_head
-        D_ = r_bias[None, :, None]                                              # 1    x klen x 1   x n_head
-        BD = self._rel_shift(B_ + D_)
-
-        # [qlen x klen x bsz x n_head]
-        attn_score = AC + BD
-        attn_score.mul_(self.scale)
-
-        #### compute attention probability
-        if attn_mask is not None and attn_mask.any().item():
-            if attn_mask.dim() == 2:
-                attn_score.masked_fill_(attn_mask[None,:,:,None], -float('inf'))
-            elif attn_mask.dim() == 3:
-                attn_score.masked_fill_(attn_mask[:,:,:,None], -float('inf'))
-
-        # [qlen x klen x bsz x n_head]
-        attn_prob = F.softmax(attn_score, dim=1)
-        attn_prob = self.dropatt(attn_prob)
-
-        #### compute attention vector
-        attn_vec = torch.einsum('ijbn,jbnd->ibnd', (attn_prob, w_head_v))
-
-        # [qlen x bsz x n_head x d_head]
-        attn_vec = attn_vec.contiguous().view(
-            attn_vec.size(0), attn_vec.size(1), self.n_head * self.d_head)
-
-        ##### linear projection
-        attn_out = self.o_net(attn_vec)
-        attn_out = self.drop(attn_out)
-
-        if self.pre_lnorm:
-            ##### residual connection
-            output = w + attn_out
-        else:
-            ##### residual connection + layer normalization
-            output = self.layer_norm(w + attn_out)
-
-        return output
-
-class DecoderLayer(nn.Module):
-    def __init__(self, n_head, d_model, d_head, d_inner, dropout, **kwargs):
-        super(DecoderLayer, self).__init__()
-
-        self.dec_attn = MultiHeadAttn(n_head, d_model, d_head, dropout, **kwargs)
-        self.pos_ff = PositionwiseFF(d_model, d_inner, dropout, 
-                                     pre_lnorm=kwargs.get('pre_lnorm'))
-
-    def forward(self, dec_inp, dec_attn_mask=None, mems=None):
-
-        output = self.dec_attn(dec_inp, attn_mask=dec_attn_mask,
-                               mems=mems)
-        output = self.pos_ff(output)
-
-        return output
-
-class RelLearnableDecoderLayer(nn.Module):
-    def __init__(self, n_head, d_model, d_head, d_inner, dropout,
-                 **kwargs):
-        super(RelLearnableDecoderLayer, self).__init__()
-
-        self.dec_attn = RelLearnableMultiHeadAttn(n_head, d_model, d_head, dropout,
-                                         **kwargs)
-        self.pos_ff = PositionwiseFF(d_model, d_inner, dropout, 
-                                     pre_lnorm=kwargs.get('pre_lnorm'))
-
-    def forward(self, dec_inp, r_emb, r_w_bias, r_bias, dec_attn_mask=None, mems=None):
-
-        output = self.dec_attn(dec_inp, r_emb, r_w_bias, r_bias,
-                               attn_mask=dec_attn_mask,
-                               mems=mems)
-        output = self.pos_ff(output)
-
-        return output
-
-class RelPartialLearnableDecoderLayer(nn.Module):
-    def __init__(self, n_head, d_model, d_head, d_inner, dropout,
-                 **kwargs):
-        super(RelPartialLearnableDecoderLayer, self).__init__()
-
-        self.dec_attn = RelPartialLearnableMultiHeadAttn(n_head, d_model,
-                            d_head, dropout, **kwargs)
-        self.pos_ff = PositionwiseFF(d_model, d_inner, dropout, 
-                                     pre_lnorm=kwargs.get('pre_lnorm'))
-
-    def forward(self, dec_inp, r, r_w_bias, r_r_bias, dec_attn_mask=None, mems=None):
-
-        output = self.dec_attn(dec_inp, r, r_w_bias, r_r_bias,
-                               attn_mask=dec_attn_mask,
-                               mems=mems)
-        output = self.pos_ff(output)
-
-        return output
-
-
-class AdaptiveEmbedding(nn.Module):
-    def __init__(self, n_token, d_embed, d_proj, cutoffs, div_val=1, 
-                 sample_softmax=False):
-        super(AdaptiveEmbedding, self).__init__()
-
-        self.n_token = n_token
-        self.d_embed = d_embed
-
-        self.cutoffs = cutoffs + [n_token]
-        self.div_val = div_val
-        self.d_proj = d_proj
-
-        self.emb_scale = d_proj ** 0.5
-
-        self.cutoff_ends = [0] + self.cutoffs
-
-        self.emb_layers = nn.ModuleList()
-        self.emb_projs = nn.ParameterList()
-        if div_val == 1:
-            self.emb_layers.append(
-                nn.Embedding(n_token, d_embed, sparse=sample_softmax>0)
-            )
-            if d_proj != d_embed:
-                self.emb_projs.append(nn.Parameter(torch.Tensor(d_proj, d_embed)))
-        else:
-            for i in range(len(self.cutoffs)):
-                l_idx, r_idx = self.cutoff_ends[i], self.cutoff_ends[i+1]
-                d_emb_i = d_embed // (div_val ** i)
-                self.emb_layers.append(nn.Embedding(r_idx-l_idx, d_emb_i))
-                self.emb_projs.append(nn.Parameter(torch.Tensor(d_proj, d_emb_i)))
-
-    def forward(self, inp):
-        if self.div_val == 1:
-            embed = self.emb_layers[0](inp)
-            if self.d_proj != self.d_embed:
-                embed  = F.linear(embed, self.emb_projs[0])
-        else:
-            param = next(self.parameters())
-            inp_flat = inp.view(-1)
-            emb_flat = torch.zeros([inp_flat.size(0), self.d_proj], 
-                dtype=param.dtype, device=param.device)
-            for i in range(len(self.cutoffs)):
-                l_idx, r_idx = self.cutoff_ends[i], self.cutoff_ends[i + 1]
-
-                mask_i = (inp_flat >= l_idx) & (inp_flat < r_idx)
-                indices_i = mask_i.nonzero().squeeze()
-
-                if indices_i.numel() == 0:
-                    continue
-
-                inp_i = inp_flat.index_select(0, indices_i) - l_idx
-                emb_i = self.emb_layers[i](inp_i)
-                emb_i = F.linear(emb_i, self.emb_projs[i])
-
-                emb_flat.index_copy_(0, indices_i, emb_i)
-
-            embed = emb_flat.view(*inp.size(), self.d_proj)
-
-        embed.mul_(self.emb_scale)
-
-        return embed
-
-class MemTransformerLM(nn.Module):
-    def __init__(self, n_token, n_layer, n_head, d_model, d_head, d_inner,
-                 dropout, dropatt, tie_weight=True, d_embed=None, 
-                 div_val=1, tie_projs=[False], pre_lnorm=False,
-                 tgt_len=None, ext_len=None, mem_len=None, 
-                 cutoffs=[], adapt_inp=False,
-                 same_length=False, attn_type=0, clamp_len=-1, 
-                 sample_softmax=-1):
-        super(MemTransformerLM, self).__init__()
-        self.n_token = n_token
-
-        d_embed = d_model if d_embed is None else d_embed
-        self.d_embed = d_embed
-        self.d_model = d_model
-        self.n_head = n_head
-        self.d_head = d_head
-
-        self.word_emb = AdaptiveEmbedding(n_token, d_embed, d_model, cutoffs, 
-                                          div_val=div_val)
-
-        self.drop = nn.Dropout(dropout)
-
-        self.n_layer = n_layer
-
-        self.tgt_len = tgt_len
-        self.mem_len = mem_len
-        self.ext_len = ext_len
-        self.max_klen = tgt_len + ext_len + mem_len
-
-        self.attn_type = attn_type
-
-        self.layers = nn.ModuleList()
-        if attn_type == 0: # the default attention
-            for i in range(n_layer):
-                self.layers.append(
-                    RelPartialLearnableDecoderLayer(
-                        n_head, d_model, d_head, d_inner, dropout,
-                        tgt_len=tgt_len, ext_len=ext_len, mem_len=mem_len,
-                        dropatt=dropatt, pre_lnorm=pre_lnorm)
-                )
-        elif attn_type == 1: # learnable embeddings
-            for i in range(n_layer):
-                self.layers.append(
-                    RelLearnableDecoderLayer(
-                        n_head, d_model, d_head, d_inner, dropout,
-                        tgt_len=tgt_len, ext_len=ext_len, mem_len=mem_len,
-                        dropatt=dropatt, pre_lnorm=pre_lnorm)
-                )
-        elif attn_type in [2, 3]: # absolute embeddings
-            for i in range(n_layer):
-                self.layers.append(
-                    DecoderLayer(
-                        n_head, d_model, d_head, d_inner, dropout,
-                        dropatt=dropatt, pre_lnorm=pre_lnorm)
-                )
-
-        self.sample_softmax = sample_softmax
-        # use sampled softmax
-        if sample_softmax > 0:
-            self.out_layer = nn.Linear(d_model, n_token)
-            if tie_weight:
-                self.out_layer.weight = self.word_emb.weight
-            self.tie_weight = tie_weight
-            self.sampler = LogUniformSampler(n_token, sample_softmax)
-
-        # use adaptive softmax (including standard softmax)
-        else:
-            self.crit = ProjectedAdaptiveLogSoftmax(n_token, d_embed, d_model, 
-                                                    cutoffs, div_val=div_val)
-
-            if tie_weight:
-                for i in range(len(self.crit.out_layers)):
-                    self.crit.out_layers[i].weight = self.word_emb.emb_layers[i].weight
-
-            if tie_projs:
-                for i, tie_proj in enumerate(tie_projs):
-                    if tie_proj and div_val == 1 and d_model != d_embed:
-                        self.crit.out_projs[i] = self.word_emb.emb_projs[0]
-                    elif tie_proj and div_val != 1:
-                        self.crit.out_projs[i] = self.word_emb.emb_projs[i]
-
-        self.same_length = same_length
-        self.clamp_len = clamp_len
-
-        self._create_params()
-
-    def backward_compatible(self):
-        self.sample_softmax = -1
-
-    def _create_params(self):
-        if self.attn_type == 0: # default attention
-            self.pos_emb = PositionalEmbedding(self.d_model)
-            self.r_w_bias = nn.Parameter(torch.Tensor(self.n_head, self.d_head))
-            self.r_r_bias = nn.Parameter(torch.Tensor(self.n_head, self.d_head))
-        elif self.attn_type == 1: # learnable
-            self.r_emb = nn.Parameter(torch.Tensor(
-                    self.n_layer, self.max_klen, self.n_head, self.d_head))
-            self.r_w_bias = nn.Parameter(torch.Tensor(
-                    self.n_layer, self.n_head, self.d_head))
-            self.r_bias = nn.Parameter(torch.Tensor(
-                    self.n_layer, self.max_klen, self.n_head))
-        elif self.attn_type == 2: # absolute standard
-            self.pos_emb = PositionalEmbedding(self.d_model)
-        elif self.attn_type == 3: # absolute deeper SA
-            self.r_emb = nn.Parameter(torch.Tensor(
-                    self.n_layer, self.max_klen, self.n_head, self.d_head))
-
-    def reset_length(self, tgt_len, ext_len, mem_len):
-        self.tgt_len = tgt_len
-        self.mem_len = mem_len
-        self.ext_len = ext_len
-
-    def init_mems(self):
-        if self.mem_len > 0:
-            mems = []
-            param = next(self.parameters())
-            for i in range(self.n_layer+1):
-                empty = torch.empty(0, dtype=param.dtype, device=param.device)
-                mems.append(empty)
-
-            return mems
-        else:
-            return None
-
-    def _update_mems(self, hids, mems, qlen, mlen):
-        # does not deal with None
-        if mems is None: return None
-
-        # mems is not None
-        assert len(hids) == len(mems), 'len(hids) != len(mems)'
-
-        # There are `mlen + qlen` steps that can be cached into mems
-        # For the next step, the last `ext_len` of the `qlen` tokens
-        # will be used as the extended context. Hence, we only cache
-        # the tokens from `mlen + qlen - self.ext_len - self.mem_len`
-        # to `mlen + qlen - self.ext_len`.
-        with torch.no_grad():
-            new_mems = []
-            end_idx = mlen + max(0, qlen - 0 - self.ext_len)
-            beg_idx = max(0, end_idx - self.mem_len)
-            for i in range(len(hids)):
-
-                cat = torch.cat([mems[i], hids[i]], dim=0)
-                new_mems.append(cat[beg_idx:end_idx].detach())
-
-        return new_mems
-
-    def _forward(self, dec_inp, mems=None):
-        qlen, bsz = dec_inp.size()
-
-        word_emb = self.word_emb(dec_inp)
-
-        mlen = mems[0].size(0) if mems is not None else 0
-        klen = mlen + qlen
-        if self.same_length:
-            all_ones = word_emb.new_ones(qlen, klen)
-            mask_len = klen - self.mem_len
-            if mask_len > 0:
-                mask_shift_len = qlen - mask_len
-            else:
-                mask_shift_len = qlen
-            dec_attn_mask = (torch.triu(all_ones, 1+mlen)
-                    + torch.tril(all_ones, -mask_shift_len)).byte()[:, :, None] # -1
-        else:
-            dec_attn_mask = torch.triu(
-                word_emb.new_ones(qlen, klen), diagonal=1+mlen).byte()[:,:,None]
-
-        hids = []
-        if self.attn_type == 0: # default
-            pos_seq = torch.arange(klen - 1, -1, -1.0, device=word_emb.DEVICE,
-                                   dtype=word_emb.dtype)
-            if self.clamp_len > 0:
-                pos_seq.clamp_(max=self.clamp_len)
-            pos_emb = self.pos_emb(pos_seq)
-
-            core_out = self.drop(word_emb)
-            pos_emb = self.drop(pos_emb)
-
-            hids.append(core_out)
-            for i, layer in enumerate(self.layers):
-                mems_i = None if mems is None else mems[i]
-                core_out = layer(core_out, pos_emb, self.r_w_bias,
-                        self.r_r_bias, dec_attn_mask=dec_attn_mask, mems=mems_i)
-                hids.append(core_out)
-        elif self.attn_type == 1: # learnable
-            core_out = self.drop(word_emb)
-            hids.append(core_out)
-            for i, layer in enumerate(self.layers):
-                if self.clamp_len > 0:
-                    r_emb = self.r_emb[i][-self.clamp_len :]
-                    r_bias = self.r_bias[i][-self.clamp_len :]
-                else:
-                    r_emb, r_bias = self.r_emb[i], self.r_bias[i]
-
-                mems_i = None if mems is None else mems[i]
-                core_out = layer(core_out, r_emb, self.r_w_bias[i],
-                        r_bias, dec_attn_mask=dec_attn_mask, mems=mems_i)
-                hids.append(core_out)
-        elif self.attn_type == 2: # absolute
-            pos_seq = torch.arange(klen - 1, -1, -1.0, device=word_emb.DEVICE,
-                                   dtype=word_emb.dtype)
-            if self.clamp_len > 0:
-                pos_seq.clamp_(max=self.clamp_len)
-            pos_emb = self.pos_emb(pos_seq)
-
-            core_out = self.drop(word_emb + pos_emb[-qlen:])
-
-            hids.append(core_out)
-            for i, layer in enumerate(self.layers):
-                mems_i = None if mems is None else mems[i]
-                if mems_i is not None and i == 0:
-                    mems_i += pos_emb[:mlen]
-                core_out = layer(core_out, dec_attn_mask=dec_attn_mask,
-                                 mems=mems_i)
-                hids.append(core_out)
-        elif self.attn_type == 3:
-            core_out = self.drop(word_emb)
-
-            hids.append(core_out)
-            for i, layer in enumerate(self.layers):
-                mems_i = None if mems is None else mems[i]
-                if mems_i is not None and mlen > 0:
-                    cur_emb = self.r_emb[i][:-qlen]
-                    cur_size = cur_emb.size(0)
-                    if cur_size < mlen:
-                        cur_emb_pad = cur_emb[0:1].expand(mlen-cur_size, -1, -1)
-                        cur_emb = torch.cat([cur_emb_pad, cur_emb], 0)
-                    else:
-                        cur_emb = cur_emb[-mlen:]
-                    mems_i += cur_emb.view(mlen, 1, -1)
-                core_out += self.r_emb[i][-qlen:].view(qlen, 1, -1)
-
-                core_out = layer(core_out, dec_attn_mask=dec_attn_mask,
-                                 mems=mems_i)
-                hids.append(core_out)
-
-        core_out = self.drop(core_out)
-
-        new_mems = self._update_mems(hids, mems, mlen, qlen)
-
-        return core_out, new_mems
-
-    def forward(self, data, target, *mems):
-        # nn.DataParallel does not allow size(0) tensors to be broadcasted.
-        # So, have to initialize size(0) mems inside the model forward.
-        # Moreover, have to return new_mems to allow nn.DataParallel to piece
-        # them together.
-        if not mems: mems = self.init_mems()
-
-        tgt_len = target.size(0)
-        hidden, new_mems = self._forward(data, mems=mems)
-
-        pred_hid = hidden[-tgt_len:]
-        if self.sample_softmax > 0 and self.training:
-            assert self.tie_weight
-            logit = sample_logits(self.word_emb,
-                self.out_layer.bias, target, pred_hid, self.sampler)
-            loss = -F.log_softmax(logit, -1)[:, :, 0]
-        else:
-            loss = self.crit(pred_hid.view(-1, pred_hid.size(-1)), target.view(-1))
-            loss = loss.view(tgt_len, -1)
-
-        if new_mems is None:
-            return [loss]
-        else:
-            return [loss] + new_mems
-
-if __name__ == '__main__':
-    import argparse
-
-    parser = argparse.ArgumentParser(description='unit test')
-
-    parser.add_argument('--n_layer', type=int, default=4, help='')
-    parser.add_argument('--n_rel_layer', type=int, default=4, help='')
-    parser.add_argument('--n_head', type=int, default=2, help='')
-    parser.add_argument('--d_head', type=int, default=2, help='')
-    parser.add_argument('--d_model', type=int, default=200, help='')
-    parser.add_argument('--d_embed', type=int, default=200, help='')
-    parser.add_argument('--d_inner', type=int, default=200, help='')
-    parser.add_argument('--dropout', type=float, default=0.0, help='')
-    parser.add_argument('--cuda', action='store_true', help='')
-    parser.add_argument('--seed', type=int, default=1111, help='')
-    parser.add_argument('--multi_gpu', action='store_true', help='')
-
-    args = parser.parse_args()
-
-    device = torch.device("cuda" if args.cuda else "cpu")
-
-    B = 4
-    tgt_len, mem_len, ext_len = 36, 36, 0
-    data_len = tgt_len * 20
-    args.n_token = 10000
-
-    import data_utils
-
-    data = torch.LongTensor(data_len*B).random_(0, args.n_token).to(device)
-    diter = data_utils.LMOrderedIterator(data, B, tgt_len, device=device, ext_len=ext_len)
-
-    cutoffs = [args.n_token // 2]
-    tie_projs = [False] + [True] * len(cutoffs)
-
-    for div_val in [1, 2]:
-        for d_embed in [200, 100]:
-            model = MemTransformerLM(args.n_token, args.n_layer, args.n_head,
-                            args.d_model, args.d_head, args.d_inner, args.dropout,
-                            dropatt=args.dropout, tie_weight=True, 
-                            d_embed=d_embed, div_val=div_val, 
-                            tie_projs=tie_projs, pre_lnorm=True,
-                            tgt_len=tgt_len, ext_len=ext_len, mem_len=mem_len, 
-                            cutoffs=cutoffs, attn_type=0).to(device)
-
-            print(sum(p.numel() for p in model.parameters()))
-
-            mems = tuple()
-            for idx, (inp, tgt, seqlen) in enumerate(diter):
-                print('batch {}'.format(idx))
-                out = model(inp, tgt, *mems)
-                mems = out[1:]
diff --git a/transformer-xl/pytorch/run_enwik8_base.sh b/transformer-xl/pytorch/run_enwik8_base.sh
deleted file mode 100644
index db542a8..0000000
--- a/transformer-xl/pytorch/run_enwik8_base.sh
+++ /dev/null
@@ -1,41 +0,0 @@
-#!/bin/bash
-
-if [[ $1 == 'train' ]]; then
-    echo 'Run training...'
-    python train.py \
-        --cuda \
-        --data ../data/enwik8/ \
-        --dataset enwik8 \
-        --n_layer 12 \
-        --d_model 512 \
-        --n_head 8 \
-        --d_head 64 \
-        --d_inner 2048 \
-        --dropout 0.1 \
-        --dropatt 0.0 \
-        --optim adam \
-        --lr 0.00025 \
-        --warmup_step 0 \
-        --max_step 400000 \
-        --tgt_len 512 \
-        --mem_len 512 \
-        --eval_tgt_len 128 \
-        --batch_size 22 \
-        --multi_gpu \
-        --gpu0_bsz 4 \
-        ${@:2}
-elif [[ $1 == 'eval' ]]; then
-    echo 'Run evaluation...'
-    python eval.py \
-        --cuda \
-        --data ../data/enwik8/ \
-        --dataset enwik8 \
-        --tgt_len 80 \
-        --mem_len 2100 \
-        --clamp_len 820 \
-        --same_length \
-        --split test \
-        ${@:2}
-else
-    echo 'unknown argment 1'
-fi
diff --git a/transformer-xl/pytorch/run_enwik8_large.sh b/transformer-xl/pytorch/run_enwik8_large.sh
deleted file mode 100644
index 5db67bf..0000000
--- a/transformer-xl/pytorch/run_enwik8_large.sh
+++ /dev/null
@@ -1,41 +0,0 @@
-#!/bin/bash
-
-if [[ $1 == 'train' ]]; then
-    echo 'Run training...'
-    python train.py \
-        --cuda \
-        --data ../data/enwik8/ \
-        --dataset enwik8 \
-        --n_layer 24 \
-        --d_model 1024 \
-        --n_head 8 \
-        --d_head 128 \
-        --d_inner 3072 \
-        --dropout 0.15 \
-        --dropatt 0.15 \
-        --optim adam \
-        --lr 0.00025 \
-        --warmup_step 4000 \
-        --max_step 400000 \
-        --tgt_len 768 \
-        --mem_len 768 \
-        --eval_tgt_len 128 \
-        --batch_size 64 \
-        --multi_gpu \
-        --gpu0_bsz 0 \
-        ${@:2}
-elif [[ $1 == 'eval' ]]; then
-    echo 'Run evaluation...'
-    python eval.py \
-        --cuda \
-        --data ../data/enwik8/ \
-        --dataset enwik8 \
-        --tgt_len 128 \
-        --mem_len 3800 \
-        --clamp_len 1000 \
-        --same_length \
-        --split test \
-        ${@:2}
-else
-    echo 'unknown argment 1'
-fi
diff --git a/transformer-xl/pytorch/run_lm1b_base.sh b/transformer-xl/pytorch/run_lm1b_base.sh
deleted file mode 100644
index e4aebef..0000000
--- a/transformer-xl/pytorch/run_lm1b_base.sh
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/bin/bash
-
-if [[ $1 == 'train' ]]; then
-    echo 'Run training...'
-    python train.py \
-        --cuda \
-        --data ../data/one-billion-words/ \
-        --dataset lm1b \
-        --adaptive \
-        --n_layer 18 \
-        --d_model 1024 \
-        --div_val 4 \
-        --n_head 8 \
-        --d_head 128 \
-        --d_inner 4096 \
-        --dropout 0.0 \
-        --dropatt 0.0 \
-        --optim adam \
-        --warmup_step 20000 \
-        --max_step 500000 \
-        --lr 0.00025 \
-        --tgt_len 32 \
-        --mem_len 32 \
-        --eval_tgt_len 32 \
-        --batch_size 224 \
-        --multi_gpu \
-        --gpu0_bsz 32 \
-        ${@:2}
-elif [[ $1 == 'eval' ]]; then
-    echo 'Run evaluation...'
-    python eval.py \
-        --cuda \
-        --data ../data/one-billion-words/ \
-        --dataset lm1b \
-        --batch_size 64 \
-        --tgt_len 32 \
-        --mem_len 128 \
-        --split test \
-        --same_length \
-        ${@:2}
-else
-    echo 'unknown argment 1'
-fi
diff --git a/transformer-xl/pytorch/run_lm1b_large.sh b/transformer-xl/pytorch/run_lm1b_large.sh
deleted file mode 100644
index f8b330a..0000000
--- a/transformer-xl/pytorch/run_lm1b_large.sh
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/bin/bash
-
-if [[ $1 == 'train' ]]; then
-    echo 'Run training...'
-    python train.py \
-        --cuda \
-        --data ../data/one-billion-words/ \
-        --dataset lm1b \
-        --adaptive \
-        --div_val 4 \
-        --n_layer 24 \
-        --d_model 1280 \
-        --n_head 16 \
-        --d_head 80 \
-        --d_inner 8192 \
-        --dropout 0.05 \
-        --dropatt 0.05 \
-        --optim adam \
-        --warmup_step 30000 \
-        --max_step 1200000 \
-        --lr 0.00025 \
-        --tgt_len 32 \
-        --mem_len 32 \
-        --eval_tgt_len 32 \
-        --batch_size 512 \
-        --multi_gpu \
-        --gpu0_bsz 0 \
-        ${@:2}
-elif [[ $1 == 'eval' ]]; then
-    echo 'Run evaluation...'
-    python eval.py \
-        --cuda \
-        --data ../data/one-billion-words/ \
-        --dataset lm1b \
-        --batch_size 8 \
-        --tgt_len 32 \
-        --mem_len 128 \
-        --split test \
-        --same_length \
-        ${@:2}
-else
-    echo 'unknown argment 1'
-fi
diff --git a/transformer-xl/pytorch/run_text8_base.sh b/transformer-xl/pytorch/run_text8_base.sh
deleted file mode 100644
index 7058f77..0000000
--- a/transformer-xl/pytorch/run_text8_base.sh
+++ /dev/null
@@ -1,41 +0,0 @@
-#!/bin/bash
-
-if [[ $1 == 'train' ]]; then
-    echo 'Run training...'
-    python train.py \
-        --cuda \
-        --data ../data/text8/ \
-        --dataset text8 \
-        --n_layer 12 \
-        --d_model 512 \
-        --n_head 8 \
-        --d_head 64 \
-        --d_inner 2048 \
-        --dropout 0.1 \
-        --dropatt 0.0 \
-        --optim adam \
-        --lr 0.00025 \
-        --warmup_step 0 \
-        --max_step 400000 \
-        --tgt_len 512 \
-        --mem_len 512 \
-        --eval_tgt_len 128 \
-        --batch_size 22 \
-        --multi_gpu \
-        --gpu0_bsz 4 \
-        ${@:2}
-elif [[ $1 == 'eval' ]]; then
-    echo 'Run evaluation...'
-    python eval.py \
-        --cuda \
-        --data ../data/text8/ \
-        --dataset text8 \
-        --tgt_len 80 \
-        --mem_len 2100 \
-        --clamp_len 820 \
-        --same_length \
-        --split test \
-        ${@:2}
-else
-    echo 'unknown argment 1'
-fi
diff --git a/transformer-xl/pytorch/run_text8_large.sh b/transformer-xl/pytorch/run_text8_large.sh
deleted file mode 100644
index cfc84df..0000000
--- a/transformer-xl/pytorch/run_text8_large.sh
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/bin/bash
-
-if [[ $1 == 'train' ]]; then
-    echo 'Run training...'
-    python train.py \
-        --cuda \
-        --data ../data/text8/ \
-        --dataset text8 \
-        --n_layer 24 \
-        --d_model 1024 \
-        --n_head 8 \
-        --d_head 128 \
-        --d_inner 3072 \
-        --dropout 0.15 \
-        --dropatt 0.15 \
-        --optim adam \
-        --lr 0.00025 \
-        --tgt_len 768 \
-        --mem_len 768 \
-        --eval_tgt_len 128 \
-        --batch_size 64 \
-        --max_step 400000 \
-        ${@:2}
-elif [[ $1 == 'eval' ]]; then
-    echo 'Run evaluation...'
-    python eval.py \
-        --cuda \
-        --data ../data/text8/ \
-        --dataset text8 \
-        --tgt_len 128 \
-        --mem_len 3800 \
-        --clamp_len 1000 \
-        --same_length \
-        --split test \
-        ${@:2}
-else
-    echo 'unknown argment 1'
-fi
diff --git a/transformer-xl/pytorch/run_wt103_base.sh b/transformer-xl/pytorch/run_wt103_base.sh
deleted file mode 100644
index 22c7550..0000000
--- a/transformer-xl/pytorch/run_wt103_base.sh
+++ /dev/null
@@ -1,42 +0,0 @@
-#!/bin/bash
-
-if [[ $1 == 'train' ]]; then
-    echo 'Run training...'
-    python train.py \
-        --cuda \
-        --data ../data/wikitext-103/ \
-        --dataset wt103 \
-        --adaptive \
-        --n_layer 16 \
-        --d_model 410 \
-        --n_head 10 \
-        --d_head 41 \
-        --d_inner 2100 \
-        --dropout 0.1 \
-        --dropatt 0.0 \
-        --optim adam \
-        --lr 0.00025 \
-        --warmup_step 0 \
-        --max_step 200000 \
-        --tgt_len 150 \
-        --mem_len 150 \
-        --eval_tgt_len 150 \
-        --batch_size 60 \
-        --multi_gpu \
-        --gpu0_bsz 4 \
-        ${@:2}
-elif [[ $1 == 'eval' ]]; then
-    echo 'Run evaluation...'
-    python eval.py \
-        --cuda \
-        --data ../data/wikitext-103/ \
-        --dataset wt103 \
-        --tgt_len 64 \
-        --mem_len 640 \
-        --clamp_len 400 \
-        --same_length \
-        --split test \
-        ${@:2}
-else
-    echo 'unknown argment 1'
-fi
diff --git a/transformer-xl/pytorch/run_wt103_large.sh b/transformer-xl/pytorch/run_wt103_large.sh
deleted file mode 100644
index a4e701b..0000000
--- a/transformer-xl/pytorch/run_wt103_large.sh
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/bin/bash
-
-if [[ $1 == 'train' ]]; then
-    echo 'Run training...'
-    python train.py \
-        --cuda \
-        --data ../data/wikitext-103/ \
-        --dataset wt103 \
-        --adaptive \
-        --div_val 4 \
-        --n_layer 18 \
-        --d_model 1024 \
-        --n_head 16 \
-        --d_head 64 \
-        --d_inner 4096 \
-        --dropout 0.2 \
-        --dropatt 0.2 \
-        --optim adam \
-        --lr 0.00025 \
-        --warmup_step 16000 \
-        --max_step 4000000 \
-        --tgt_len 384 \
-        --mem_len 384 \
-        --eval_tgt_len 128 \
-        --batch_size 128 \
-        --multi_gpu \
-        --gpu0_bsz 0 \
-        ${@:2}
-elif [[ $1 == 'eval' ]]; then
-    echo 'Run evaluation...'
-    python eval.py \
-        --cuda \
-        --data ../data/wikitext-103/ \
-        --dataset wt103 \
-        --tgt_len 128 \
-        --mem_len 1600 \
-        --clamp_len 1000 \
-        --same_length \
-        --split test \
-        ${@:2}
-else
-    echo 'unknown argment 1'
-fi
diff --git a/transformer-xl/pytorch/train.py b/transformer-xl/pytorch/train.py
deleted file mode 100644
index 0e00e82..0000000
--- a/transformer-xl/pytorch/train.py
+++ /dev/null
@@ -1,562 +0,0 @@
-# coding: utf-8
-import argparse
-import time
-import math
-import os, sys
-import itertools
-
-import numpy as np
-
-import torch
-import torch.nn as nn
-import torch.optim as optim
-
-from data_utils import get_lm_corpus
-from mem_transformer import MemTransformerLM
-from utils.exp_utils import create_exp_dir
-from utils.data_parallel import BalancedDataParallel
-
-parser = argparse.ArgumentParser(description='PyTorch Transformer Language Model')
-parser.add_argument('--data', type=str, default='../data/wikitext-103',
-                    help='location of the data corpus')
-parser.add_argument('--dataset', type=str, default='wt103',
-                    choices=['wt103', 'lm1b', 'enwik8', 'text8'],
-                    help='dataset name')
-parser.add_argument('--n_layer', type=int, default=12,
-                    help='number of total layers')
-parser.add_argument('--n_head', type=int, default=10,
-                    help='number of heads')
-parser.add_argument('--d_head', type=int, default=50,
-                    help='head dimension')
-parser.add_argument('--d_embed', type=int, default=-1,
-                    help='embedding dimension')
-parser.add_argument('--d_model', type=int, default=500,
-                    help='model dimension')
-parser.add_argument('--d_inner', type=int, default=1000,
-                    help='inner dimension in FF')
-parser.add_argument('--dropout', type=float, default=0.0,
-                    help='global dropout rate')
-parser.add_argument('--dropatt', type=float, default=0.0,
-                    help='attention probability dropout rate')
-parser.add_argument('--init', default='normal', type=str,
-                    help='parameter initializer to use.')
-parser.add_argument('--emb_init', default='normal', type=str,
-                    help='parameter initializer to use.')
-parser.add_argument('--init_range', type=float, default=0.1,
-                    help='parameters initialized by U(-init_range, init_range)')
-parser.add_argument('--emb_init_range', type=float, default=0.01,
-                    help='parameters initialized by U(-init_range, init_range)')
-parser.add_argument('--init_std', type=float, default=0.02,
-                    help='parameters initialized by N(0, init_std)')
-parser.add_argument('--proj_init_std', type=float, default=0.01,
-                    help='parameters initialized by N(0, init_std)')
-parser.add_argument('--optim', default='adam', type=str,
-                    choices=['adam', 'sgd', 'adagrad'],
-                    help='optimizer to use.')
-parser.add_argument('--lr', type=float, default=0.00025,
-                    help='initial learning rate (0.00025|5 for adam|sgd)')
-parser.add_argument('--mom', type=float, default=0.0,
-                    help='momentum for sgd')
-parser.add_argument('--scheduler', default='cosine', type=str,
-                    choices=['cosine', 'inv_sqrt', 'dev_perf', 'constant'],
-                    help='lr scheduler to use.')
-parser.add_argument('--warmup_step', type=int, default=0,
-                    help='upper epoch limit')
-parser.add_argument('--decay_rate', type=float, default=0.5,
-                    help='decay factor when ReduceLROnPlateau is used')
-parser.add_argument('--lr_min', type=float, default=0.0,
-                    help='minimum learning rate during annealing')
-parser.add_argument('--clip', type=float, default=0.25,
-                    help='gradient clipping')
-parser.add_argument('--clip_nonemb', action='store_true',
-                    help='only clip the gradient of non-embedding params')
-parser.add_argument('--max_step', type=int, default=100000,
-                    help='upper epoch limit')
-parser.add_argument('--batch_size', type=int, default=60,
-                    help='batch size')
-parser.add_argument('--batch_chunk', type=int, default=1,
-                    help='split batch into chunks to save memory')
-parser.add_argument('--tgt_len', type=int, default=70,
-                    help='number of tokens to predict')
-parser.add_argument('--eval_tgt_len', type=int, default=50,
-                    help='number of tokens to predict for evaluation')
-parser.add_argument('--ext_len', type=int, default=0,
-                    help='length of the extended context')
-parser.add_argument('--mem_len', type=int, default=0,
-                    help='length of the retained previous heads')
-parser.add_argument('--not_tied', action='store_true',
-                    help='do not tie the word embedding and softmax weights')
-parser.add_argument('--seed', type=int, default=1111,
-                    help='random seed')
-parser.add_argument('--cuda', action='store_true',
-                    help='use CUDA')
-parser.add_argument('--adaptive', action='store_true',
-                    help='use adaptive softmax')
-parser.add_argument('--div_val', type=int, default=1,
-                    help='divident value for adapative input and softmax')
-parser.add_argument('--pre_lnorm', action='store_true',
-                    help='apply LayerNorm to the input instead of the output')
-parser.add_argument('--varlen', action='store_true',
-                    help='use variable length')
-parser.add_argument('--multi_gpu', action='store_true',
-                    help='use multiple GPU')
-parser.add_argument('--log-interval', type=int, default=200,
-                    help='report interval')
-parser.add_argument('--eval-interval', type=int, default=4000,
-                    help='evaluation interval')
-parser.add_argument('--work_dir', default='LM-TFM', type=str,
-                    help='experiment directory.')
-parser.add_argument('--restart', action='store_true',
-                    help='restart training from the saved checkpoint')
-parser.add_argument('--restart_dir', type=str, default='',
-                    help='restart dir')
-parser.add_argument('--debug', action='store_true',
-                    help='run in debug mode (do not create exp dir)')
-parser.add_argument('--same_length', action='store_true',
-                    help='use the same attn length for all tokens')
-parser.add_argument('--attn_type', type=int, default=0,
-                    help='attention type. 0 for ours, 1 for Shaw et al,'
-                    '2 for Vaswani et al, 3 for Al Rfou et al.')
-parser.add_argument('--clamp_len', type=int, default=-1,
-                    help='use the same pos embeddings after clamp_len')
-parser.add_argument('--eta_min', type=float, default=0.0,
-                    help='min learning rate for cosine scheduler')
-parser.add_argument('--gpu0_bsz', type=int, default=-1,
-                    help='batch size on gpu 0')
-parser.add_argument('--max_eval_steps', type=int, default=-1,
-                    help='max eval steps')
-parser.add_argument('--sample_softmax', type=int, default=-1,
-                    help='number of samples in sampled softmax')
-parser.add_argument('--patience', type=int, default=0,
-                    help='patience')
-parser.add_argument('--finetune_v2', action='store_true',
-                    help='finetune v2')
-parser.add_argument('--finetune_v3', action='store_true',
-                    help='finetune v3')
-parser.add_argument('--fp16', action='store_true',
-                    help='Run in pseudo-fp16 mode (fp16 storage fp32 math).')
-parser.add_argument('--static-loss-scale', type=float, default=1,
-                    help='Static loss scale, positive power of 2 values can '
-                    'improve fp16 convergence.')
-parser.add_argument('--dynamic-loss-scale', action='store_true',
-                    help='Use dynamic loss scaling.  If supplied, this argument'
-                    ' supersedes --static-loss-scale.')
-args = parser.parse_args()
-args.tied = not args.not_tied
-
-if args.d_embed < 0:
-    args.d_embed = args.d_model
-
-assert args.ext_len >= 0, 'extended context length must be non-negative'
-assert args.batch_size % args.batch_chunk == 0
-
-args.work_dir = '{}-{}'.format(args.work_dir, args.dataset)
-args.work_dir = os.path.join(args.work_dir, time.strftime('%Y%m%d-%H%M%S'))
-logging = create_exp_dir(args.work_dir,
-    scripts_to_save=['train.py', 'mem_transformer.py'], debug=args.debug)
-
-# Set the random seed manually for reproducibility.
-np.random.seed(args.seed)
-torch.manual_seed(args.seed)
-if torch.cuda.is_available():
-    if not args.cuda:
-        print('WARNING: You have a CUDA DEVICE, so you should probably run with --cuda')
-    else:
-        torch.cuda.manual_seed_all(args.seed)
-
-# Validate `--fp16` option
-if args.fp16:
-    if not args.cuda:
-        print('WARNING: --fp16 requires --cuda, ignoring --fp16 option')
-        args.fp16 = False
-    else:
-        try:
-            from apex.fp16_utils import FP16_Optimizer
-        except:
-            print('WARNING: apex not installed, ignoring --fp16 option')
-            args.fp16 = False
-
-device = torch.device('cuda' if args.cuda else 'cpu')
-
-###############################################################################
-# Load data
-###############################################################################
-corpus = get_lm_corpus(args.data, args.dataset)
-ntokens = len(corpus.vocab)
-args.n_token = ntokens
-
-eval_batch_size = 10
-tr_iter = corpus.get_iterator('train', args.batch_size, args.tgt_len,
-    device=device, ext_len=args.ext_len)
-va_iter = corpus.get_iterator('valid', eval_batch_size, args.eval_tgt_len,
-    device=device, ext_len=args.ext_len)
-te_iter = corpus.get_iterator('test', eval_batch_size, args.eval_tgt_len,
-    device=device, ext_len=args.ext_len)
-
-# adaptive softmax / embedding
-cutoffs, tie_projs = [], [False]
-if args.adaptive:
-    assert args.dataset in ['wt103', 'lm1b']
-    if args.dataset == 'wt103':
-        cutoffs = [20000, 40000, 200000]
-        tie_projs += [True] * len(cutoffs)
-    elif args.dataset == 'lm1b':
-        cutoffs = [60000, 100000, 640000]
-        tie_projs += [False] * len(cutoffs)
-
-###############################################################################
-# Build the model
-###############################################################################
-def init_weight(weight):
-    if args.init == 'uniform':
-        nn.init.uniform_(weight, -args.init_range, args.init_range)
-    elif args.init == 'normal':
-        nn.init.normal_(weight, 0.0, args.init_std)
-
-def init_bias(bias):
-    nn.init.constant_(bias, 0.0)
-
-def weights_init(m):
-    classname = m.__class__.__name__
-    if classname.find('Linear') != -1:
-        if hasattr(m, 'weight') and m.weight is not None:
-            init_weight(m.weight)
-        if hasattr(m, 'bias') and m.bias is not None:
-            init_bias(m.bias)
-    elif classname.find('AdaptiveEmbedding') != -1:
-        if hasattr(m, 'emb_projs'):
-            for i in range(len(m.emb_projs)):
-                if m.emb_projs[i] is not None:
-                    nn.init.normal_(m.emb_projs[i], 0.0, args.proj_init_std)
-    elif classname.find('Embedding') != -1:
-        if hasattr(m, 'weight'):
-            init_weight(m.weight)
-    elif classname.find('ProjectedAdaptiveLogSoftmax') != -1:
-        if hasattr(m, 'cluster_weight') and m.cluster_weight is not None:
-            init_weight(m.cluster_weight)
-        if hasattr(m, 'cluster_bias') and m.cluster_bias is not None:
-            init_bias(m.cluster_bias)
-        if hasattr(m, 'out_projs'):
-            for i in range(len(m.out_projs)):
-                if m.out_projs[i] is not None:
-                    nn.init.normal_(m.out_projs[i], 0.0, args.proj_init_std)
-    elif classname.find('LayerNorm') != -1:
-        if hasattr(m, 'weight'):
-            nn.init.normal_(m.weight, 1.0, args.init_std)
-        if hasattr(m, 'bias') and m.bias is not None:
-            init_bias(m.bias)
-    elif classname.find('TransformerLM') != -1:
-        if hasattr(m, 'r_emb'):
-            init_weight(m.r_emb)
-        if hasattr(m, 'r_w_bias'):
-            init_weight(m.r_w_bias)
-        if hasattr(m, 'r_r_bias'):
-            init_weight(m.r_r_bias)
-        if hasattr(m, 'r_bias'):
-            init_bias(m.r_bias)
-
-def update_dropout(m):
-    classname = m.__class__.__name__
-    if classname.find('Dropout') != -1:
-        if hasattr(m, 'p'):
-            m.p = args.dropout
-
-def update_dropatt(m):
-    if hasattr(m, 'dropatt'):
-        m.dropatt.p = args.dropatt
-
-if args.restart:
-    with open(os.path.join(args.restart_dir, 'model.pt'), 'rb') as f:
-        model = torch.load(f)
-    if not args.fp16:
-        model = model.float()
-    model.apply(update_dropout)
-    model.apply(update_dropatt)
-else:
-    model = MemTransformerLM(ntokens, args.n_layer, args.n_head, args.d_model,
-        args.d_head, args.d_inner, args.dropout, args.dropatt,
-        tie_weight=args.tied, d_embed=args.d_embed, div_val=args.div_val,
-        tie_projs=tie_projs, pre_lnorm=args.pre_lnorm, tgt_len=args.tgt_len,
-        ext_len=args.ext_len, mem_len=args.mem_len, cutoffs=cutoffs,
-        same_length=args.same_length, attn_type=args.attn_type,
-        clamp_len=args.clamp_len, sample_softmax=args.sample_softmax)
-    model.apply(weights_init)
-    model.word_emb.apply(weights_init) # ensure embedding init is not overridden by out_layer in case of weight sharing
-args.n_all_param = sum([p.nelement() for p in model.parameters()])
-args.n_nonemb_param = sum([p.nelement() for p in model.layers.parameters()])
-
-if args.fp16:
-    model = model.half()
-
-if args.multi_gpu:
-    model = model.to(device)
-    if args.gpu0_bsz >= 0:
-        para_model = BalancedDataParallel(args.gpu0_bsz // args.batch_chunk,
-                                          model, dim=1).to(device)
-    else:
-        para_model = nn.DataParallel(model, dim=1).to(device)
-else:
-    para_model = model.to(device)
-
-#### optimizer
-if args.optim.lower() == 'sgd':
-    if args.sample_softmax > 0:
-        dense_params, sparse_params = [], []
-        for param in model.parameters():
-            if param.size() == model.word_emb.weight.size():
-                sparse_params.append(param)
-            else:
-                dense_params.append(param)
-        optimizer_sparse = optim.SGD(sparse_params, lr=args.lr * 2)
-        optimizer = optim.SGD(dense_params, lr=args.lr, momentum=args.mom)
-    else:
-        optimizer = optim.SGD(model.parameters(), lr=args.lr,
-            momentum=args.mom)
-elif args.optim.lower() == 'adam':
-    if args.sample_softmax > 0:
-        dense_params, sparse_params = [], []
-        for param in model.parameters():
-            if param.size() == model.word_emb.weight.size():
-                sparse_params.append(param)
-            else:
-                dense_params.append(param)
-        optimizer_sparse = optim.SparseAdam(sparse_params, lr=args.lr)
-        optimizer = optim.Adam(dense_params, lr=args.lr)
-    else:
-        optimizer = optim.Adam(model.parameters(), lr=args.lr)
-elif args.optim.lower() == 'adagrad':
-    optimizer = optim.Adagrad(model.parameters(), lr=args.lr)
-
-#### scheduler
-if args.scheduler == 'cosine':
-    # here we do not set eta_min to lr_min to be backward compatible
-    # because in previous versions eta_min is default to 0
-    # rather than the default value of lr_min 1e-6
-    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer,
-        args.max_step, eta_min=args.eta_min) # should use eta_min arg
-    if args.sample_softmax > 0:
-        scheduler_sparse = optim.lr_scheduler.CosineAnnealingLR(optimizer_sparse,
-            args.max_step, eta_min=args.eta_min) # should use eta_min arg
-elif args.scheduler == 'inv_sqrt':
-    # originally used for Transformer (in Attention is all you need)
-    def lr_lambda(step):
-        # return a multiplier instead of a learning rate
-        if step == 0 and args.warmup_step == 0:
-            return 1.
-        else:
-            return 1. / (step ** 0.5) if step > args.warmup_step \
-                   else step / (args.warmup_step ** 1.5)
-    scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lr_lambda)
-elif args.scheduler == 'dev_perf':
-    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
-        factor=args.decay_rate, patience=args.patience, min_lr=args.lr_min)
-    if args.sample_softmax > 0:
-        scheduler_sparse = optim.lr_scheduler.ReduceLROnPlateau(optimizer_sparse,
-            factor=args.decay_rate, patience=args.patience, min_lr=args.lr_min)
-elif args.scheduler == 'constant':
-    pass
-
-if args.cuda and args.fp16:
-    # If args.dynamic_loss_scale is False, static_loss_scale will be used.
-    # If args.dynamic_loss_scale is True, it will take precedence over static_loss_scale.
-    optimizer = FP16_Optimizer(optimizer,
-                               static_loss_scale = args.static_loss_scale,
-                               dynamic_loss_scale = args.dynamic_loss_scale,
-                               dynamic_loss_args = {'init_scale': 2 ** 16})
-
-if args.restart:
-    if os.path.exists(os.path.join(args.restart_dir, 'optimizer.pt')):
-        with open(os.path.join(args.restart_dir, 'optimizer.pt'), 'rb') as f:
-            opt_state_dict = torch.load(f)
-            optimizer.load_state_dict(opt_state_dict)
-    else:
-        print('Optimizer was not saved. Start from scratch.')
-
-logging('=' * 100)
-for k, v in args.__dict__.items():
-    logging('    - {} : {}'.format(k, v))
-logging('=' * 100)
-logging('#params = {}'.format(args.n_all_param))
-logging('#non emb params = {}'.format(args.n_nonemb_param))
-
-###############################################################################
-# Training code
-###############################################################################
-
-def evaluate(eval_iter):
-    # Turn on evaluation mode which disables dropout.
-    model.eval()
-
-    # If the model does not use memory at all, make the ext_len longer.
-    # Otherwise, make the mem_len longer and keep the ext_len the same.
-    if args.mem_len == 0:
-        model.reset_length(args.eval_tgt_len,
-            args.ext_len+args.tgt_len-args.eval_tgt_len, args.mem_len)
-    else:
-        model.reset_length(args.eval_tgt_len,
-            args.ext_len, args.mem_len+args.tgt_len-args.eval_tgt_len)
-
-    # Evaluation
-    total_len, total_loss = 0, 0.
-    with torch.no_grad():
-        mems = tuple()
-        for i, (data, target, seq_len) in enumerate(eval_iter):
-            if args.max_eval_steps > 0 and i >= args.max_eval_steps:
-                break
-            ret = model(data, target, *mems)
-            loss, mems = ret[0], ret[1:]
-            loss = loss.mean()
-            total_loss += seq_len * loss.float().item()
-            total_len += seq_len
-
-    # Switch back to the training mode
-    model.reset_length(args.tgt_len, args.ext_len, args.mem_len)
-    model.train()
-
-    return total_loss / total_len
-
-
-def train():
-    # Turn on training mode which enables dropout.
-    global train_step, train_loss, best_val_loss, eval_start_time, log_start_time
-    model.train()
-    if args.batch_chunk > 1:
-        mems = [tuple() for _ in range(args.batch_chunk)]
-    else:
-        mems = tuple()
-    train_iter = tr_iter.get_varlen_iter() if args.varlen else tr_iter
-    for batch, (data, target, seq_len) in enumerate(train_iter):
-        model.zero_grad()
-        if args.batch_chunk > 1:
-            data_chunks = torch.chunk(data, args.batch_chunk, 1)
-            target_chunks = torch.chunk(target, args.batch_chunk, 1)
-            for i in range(args.batch_chunk):
-                data_i = data_chunks[i].contiguous()
-                target_i = target_chunks[i].contiguous()
-                ret = para_model(data_i, target_i, *mems[i])
-                loss, mems[i] = ret[0], ret[1:]
-                loss = loss.float().mean().type_as(loss) / args.batch_chunk
-                if args.fp16:
-                    optimizer.backward(loss)
-                else:
-                    loss.backward()
-                train_loss += loss.float().item()
-        else:
-            ret = para_model(data, target, *mems)
-            loss, mems = ret[0], ret[1:]
-            loss = loss.float().mean().type_as(loss)
-            if args.fp16:
-                optimizer.backward(loss)
-            else:
-                loss.backward()
-            train_loss += loss.float().item()
-
-        if args.fp16:
-            optimizer.clip_master_grads(args.clip)
-        else:
-            torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
-
-        optimizer.step()
-        if args.sample_softmax > 0:
-            optimizer_sparse.step()
-
-        # step-wise learning rate annealing
-        train_step += 1
-        if args.scheduler in ['cosine', 'constant', 'dev_perf']:
-            # linear warmup stage
-            if train_step < args.warmup_step:
-                curr_lr = args.lr * train_step / args.warmup_step
-                optimizer.param_groups[0]['lr'] = curr_lr
-                if args.sample_softmax > 0:
-                    optimizer_sparse.param_groups[0]['lr'] = curr_lr * 2
-            else:
-                if args.scheduler == 'cosine':
-                    scheduler.step(train_step)
-                    if args.sample_softmax > 0:
-                        scheduler_sparse.step(train_step)
-        elif args.scheduler == 'inv_sqrt':
-            scheduler.step(train_step)
-
-        if train_step % args.log_interval == 0:
-            cur_loss = train_loss / args.log_interval
-            elapsed = time.time() - log_start_time
-            log_str = '| epoch {:3d} step {:>8d} | {:>6d} batches | lr {:.3g} ' \
-                      '| ms/batch {:5.2f} | loss {:5.2f}'.format(
-                epoch, train_step, batch+1, optimizer.param_groups[0]['lr'],
-                elapsed * 1000 / args.log_interval, cur_loss)
-            if args.dataset in ['enwik8', 'text8']:
-                log_str += ' | bpc {:9.5f}'.format(cur_loss / math.log(2))
-            else:
-                log_str += ' | ppl {:9.3f}'.format(math.exp(cur_loss))
-            logging(log_str)
-            train_loss = 0
-            log_start_time = time.time()
-
-        if train_step % args.eval_interval == 0:
-            val_loss = evaluate(va_iter)
-            logging('-' * 100)
-            log_str = '| Eval {:3d} at step {:>8d} | time: {:5.2f}s ' \
-                      '| valid loss {:5.2f}'.format(
-                train_step // args.eval_interval, train_step,
-                (time.time() - eval_start_time), val_loss)
-            if args.dataset in ['enwik8', 'text8']:
-                log_str += ' | bpc {:9.5f}'.format(val_loss / math.log(2))
-            else:
-                log_str += ' | valid ppl {:9.3f}'.format(math.exp(val_loss))
-            logging(log_str)
-            logging('-' * 100)
-            # Save the model if the validation loss is the best we've seen so far.
-            if not best_val_loss or val_loss < best_val_loss:
-                if not args.debug:
-                    with open(os.path.join(args.work_dir, 'model.pt'), 'wb') as f:
-                        torch.save(model, f)
-                    with open(os.path.join(args.work_dir, 'optimizer.pt'), 'wb') as f:
-                        torch.save(optimizer.state_dict(), f)
-                best_val_loss = val_loss
-
-            # dev-performance based learning rate annealing
-            if args.scheduler == 'dev_perf':
-                scheduler.step(val_loss)
-                if args.sample_softmax > 0:
-                    scheduler_sparse.step(val_loss)
-
-            eval_start_time = time.time()
-
-        if train_step == args.max_step:
-            break
-
-# Loop over epochs.
-train_step = 0
-train_loss = 0
-best_val_loss = None
-
-log_start_time = time.time()
-eval_start_time = time.time()
-
-# At any point you can hit Ctrl + C to break out of training early.
-try:
-    for epoch in itertools.count(start=1):
-        train()
-        if train_step == args.max_step:
-            logging('-' * 100)
-            logging('End of training')
-            break
-except KeyboardInterrupt:
-    logging('-' * 100)
-    logging('Exiting from training early')
-
-# Load the best saved model.
-with open(os.path.join(args.work_dir, 'model.pt'), 'rb') as f:
-    model = torch.load(f)
-para_model = model.to(device)
-
-# Run on test data.
-test_loss = evaluate(te_iter)
-logging('=' * 100)
-if args.dataset in ['enwik8', 'text8']:
-    logging('| End of training | test loss {:5.2f} | test bpc {:9.5f}'.format(
-        test_loss, test_loss / math.log(2)))
-else:
-    logging('| End of training | test loss {:5.2f} | test ppl {:9.3f}'.format(
-        test_loss, math.exp(test_loss)))
-logging('=' * 100)
diff --git a/transformer-xl/pytorch/utils/adaptive_softmax.py b/transformer-xl/pytorch/utils/adaptive_softmax.py
deleted file mode 100644
index f22da23..0000000
--- a/transformer-xl/pytorch/utils/adaptive_softmax.py
+++ /dev/null
@@ -1,90 +0,0 @@
-from collections import defaultdict
-
-import numpy as np
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-class AdaptiveLogSoftmax(nn.Module):
-    def __init__(self, in_features, n_classes, cutoffs, keep_order=False):
-        super(AdaptiveLogSoftmax, self).__init__()
-
-        cutoffs = list(cutoffs)
-
-        if (cutoffs != sorted(cutoffs)) \
-                or (min(cutoffs) <= 0) \
-                or (max(cutoffs) >= (n_classes - 1)) \
-                or (len(set(cutoffs)) != len(cutoffs)) \
-                or any([int(c) != c for c in cutoffs]):
-
-            raise ValueError("cutoffs should be a sequence of unique, positive "
-                             "integers sorted in an increasing order, where "
-                             "each value is between 1 and n_classes-1")
-
-        self.in_features = in_features
-        self.n_classes = n_classes
-        self.cutoffs = cutoffs + [n_classes]
-
-        self.shortlist_size = self.cutoffs[0]
-        self.n_clusters = len(self.cutoffs) - 1
-        self.head_size = self.shortlist_size + self.n_clusters
-
-        self.cluster_weight = nn.Parameter(torch.zeros(self.n_clusters, self.in_features))
-        self.cluster_bias = nn.Parameter(torch.zeros(self.n_clusters))
-
-        self.keep_order = keep_order
-
-
-    def forward(self, hidden, target, weight, bias, keep_order=False):
-        if hidden.size(0) != target.size(0):
-            raise RuntimeError('Input and target should have the same size '
-                               'in the batch dimension.')
-
-        head_weight = torch.cat(
-            [weight[:self.shortlist_size], self.cluster_weight], dim=0)
-        head_bias = torch.cat(
-            [bias[:self.shortlist_size], self.cluster_bias], dim=0)
-
-        head_logit = F.linear(hidden, head_weight, bias=head_bias)
-        head_logprob = F.log_softmax(head_logit, dim=1)
-
-        nll = torch.zeros_like(target,
-                               dtype=hidden.dtype, device=hidden.DEVICE)
-
-        offset = 0
-        cutoff_values = [0] + self.cutoffs
-        for i in range(len(cutoff_values) - 1):
-            l_idx, h_idx = cutoff_values[i], cutoff_values[i + 1]
-
-            mask_i = (target >= l_idx) & (target < h_idx)
-            indices_i = mask_i.nonzero().squeeze()
-
-            if indices_i.numel() == 0:
-                continue
-
-            target_i = target.index_select(0, indices_i) - l_idx
-            head_logprob_i = head_logprob.index_select(0, indices_i)
-
-            if i == 0:
-                logprob_i = head_logprob_i.gather(1, target_i[:,None]).squeeze(1)
-            else:
-                weight_i = weight[l_idx:h_idx]
-                bias_i = bias[l_idx:h_idx]
-
-                hidden_i = hidden.index_select(0, indices_i)
-
-                tail_logit_i = F.linear(hidden_i, weight_i, bias=bias_i)
-                tail_logprob_i = F.log_softmax(tail_logit_i, dim=1)
-
-                logprob_i = head_logprob_i[:, -i] \
-                          + tail_logprob_i.gather(1, target_i[:,None]).squeeze(1)
-
-            if (hasattr(self, 'keep_order') and self.keep_order) or keep_order:
-                nll.index_copy_(0, indices_i, -logprob_i)
-            else:
-                nll[offset:offset+logprob_i.size(0)].copy_(-logprob_i)
-
-            offset += logprob_i.size(0)
-
-        return nll
diff --git a/transformer-xl/pytorch/utils/data_parallel.py b/transformer-xl/pytorch/utils/data_parallel.py
deleted file mode 100644
index d7e1811..0000000
--- a/transformer-xl/pytorch/utils/data_parallel.py
+++ /dev/null
@@ -1,91 +0,0 @@
-
-from torch.nn.parallel import DataParallel
-import torch
-from torch.nn.parallel._functions import Scatter
-from torch.nn.parallel.parallel_apply import parallel_apply
-
-def scatter(inputs, target_gpus, chunk_sizes, dim=0):
-    r"""
-    Slices tensors into approximately equal chunks and
-    distributes them across given GPUs. Duplicates
-    references to objects that are not tensors.
-    """
-    def scatter_map(obj):
-        if isinstance(obj, torch.Tensor):
-            try:
-                return Scatter.apply(target_gpus, chunk_sizes, dim, obj)
-            except:
-                print('obj', obj.size())
-                print('dim', dim)
-                print('chunk_sizes', chunk_sizes)
-                quit()
-        if isinstance(obj, tuple) and len(obj) > 0:
-            return list(zip(*map(scatter_map, obj)))
-        if isinstance(obj, list) and len(obj) > 0:
-            return list(map(list, zip(*map(scatter_map, obj))))
-        if isinstance(obj, dict) and len(obj) > 0:
-            return list(map(type(obj), zip(*map(scatter_map, obj.items()))))
-        return [obj for targets in target_gpus]
-
-    # After scatter_map is called, a scatter_map cell will exist. This cell
-    # has a reference to the actual function scatter_map, which has references
-    # to a closure that has a reference to the scatter_map cell (because the
-    # fn is recursive). To avoid this reference cycle, we set the function to
-    # None, clearing the cell
-    try:
-        return scatter_map(inputs)
-    finally:
-        scatter_map = None
-
-def scatter_kwargs(inputs, kwargs, target_gpus, chunk_sizes, dim=0):
-    r"""Scatter with support for kwargs dictionary"""
-    inputs = scatter(inputs, target_gpus, chunk_sizes, dim) if inputs else []
-    kwargs = scatter(kwargs, target_gpus, chunk_sizes, dim) if kwargs else []
-    if len(inputs) < len(kwargs):
-        inputs.extend([() for _ in range(len(kwargs) - len(inputs))])
-    elif len(kwargs) < len(inputs):
-        kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))])
-    inputs = tuple(inputs)
-    kwargs = tuple(kwargs)
-    return inputs, kwargs
-
-class BalancedDataParallel(DataParallel):
-    def __init__(self, gpu0_bsz, *args, **kwargs):
-        self.gpu0_bsz = gpu0_bsz
-        super().__init__(*args, **kwargs)
-
-    def forward(self, *inputs, **kwargs):
-        if not self.device_ids:
-            return self.module(*inputs, **kwargs)
-        if self.gpu0_bsz == 0:
-            device_ids = self.device_ids[1:]
-        else:
-            device_ids = self.device_ids
-        inputs, kwargs = self.scatter(inputs, kwargs, device_ids)
-        if len(self.device_ids) == 1:
-            return self.module(*inputs[0], **kwargs[0])
-        replicas = self.replicate(self.module, self.device_ids)
-        if self.gpu0_bsz == 0:
-            replicas = replicas[1:]
-        outputs = self.parallel_apply(replicas, device_ids, inputs, kwargs)
-        return self.gather(outputs, self.output_device)
-
-    def parallel_apply(self, replicas, device_ids, inputs, kwargs):
-        return parallel_apply(replicas, inputs, kwargs, device_ids)
-
-    def scatter(self, inputs, kwargs, device_ids):
-        bsz = inputs[0].size(self.dim)
-        num_dev = len(self.device_ids)
-        gpu0_bsz = self.gpu0_bsz
-        bsz_unit = (bsz - gpu0_bsz) // (num_dev - 1)
-        if gpu0_bsz < bsz_unit:
-            chunk_sizes = [gpu0_bsz] + [bsz_unit] * (num_dev - 1)
-            delta = bsz - sum(chunk_sizes)
-            for i in range(delta):
-                chunk_sizes[i + 1] += 1
-            if gpu0_bsz == 0:
-                chunk_sizes = chunk_sizes[1:]
-        else:
-            return super().scatter(inputs, kwargs, device_ids)
-        return scatter_kwargs(inputs, kwargs, device_ids, chunk_sizes, dim=self.dim)
-
diff --git a/transformer-xl/pytorch/utils/exp_utils.py b/transformer-xl/pytorch/utils/exp_utils.py
deleted file mode 100644
index e44f7c2..0000000
--- a/transformer-xl/pytorch/utils/exp_utils.py
+++ /dev/null
@@ -1,40 +0,0 @@
-import functools
-import os, shutil
-
-import numpy as np
-
-import torch
-
-
-def logging(s, log_path, print_=True, log_=True):
-    if print_:
-        print(s)
-    if log_:
-        with open(log_path, 'a+') as f_log:
-            f_log.write(s + '\n')
-
-def get_logger(log_path, **kwargs):
-    return functools.partial(logging, log_path=log_path, **kwargs)
-
-def create_exp_dir(dir_path, scripts_to_save=None, debug=False):
-    if debug:
-        print('Debug Mode : no experiment dir created')
-        return functools.partial(logging, log_path=None, log_=False)
-
-    if not os.path.exists(dir_path):
-        os.makedirs(dir_path)
-
-    print('Experiment dir : {}'.format(dir_path))
-    if scripts_to_save is not None:
-        script_path = os.path.join(dir_path, 'scripts')
-        if not os.path.exists(script_path):
-            os.makedirs(script_path)
-        for script in scripts_to_save:
-            dst_file = os.path.join(dir_path, 'scripts', os.path.basename(script))
-            shutil.copyfile(script, dst_file)
-
-    return get_logger(log_path=os.path.join(dir_path, 'log.txt'))
-
-def save_checkpoint(model, optimizer, path, epoch):
-    torch.save(model, os.path.join(path, 'model_{}.pt'.format(epoch)))
-    torch.save(optimizer.state_dict(), os.path.join(path, 'optimizer_{}.pt'.format(epoch)))
diff --git a/transformer-xl/pytorch/utils/log_uniform_sampler.py b/transformer-xl/pytorch/utils/log_uniform_sampler.py
deleted file mode 100644
index 857ad52..0000000
--- a/transformer-xl/pytorch/utils/log_uniform_sampler.py
+++ /dev/null
@@ -1,147 +0,0 @@
-import torch
-from torch import nn
-import numpy as np
-
-class LogUniformSampler(object):
-    def __init__(self, range_max, n_sample):
-        """
-        Reference : https://github.com/tensorflow/tensorflow/blob/r1.10/tensorflow/python/ops/candidate_sampling_ops.py
-            `P(class) = (log(class + 2) - log(class + 1)) / log(range_max + 1)`
-
-        expected count can be approximated by 1 - (1 - p)^n
-        and we use a numerically stable version -expm1(num_tries * log1p(-p))
-
-        Our implementation fixes num_tries at 2 * n_sample, and the actual #samples will vary from run to run
-        """
-        with torch.no_grad():
-            self.range_max = range_max
-            log_indices = torch.arange(1., range_max+2., 1.).log_()
-            self.dist = (log_indices[1:] - log_indices[:-1]) / log_indices[-1]
-            # print('P', self.dist.numpy().tolist()[-30:])
-
-            self.log_q = (- (-self.dist.double().log1p_() * 2 * n_sample).expm1_()).log_().float()
-
-        self.n_sample = n_sample
-
-    def sample(self, labels):
-        """
-            labels: [b1, b2]
-        Return
-            true_log_probs: [b1, b2]
-            samp_log_probs: [n_sample]
-            neg_samples: [n_sample]
-        """
-
-        # neg_samples = torch.empty(0).long()
-        n_sample = self.n_sample
-        n_tries = 2 * n_sample
-
-        with torch.no_grad():
-            neg_samples = torch.multinomial(self.dist, n_tries, replacement=True).unique()
-            device = labels.DEVICE
-            neg_samples = neg_samples.to(device)
-            true_log_probs = self.log_q[labels].to(device)
-            samp_log_probs = self.log_q[neg_samples].to(device)
-            return true_log_probs, samp_log_probs, neg_samples
-
-def sample_logits(embedding, bias, labels, inputs, sampler):
-    """
-        embedding: an nn.Embedding layer
-        bias: [n_vocab]
-        labels: [b1, b2]
-        inputs: [b1, b2, n_emb]
-        sampler: you may use a LogUniformSampler
-    Return
-        logits: [b1, b2, 1 + n_sample]
-    """
-    true_log_probs, samp_log_probs, neg_samples = sampler.sample(labels)
-    n_sample = neg_samples.size(0)
-    b1, b2 = labels.size(0), labels.size(1)
-    all_ids = torch.cat([labels.view(-1), neg_samples])
-    all_w = embedding(all_ids)
-    true_w = all_w[: -n_sample].view(b1, b2, -1)
-    sample_w = all_w[- n_sample:].view(n_sample, -1)
-
-    all_b = bias[all_ids]
-    true_b = all_b[: -n_sample].view(b1, b2)
-    sample_b = all_b[- n_sample:]
-
-    hit = (labels[:, :, None] == neg_samples).detach()
-
-    true_logits = torch.einsum('ijk,ijk->ij',
-        [true_w, inputs]) + true_b - true_log_probs
-    sample_logits = torch.einsum('lk,ijk->ijl',
-        [sample_w, inputs]) + sample_b - samp_log_probs
-    sample_logits.masked_fill_(hit, -1e30)
-    logits = torch.cat([true_logits[:, :, None], sample_logits], -1)
-
-    return logits
-
-
-# class LogUniformSampler(object):
-#     def __init__(self, range_max, unique=False):
-#         """
-#         Reference : https://github.com/tensorflow/tensorflow/blob/r1.10/tensorflow/python/ops/candidate_sampling_ops.py
-#             `P(class) = (log(class + 2) - log(class + 1)) / log(range_max + 1)`
-#         """
-#         self.range_max = range_max
-#         log_indices = torch.arange(1., range_max+2., 1.).log_()
-#         self.dist = (log_indices[1:] - log_indices[:-1]) / log_indices[-1]
-
-#         self.unique = unique
-
-#         if self.unique:
-#             self.exclude_mask = torch.ByteTensor(range_max).fill_(0)
-
-#     def sample(self, n_sample, labels):
-#         pos_sample, new_labels = labels.unique(return_inverse=True)
-#         n_pos_sample = pos_sample.size(0)
-#         n_neg_sample = n_sample - n_pos_sample
-
-#         if self.unique:
-#             self.exclude_mask.index_fill_(0, pos_sample, 1)
-#             sample_dist = self.dist.clone().masked_fill_(self.exclude_mask, 0)
-#             self.exclude_mask.index_fill_(0, pos_sample, 0)
-#         else:
-#             sample_dist = self.dist
-
-#         neg_sample = torch.multinomial(sample_dist, n_neg_sample)
-
-#         sample = torch.cat([pos_sample, neg_sample])
-#         sample_prob = self.dist[sample]
-
-#         return new_labels, sample, sample_prob
-
-
-if __name__ == '__main__':
-    S, B = 3, 4
-    n_vocab = 10000
-    n_sample = 5
-    H = 32
-
-    labels = torch.LongTensor(S, B).random_(0, n_vocab)
-
-    # sampler = LogUniformSampler(n_vocab, unique=False)
-    # new_labels, sample, sample_prob = sampler.sample(n_sample, labels)
-
-    sampler = LogUniformSampler(n_vocab, unique=True)
-    # true_probs, samp_probs, neg_samples = sampler.sample(n_sample, labels)
-
-    # print('true_probs', true_probs.numpy().tolist())
-    # print('samp_probs', samp_probs.numpy().tolist())
-    # print('neg_samples', neg_samples.numpy().tolist())
-
-    # print('sum', torch.sum(sampler.dist).item())
-
-    # assert torch.all(torch.sort(sample.unique())[0].eq(torch.sort(sample)[0])).item()
-
-    embedding = nn.Embedding(n_vocab, H)
-    bias = torch.zeros(n_vocab)
-    inputs = torch.Tensor(S, B, H).normal_()
-
-    logits, out_labels = sample_logits(embedding, bias, labels, inputs, sampler, n_sample)
-    print('logits', logits.detach().numpy().tolist())
-    print('logits shape', logits.size())
-    print('out_labels', out_labels.detach().numpy().tolist())
-    print('out_labels shape', out_labels.size())
-
diff --git a/transformer-xl/pytorch/utils/proj_adaptive_softmax.py b/transformer-xl/pytorch/utils/proj_adaptive_softmax.py
deleted file mode 100644
index c5a0f84..0000000
--- a/transformer-xl/pytorch/utils/proj_adaptive_softmax.py
+++ /dev/null
@@ -1,151 +0,0 @@
-from collections import defaultdict
-
-import numpy as np
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-CUDA_MAJOR = int(torch.version.cuda.split('.')[0])
-CUDA_MINOR = int(torch.version.cuda.split('.')[1])
-
-class ProjectedAdaptiveLogSoftmax(nn.Module):
-    def __init__(self, n_token, d_embed, d_proj, cutoffs, div_val=1,
-                 keep_order=False):
-        super(ProjectedAdaptiveLogSoftmax, self).__init__()
-
-        self.n_token = n_token
-        self.d_embed = d_embed
-        self.d_proj = d_proj
-
-        self.cutoffs = cutoffs + [n_token]
-        self.cutoff_ends = [0] + self.cutoffs
-        self.div_val = div_val
-
-        self.shortlist_size = self.cutoffs[0]
-        self.n_clusters = len(self.cutoffs) - 1
-        self.head_size = self.shortlist_size + self.n_clusters
-
-        if self.n_clusters > 0:
-            self.cluster_weight = nn.Parameter(torch.zeros(self.n_clusters, self.d_embed))
-            self.cluster_bias = nn.Parameter(torch.zeros(self.n_clusters))
-
-        self.out_layers = nn.ModuleList()
-        self.out_projs = nn.ParameterList()
-
-        if div_val == 1:
-            for i in range(len(self.cutoffs)):
-                if d_proj != d_embed:
-                    self.out_projs.append(
-                        nn.Parameter(torch.Tensor(d_proj, d_embed))
-                    )
-                else:
-                    self.out_projs.append(None)
-
-            self.out_layers.append(nn.Linear(d_embed, n_token))
-        else:
-            for i in range(len(self.cutoffs)):
-                l_idx, r_idx = self.cutoff_ends[i], self.cutoff_ends[i+1]
-                d_emb_i = d_embed // (div_val ** i)
-
-                self.out_projs.append(
-                    nn.Parameter(torch.Tensor(d_proj, d_emb_i))
-                )
-
-                self.out_layers.append(nn.Linear(d_emb_i, r_idx-l_idx))
-
-        self.keep_order = keep_order
-
-    def _compute_logit(self, hidden, weight, bias, proj):
-        if proj is None:
-            logit = F.linear(hidden, weight, bias=bias)
-        else:
-            # if CUDA_MAJOR <= 9 and CUDA_MINOR <= 1:
-            proj_hid = F.linear(hidden, proj.t().contiguous())
-            logit = F.linear(proj_hid, weight, bias=bias)
-            # else:
-            #     logit = torch.einsum('bd,de,ev->bv', (hidden, proj, weight.t()))
-            #     if bias is not None:
-            #         logit = logit + bias
-
-        return logit
-
-    def forward(self, hidden, target, keep_order=False):
-        '''
-            hidden :: [len*bsz x d_proj]
-            target :: [len*bsz]
-        '''
-
-        if hidden.size(0) != target.size(0):
-            raise RuntimeError('Input and target should have the same size '
-                               'in the batch dimension.')
-
-        if self.n_clusters == 0:
-            logit = self._compute_logit(hidden, self.out_layers[0].weight,
-                                        self.out_layers[0].bias, self.out_projs[0])
-            nll = -F.log_softmax(logit, dim=-1) \
-                    .gather(1, target.unsqueeze(1)).squeeze(1)
-        else:
-            # construct weights and biases
-            weights, biases = [], []
-            for i in range(len(self.cutoffs)):
-                if self.div_val == 1:
-                    l_idx, r_idx = self.cutoff_ends[i], self.cutoff_ends[i + 1]
-                    weight_i = self.out_layers[0].weight[l_idx:r_idx]
-                    bias_i = self.out_layers[0].bias[l_idx:r_idx]
-                else:
-                    weight_i = self.out_layers[i].weight
-                    bias_i = self.out_layers[i].bias
-
-                if i == 0:
-                    weight_i = torch.cat(
-                        [weight_i, self.cluster_weight], dim=0)
-                    bias_i = torch.cat(
-                        [bias_i, self.cluster_bias], dim=0)
-
-                weights.append(weight_i)
-                biases.append(bias_i)
-
-            head_weight, head_bias, head_proj = weights[0], biases[0], self.out_projs[0]
-
-            head_logit = self._compute_logit(hidden, head_weight, head_bias, head_proj)
-            head_logprob = F.log_softmax(head_logit, dim=1)
-
-            nll = torch.zeros_like(target,
-                                   dtype=hidden.dtype, device=hidden.DEVICE)
-
-            offset = 0
-            cutoff_values = [0] + self.cutoffs
-            for i in range(len(cutoff_values) - 1):
-                l_idx, r_idx = cutoff_values[i], cutoff_values[i + 1]
-
-                mask_i = (target >= l_idx) & (target < r_idx)
-                indices_i = mask_i.nonzero().squeeze()
-
-                if indices_i.numel() == 0:
-                    continue
-
-                target_i = target.index_select(0, indices_i) - l_idx
-                head_logprob_i = head_logprob.index_select(0, indices_i)
-
-                if i == 0:
-                    logprob_i = head_logprob_i.gather(1, target_i[:,None]).squeeze(1)
-                else:
-                    weight_i, bias_i, proj_i = weights[i], biases[i], self.out_projs[i]
-
-                    hidden_i = hidden.index_select(0, indices_i)
-
-                    tail_logit_i = self._compute_logit(hidden_i, weight_i, bias_i, proj_i)
-                    tail_logprob_i = F.log_softmax(tail_logit_i, dim=1)
-
-                    logprob_i = head_logprob_i[:, -i] \
-                              + tail_logprob_i.gather(1, target_i[:,None]).squeeze(1)
-
-                if (hasattr(self, 'keep_order') and self.keep_order) or keep_order:
-                    nll.index_copy_(0, indices_i, -logprob_i)
-                else:
-                    nll[offset:offset+logprob_i.size(0)].copy_(-logprob_i)
-
-                offset += logprob_i.size(0)
-
-        return nll
diff --git a/transformer-xl/pytorch/utils/vocabulary.py b/transformer-xl/pytorch/utils/vocabulary.py
deleted file mode 100644
index b6b8249..0000000
--- a/transformer-xl/pytorch/utils/vocabulary.py
+++ /dev/null
@@ -1,163 +0,0 @@
-import os
-from collections import Counter, OrderedDict
-
-import torch
-
-class Vocab(object):
-    def __init__(self, special=[], min_freq=0, max_size=None, lower_case=True,
-                 delimiter=None, vocab_file=None):
-        self.counter = Counter()
-        self.special = special
-        self.min_freq = min_freq
-        self.max_size = max_size
-        self.lower_case = lower_case
-        self.delimiter = delimiter
-        self.vocab_file = vocab_file
-
-    def tokenize(self, line, add_eos=False, add_double_eos=False):
-        line = line.strip()
-        # convert to lower case
-        if self.lower_case:
-            line = line.lower()
-
-        # empty delimiter '' will evaluate False
-        if self.delimiter == '':
-            symbols = line
-        else:
-            symbols = line.split(self.delimiter)
-
-        if add_double_eos: # lm1b
-            return ['<S>'] + symbols + ['<S>']
-        elif add_eos:
-            return symbols + ['<eos>']
-        else:
-            return symbols
-
-    def count_file(self, path, verbose=False, add_eos=False):
-        if verbose: print('counting file {} ...'.format(path))
-        assert os.path.exists(path)
-
-        sents = []
-        with open(path, 'r', encoding='utf-8') as f:
-            for idx, line in enumerate(f):
-                if verbose and idx > 0 and idx % 500000 == 0:
-                    print('    line {}'.format(idx))
-                symbols = self.tokenize(line, add_eos=add_eos)
-                self.counter.update(symbols)
-                sents.append(symbols)
-
-        return sents
-
-    def count_sents(self, sents, verbose=False):
-        """
-            sents : a list of sentences, each a list of tokenized symbols
-        """
-        if verbose: print('counting {} sents ...'.format(len(sents)))
-        for idx, symbols in enumerate(sents):
-            if verbose and idx > 0 and idx % 500000 == 0:
-                print('    line {}'.format(idx))
-            self.counter.update(symbols)
-
-    def _build_from_file(self, vocab_file):
-        self.idx2sym = []
-        self.sym2idx = OrderedDict()
-
-        with open(vocab_file, 'r', encoding='utf-8') as f:
-            for line in f:
-                symb = line.strip().split()[0]
-                self.add_symbol(symb)
-        self.unk_idx = self.sym2idx['<UNK>']
-
-    def build_vocab(self):
-        if self.vocab_file:
-            print('building vocab from {}'.format(self.vocab_file))
-            self._build_from_file(self.vocab_file)
-            print('final vocab size {}'.format(len(self)))
-        else:
-            print('building vocab with min_freq={}, max_size={}'.format(
-                self.min_freq, self.max_size))
-            self.idx2sym = []
-            self.sym2idx = OrderedDict()
-
-            for sym in self.special:
-                self.add_special(sym)
-
-            for sym, cnt in self.counter.most_common(self.max_size):
-                if cnt < self.min_freq: break
-                self.add_symbol(sym)
-
-            print('final vocab size {} from {} unique tokens'.format(
-                len(self), len(self.counter)))
-
-    def encode_file(self, path, ordered=False, verbose=False, add_eos=True,
-            add_double_eos=False):
-        if verbose: print('encoding file {} ...'.format(path))
-        assert os.path.exists(path)
-        encoded = []
-        with open(path, 'r', encoding='utf-8') as f:
-            for idx, line in enumerate(f):
-                if verbose and idx > 0 and idx % 500000 == 0:
-                    print('    line {}'.format(idx))
-                symbols = self.tokenize(line, add_eos=add_eos,
-                    add_double_eos=add_double_eos)
-                encoded.append(self.convert_to_tensor(symbols))
-
-        if ordered:
-            encoded = torch.cat(encoded)
-
-        return encoded
-
-    def encode_sents(self, sents, ordered=False, verbose=False):
-        if verbose: print('encoding {} sents ...'.format(len(sents)))
-        encoded = []
-        for idx, symbols in enumerate(sents):
-            if verbose and idx > 0 and idx % 500000 == 0:
-                print('    line {}'.format(idx))
-            encoded.append(self.convert_to_tensor(symbols))
-
-        if ordered:
-            encoded = torch.cat(encoded)
-
-        return encoded
-
-    def add_special(self, sym):
-        if sym not in self.sym2idx:
-            self.idx2sym.append(sym)
-            self.sym2idx[sym] = len(self.idx2sym) - 1
-            setattr(self, '{}_idx'.format(sym.strip('<>')), self.sym2idx[sym])
-
-    def add_symbol(self, sym):
-        if sym not in self.sym2idx:
-            self.idx2sym.append(sym)
-            self.sym2idx[sym] = len(self.idx2sym) - 1
-
-    def get_sym(self, idx):
-        assert 0 <= idx < len(self), 'Index {} out of range'.format(idx)
-        return self.idx2sym[idx]
-
-    def get_idx(self, sym):
-        if sym in self.sym2idx:
-            return self.sym2idx[sym]
-        else:
-            # print('encounter unk {}'.format(sym))
-            assert '<eos>' not in sym
-            assert hasattr(self, 'unk_idx')
-            return self.sym2idx.get(sym, self.unk_idx)
-
-    def get_symbols(self, indices):
-        return [self.get_sym(idx) for idx in indices]
-
-    def get_indices(self, symbols):
-        return [self.get_idx(sym) for sym in symbols]
-
-    def convert_to_tensor(self, symbols):
-        return torch.LongTensor(self.get_indices(symbols))
-
-    def convert_to_sent(self, indices, exclude=None):
-        if exclude is None:
-            return ' '.join([self.get_sym(idx) for idx in indices])
-        else:
-            return ' '.join([self.get_sym(idx) for idx in indices if idx not in exclude])
-
-    def __len__(self):
-        return len(self.idx2sym)
diff --git a/transformer-xl/tf/README.md b/transformer-xl/tf/README.md
deleted file mode 100644
index 1cd82a0..0000000
--- a/transformer-xl/tf/README.md
+++ /dev/null
@@ -1,131 +0,0 @@
-
-## Introduction
-
-This directory contains our TF implementation of Transformer-XL. Note that our state-of-the-art results reported in the paper were obtained by training the model on a large-scale TPU cluster, and our gpu codebase currently does not support distributed training. Here we provide two sets of hyperparameters and scripts:
-- `*large_tpu.sh` are for the SoTA setting on TPUs. These are exactly the commands we used to obtained our best results.
-- `*base_gpu.sh` are for the base models which can be run on a few GPUs.
-
-
-## Prerequisite
-
-- Python 2.7
-- Tensorflow [1.12.0](https://github.com/tensorflow/tensorflow/releases/tag/v1.12.0)
-
-
-
-## Obtain and evaluate pretrained SoTA models
-
-#### 1. Download preprocessed data (vocab) & pretrained models
-
-(a) Set your own `DATA_ROOT` in `sota/download.sh` (default to `./`), which will be the root diretory of downloaded model.
-
-(b) Then, download the model & data by `bash sota/download.sh`. After downloading, the expected directory structure is as follows
-
-```markdown
-pretrained_xl
-  tf_enwik8/
-    data/
-      cache.pkl
-      corpus-info.json
-    model/
-      checkpoint
-      model.ckpt*
-  tf_wt103/
-  	...
-  ...
-```
-
-**Note**: we include preprocessed data in the download files to make sure the **same vocabulary** is used. Please see the code `tf/data_utils.py` to understand the data structure.
-
-
-
-#### 2. Run evaluation scripts to replicate SoTA results on GPUs
-
-- **enwik8**: modify the script `sota/enwik8.sh` accordingly (see below)
-  - set `DATA_ROOT` to the same folder used in the download step (default to `./`)
-  - set `TEST_NUM_CORE ` (number of GPUs to use): we recommend 2 GPUs => about 60 mins
-  - run the script: `bash sota/enwik8.sh`
-
-- **lm1b**: modify the script `sota/lm1b.sh` accordingly  (see below)
-  - set `DATA_ROOT` to the same folder used in the download step (default to `./`)
-  - set `TEST_NUM_CORE ` (number of GPUs to use): we recommend 1 GPUs => less than 5 mins
-  - run the script: `bash sota/lm1b.sh`
-
-- **wt103**:  modify the script `sota/wt103.sh` accordingly  (see below)
-  - set `DATA_ROOT` to the same folder used in the download step (default to `./`)
-  - set `TEST_NUM_CORE ` (number of GPUs to use): we recommend 1 GPUs => less than 5 mins
-  - run the script: `bash sota/wt103.sh`
-
-- **text8**:  modify the script `sota/text8.sh` accordingly  (see below)
-  - set `DATA_ROOT` to the same folder used in the download step (default to `./`)
-  - set `TEST_NUM_CORE ` (number of GPUs to use): we recommend 2 GPUs => about 60 mins
-  - run the script: `bash sota/text8.sh`
-
-
-#### 3. Resources Needed for SoTA Model Training
-
-We used 32, 32, 64, and 512 TPU cores for training our best models on enwik8, text8, wt103, and lm1b respectively. The training time for each model ranges from 2 to 5 days.
-
-
-
-## Train "Transformer-XL" from scratch with GPUs or TPUs
-
-### 1. Download raw data
-
-`bash getdata.sh`
-
-
-
-### 2. Preprocess, training and evaluation
-
-For `dataset` in `[enwik8, lm1b, wt103, text8]`:
-
-- check out `scripts/dataset_base_gpu.sh` for GPU training and evaluation
-- check out `scripts/dataset_large_tpu.sh` for TPU training and evaluation
-
-
-
-#### (1) Preprocess raw data and create tfrecords
-
-**NOTE**: The preprocessing for GPU and TPU are different. So, you have to run them separately.
-
-GPU:
-
-- create training and validation data: `bash scripts/dataset_bas_gpu.sh train_data`
-- create test data: `bash scripts/dataset_base_gpu.sh test_data`
-
-TPU:
-
-- Set the Google storage URL  in `scripts/dataset_large_tpu.sh`:
-  - `GSDATA`: data URL
-  - `GSEXP`: experiment URL
-- create training and validation data: `bash scripts/dataset_large_tpu.sh train_data`
-- create test data: `bash scripts/dataset_large_tpu.sh test_data`
-
-
-
-#### (2) Run training
-
-Base models on GPUs:
-
-- Modify the configurations in `scripts/dataset_base_gpu.sh`  according to your needs.
-- `bash scripts/dataset_base_gpu.sh train`
-- If enough resources are available, increasing the model sizes (e.g., `N_LAYER`, `D_MODEL`, `D_EMBED`, `D_HEAD`, `D_INNER`) so that they are closer to the values defined in `scripts/dataset_large_tpu.sh`. Likewise, when resources are limited, decrease the model sizes. It is recommended to ensure that `D_MODEL == D_EMBED` and `D_MODEL == N_HEAD x D_HEAD`. When the model sizes increase, remember to increase `warmup_steps` accordingly to alleviate optimization difficulties.
-- Adjust the `NUM_CORE` parameter to reflect the number of GPUs to use.
-
-Larger models on TPUs:
-
-- Modify the configurations in `scripts/dataset_large_tpu.sh`  according to your needs.
-- `bash scripts/dataset_large_tpu.sh train`
-
-
-
-#### (3) Run evaluation
-
-Base models on GPUs:
-
-- `bash scripts/dataset_base_gpu.sh eval --eval_ckpt_path PATH_TO_CKPT`
-
-Larger models on TPUs:
-
-- `bash scripts/dataset_base_tpu.sh eval --eval_ckpt_path PATH_TO_CKPT`
diff --git a/transformer-xl/tf/avg_checkpoints.py b/transformer-xl/tf/avg_checkpoints.py
deleted file mode 100644
index ffa71b6..0000000
--- a/transformer-xl/tf/avg_checkpoints.py
+++ /dev/null
@@ -1,118 +0,0 @@
-# coding=utf-8
-# Copyright 2018 The Tensor2Tensor Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Script to average values of variables in a list of checkpoint files."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-import numpy as np
-import six
-from six.moves import zip  # pylint: disable=redefined-builtin
-import tensorflow as tf
-
-flags = tf.flags
-FLAGS = flags.FLAGS
-
-flags.DEFINE_string("checkpoints", "",
-                    "Comma-separated list of checkpoints to average.")
-flags.DEFINE_integer("num_last_checkpoints", 0,
-                     "Averages the last N saved checkpoints."
-                     " If the checkpoints flag is set, this is ignored.")
-flags.DEFINE_string("prefix", "",
-                    "Prefix (e.g., directory) to append to each checkpoint.")
-flags.DEFINE_string("output_path", "/tmp/averaged.ckpt",
-                    "Path to output the averaged checkpoint to.")
-
-
-def checkpoint_exists(path):
-  return (tf.gfile.Exists(path) or tf.gfile.Exists(path + ".meta") or
-          tf.gfile.Exists(path + ".index"))
-
-
-def main(_):
-  tf.logging.set_verbosity(tf.logging.INFO)
-  if FLAGS.checkpoints:
-    # Get the checkpoints list from flags and run some basic checks.
-    checkpoints = [c.strip() for c in FLAGS.checkpoints.split(",")]
-    checkpoints = [c for c in checkpoints if c]
-    if not checkpoints:
-      raise ValueError("No checkpoints provided for averaging.")
-    if FLAGS.prefix:
-      checkpoints = [FLAGS.prefix + c for c in checkpoints]
-  else:
-    assert FLAGS.num_last_checkpoints >= 1, "Must average at least one model"
-    assert FLAGS.prefix, ("Prefix must be provided when averaging last"
-                          " N checkpoints")
-    checkpoint_state = tf.train.get_checkpoint_state(
-        os.path.dirname(FLAGS.prefix))
-    # Checkpoints are ordered from oldest to newest.
-    checkpoints = checkpoint_state.all_model_checkpoint_paths[
-        -FLAGS.num_last_checkpoints:]
-
-  checkpoints = [c for c in checkpoints if checkpoint_exists(c)]
-  if not checkpoints:
-    if FLAGS.checkpoints:
-      raise ValueError(
-          "None of the provided checkpoints exist. %s" % FLAGS.checkpoints)
-    else:
-      raise ValueError("Could not find checkpoints at %s" %
-                       os.path.dirname(FLAGS.prefix))
-
-  # Read variables from all checkpoints and average them.
-  tf.logging.info("Reading variables and averaging checkpoints:")
-  for c in checkpoints:
-    tf.logging.info("%s ", c)
-  var_list = tf.contrib.framework.list_variables(checkpoints[0])
-  var_values, var_dtypes = {}, {}
-  for (name, shape) in var_list:
-    if not name.startswith("global_step"):
-      var_values[name] = np.zeros(shape)
-  for checkpoint in checkpoints:
-    reader = tf.contrib.framework.load_checkpoint(checkpoint)
-    for name in var_values:
-      tensor = reader.get_tensor(name)
-      var_dtypes[name] = tensor.dtype
-      var_values[name] += tensor
-    tf.logging.info("Read from checkpoint %s", checkpoint)
-  for name in var_values:  # Average.
-    var_values[name] /= len(checkpoints)
-
-  with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):
-    tf_vars = [
-        tf.get_variable(v, shape=var_values[v].shape, dtype=var_dtypes[v])
-        for v in var_values
-    ]
-  placeholders = [tf.placeholder(v.dtype, shape=v.shape) for v in tf_vars]
-  assign_ops = [tf.assign(v, p) for (v, p) in zip(tf_vars, placeholders)]
-  global_step = tf.Variable(
-      0, name="global_step", trainable=False, dtype=tf.int64)
-  saver = tf.train.Saver(tf.all_variables())
-
-  # Build a model consisting only of variables, set them to the average values.
-  with tf.Session() as sess:
-    sess.run(tf.initialize_all_variables())
-    for p, assign_op, (name, value) in zip(placeholders, assign_ops,
-                                           six.iteritems(var_values)):
-      sess.run(assign_op, {p: value})
-    # Use the built saver to save the averaged checkpoint.
-    saver.save(sess, FLAGS.output_path, global_step=global_step)
-
-  tf.logging.info("Averaged checkpoints saved in %s", FLAGS.output_path)
-
-
-if __name__ == "__main__":
-  tf.app.run()
diff --git a/transformer-xl/tf/data_utils.py b/transformer-xl/tf/data_utils.py
deleted file mode 100644
index ea2e32b..0000000
--- a/transformer-xl/tf/data_utils.py
+++ /dev/null
@@ -1,586 +0,0 @@
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import math
-import os
-from functools import partial
-
-from collections import Counter, OrderedDict
-import pickle
-import json
-import multiprocessing as mp
-
-import numpy as np
-
-from absl import flags
-import tensorflow as tf
-from vocabulary import Vocab
-
-from tensorflow.gfile import Exists as exists
-from tensorflow.gfile import MakeDirs as makedirs
-from tensorflow.gfile import Glob as glob
-
-
-def _preprocess(shard, train, vocab, save_dir, cutoffs, bin_sizes, bsz, tgt_len,
-                num_core_per_host, use_tpu, num_shuffle):
-  file_names = []
-  num_batch = 0
-
-  path = train[shard]
-  data_shard = vocab.encode_file(path, ordered=False, add_double_eos=True)
-
-  for shuffle in range(num_shuffle):
-    basename = "train-{:03d}-{:02d}".format(shard, shuffle)
-    print("Processing shard {} shuffle {}".format(shard, shuffle))
-
-    np.random.shuffle(data_shard)
-    file_name, num_batch_shuffle = create_ordered_tfrecords(
-        save_dir, basename, np.concatenate(data_shard), bsz, tgt_len,
-        num_core_per_host, cutoffs, bin_sizes, use_tpu=use_tpu)
-    file_names.append(file_name)
-    num_batch += num_batch_shuffle
-
-  return file_names, num_batch
-
-
-class Corpus(object):
-  def __init__(self, path, dataset, *args, **kwargs):
-    self.dataset = dataset
-    self.vocab = Vocab(*args, **kwargs)
-
-    if self.dataset in ["ptb", "wt2", "enwik8", "text8"]:
-      self.vocab.count_file(os.path.join(path, "train.txt"))
-      self.vocab.count_file(os.path.join(path, "valid.txt"))
-      self.vocab.count_file(os.path.join(path, "test.txt"))
-    elif self.dataset == "wt103":
-      self.vocab.count_file(os.path.join(path, "train.txt"))
-    elif self.dataset == "lm1b":
-      train_path_pattern = os.path.join(
-          path, "1-billion-word-language-modeling-benchmark-r13output",
-          "training-monolingual.tokenized.shuffled", "news.en-*")
-      train_paths = glob(train_path_pattern)
-
-      # the vocab will load from file when build_vocab() is called
-      # for train_path in sorted(train_paths):
-      #   self.vocab.count_file(train_path, verbose=True)
-
-    self.vocab.build_vocab()
-
-    if self.dataset in ["ptb", "wt2", "wt103"]:
-      self.train = self.vocab.encode_file(
-          os.path.join(path, "train.txt"), ordered=True)
-      self.valid = self.vocab.encode_file(
-          os.path.join(path, "valid.txt"), ordered=True)
-      self.test  = self.vocab.encode_file(
-          os.path.join(path, "test.txt"), ordered=True)
-    elif self.dataset in ["enwik8", "text8"]:
-      self.train = self.vocab.encode_file(
-          os.path.join(path, "train.txt"), ordered=True, add_eos=False)
-      self.valid = self.vocab.encode_file(
-          os.path.join(path, "valid.txt"), ordered=True, add_eos=False)
-      self.test  = self.vocab.encode_file(
-          os.path.join(path, "test.txt"), ordered=True, add_eos=False)
-    elif self.dataset == "lm1b":
-      self.train = train_paths
-      valid_path = os.path.join(path, "valid.txt")
-      test_path = valid_path
-      self.valid = self.vocab.encode_file(
-          valid_path, ordered=True, add_double_eos=True)
-      self.test  = self.vocab.encode_file(
-          test_path, ordered=True, add_double_eos=True)
-
-    if self.dataset == "wt103":
-      self.cutoffs = [0, 20000, 40000, 200000] + [len(self.vocab)]
-    elif self.dataset == "lm1b":
-      self.cutoffs = [0, 60000, 100000, 640000] + [len(self.vocab)]
-    else:
-      self.cutoffs = []
-
-
-  def convert_to_tfrecords(self, split, save_dir, bsz, tgt_len,
-                           num_core_per_host, **kwargs):
-    FLAGS = kwargs.get('FLAGS')
-
-    file_names = []
-    use_tpu = FLAGS.use_tpu and not (split == "test" and num_core_per_host == 1)
-
-    if use_tpu:
-      record_name = "record_info-{}.bsz-{}.tlen-{}.core-{}.json".format(
-           split, bsz, tgt_len, num_core_per_host)
-    else:
-      record_name = "record_info-{}.bsz-{}.tlen-{}.json".format(
-           split, bsz, tgt_len)
-
-    record_info_path = os.path.join(save_dir, record_name)
-
-    if self.dataset in ["ptb", "wt2", "wt103", "enwik8", "text8"]:
-      data = getattr(self, split)
-      bin_sizes = get_bin_sizes(
-          data, bsz // num_core_per_host, tgt_len, self.cutoffs)
-      file_name, num_batch = create_ordered_tfrecords(
-          save_dir, split, data, bsz, tgt_len, num_core_per_host,
-          self.cutoffs, bin_sizes,
-          num_passes=FLAGS.num_passes if split == 'train' and use_tpu else 1,
-          use_tpu=use_tpu)
-      file_names.append(file_name)
-    elif self.dataset == "lm1b":
-      bin_sizes = get_bin_sizes(
-          self.valid, bsz // num_core_per_host, tgt_len, self.cutoffs)
-      if split == "train":
-        np.random.seed(123456)
-        num_batch = 0
-
-        if FLAGS.num_procs > 1:
-          _preprocess_wrapper = partial(_preprocess,
-              train=self.train, vocab=self.vocab, save_dir=save_dir,
-              cutoffs=self.cutoffs, bin_sizes=bin_sizes, bsz=bsz,
-              tgt_len=tgt_len, num_core_per_host=num_core_per_host,
-              use_tpu=use_tpu, num_shuffle=FLAGS.num_shuffle)
-
-          pool = mp.Pool(processes=FLAGS.num_procs)
-          results = pool.map(_preprocess_wrapper, range(len(self.train)))
-          for res in results:
-            file_names.extend(res[0])
-            num_batch += res[1]
-        else:
-          for shard, path in enumerate(self.train):
-            data_shard = self.vocab.encode_file(path, ordered=False,
-                                                add_double_eos=True)
-
-            num_shuffle = FLAGS.num_shuffle
-
-            for shuffle in range(num_shuffle):
-              print("Processing shard {} shuffle {}".format(shard, shuffle))
-              basename = "train-{:03d}-{:02d}".format(shard, shuffle)
-              np.random.shuffle(data_shard)
-              file_name, num_batch_ = create_ordered_tfrecords(
-                  save_dir, basename, np.concatenate(data_shard), bsz, tgt_len,
-                  num_core_per_host,
-                  self.cutoffs, bin_sizes, use_tpu=use_tpu)
-              file_names.append(file_name)
-              num_batch += num_batch_
-
-      else:
-        file_name, num_batch = create_ordered_tfrecords(
-            save_dir, split, getattr(self, split), bsz, tgt_len,
-            num_core_per_host,
-            self.cutoffs, bin_sizes, use_tpu=use_tpu)
-        file_names.append(file_name)
-
-    with open(record_info_path, "w") as fp:
-      record_info = {
-        "filenames": file_names,
-        "bin_sizes": bin_sizes,
-        "num_batch": num_batch
-      }
-      json.dump(record_info, fp)
-
-
-def get_bin_sizes(data, batch_size, tgt_len, cutoffs, std_mult=[2.5, 2.5, 2.5]):
-  """
-    Note: the `batch_size` here should be per-core batch size
-  """
-  bin_sizes = []
-
-  def _nearest_to_eight(x): # so that it's faster on TPUs
-    y = x - x % 8
-    return y + 8 if x % 8 >= 4 else max(8, y)
-
-  if cutoffs:
-    num_batch = len(data) // batch_size // tgt_len
-
-    data = data[:batch_size * num_batch * tgt_len]
-    data = data.reshape(batch_size, num_batch, tgt_len)
-
-    tot = batch_size * tgt_len
-    for b, (left, right) in enumerate(zip(cutoffs[1:-1], cutoffs[2:])):
-      mask = (data >= left) * (data < right)
-      percents = mask.astype(np.float64).sum(2).sum(0) / tot
-      mean = np.mean(percents)
-      std = np.std(percents)
-
-      bin_size = int(math.ceil(tgt_len * batch_size * (mean + std_mult[b] * std)))
-      bin_size = _nearest_to_eight(bin_size)
-      bin_sizes.append(bin_size)
-
-  return bin_sizes
-
-
-def _int64_feature(values):
-  return tf.train.Feature(int64_list=tf.train.Int64List(value=values))
-
-def _float_feature(values):
-  return tf.train.Feature(float_list=tf.train.FloatList(value=values))
-
-def batchify(data, batch_size, num_passes):
-  """
-    if use_tpu = True: num_passes > 1 
-    
-    Since TPU training requires entire [bsz x tgt_len] chunks, it can discard
-    as many as `bsz * tgt_len` tokens in training. When `bsz` and `tgt_len` are 
-    both large, as in the case of TPU training for Transformer-XL, the problem
-    may lead to detectable performance drop. 
-
-    Here, we use multiple randomly shifted copies to deal with this problem.
-  """
-  if num_passes > 1:
-    data_len = len(data)
-    double_data = np.concatenate([data, data])
-    data_list = []
-    for i in range(num_passes):
-      start = np.random.randint(0, data_len)
-      data_list.append(double_data[start:start+data_len])
-    data = np.concatenate(data_list)
-
-  num_step = len(data) // batch_size
-  data = data[:batch_size * num_step]
-  data = data.reshape(batch_size, num_step)
-
-  return data
-
-
-def create_ordered_tfrecords(save_dir, basename, data, batch_size, tgt_len,
-                             num_core_per_host, cutoffs=[], bin_sizes=[], 
-                             num_passes=1, use_tpu=False):
-
-  if use_tpu:
-    file_name = "{}.bsz-{}.tlen-{}.core-{}.tfrecords".format(
-        basename, batch_size, tgt_len, num_core_per_host)
-  else:
-    file_name = "{}.bsz-{}.tlen-{}.tfrecords".format(
-        basename, batch_size, tgt_len)
-
-  save_path = os.path.join(save_dir, file_name)
-  record_writer = tf.python_io.TFRecordWriter(save_path)
-
-  batched_data = batchify(data, batch_size, num_passes)
-
-  num_batch = 0
-  # for t in range(0, batched_data.shape[1] - tgt_len - 1, tgt_len):
-  for t in range(0, batched_data.shape[1] - 1, tgt_len):
-    cur_tgt_len = min(batched_data.shape[1] - 1 - t, tgt_len)
-    # drop the remainder if use tpu
-    if use_tpu and cur_tgt_len < tgt_len: 
-      break
-    if num_batch % 500 == 0:
-      print("  processing batch {}".format(num_batch))
-    for idx in range(batch_size):
-      inputs = batched_data[idx, t:t + cur_tgt_len]
-      labels = batched_data[idx, t + 1:t + cur_tgt_len + 1]
-
-      # features dict
-      feature = {
-          "inputs": _int64_feature(inputs),
-          "labels": _int64_feature(labels),
-      }
-
-      if len(cutoffs) > 0 and use_tpu:
-        # validate `bin_sizes` and `cutoffs`
-        assert len(cutoffs) - len(bin_sizes) == 2, \
-          "len(cutoffs) - len(bin_sizes) != 2"
-
-        # mask for bin 0
-        left, right = cutoffs[:2]
-        inp_mask = ((inputs >= left) * (inputs < right)).astype(np.float32)
-        tgt_mask = ((labels >= left) * (labels < right)).astype(np.float32)
-
-        feature["inp_mask"] = _float_feature(inp_mask)
-        feature["tgt_mask"] = _float_feature(tgt_mask)
-
-        # refresh `inp_cnts` and `tgt_cnts` for each TPU core
-        if idx % (batch_size // num_core_per_host) == 0:
-          inp_cnts = [0] * len(bin_sizes)
-          tgt_cnts = [0] * len(bin_sizes)
-
-        head_labels = np.copy(labels)
-        inp_pos_per_bin, tgt_pos_per_bin = [], []
-        for b, (left, right) in enumerate(zip(cutoffs[1:-1], cutoffs[2:])):
-          inp_pos = np.where((inputs >= left) * (inputs < right))[0]
-          tgt_pos = np.where((labels >= left) * (labels < right))[0]
-          inp_pos_per_bin.append(inp_pos)
-          tgt_pos_per_bin.append(tgt_pos)
-
-          head_labels[tgt_pos] = cutoffs[1] + b
-
-        feature["head_labels"] = _int64_feature(head_labels)
-
-        # permutation feature
-        def _add_perm_feature(feature, pos_per_bin, cnts, prefix):
-          for b, pos in enumerate(pos_per_bin):
-            idx_tuple = []
-            for p in pos:
-              if cnts[b] < bin_sizes[b]:
-                idx_tuple.append([p, cnts[b]])
-                cnts[b] += 1
-              else:
-                break
-
-            n_tup = len(idx_tuple)
-            tup = np.array(idx_tuple).reshape(n_tup * 2)
-
-            feature["{}_cnt_{}".format(prefix, b)] = _int64_feature([n_tup])
-            feature["{}_tup_{}".format(prefix, b)] = _int64_feature(tup)
-
-        _add_perm_feature(feature, inp_pos_per_bin, inp_cnts, "inp")
-        _add_perm_feature(feature, tgt_pos_per_bin, tgt_cnts, "tgt")
-
-      example = tf.train.Example(features=tf.train.Features(feature=feature))
-      record_writer.write(example.SerializeToString())
-
-    num_batch += 1
-
-  record_writer.close()
-  print("Done writing {}. batches: {}".format(file_name, num_batch))
-
-  return file_name, num_batch
-
-
-def get_lm_corpus(data_dir, dataset):
-  fn = os.path.join(data_dir, "cache.pkl")
-
-  if exists(fn):
-    print("Loading cached dataset...")
-    with open(fn, "rb") as fp:
-      corpus = pickle.load(fp)
-  else:
-    print("Producing dataset...")
-    kwargs = {}
-    if dataset in ["wt103", "wt2"]:
-      kwargs["special"] = ["<eos>"]
-      kwargs["lower_case"] = False
-    elif dataset == "ptb":
-      kwargs["special"] = ["<eos>"]
-      kwargs["lower_case"] = True
-    elif dataset == "lm1b":
-      kwargs["special"] = []
-      kwargs["lower_case"] = False
-      kwargs["vocab_file"] = os.path.join(data_dir, "1b_word_vocab.txt")
-    elif dataset in ["enwik8", "text8"]:
-      pass
-
-    corpus = Corpus(data_dir, dataset, **kwargs)
-
-    print("Saving dataset...")
-    with open(fn, "wb") as fp:
-      pickle.dump(corpus, fp, protocol=2)
-
-    corpus_info = {
-      "vocab_size" : len(corpus.vocab),
-      "cutoffs" : corpus.cutoffs,
-      "dataset" : corpus.dataset
-    }
-    with open(os.path.join(data_dir, "corpus-info.json"), "w") as fp:
-      json.dump(corpus_info, fp)
-
-  return corpus
-
-
-def main(unused_argv):
-  del unused_argv  # Unused
-
-  corpus = get_lm_corpus(FLAGS.data_dir, FLAGS.dataset)
-
-  save_dir = os.path.join(FLAGS.data_dir, "tfrecords")
-  if not exists(save_dir):
-    makedirs(save_dir)
-
-  # test mode
-  if FLAGS.per_host_test_bsz > 0:
-    corpus.convert_to_tfrecords("test", save_dir, FLAGS.per_host_test_bsz,
-                                FLAGS.tgt_len, FLAGS.num_core_per_host, 
-                                FLAGS=FLAGS)
-    return
-
-  for split, batch_size in zip(
-      ["train", "valid"],
-      [FLAGS.per_host_train_bsz, FLAGS.per_host_valid_bsz]):
-
-    if batch_size <= 0: continue
-    print("Converting {} set...".format(split))
-    corpus.convert_to_tfrecords(split, save_dir, batch_size, FLAGS.tgt_len,
-                                FLAGS.num_core_per_host, FLAGS=FLAGS)
-
-
-def load_record_info(record_info_dir, split, per_host_bsz, tgt_len,
-                     num_core_per_host, use_tpu):
-  if use_tpu:
-    record_name = "record_info-{}.bsz-{}.tlen-{}.core-{}.json".format(
-        split, per_host_bsz, tgt_len, num_core_per_host)
-  else:
-    record_name = "record_info-{}.bsz-{}.tlen-{}.json".format(
-        split, per_host_bsz, tgt_len)
-
-  record_info_path = os.path.join(record_info_dir, record_name)
-  with open(record_info_path, "r") as fp:
-    record_info = json.load(fp)
-
-  return record_info
-
-def get_input_fn(record_info_dir, split, per_host_bsz, tgt_len,
-                 num_core_per_host, num_hosts=1, use_tpu=False):
-  """Creates input function."""
-  record_info = load_record_info(record_info_dir, split, per_host_bsz, tgt_len,
-                                 num_core_per_host, use_tpu=use_tpu)
-
-  file_names = record_info["filenames"]
-  bin_sizes = record_info["bin_sizes"]
-  num_batch = record_info["num_batch"]
-
-  tf.logging.info("[{}] File names {}".format(split, file_names))
-
-  def input_fn(params):
-    # per-core batch size
-    per_core_bsz = params["batch_size"]
-
-    # data_dir could be a remote path, e.g., a google storage url
-    data_dir = params["data_dir"]
-
-    def parser(record):
-      # preprocess "inp_perm" and "tgt_perm"
-      def _process_perm_feature(example, prefix):
-        for b in range(len(bin_sizes)):
-          cnt = example.pop("{}_cnt_{}".format(prefix, b))[0]
-          tup = example.pop("{}_tup_{}".format(prefix, b))
-
-          tup = tf.reshape(
-              tf.sparse_tensor_to_dense(tup),
-              shape=[cnt, 2])
-
-          # tf.float32
-          perm = tf.sparse_to_dense(
-              sparse_indices=tup,
-              output_shape=[tgt_len, bin_sizes[b]],
-              sparse_values=1.0,
-              default_value=0.0)
-
-          example["{}_perm_{}".format(prefix, b)] = perm
-
-      # whether allow the last batch with a potentially shorter length
-      if use_tpu:
-        record_spec = {
-            "inputs": tf.FixedLenFeature([tgt_len], tf.int64),
-            "labels": tf.FixedLenFeature([tgt_len], tf.int64),
-        }
-      else:
-        record_spec = {
-            "inputs": tf.VarLenFeature(tf.int64),
-            "labels": tf.VarLenFeature(tf.int64),
-        }
-
-      # permutation related features
-      if bin_sizes and use_tpu:
-        # tf.float32
-        record_spec["inp_mask"] = tf.FixedLenFeature([tgt_len], tf.float32)
-        record_spec["tgt_mask"] = tf.FixedLenFeature([tgt_len], tf.float32)
-
-        record_spec["head_labels"] = tf.FixedLenFeature([tgt_len], tf.int64)
-
-        for b in range(len(bin_sizes)):
-          record_spec["inp_cnt_{}".format(b)] = tf.FixedLenFeature([1], tf.int64)
-          record_spec["inp_tup_{}".format(b)] = tf.VarLenFeature(tf.int64)
-          record_spec["tgt_cnt_{}".format(b)] = tf.FixedLenFeature([1], tf.int64)
-          record_spec["tgt_tup_{}".format(b)] = tf.VarLenFeature(tf.int64)
-
-      # retrieve serialized example
-      example = tf.parse_single_example(
-          serialized=record,
-          features=record_spec)
-
-      # transform permutation tuples to permutation matrices
-      if bin_sizes and use_tpu:
-        _process_perm_feature(example, "inp")
-        _process_perm_feature(example, "tgt")
-
-      # cast int64 into int32
-      # cast sparse to dense
-      for key in list(example.keys()):
-        val = example[key]
-        if tf.keras.backend.is_sparse(val):
-          val = tf.sparse.to_dense(val)
-        if val.dtype == tf.int64:
-          val = tf.to_int32(val)
-        example[key] = val
-
-      if use_tpu:
-        return example
-      else:
-        return example["inputs"], example["labels"]
-
-    file_paths = []
-    for file_name in file_names:
-      file_path = os.path.join(data_dir, file_name)
-      file_paths.append(file_path)
-
-    if split == "train":
-      dataset = tf.data.Dataset.from_tensor_slices(file_paths)
-      if len(file_paths) > 1:
-        dataset = dataset.shuffle(len(file_paths)).repeat()
-        dataset = tf.data.TFRecordDataset(dataset)
-      elif num_hosts > 1:
-        host_id = params["context"].current_host
-        # drop the remaining batches
-        num_batch_per_host = num_batch // num_hosts
-
-        my_start_sample_id = (host_id * num_batch_per_host * num_core_per_host *
-                              per_core_bsz)
-        my_sample_num = num_batch_per_host * num_core_per_host * per_core_bsz
-        dataset = tf.data.TFRecordDataset(dataset).skip(
-            my_start_sample_id).take(my_sample_num)
-      else:
-        dataset = tf.data.TFRecordDataset(dataset)
-
-      dataset = dataset.map(parser).cache().repeat()
-      dataset = dataset.batch(per_core_bsz, drop_remainder=True)
-      dataset = dataset.prefetch(num_core_per_host * per_core_bsz)
-    else:
-      # do not shuffle, repeat or cache in evaluation
-      dataset = tf.data.Dataset.from_tensor_slices(file_paths)
-      dataset = tf.data.TFRecordDataset(dataset)
-      dataset = dataset.map(parser)
-      dataset = dataset.batch(per_core_bsz, drop_remainder=True)
-
-    return dataset
-
-  if split == "train" and num_hosts > 1:
-    record_info["num_batch"] = num_batch // num_hosts
-
-  return input_fn, record_info
-
-def get_corpus_info(corpus_info_path):
-  with open(corpus_info_path, "r") as fp:
-    corpus_info = json.load(fp)
-  return corpus_info
-
-if __name__ == "__main__":
-  FLAGS = flags.FLAGS
-  flags.DEFINE_string("data_dir", None,
-        help="Location of the data corpus")
-  flags.DEFINE_enum("dataset", "wt103",
-        ["ptb", "wt2", "wt103", "lm1b", "enwik8", "text8"],
-        help="Dataset name.")
-  flags.DEFINE_integer("per_host_train_bsz", 60,
-        help="train batch size each host")
-  flags.DEFINE_integer("per_host_valid_bsz", 60,
-        help="valid batch size each host")
-  flags.DEFINE_integer("per_host_test_bsz", 0,
-        help="If > 0, enter test mode and process test set only."
-             "Otherwise, process train and dev sets only.")
-  flags.DEFINE_integer("tgt_len", 70,
-        help="number of tokens to predict")
-  flags.DEFINE_integer("max_batch", -1,
-        help="run in debug mode")
-  flags.DEFINE_integer("num_core_per_host", 8,
-        help="8 for TPU v2.")
-  flags.DEFINE_bool("debug", default=False,
-        help="Process only the first batch without shuffle for lm1b.")
-  flags.DEFINE_integer("num_procs", 1,
-        help="number of processes")
-  flags.DEFINE_integer("num_passes", 10,
-        help="number of passes when use_tpu=True")
-  flags.DEFINE_integer("num_shuffle", 4,
-        help="number of shuffles for lm1b")
-  flags.DEFINE_bool("use_tpu", True,
-        help="use tpu")
-
-  tf.app.run(main)
diff --git a/transformer-xl/tf/gpu_utils.py b/transformer-xl/tf/gpu_utils.py
deleted file mode 100644
index ea4b1b7..0000000
--- a/transformer-xl/tf/gpu_utils.py
+++ /dev/null
@@ -1,65 +0,0 @@
-import os
-import tensorflow as tf
-
-def assign_to_gpu(gpu=0, ps_dev="/DEVICE:CPU:0"):
-    def _assign(op):
-        node_def = op if isinstance(op, tf.NodeDef) else op.node_def
-        if node_def.op == "Variable":
-            return ps_dev
-        else:
-            return "/gpu:%d" % gpu
-    return _assign
-
-
-def average_grads_and_vars(tower_grads_and_vars):
-    def average_dense(grad_and_vars):
-        if len(grad_and_vars) == 1:
-            return grad_and_vars[0][0]
-
-        grad = grad_and_vars[0][0]
-        for g, _ in grad_and_vars[1:]:
-            grad += g
-        return grad / len(grad_and_vars)
-
-    def average_sparse(grad_and_vars):
-        if len(grad_and_vars) == 1:
-            return grad_and_vars[0][0]
-
-        indices = []
-        values = []
-        for g, _ in grad_and_vars:
-            indices += [g.indices]
-            values += [g.values]
-        indices = tf.concat(indices, 0)
-        values = tf.concat(values, 0) / len(grad_and_vars)
-        return tf.IndexedSlices(values, indices, grad_and_vars[0][0].dense_shape)
-
-    average_grads_and_vars = []
-    for grad_and_vars in zip(*tower_grads_and_vars):
-        if grad_and_vars[0][0] is None:
-            grad = None
-        elif isinstance(grad_and_vars[0][0], tf.IndexedSlices):
-            grad = average_sparse(grad_and_vars)
-        else:
-            grad = average_dense(grad_and_vars)
-        # Keep in mind that the Variables are redundant because they are shared
-        # across towers. So .. we will just return the first tower's pointer to
-        # the Variable.
-        v = grad_and_vars[0][1]
-        grad_and_var = (grad, v)
-        average_grads_and_vars.append(grad_and_var)
-    return average_grads_and_vars
-
-
-def load_from_checkpoint(saver, logdir):
-    sess = tf.get_default_session()
-    ckpt = tf.train.get_checkpoint_state(logdir)
-    if ckpt and ckpt.model_checkpoint_path:
-        if os.path.isabs(ckpt.model_checkpoint_path):
-            # Restores from checkpoint with absolute path.
-            saver.restore(sess, ckpt.model_checkpoint_path)
-        else:
-            # Restores from checkpoint with relative path.
-            saver.restore(sess, os.path.join(logdir, ckpt.model_checkpoint_path))
-        return True
-    return False
diff --git a/transformer-xl/tf/model.py b/transformer-xl/tf/model.py
deleted file mode 100644
index bab7bee..0000000
--- a/transformer-xl/tf/model.py
+++ /dev/null
@@ -1,546 +0,0 @@
-import tensorflow as tf
-
-
-def positional_embedding(pos_seq, inv_freq, bsz=None):
-  sinusoid_inp = tf.einsum('i,j->ij', pos_seq, inv_freq)
-  pos_emb = tf.concat([tf.sin(sinusoid_inp), tf.cos(sinusoid_inp)], -1)
-  if bsz is not None:
-    return tf.tile(pos_emb[:, None, :], [1, bsz, 1])
-  else:
-    return pos_emb[:, None, :]
-
-
-def positionwise_FF(inp, d_model, d_inner, dropout, kernel_initializer,
-                    scope='ff', is_training=True):
-  output = inp
-  with tf.variable_scope(scope):
-    output = tf.layers.dense(inp, d_inner, activation=tf.nn.relu,
-                             kernel_initializer=kernel_initializer,
-                             name='layer_1')
-    output = tf.layers.dropout(output, dropout, training=is_training,
-                               name='drop_1')
-    output = tf.layers.dense(output, d_model,
-                             kernel_initializer=kernel_initializer,
-                             name='layer_2')
-    output = tf.layers.dropout(output, dropout, training=is_training,
-                               name='drop_2')
-    output = tf.contrib.layers.layer_norm(output + inp, begin_norm_axis=-1)
-  return output
-
-
-def rel_shift(x):
-  x_size = tf.shape(x)
-
-  x = tf.pad(x, [[0, 0], [1, 0], [0, 0], [0, 0]])
-  x = tf.reshape(x, [x_size[1] + 1, x_size[0], x_size[2], x_size[3]])
-  x = tf.slice(x, [1, 0, 0, 0], [-1, -1, -1, -1])
-  x = tf.reshape(x, x_size)
-
-  return x
-
-
-def rel_multihead_attn(w, r, r_w_bias, r_r_bias, attn_mask, mems, d_model,
-                       n_head, d_head, dropout, dropatt, is_training,
-                       kernel_initializer, scope='rel_attn'):
-  scale = 1 / (d_head ** 0.5)
-  with tf.variable_scope(scope):
-    qlen = tf.shape(w)[0]
-    rlen = tf.shape(r)[0]
-    bsz = tf.shape(w)[1]
-
-    cat = tf.concat([mems, w],
-                    0) if mems is not None and mems.shape.ndims > 1 else w
-    w_heads = tf.layers.dense(cat, 3 * n_head * d_head, use_bias=False,
-                              kernel_initializer=kernel_initializer, name='qkv')
-    r_head_k = tf.layers.dense(r, n_head * d_head, use_bias=False,
-                               kernel_initializer=kernel_initializer, name='r')
-
-    w_head_q, w_head_k, w_head_v = tf.split(w_heads, 3, -1)
-    w_head_q = w_head_q[-qlen:]
-
-    klen = tf.shape(w_head_k)[0]
-
-    w_head_q = tf.reshape(w_head_q, [qlen, bsz, n_head, d_head])
-    w_head_k = tf.reshape(w_head_k, [klen, bsz, n_head, d_head])
-    w_head_v = tf.reshape(w_head_v, [klen, bsz, n_head, d_head])
-
-    r_head_k = tf.reshape(r_head_k, [rlen, n_head, d_head])
-
-    rw_head_q = w_head_q + r_w_bias
-    rr_head_q = w_head_q + r_r_bias
-
-    AC = tf.einsum('ibnd,jbnd->ijbn', rw_head_q, w_head_k)
-    BD = tf.einsum('ibnd,jnd->ijbn', rr_head_q, r_head_k)
-    BD = rel_shift(BD)
-
-    attn_score = (AC + BD) * scale
-    attn_mask_t = attn_mask[:, :, None, None]
-    attn_score = attn_score * (1 - attn_mask_t) - 1e30 * attn_mask_t
-
-    attn_prob = tf.nn.softmax(attn_score, 1)
-    attn_prob = tf.layers.dropout(attn_prob, dropatt, training=is_training)
-
-    attn_vec = tf.einsum('ijbn,jbnd->ibnd', attn_prob, w_head_v)
-    size_t = tf.shape(attn_vec)
-    attn_vec = tf.reshape(attn_vec, [size_t[0], size_t[1], n_head * d_head])
-
-    attn_out = tf.layers.dense(attn_vec, d_model, use_bias=False,
-                               kernel_initializer=kernel_initializer, name='o')
-    attn_out = tf.layers.dropout(attn_out, dropout, training=is_training)
-
-    output = tf.contrib.layers.layer_norm(attn_out + w, begin_norm_axis=-1)
-  return output
-
-
-def embedding_lookup(lookup_table, x, use_tpu=True):
-  if use_tpu:
-    n_token = tf.shape(lookup_table)[0]
-    one_hot_idx = tf.one_hot(x, n_token)
-    if one_hot_idx.shape.ndims == 2:
-      return tf.einsum('nd,in->id', lookup_table, one_hot_idx)
-    else:
-      return tf.einsum('nd,ibn->ibd', lookup_table, one_hot_idx)
-  else:
-    return tf.nn.embedding_lookup(lookup_table, x)
-
-
-def mask_adaptive_embedding_lookup(x, n_token, d_embed, d_proj, cutoffs, initializer,
-                                   proj_initializer, div_val=1,
-                                   proj_same_dim=True,
-                                   scope='adaptive_embed', **kwargs):
-  emb_scale = d_proj ** 0.5
-  with tf.variable_scope(scope):
-    if div_val == 1:
-      lookup_table = tf.get_variable('lookup_table', [n_token, d_embed],
-                                     initializer=initializer)
-      y = embedding_lookup(lookup_table, x, use_tpu=False)
-      if d_proj != d_embed:
-        proj_W = tf.get_variable('proj_W', [d_embed, d_proj],
-                                 initializer=proj_initializer)
-        y = tf.einsum('ibe,ed->ibd', y, proj_W)
-      else:
-        proj_W = None
-      ret_params = [lookup_table, proj_W]
-    else:
-      tables, projs = [], []
-      cutoff_ends = [0] + cutoffs + [n_token]
-      x_size = tf.shape(x)
-      y = tf.zeros([x_size[0], x_size[1], d_proj])
-      for i in range(len(cutoff_ends) - 1):
-        with tf.variable_scope('cutoff_{}'.format(i)):
-          l_idx, r_idx = cutoff_ends[i], cutoff_ends[i + 1]
-          mask = (x >= l_idx) & (x < r_idx)
-          cur_x = tf.boolean_mask(x, mask) - l_idx
-          cur_d_embed = d_embed // (div_val ** i)
-          lookup_table = tf.get_variable('lookup_table',
-                                         [r_idx - l_idx, cur_d_embed],
-                                         initializer=initializer)
-          cur_y = embedding_lookup(lookup_table, cur_x, use_tpu=False)
-          if d_proj == cur_d_embed and not proj_same_dim:
-            proj_W = None
-          else:
-            proj_W = tf.get_variable('proj_W', [cur_d_embed, d_proj],
-                                     initializer=proj_initializer)
-            cur_y = tf.einsum('id,de->ie', cur_y, proj_W)
-          mask_idx = tf.to_int64(tf.where(mask))
-          y += tf.scatter_nd(mask_idx, cur_y, tf.to_int64(tf.shape(y)))
-          tables.append(lookup_table)
-          projs.append(proj_W)
-      ret_params = [tables, projs]
-
-  y *= emb_scale
-  return y, ret_params
-
-
-def mul_adaptive_embedding_lookup(x, n_token, d_embed, d_proj, cutoffs, initializer,
-                                  proj_initializer, div_val=1, perms=None,
-                                  proj_same_dim=True,
-                                  scope='adaptive_embed'):
-  """
-  perms: If None, first compute W = W1 x W2 (projection for each bin),
-      and then compute X x W (embedding lookup). If not None,
-      use bin-based embedding lookup with max_bin_size defined by
-      the shape of perms.
-  """
-  emb_scale = d_proj ** 0.5
-  with tf.variable_scope(scope):
-    if div_val == 1:
-      lookup_table = tf.get_variable('lookup_table', [n_token, d_embed],
-                                     initializer=initializer)
-      y = embedding_lookup(lookup_table, x)
-      if d_proj != d_embed:
-        proj_W = tf.get_variable('proj_W', [d_embed, d_proj],
-                                 initializer=proj_initializer)
-        y = tf.einsum('ibe,ed->ibd', y, proj_W)
-      else:
-        proj_W = None
-      ret_params = [lookup_table, proj_W]
-    else:
-      tables, projs = [], []
-      cutoff_ends = [0] + cutoffs + [n_token]
-      x_size = tf.shape(x)
-      if perms is None:
-        cat_lookup = []
-      else:
-        cat_lookup = tf.zeros([x_size[0], x_size[1], d_proj])
-      for i in range(len(cutoff_ends) - 1):
-        with tf.variable_scope('cutoff_{}'.format(i)):
-          l_idx, r_idx = cutoff_ends[i], cutoff_ends[i + 1]
-          cur_d_embed = d_embed // (div_val ** i)
-          lookup_table = tf.get_variable('lookup_table',
-                                         [r_idx - l_idx, cur_d_embed],
-                                         initializer=initializer)
-          if cur_d_embed == d_proj and not proj_same_dim:
-            proj_W = None
-          else:
-            proj_W = tf.get_variable('proj_W', [cur_d_embed, d_proj],
-                                   initializer=proj_initializer)
-          if perms is None:
-            cat_lookup.append(tf.einsum('ie,ed->id', lookup_table, proj_W))
-          else:
-            # speed up the computation of the first bin
-            # also save some meory
-            if i == 0:
-              cur_y = embedding_lookup(lookup_table, tf.minimum(x, r_idx - 1))
-              if proj_W is not None:
-                cur_y = tf.einsum('ibe,ed->ibd', cur_y, proj_W)
-              cur_y *= perms[i][:, :, None]
-              cat_lookup += cur_y
-            else:
-              cur_x = tf.einsum('ib,ibk->k', tf.to_float(x - l_idx), perms[i])
-              cur_x = tf.to_int32(cur_x)
-              cur_y = embedding_lookup(lookup_table, cur_x)
-              if proj_W is not None:
-                cur_y = tf.einsum('ke,ed->kd', cur_y, proj_W)
-              cat_lookup += tf.einsum('kd,ibk->ibd', cur_y, perms[i])
-          tables.append(lookup_table)
-          projs.append(proj_W)
-      if perms is None:
-        cat_lookup = tf.concat(cat_lookup, 0)
-        y = embedding_lookup(cat_lookup, x)
-      else:
-        y = cat_lookup
-      ret_params = [tables, projs]
-
-  y *= emb_scale
-  return y, ret_params
-
-
-def mask_adaptive_logsoftmax(hidden, target, n_token, d_embed, d_proj, cutoffs,
-                             params, tie_projs,
-                             initializer=None, proj_initializer=None,
-                             div_val=1, scope='adaptive_softmax',
-                             proj_same_dim=True,
-                             return_mean=True, **kwargs):
-  def _logit(x, W, b, proj):
-    y = x
-    if proj is not None:
-      y = tf.einsum('ibd,ed->ibe', y, proj)
-    return tf.einsum('ibd,nd->ibn', y, W) + b
-
-  params_W, params_projs = params[0], params[1]
-
-  def _gather_logprob(logprob, target):
-    lp_size = tf.shape(logprob)
-    r = tf.range(lp_size[0])
-    idx = tf.stack([r, target], 1)
-    return tf.gather_nd(logprob, idx)
-
-  with tf.variable_scope(scope):
-    if len(cutoffs) == 0:
-      softmax_b = tf.get_variable('bias', [n_token],
-                                  initializer=tf.zeros_initializer())
-      output = _logit(hidden, params_W, softmax_b, params_projs)
-      nll = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=target,
-                                                           logits=output)
-    else:
-      cutoff_ends = [0] + cutoffs + [n_token]
-      nll = tf.zeros_like(target, dtype=tf.float32)
-      for i in range(len(cutoff_ends) - 1):
-        with tf.variable_scope('cutoff_{}'.format(i)):
-          l_idx, r_idx = cutoff_ends[i], cutoff_ends[i + 1]
-          mask = (target >= l_idx) & (target < r_idx)
-          mask_idx = tf.where(mask)
-          cur_target = tf.boolean_mask(target, mask) - l_idx
-          cur_d_embed = d_embed // (div_val ** i)
-
-          if div_val == 1:
-            cur_W = params_W[l_idx: r_idx]
-          else:
-            cur_W = params_W[i]
-          cur_b = tf.get_variable('b', [r_idx - l_idx],
-                                  initializer=tf.zeros_initializer())
-          if tie_projs[i]:
-            if div_val == 1:
-              cur_proj = params_projs
-            else:
-              cur_proj = params_projs[i]
-          else:
-            if (div_val == 1 or not proj_same_dim) and d_proj == cur_d_embed:
-              cur_proj = None
-            else:
-              cur_proj = tf.get_variable('proj', [cur_d_embed, d_proj],
-                                         initializer=proj_initializer)
-          if i == 0:
-            cluster_W = tf.get_variable('cluster_W', [len(cutoffs), d_embed],
-                                        initializer=tf.zeros_initializer())
-            cluster_b = tf.get_variable('cluster_b', [len(cutoffs)],
-                                        initializer=tf.zeros_initializer())
-            cur_W = tf.concat([cur_W, cluster_W], 0)
-            cur_b = tf.concat([cur_b, cluster_b], 0)
-
-            head_logit = _logit(hidden, cur_W, cur_b, cur_proj)
-            head_logprob = tf.nn.log_softmax(head_logit)
-            cur_head_logprob = tf.boolean_mask(head_logprob, mask)
-            cur_logprob = _gather_logprob(cur_head_logprob, cur_target)
-          else:
-            cur_head_logprob = tf.boolean_mask(head_logprob, mask)
-            cur_hidden = tf.boolean_mask(hidden, mask)
-            tail_logit = tf.squeeze(_logit(
-                cur_hidden[None], cur_W, cur_b, cur_proj), 0)
-            tail_logprob = tf.nn.log_softmax(tail_logit)
-            cur_logprob = (cur_head_logprob[:, cutoff_ends[1] + i - 1] +
-                           _gather_logprob(tail_logprob, cur_target))
-          nll += tf.scatter_nd(mask_idx, -cur_logprob,
-                                 tf.to_int64(tf.shape(nll)))
-  if return_mean:
-    nll = tf.reduce_mean(nll)
-  return nll
-
-
-def mul_adaptive_logsoftmax(hidden, target, n_token, d_embed, d_proj, cutoffs,
-                            params, tie_projs,
-                            initializer=None, proj_initializer=None,
-                            div_val=1, perms=None, proj_same_dim=True,
-                            scope='adaptive_softmax',
-                            **kwargs):
-  def _logit(x, W, b, proj):
-    y = x
-    if x.shape.ndims == 3:
-      if proj is not None:
-        y = tf.einsum('ibd,ed->ibe', y, proj)
-      return tf.einsum('ibd,nd->ibn', y, W) + b
-    else:
-      if proj is not None:
-        y = tf.einsum('id,ed->ie', y, proj)
-      return tf.einsum('id,nd->in', y, W) + b
-
-  params_W, params_projs = params[0], params[1]
-
-  with tf.variable_scope(scope):
-    if len(cutoffs) == 0:
-      softmax_b = tf.get_variable('bias', [n_token],
-                                  initializer=tf.zeros_initializer())
-      output = _logit(hidden, params_W, softmax_b, params_projs)
-      nll = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=target,
-                                                           logits=output)
-      nll = tf.reduce_mean(nll)
-    else:
-      total_loss, total_cnt = 0, 0
-      cutoff_ends = [0] + cutoffs + [n_token]
-      for i in range(len(cutoff_ends) - 1):
-        with tf.variable_scope('cutoff_{}'.format(i)):
-          l_idx, r_idx = cutoff_ends[i], cutoff_ends[i + 1]
-
-          cur_d_embed = d_embed // (div_val ** i)
-
-          if div_val == 1:
-            cur_W = params_W[l_idx: r_idx]
-          else:
-            cur_W = params_W[i]
-          cur_b = tf.get_variable('b', [r_idx - l_idx],
-                                  initializer=tf.zeros_initializer())
-          if tie_projs[i]:
-            if div_val == 1:
-              cur_proj = params_projs
-            else:
-              cur_proj = params_projs[i]
-          else:
-            if (div_val == 1 or not proj_same_dim) and d_proj == cur_d_embed:
-              cur_proj = None
-            else:
-              cur_proj = tf.get_variable('proj', [cur_d_embed, d_proj],
-                                         initializer=proj_initializer)
-
-          if i == 0:
-            cluster_W = tf.get_variable('cluster_W', [len(cutoffs), d_embed],
-                                        initializer=tf.zeros_initializer())
-            cluster_b = tf.get_variable('cluster_b', [len(cutoffs)],
-                                        initializer=tf.zeros_initializer())
-            cur_W = tf.concat([cur_W, cluster_W], 0)
-            cur_b = tf.concat([cur_b, cluster_b], 0)
-
-            head_logit = _logit(hidden, cur_W, cur_b, cur_proj)
-
-            head_target = kwargs.get("head_target")
-            head_nll = tf.nn.sparse_softmax_cross_entropy_with_logits(
-                labels=head_target,
-                logits=head_logit)
-
-            masked_loss = head_nll * perms[i]
-            total_loss += tf.reduce_sum(masked_loss)
-            total_cnt += tf.reduce_sum(perms[i])
-
-            # head_logprob = tf.nn.log_softmax(head_logit)
-
-            # final_logprob = head_logprob * perms[i][:, :, None]
-            # final_target = tf.one_hot(target, tf.shape(head_logprob)[2])
-            # total_loss -= tf.einsum('ibn,ibn->', final_logprob, final_target)
-            # total_cnt += tf.reduce_sum(perms[i])
-          else:
-            cur_head_nll = tf.einsum('ib,ibk->k', head_nll, perms[i])
-
-            cur_hidden = tf.einsum('ibd,ibk->kd', hidden, perms[i])
-            tail_logit = _logit(cur_hidden, cur_W, cur_b, cur_proj)
-
-            tail_target = tf.einsum('ib,ibk->k', tf.to_float(target - l_idx),
-                                    perms[i])
-            tail_nll = tf.nn.sparse_softmax_cross_entropy_with_logits(
-                labels=tf.to_int32(tail_target),
-                logits=tail_logit)
-
-            sum_nll = cur_head_nll + tail_nll
-            mask = tf.reduce_sum(perms[i], [0, 1])
-
-            masked_loss = sum_nll * mask
-            total_loss += tf.reduce_sum(masked_loss)
-            total_cnt += tf.reduce_sum(mask)
-
-      nll = total_loss / total_cnt
-
-  return nll
-
-
-def _create_mask(qlen, mlen, same_length=False):
-  attn_mask = tf.ones([qlen, qlen])
-  mask_u = tf.matrix_band_part(attn_mask, 0, -1)
-  mask_dia = tf.matrix_band_part(attn_mask, 0, 0)
-  attn_mask_pad = tf.zeros([qlen, mlen])
-  ret = tf.concat([attn_mask_pad, mask_u - mask_dia], 1)
-  if same_length:
-    mask_l = tf.matrix_band_part(attn_mask, -1, 0)
-    ret = tf.concat([ret[:, :qlen] + mask_l - mask_dia, ret[:, qlen:]], 1)
-  return ret
-
-def _cache_mem(curr_out, prev_mem, mem_len=None):
-  if mem_len is None or prev_mem is None:
-    new_mem = curr_out
-  elif mem_len == 0:
-    return prev_mem
-  else:
-    new_mem = tf.concat([prev_mem, curr_out], 0)[- mem_len:]
-
-  return tf.stop_gradient(new_mem)
-
-
-def transformer(dec_inp, target, mems, n_token, n_layer, d_model, d_embed,
-                n_head, d_head, d_inner, dropout, dropatt,
-                initializer, is_training, proj_initializer=None,
-                mem_len=None, cutoffs=[], div_val=1, tie_projs=[],
-                same_length=False, clamp_len=-1, use_tpu=True,
-                input_perms=None, target_perms=None, head_target=None,
-                untie_r=False, proj_same_dim=True,
-                scope='transformer'):
-  """
-  cutoffs: a list of python int. Cutoffs for adaptive softmax.
-  tie_projs: a list of python bools. Whether to tie the projections.
-  use_tpu: if True, use one_hot in embedding lookup and bin-based implementation
-        of adaptive softmax.
-  perms: a list of tensors. Each tensor should of size [len, bsz, bin_size].
-        Only used in the adaptive setting.
-  """
-  new_mems = []
-  with tf.variable_scope(scope):
-    if untie_r:
-      r_w_bias = tf.get_variable('r_w_bias', [n_layer, n_head, d_head],
-                               initializer=initializer)
-      r_r_bias = tf.get_variable('r_r_bias', [n_layer, n_head, d_head],
-                                 initializer=initializer)
-    else:
-      r_w_bias = tf.get_variable('r_w_bias', [n_head, d_head],
-                                 initializer=initializer)
-      r_r_bias = tf.get_variable('r_r_bias', [n_head, d_head],
-                                 initializer=initializer)
-
-    qlen = tf.shape(dec_inp)[0]
-    mlen = tf.shape(mems[0])[0] if mems is not None else 0
-    klen = mlen + qlen
-
-    if proj_initializer is None:
-      proj_initializer = initializer
-    lookup_fn = (mul_adaptive_embedding_lookup if use_tpu else
-                 mask_adaptive_embedding_lookup)
-    embeddings, shared_params = lookup_fn(
-        x=dec_inp,
-        n_token=n_token,
-        d_embed=d_embed,
-        d_proj=d_model,
-        cutoffs=cutoffs,
-        initializer=initializer,
-        proj_initializer=proj_initializer,
-        div_val= div_val,
-        perms=input_perms,
-        proj_same_dim=proj_same_dim)
-
-    attn_mask = _create_mask(qlen, mlen, same_length)
-
-    pos_seq = tf.range(klen - 1, -1, -1.0)
-    if clamp_len > 0:
-      pos_seq = tf.minimum(pos_seq, clamp_len)
-    inv_freq = 1 / (10000 ** (tf.range(0, d_model, 2.0) / d_model))
-    pos_emb = positional_embedding(pos_seq, inv_freq)
-
-    output = tf.layers.dropout(embeddings, dropout, training=is_training)
-    pos_emb = tf.layers.dropout(pos_emb, dropout, training=is_training)
-
-    if mems is None:
-      mems = [None] * n_layer
-
-    for i in range(n_layer):
-      # cache new mems
-      new_mems.append(_cache_mem(output, mems[i], mem_len))
-
-      with tf.variable_scope('layer_{}'.format(i)):
-        output = rel_multihead_attn(
-            w=output,
-            r=pos_emb,
-            r_w_bias=r_w_bias if not untie_r else r_w_bias[i],
-            r_r_bias=r_r_bias if not untie_r else r_r_bias[i],
-            attn_mask=attn_mask,
-            mems=mems[i],
-            d_model=d_model,
-            n_head=n_head,
-            d_head=d_head,
-            dropout=dropout,
-            dropatt=dropatt,
-            is_training=is_training,
-            kernel_initializer=initializer)
-        output = positionwise_FF(
-            inp=output,
-            d_model=d_model,
-            d_inner=d_inner,
-            dropout=dropout,
-            kernel_initializer=initializer,
-            is_training=is_training)
-
-    output = tf.layers.dropout(output, dropout, training=is_training)
-
-    logsoftmax_fn = (mul_adaptive_logsoftmax if use_tpu else
-                     mask_adaptive_logsoftmax)
-    loss = logsoftmax_fn(
-        hidden=output,
-        target=target,
-        n_token=n_token,
-        d_embed=d_embed,
-        d_proj=d_model,
-        cutoffs=cutoffs,
-        params=shared_params,
-        tie_projs=tie_projs,
-        initializer=initializer,
-        proj_initializer=proj_initializer,
-        div_val=div_val,
-        perms=target_perms,
-        head_target=head_target,
-        proj_same_dim=proj_same_dim)
-    return loss, new_mems
-
diff --git a/transformer-xl/tf/scripts/enwik8_base_gpu.sh b/transformer-xl/tf/scripts/enwik8_base_gpu.sh
deleted file mode 100644
index 6de09a0..0000000
--- a/transformer-xl/tf/scripts/enwik8_base_gpu.sh
+++ /dev/null
@@ -1,102 +0,0 @@
-#!/bin/bash
-
-# Data
-DATA_ROOT=../data/enwik8/
-
-# Model
-N_LAYER=12
-D_MODEL=512
-D_EMBED=512
-N_HEAD=8
-D_HEAD=64
-D_INNER=2048
-
-# Training
-TGT_LEN=512
-MEM_LEN=512
-
-BSZ=24
-NUM_CORE=4
-
-# Testing
-TEST_TGT_LEN=80
-TEST_MEM_LEN=2100
-TEST_CLAMP_LEN=820
-
-TEST_BSZ=10
-TEST_NUM_CORE=1
-
-if [[ $1 == 'train_data' ]]; then
-    python data_utils.py \
-        --data_dir=${DATA_ROOT}/ \
-        --dataset=enwik8 \
-        --tgt_len=${TGT_LEN} \
-        --per_host_train_bsz=${BSZ} \
-        --per_host_valid_bsz=${BSZ} \
-        --num_passes=1 \
-        --use_tpu=False \
-        ${@:2}
-elif [[ $1 == 'test_data' ]]; then
-    python data_utils.py \
-        --data_dir=${DATA_ROOT}/ \
-        --dataset=enwik8 \
-        --tgt_len=${TEST_TGT_LEN} \
-        --per_host_test_bsz=${TEST_BSZ} \
-        --num_passes=1 \
-        --use_tpu=False \
-        ${@:2}
-elif [[ $1 == 'train' ]]; then
-    echo 'Run training...'
-    python train_gpu.py \
-        --data_dir=${DATA_ROOT}/tfrecords \
-        --record_info_dir=${DATA_ROOT}/tfrecords/ \
-        --corpus_info_path=${DATA_ROOT}/corpus-info.json \
-        --model_dir=EXP-enwik8 \
-        --n_layer=${N_LAYER} \
-        --d_model=${D_MODEL} \
-        --d_embed=${D_EMBED} \
-        --n_head=${N_HEAD} \
-        --d_head=${D_HEAD} \
-        --d_inner=${D_INNER} \
-        --dropout=0.1 \
-        --dropatt=0.0 \
-        --learning_rate=0.00025 \
-        --warmup_steps=0 \
-        --train_steps=400000 \
-        --tgt_len=${TGT_LEN} \
-        --mem_len=${MEM_LEN} \
-        --train_batch_size=${BSZ} \
-        --num_core_per_host=${NUM_CORE} \
-        --iterations=200 \
-        --save_steps=4000 \
-        --do_train=True \
-        --do_eval=False \
-        ${@:2}
-elif [[ $1 == 'eval' ]]; then
-    echo 'Run evaluation...'
-    python train_gpu.py \
-        --data_dir=${DATA_ROOT}/tfrecords \
-        --record_info_dir=${DATA_ROOT}/tfrecords/ \
-        --corpus_info_path=${DATA_ROOT}/corpus-info.json \
-        --model_dir=EXP-enwik8 \
-        --n_layer=${N_LAYER} \
-        --d_model=${D_MODEL} \
-        --d_embed=${D_EMBED} \
-        --n_head=${N_HEAD} \
-        --d_head=${D_HEAD} \
-        --d_inner=${D_INNER} \
-        --dropout=0.0 \
-        --dropatt=0.0 \
-        --tgt_len=${TEST_TGT_LEN} \
-        --mem_len=${TEST_MEM_LEN} \
-        --clamp_len=${TEST_CLAMP_LEN} \
-        --same_length=True \
-        --eval_batch_size=${TEST_BSZ} \
-        --num_core_per_host=${TEST_NUM_CORE} \
-        --do_train=False \
-        --do_eval=True \
-        --eval_split=test \
-        ${@:2}
-else
-    echo 'unknown argment 1'
-fi
\ No newline at end of file
diff --git a/transformer-xl/tf/scripts/enwik8_large_tpu.sh b/transformer-xl/tf/scripts/enwik8_large_tpu.sh
deleted file mode 100644
index e862fd7..0000000
--- a/transformer-xl/tf/scripts/enwik8_large_tpu.sh
+++ /dev/null
@@ -1,122 +0,0 @@
-#!/bin/bash
-
-# Path
-LOCAL_DIR=../data/enwik8/
-GSDATA=
-GSEXP=
-
-# TPU setting
-NUM_HOST=2
-NUM_CORE=16 # TPUv2 -> 8 | TPUv3 -> 16
-
-TEST_NUM_HOST=1
-TEST_NUM_CORE=8 # TPUv2 -> 8 | TPUv3 -> 16
-
-# Model
-N_LAYER=24
-D_MODEL=1024
-D_EMBED=1024
-N_HEAD=8
-D_HEAD=128
-D_INNER=3072
-
-# Training
-TGT_LEN=768
-MEM_LEN=768
-TRAIN_BSZ=64
-VALID_BSZ=64
-
-# Testing
-TEST_TGT_LEN=128
-TEST_MEM_LEN=3800
-TEST_CLAMP_LEN=1000
-TEST_BSZ=16
-
-if [[ $1 == 'train_data' ]]; then
-    python data_utils.py \
-        --data_dir=${LOCAL_DIR}/ \
-        --dataset=enwik8 \
-        --tgt_len=${TGT_LEN} \
-        --per_host_train_bsz=${TRAIN_BSZ} \
-        --per_host_valid_bsz=${VALID_BSZ} \
-        --num_core_per_host=${NUM_CORE} \
-        --num_passes=10 \
-        --use_tpu=True \
-        ${@:2}
-
-    SRC_PATTERN=train.bsz-${TRAIN_BSZ}.tlen-${TGT_LEN}.core-${NUM_CORE}*
-    gsutil cp ${LOCAL_DIR}/tfrecords/${SRC_PATTERN} ${GSDATA}/enwik8-tfrecords/
-
-    SRC_PATTERN=valid.bsz-${VALID_BSZ}.tlen-${TGT_LEN}.core-${NUM_CORE}*
-    gsutil cp ${LOCAL_DIR}/tfrecords/${SRC_PATTERN} ${GSDATA}/enwik8-tfrecords/
-
-elif [[ $1 == 'test_data' ]]; then
-    python data_utils.py \
-        --data_dir=${LOCAL_DIR}/ \
-        --dataset=enwik8 \
-        --tgt_len=${TEST_TGT_LEN} \
-        --per_host_test_bsz=${TEST_BSZ} \
-        --num_core_per_host=${TEST_NUM_CORE} \
-        --num_passes=1 \
-        --use_tpu=True \
-        ${@:2}
-
-    SRC_PATTERN=test.bsz-${TEST_BSZ}.tlen-${TEST_TGT_LEN}.core-${TEST_NUM_CORE}*
-    gsutil cp ${LOCAL_DIR}/tfrecords/${SRC_PATTERN} ${GSDATA}/enwik8-tfrecords/
-
-elif [[ $1 == 'train' ]]; then
-    echo 'Run training...'
-    python train.py \
-        --data_dir=${GSDATA}/enwik8-tfrecords \
-        --record_info_dir=${LOCAL_DIR}/tfrecords/ \
-        --corpus_info_path=${LOCAL_DIR}/corpus-info.json \
-        --model_dir=${GSEXP}/enwik8 \
-        --n_layer=${N_LAYER} \
-        --d_model=${D_MODEL} \
-        --d_embed=${D_EMBED} \
-        --n_head=${N_HEAD} \
-        --d_head=${D_HEAD} \
-        --d_inner=${D_INNER} \
-        --dropout=0.15 \
-        --dropatt=0.15 \
-        --learning_rate=0.00025 \
-        --warmup_steps=4000 \
-        --train_steps=400000 \
-        --tgt_len=${TGT_LEN} \
-        --mem_len=${MEM_LEN} \
-        --train_batch_size=${TRAIN_BSZ} \
-        --use_tpu=True \
-        --num_host=${NUM_HOST} \
-        --num_core_per_host=${NUM_CORE} \
-        --iterations=1000 \
-        --save_steps=10000 \
-        --do_train=True \
-        --do_eval=False \
-        ${@:2}
-
-elif [[ $1 == 'eval' ]]; then
-    echo 'Run evaluation...'
-    python train.py \
-        --data_dir=${GSDATA}/enwik8-tfrecords \
-        --record_info_dir=${LOCAL_DIR}/tfrecords/ \
-        --corpus_info_path=${LOCAL_DIR}/corpus-info.json \
-        --model_dir=${GSEXP}/enwik8 \
-        --n_layer=${N_LAYER} \
-        --d_model=${D_MODEL} \
-        --d_embed=${D_EMBED} \
-        --n_head=${N_HEAD} \
-        --d_head=${D_HEAD} \
-        --d_inner=${D_INNER} \
-        --tgt_len=${TEST_TGT_LEN} \
-        --mem_len=${TEST_MEM_LEN} \
-        --eval_batch_size=${TEST_BSZ} \
-        --num_host=${TEST_NUM_HOST} \
-        --num_core_per_host=${TEST_NUM_CORE} \
-        --use_tpu=True \
-        --do_train=False \
-        --do_eval_only=True \
-        --eval_split=test \
-        ${@:2}
-else
-    echo 'unknown argment 1'
-fi
diff --git a/transformer-xl/tf/scripts/lm1b_base_gpu.sh b/transformer-xl/tf/scripts/lm1b_base_gpu.sh
deleted file mode 100644
index 2dcb252..0000000
--- a/transformer-xl/tf/scripts/lm1b_base_gpu.sh
+++ /dev/null
@@ -1,110 +0,0 @@
-#!/bin/bash
-
-# Data
-DATA_ROOT=../data/one-billion-words/
-
-# Model
-DIV_VAL=4
-N_LAYER=18
-D_MODEL=1024
-D_EMBED=1024
-N_HEAD=8
-D_HEAD=128
-D_INNER=4096
-
-# Training
-TGT_LEN=256
-MEM_LEN=256
-
-BSZ=256
-NUM_CORE=4
-
-# Testing
-TEST_TGT_LEN=32
-TEST_MEM_LEN=128
-TEST_CLAMP_LEN=-1
-
-TEST_BSZ=16
-TEST_NUM_CORE=1
-
-
-if [[ $1 == 'train_data' ]]; then
-    python data_utils.py \
-      --data_dir=${DATA_ROOT}/ \
-      --dataset=lm1b \
-      --tgt_len=${TGT_LEN} \
-      --per_host_train_bsz=${BSZ} \
-      --per_host_valid_bsz=${BSZ} \
-      --num_passes=1 \
-      --use_tpu=False \
-      ${@:2}
-elif [[ $1 == 'test_data' ]]; then
-    python data_utils.py \
-      --data_dir=${DATA_ROOT}/ \
-      --dataset=lm1b \
-      --tgt_len=${TEST_TGT_LEN} \
-      --per_host_test_bsz=${TEST_BSZ} \
-      --num_passes=1 \
-      --use_tpu=False \
-      ${@:2}
-elif [[ $1 == 'train' ]]; then
-    echo 'Run training...'
-    python train_gpu.py \
-        --data_dir=${DATA_ROOT}/tfrecords \
-        --record_info_dir=${DATA_ROOT}/tfrecords/ \
-        --corpus_info_path=${DATA_ROOT}/corpus-info.json \
-        --model_dir=EXP-lm1b \
-        --div_val=${DIV_VAL} \
-        --untie_r=True \
-        --proj_share_all_but_first=False \
-        --proj_same_dim=False \
-        --n_layer=${N_LAYER} \
-        --d_model=${D_MODEL} \
-        --d_embed=${D_EMBED} \
-        --n_head=${N_HEAD} \
-        --d_head=${D_HEAD} \
-        --d_inner=${D_INNER} \
-        --dropout=0.1 \
-        --dropatt=0.0 \
-        --learning_rate=0.00025 \
-        --warmup_steps=0 \
-        --train_steps=400000 \
-        --tgt_len=${TGT_LEN} \
-        --mem_len=${MEM_LEN} \
-        --train_batch_size=${BSZ} \
-        --num_core_per_host=${NUM_CORE} \
-        --iterations=200 \
-        --save_steps=4000 \
-        ${@:2}
-elif [[ $1 == 'eval' ]]; then
-    echo 'Run evaluation...'
-    python train_gpu.py \
-        --data_dir=${DATA_ROOT}/tfrecords \
-        --record_info_dir=${DATA_ROOT}/tfrecords/ \
-        --corpus_info_path=${DATA_ROOT}/corpus-info.json \
-        --model_dir=EXP-lm1b \
-        --div_val=${DIV_VAL} \
-        --untie_r=True \
-        --proj_share_all_but_first=False \
-        --proj_same_dim=False \
-        --n_layer=${N_LAYER} \
-        --d_model=${D_MODEL} \
-        --d_embed=${D_EMBED} \
-        --n_head=${N_HEAD} \
-        --d_head=${D_HEAD} \
-        --d_inner=${D_INNER} \
-        --dropout=0.0 \
-        --dropatt=0.0 \
-        --tgt_len=${TEST_TGT_LEN} \
-        --mem_len=${TEST_MEM_LEN} \
-        --clamp_len=${TEST_CLAMP_LEN} \
-        --same_length=True \
-        --eval_batch_size=${TEST_BSZ} \
-        --num_core_per_host=${TEST_NUM_CORE} \
-        --do_train=False \
-        --do_eval=True \
-        --eval_split=test \
-        ${@:2}
-else
-    echo 'unknown argment 1'
-fi
diff --git a/transformer-xl/tf/scripts/lm1b_large_tpu.sh b/transformer-xl/tf/scripts/lm1b_large_tpu.sh
deleted file mode 100644
index 076478e..0000000
--- a/transformer-xl/tf/scripts/lm1b_large_tpu.sh
+++ /dev/null
@@ -1,136 +0,0 @@
-#!/bin/bash
-
-# Path
-LOCAL_DIR=../data/one-billion-words/
-GSDATA=
-GSEXP=
-
-# TPU setting
-NUM_HOST=32
-NUM_CORE=16 # TPUv2 -> 8 | TPUv3 -> 16
-
-TEST_NUM_HOST=1
-TEST_NUM_CORE=8 # TPUv2 -> 8 | TPUv3 -> 16
-
-# Model
-DIV_VAL=4
-N_LAYER=24
-D_MODEL=1280
-D_EMBED=1280
-N_HEAD=16
-D_HEAD=80
-D_INNER=8192
-
-# Training
-TGT_LEN=32
-MEM_LEN=32
-TRAIN_BSZ=512
-VALID_BSZ=512
-TRAIN_BSZ_PER_HOST=$((TRAIN_BSZ / NUM_HOST))
-VALID_BSZ_PER_HOST=$((VALID_BSZ / NUM_HOST))
-
-# Testing
-TEST_TGT_LEN=32
-TEST_MEM_LEN=128
-TEST_CLAMP_LEN=-1
-TEST_BSZ=8
-
-if [[ $1 == 'train_data' ]]; then
-    python data_utils.py \
-        --data_dir=${LOCAL_DIR}/ \
-        --dataset=lm1b \
-        --tgt_len=${TGT_LEN} \
-        --per_host_train_bsz=${TRAIN_BSZ_PER_HOST} \
-        --per_host_valid_bsz=${VALID_BSZ_PER_HOST} \
-        --num_core_per_host=${NUM_CORE} \
-        --num_passes=10 \
-        --use_tpu=True \
-        ${@:2}
-
-    SRC_PATTERN=train.bsz-${TRAIN_BSZ}.tlen-${TGT_LEN}.core-${NUM_CORE}*
-    gsutil cp ${LOCAL_DIR}/tfrecords/${SRC_PATTERN} ${GSDATA}/lm1b-tfrecords/
-
-    SRC_PATTERN=valid.bsz-${VALID_BSZ}.tlen-${TGT_LEN}.core-${NUM_CORE}*
-    gsutil cp ${LOCAL_DIR}/tfrecords/${SRC_PATTERN} ${GSDATA}/lm1b-tfrecords/
-
-elif [[ $1 == 'test_data' ]]; then
-    python data_utils.py \
-        --data_dir=${LOCAL_DIR}/ \
-        --dataset=lm1b \
-        --tgt_len=${TEST_TGT_LEN} \
-        --per_host_test_bsz=${TEST_BSZ} \
-        --num_core_per_host=${TEST_NUM_CORE} \
-        --num_passes=1 \
-        --use_tpu=True \
-        ${@:2}
-
-    SRC_PATTERN=test.bsz-${TEST_BSZ}.tlen-${TEST_TGT_LEN}.core-${TEST_NUM_CORE}*
-    gsutil cp ${LOCAL_DIR}/tfrecords/${SRC_PATTERN} ${GSDATA}/lm1b-tfrecords/
-
-elif [[ $1 == 'train' ]]; then
-    echo 'Run training...'
-    python train.py \
-        --data_dir=${GSDATA}/lm1b-tfrecords \
-        --record_info_dir=${LOCAL_DIR}/tfrecords/ \
-        --corpus_info_path=${LOCAL_DIR}/corpus-info.json \
-        --model_dir=${GSEXP}/lm1b \
-        --div_val=${DIV_VAL} \
-        --untie_r=True \
-        --proj_share_all_but_first=False \
-        --proj_same_dim=False \
-        --n_layer=${N_LAYER} \
-        --d_model=${D_MODEL} \
-        --d_embed=${D_EMBED} \
-        --n_head=${N_HEAD} \
-        --d_head=${D_HEAD} \
-        --d_inner=${D_INNER} \
-        --dropout=0.05 \
-        --dropatt=0.05 \
-        --init_std=0.005 \
-        --learning_rate=0.0001 \
-        --warmup_steps=30000 \
-        --train_steps=1200000 \
-        --tgt_len=${TGT_LEN} \
-        --mem_len=${MEM_LEN} \
-        --train_batch_size=${TRAIN_BSZ} \
-        --num_hosts=${NUM_HOST} \
-        --num_core_per_host=${NUM_CORE} \
-        --iterations=1000 \
-        --save_steps=10000 \
-        --use_tpu=True \
-        --do_eval=False \
-        ${@:2}
-
-elif [[ $1 == 'eval' ]]; then
-    echo 'Run evaluation...'
-    python train.py \
-        --data_dir=${GSDATA}/lm1b-tfrecords \
-        --record_info_dir=${LOCAL_DIR}/tfrecords/ \
-        --corpus_info_path=${LOCAL_DIR}/corpus-info.json \
-        --model_dir=${GSEXP}/lm1b \
-        --div_val=${DIV_VAL} \
-        --untie_r=True \
-        --proj_share_all_but_first=False \
-        --proj_same_dim=False \
-        --n_layer=${N_LAYER} \
-        --d_model=${D_MODEL} \
-        --d_embed=${D_EMBED} \
-        --n_head=${N_HEAD} \
-        --d_head=${D_HEAD} \
-        --d_inner=${D_INNER} \
-        --tgt_len=${TEST_TGT_LEN} \
-        --mem_len=${TEST_MEM_LEN} \
-        --clamp_len=${TEST_CLAMP_LEN} \
-        --same_length=True \
-        --eval_batch_size=${TEST_BSZ} \
-        --num_host=${TEST_NUM_HOST} \
-        --num_core_per_host=${TEST_NUM_CORE} \
-        --use_tpu=True \
-        --do_train=False \
-        --do_eval_only=True \
-        --eval_split=test \
-        ${@:2}
-
-else
-    echo 'unknown argment 1'
-fi
diff --git a/transformer-xl/tf/scripts/text8_base_gpu.sh b/transformer-xl/tf/scripts/text8_base_gpu.sh
deleted file mode 100644
index 1cff08a..0000000
--- a/transformer-xl/tf/scripts/text8_base_gpu.sh
+++ /dev/null
@@ -1,102 +0,0 @@
-#!/bin/bash
-
-# Data
-DATA_ROOT=../data/text8/
-
-# Model
-N_LAYER=12
-D_MODEL=512
-D_EMBED=512
-N_HEAD=8
-D_HEAD=64
-D_INNER=2048
-
-# Training
-TGT_LEN=512
-MEM_LEN=512
-
-BSZ=24
-NUM_CORE=4
-
-# Testing
-TEST_TGT_LEN=80
-TEST_MEM_LEN=2100
-TEST_CLAMP_LEN=820
-
-TEST_BSZ=10
-TEST_NUM_CORE=1
-
-if [[ $1 == 'train_data' ]]; then
-    python data_utils.py \
-        --data_dir=${DATA_ROOT}/ \
-        --dataset=text8 \
-        --tgt_len=${TGT_LEN} \
-        --per_host_train_bsz=${BSZ} \
-        --per_host_valid_bsz=${BSZ} \
-        --num_passes=1 \
-        --use_tpu=False \
-        ${@:2}
-elif [[ $1 == 'test_data' ]]; then
-    python data_utils.py \
-        --data_dir=${DATA_ROOT}/ \
-        --dataset=text8 \
-        --tgt_len=${TEST_TGT_LEN} \
-        --per_host_test_bsz=${TEST_BSZ} \
-        --num_passes=1 \
-        --use_tpu=False \
-        ${@:2}
-elif [[ $1 == 'train' ]]; then
-    echo 'Run training...'
-    python train_gpu.py \
-        --data_dir=${DATA_ROOT}/tfrecords \
-        --record_info_dir=${DATA_ROOT}/tfrecords/ \
-        --corpus_info_path=${DATA_ROOT}/corpus-info.json \
-        --model_dir=EXP-text8 \
-        --n_layer=${N_LAYER} \
-        --d_model=${D_MODEL} \
-        --d_embed=${D_EMBED} \
-        --n_head=${N_HEAD} \
-        --d_head=${D_HEAD} \
-        --d_inner=${D_INNER} \
-        --dropout=0.1 \
-        --dropatt=0.0 \
-        --learning_rate=0.00025 \
-        --warmup_steps=0 \
-        --train_steps=400000 \
-        --tgt_len=${TGT_LEN} \
-        --mem_len=${MEM_LEN} \
-        --train_batch_size=${BSZ} \
-        --num_core_per_host=${NUM_CORE} \
-        --iterations=200 \
-        --save_steps=4000 \
-        --do_train=True \
-        --do_eval=False \
-        ${@:2}
-elif [[ $1 == 'eval' ]]; then
-    echo 'Run evaluation...'
-    python train_gpu.py \
-        --data_dir=${DATA_ROOT}/tfrecords \
-        --record_info_dir=${DATA_ROOT}/tfrecords/ \
-        --corpus_info_path=${DATA_ROOT}/corpus-info.json \
-        --model_dir=EXP-text8 \
-        --n_layer=${N_LAYER} \
-        --d_model=${D_MODEL} \
-        --d_embed=${D_EMBED} \
-        --n_head=${N_HEAD} \
-        --d_head=${D_HEAD} \
-        --d_inner=${D_INNER} \
-        --dropout=0.0 \
-        --dropatt=0.0 \
-        --tgt_len=${TEST_TGT_LEN} \
-        --mem_len=${TEST_MEM_LEN} \
-        --clamp_len=${TEST_CLAMP_LEN} \
-        --same_length=True \
-        --eval_batch_size=${TEST_BSZ} \
-        --num_core_per_host=${TEST_NUM_CORE} \
-        --do_train=False \
-        --do_eval=True \
-        --eval_split=test \
-        ${@:2}
-else
-    echo 'unknown argment 1'
-fi
\ No newline at end of file
diff --git a/transformer-xl/tf/scripts/text8_large_tpu.sh b/transformer-xl/tf/scripts/text8_large_tpu.sh
deleted file mode 100644
index afcbbf5..0000000
--- a/transformer-xl/tf/scripts/text8_large_tpu.sh
+++ /dev/null
@@ -1,122 +0,0 @@
-#!/bin/bash
-
-# Path
-LOCAL_DIR=../data/text8/
-GSDATA=
-GSEXP=
-
-# TPU setting
-NUM_HOST=2
-NUM_CORE=16 # TPUv2 -> 8 | TPUv3 -> 16
-
-TEST_NUM_HOST=1
-TEST_NUM_CORE=8 # TPUv2 -> 8 | TPUv3 -> 16
-
-# Model
-N_LAYER=24
-D_MODEL=1024
-D_EMBED=1024
-N_HEAD=8
-D_HEAD=128
-D_INNER=3072
-
-# Training
-TGT_LEN=768
-MEM_LEN=768
-TRAIN_BSZ=64
-VALID_BSZ=64
-
-# Testing
-TEST_TGT_LEN=128
-TEST_MEM_LEN=3800
-TEST_CLAMP_LEN=1000
-TEST_BSZ=16
-
-if [[ $1 == 'train_data' ]]; then
-    python data_utils.py \
-        --data_dir=${LOCAL_DIR}/ \
-        --dataset=text8 \
-        --tgt_len=${TGT_LEN} \
-        --per_host_train_bsz=${TRAIN_BSZ} \
-        --per_host_valid_bsz=${VALID_BSZ} \
-        --num_core_per_host=${NUM_CORE} \
-        --num_passes=10 \
-        --use_tpu=True \
-        ${@:2}
-
-    SRC_PATTERN=train.bsz-${TRAIN_BSZ}.tlen-${TGT_LEN}.core-${NUM_CORE}*
-    gsutil cp ${LOCAL_DIR}/tfrecords/${SRC_PATTERN} ${GSDATA}/text8-tfrecords/
-
-    SRC_PATTERN=valid.bsz-${VALID_BSZ}.tlen-${TGT_LEN}.core-${NUM_CORE}*
-    gsutil cp ${LOCAL_DIR}/tfrecords/${SRC_PATTERN} ${GSDATA}/text8-tfrecords/
-
-elif [[ $1 == 'test_data' ]]; then
-    python data_utils.py \
-        --data_dir=${LOCAL_DIR}/ \
-        --dataset=text8 \
-        --tgt_len=${TEST_TGT_LEN} \
-        --per_host_test_bsz=${TEST_BSZ} \
-        --num_core_per_host=${TEST_NUM_CORE} \
-        --num_passes=1 \
-        --use_tpu=True \
-        ${@:2}
-
-    SRC_PATTERN=test.bsz-${TEST_BSZ}.tlen-${TEST_TGT_LEN}.core-${TEST_NUM_CORE}*
-    gsutil cp ${LOCAL_DIR}/tfrecords/${SRC_PATTERN} ${GSDATA}/text8-tfrecords/
-
-elif [[ $1 == 'train' ]]; then
-    echo 'Run training...'
-    python train.py \
-        --data_dir=${GSDATA}/text8-tfrecords \
-        --record_info_dir=${LOCAL_DIR}/tfrecords/ \
-        --corpus_info_path=${LOCAL_DIR}/corpus-info.json \
-        --model_dir=${GSEXP}/text8 \
-        --n_layer=${N_LAYER} \
-        --d_model=${D_MODEL} \
-        --d_embed=${D_EMBED} \
-        --n_head=${N_HEAD} \
-        --d_head=${D_HEAD} \
-        --d_inner=${D_INNER} \
-        --dropout=0.15 \
-        --dropatt=0.15 \
-        --learning_rate=0.00025 \
-        --warmup_steps=4000 \
-        --train_steps=400000 \
-        --tgt_len=${TGT_LEN} \
-        --mem_len=${MEM_LEN} \
-        --train_batch_size=${TRAIN_BSZ} \
-        --use_tpu=True \
-        --num_host=${NUM_HOST} \
-        --num_core_per_host=${NUM_CORE} \
-        --iterations=1000 \
-        --save_steps=10000 \
-        --do_train=True \
-        --do_eval=False \
-        ${@:2}
-
-elif [[ $1 == 'eval' ]]; then
-    echo 'Run evaluation...'
-    python train.py \
-        --data_dir=${GSDATA}/text8-tfrecords \
-        --record_info_dir=${LOCAL_DIR}/tfrecords/ \
-        --corpus_info_path=${LOCAL_DIR}/corpus-info.json \
-        --model_dir=${GSEXP}/text8 \
-        --n_layer=${N_LAYER} \
-        --d_model=${D_MODEL} \
-        --d_embed=${D_EMBED} \
-        --n_head=${N_HEAD} \
-        --d_head=${D_HEAD} \
-        --d_inner=${D_INNER} \
-        --tgt_len=${TEST_TGT_LEN} \
-        --mem_len=${TEST_MEM_LEN} \
-        --eval_batch_size=${TEST_BSZ} \
-        --num_host=${TEST_NUM_HOST} \
-        --num_core_per_host=${TEST_NUM_CORE} \
-        --use_tpu=True \
-        --do_train=False \
-        --do_eval_only=True \
-        --eval_split=test \
-        ${@:2}
-else
-    echo 'unknown argment 1'
-fi
diff --git a/transformer-xl/tf/scripts/wt103_base_gpu.sh b/transformer-xl/tf/scripts/wt103_base_gpu.sh
deleted file mode 100644
index c3bc810..0000000
--- a/transformer-xl/tf/scripts/wt103_base_gpu.sh
+++ /dev/null
@@ -1,108 +0,0 @@
-#!/bin/bash
-
-# Data
-DATA_ROOT=../data/wikitext-103/
-
-# Model
-DIV_VAL=1
-N_LAYER=16
-D_MODEL=410
-D_EMBED=410
-N_HEAD=10
-D_HEAD=41
-D_INNER=2100
-
-# Training
-TGT_LEN=150
-MEM_LEN=150
-
-BSZ=60
-NUM_CORE=4
-
-# Testing
-TEST_TGT_LEN=64
-TEST_MEM_LEN=640
-TEST_CLAMP_LEN=400
-
-TEST_BSZ=10
-TEST_NUM_CORE=1
-
-
-if [[ $1 == 'train_data' ]]; then
-    python data_utils.py \
-        --data_dir=${DATA_ROOT}/ \
-        --dataset=wt103 \
-        --tgt_len=${TGT_LEN} \
-        --per_host_train_bsz=${BSZ} \
-        --per_host_valid_bsz=${BSZ} \
-        --num_passes=1 \
-        --use_tpu=False \
-        ${@:2}
-elif [[ $1 == 'test_data' ]]; then
-    python data_utils.py \
-        --data_dir=${DATA_ROOT}/ \
-        --dataset=enwik8 \
-        --tgt_len=${TEST_TGT_LEN} \
-        --per_host_test_bsz=${TEST_BSZ} \
-        --num_passes=1 \
-        --use_tpu=False \
-        ${@:2}
-elif [[ $1 == 'train' ]]; then
-    echo 'Run training...'
-    python train_gpu.py \
-        --data_dir=${DATA_ROOT}/tfrecords \
-        --record_info_dir=${DATA_ROOT}/tfrecords/ \
-        --corpus_info_path=${DATA_ROOT}/corpus-info.json \
-        --model_dir=EXP-wt103 \
-        --div_val=${DIV_VAL} \
-        --untie_r=True \
-        --proj_share_all_but_first=True \
-        --n_layer=${N_LAYER} \
-        --d_model=${D_MODEL} \
-        --d_embed=${D_EMBED} \
-        --n_head=${N_HEAD} \
-        --d_head=${D_HEAD} \
-        --d_inner=${D_INNER} \
-        --dropout=0.1 \
-        --dropatt=0.0 \
-        --learning_rate=0.00025 \
-        --warmup_steps=0 \
-        --train_steps=400000 \
-        --tgt_len=${TGT_LEN} \
-        --mem_len=${MEM_LEN} \
-        --train_batch_size=${BSZ} \
-        --num_core_per_host=${NUM_CORE} \
-        --iterations=200 \
-        --save_steps=4000 \
-        ${@:2}
-elif [[ $1 == 'eval' ]]; then
-    echo 'Run evaluation...'
-    python train_gpu.py \
-        --data_dir=${DATA_ROOT}/tfrecords \
-        --record_info_dir=${DATA_ROOT}/tfrecords/ \
-        --corpus_info_path=${DATA_ROOT}/corpus-info.json \
-        --model_dir=EXP-wt103 \
-        --div_val=${DIV_VAL} \
-        --untie_r=True \
-        --proj_share_all_but_first=True \
-        --n_layer=${N_LAYER} \
-        --d_model=${D_MODEL} \
-        --d_embed=${D_EMBED} \
-        --n_head=${N_HEAD} \
-        --d_head=${D_HEAD} \
-        --d_inner=${D_INNER} \
-        --dropout=0.0 \
-        --dropatt=0.0 \
-        --tgt_len=${TEST_TGT_LEN} \
-        --mem_len=${TEST_MEM_LEN} \
-        --clamp_len=${TEST_CLAMP_LEN} \
-        --same_length=True \
-        --eval_batch_size=${TEST_BSZ} \
-        --num_core_per_host=${TEST_NUM_CORE} \
-        --do_train=False \
-        --do_eval=True \
-        --eval_split=test \
-        ${@:2}
-else
-    echo 'unknown argment 1'
-fi
\ No newline at end of file
diff --git a/transformer-xl/tf/scripts/wt103_large_tpu.sh b/transformer-xl/tf/scripts/wt103_large_tpu.sh
deleted file mode 100644
index c32fbcd..0000000
--- a/transformer-xl/tf/scripts/wt103_large_tpu.sh
+++ /dev/null
@@ -1,134 +0,0 @@
-#!/bin/bash
-
-# Path
-LOCAL_DIR=../data/wikitext-103/
-GSDATA=
-GSEXP=
-
-# TPU setting
-NUM_HOST=4
-NUM_CORE=16 # TPUv2 -> 8 | TPUv3 -> 16
-
-TEST_NUM_HOST=1
-TEST_NUM_CORE=8 # TPUv2 -> 8 | TPUv3 -> 16
-
-# Model
-DIV_VAL=4
-N_LAYER=18
-D_MODEL=1024
-D_EMBED=1024
-N_HEAD=16
-D_HEAD=64
-D_INNER=4096
-
-# Training
-TGT_LEN=384
-MEM_LEN=384
-TRAIN_BSZ=128
-VALID_BSZ=128
-
-# Testing
-TEST_TGT_LEN=128
-TEST_MEM_LEN=1600
-TEST_CLAMP_LEN=1000
-TEST_BSZ=8
-
-if [[ $1 == 'train_data' ]]; then
-    python data_utils.py \
-        --data_dir=${LOCAL_DIR}/ \
-        --dataset=wt103 \
-        --tgt_len=${TGT_LEN} \
-        --per_host_train_bsz=${TRAIN_BSZ} \
-        --per_host_valid_bsz=${VALID_BSZ} \
-        --num_core_per_host=${NUM_CORE} \
-        --num_passes=10 \
-        --use_tpu=True \
-        ${@:2}
-
-    SRC_PATTERN=train.bsz-${TRAIN_BSZ}.tlen-${TGT_LEN}.core-${NUM_CORE}*
-    gsutil cp ${LOCAL_DIR}/tfrecords/${SRC_PATTERN} ${GSDATA}/wt103-tfrecords/
-
-    SRC_PATTERN=valid.bsz-${VALID_BSZ}.tlen-${TGT_LEN}.core-${NUM_CORE}*
-    gsutil cp ${LOCAL_DIR}/tfrecords/${SRC_PATTERN} ${GSDATA}/wt103-tfrecords/
-
-elif [[ $1 == 'test_data' ]]; then
-    python data_utils.py \
-        --data_dir=${LOCAL_DIR}/ \
-        --dataset=wt103 \
-        --tgt_len=${TEST_TGT_LEN} \
-        --per_host_test_bsz=${TEST_BSZ} \
-        --num_core_per_host=${TEST_NUM_CORE} \
-        --num_passes=1 \
-        --use_tpu=True \
-        ${@:2}
-
-    SRC_PATTERN=test.bsz-${TEST_BSZ}.tlen-${TEST_TGT_LEN}.core-${TEST_NUM_CORE}*
-    gsutil cp ${LOCAL_DIR}/tfrecords/${SRC_PATTERN} ${GSDATA}/wt103-tfrecords/
-
-elif [[ $1 == 'train' ]]; then
-    echo 'Run training...'
-    python train.py \
-        --data_dir=${GSDATA}/wt103-tfrecords \
-        --record_info_dir=${LOCAL_DIR}/tfrecords/ \
-        --corpus_info_path=${LOCAL_DIR}/corpus-info.json \
-        --model_dir=${GSEXP}/wt103 \
-        --div_val=${DIV_VAL} \
-        --untie_r=True \
-        --proj_share_all_but_first=True \
-        --proj_same_dim=True \
-        --n_layer=${N_LAYER} \
-        --d_model=${D_MODEL} \
-        --d_embed=${D_EMBED} \
-        --n_head=${N_HEAD} \
-        --d_head=${D_HEAD} \
-        --d_inner=${D_INNER} \
-        --dropout=0.2 \
-        --dropatt=0.2 \
-        --init_std=0.005 \
-        --learning_rate=0.00025 \
-        --warmup_steps=16000 \
-        --train_steps=4000000 \
-        --tgt_len=${TGT_LEN} \
-        --mem_len=${MEM_LEN} \
-        --train_batch_size=${TRAIN_BSZ} \
-        --num_hosts=${NUM_HOST} \
-        --num_core_per_host=${NUM_CORE} \
-        --iterations=1000 \
-        --save_steps=10000 \
-        --use_tpu=True \
-        --do_eval=False \
-        ${@:2}
-
-elif [[ $1 == 'eval' ]]; then
-    echo 'Run evaluation...'
-    python train.py \
-        --data_dir=${GSDATA}/wt103-tfrecords \
-        --record_info_dir=${LOCAL_DIR}/tfrecords/ \
-        --corpus_info_path=${LOCAL_DIR}/corpus-info.json \
-        --model_dir=${GSEXP}/wt103 \
-        --div_val=${DIV_VAL} \
-        --untie_r=True \
-        --proj_share_all_but_first=True \
-        --proj_same_dim=True \
-        --n_layer=${N_LAYER} \
-        --d_model=${D_MODEL} \
-        --d_embed=${D_EMBED} \
-        --n_head=${N_HEAD} \
-        --d_head=${D_HEAD} \
-        --d_inner=${D_INNER} \
-        --tgt_len=${TEST_TGT_LEN} \
-        --mem_len=${TEST_MEM_LEN} \
-        --clamp_len=${TEST_CLAMP_LEN} \
-        --same_length=True \
-        --eval_batch_size=${TEST_BSZ} \
-        --num_host=${TEST_NUM_HOST} \
-        --num_core_per_host=${TEST_NUM_CORE} \
-        --use_tpu=True \
-        --do_train=False \
-        --do_eval_only=True \
-        --eval_split=test \
-        ${@:2}
-
-else
-    echo 'unknown argment 1'
-fi
diff --git a/transformer-xl/tf/sota/download.sh b/transformer-xl/tf/sota/download.sh
deleted file mode 100644
index 9a8db16..0000000
--- a/transformer-xl/tf/sota/download.sh
+++ /dev/null
@@ -1,87 +0,0 @@
-#!/bin/bash
-
-URL=http://curtis.ml.cmu.edu/datasets/pretrained_xl
-
-DATA_ROOT=./
-
-function download () {
-  fileurl=${1}
-  filename=${fileurl##*/}
-  if [ ! -f ${filename} ]; then
-    echo ">>> Download '${filename}' from '${fileurl}'."
-    wget --quiet ${fileurl}
-  else
-    echo "*** File '${filename}' exists. Skip."
-  fi
-}
-
-cd $DATA_ROOT
-mkdir -p pretrained_xl && cd pretrained_xl
-
-# enwik8
-mkdir -p tf_enwik8 && cd tf_enwik8
-
-mkdir -p data && cd data
-download ${URL}/tf_enwiki8/data/cache.pkl
-download ${URL}/tf_enwiki8/data/corpus-info.json
-cd ..
-
-mkdir -p model && cd model
-download ${URL}/tf_enwiki8/model/checkpoint
-download ${URL}/tf_enwiki8/model/model.ckpt-0.data-00000-of-00001
-download ${URL}/tf_enwiki8/model/model.ckpt-0.index
-download ${URL}/tf_enwiki8/model/model.ckpt-0.meta
-cd ..
-
-cd ..
-
-# text8
-mkdir -p tf_text8 && cd tf_text8
-
-mkdir -p data && cd data
-download ${URL}/tf_text8/data/cache.pkl
-download ${URL}/tf_text8/data/corpus-info.json
-cd ..
-
-mkdir -p model && cd model
-download ${URL}/tf_text8/model/checkpoint
-download ${URL}/tf_text8/model/model.ckpt-0.data-00000-of-00001
-download ${URL}/tf_text8/model/model.ckpt-0.index
-download ${URL}/tf_text8/model/model.ckpt-0.meta
-cd ..
-
-cd ..
-
-# wt103
-mkdir -p tf_wt103 && cd tf_wt103
-
-mkdir -p data && cd data
-download ${URL}/tf_wt103/data/cache.pkl
-download ${URL}/tf_wt103/data/corpus-info.json
-cd ..
-
-mkdir -p model && cd model
-download ${URL}/tf_wt103/model/checkpoint
-download ${URL}/tf_wt103/model/model.ckpt-0.data-00000-of-00001
-download ${URL}/tf_wt103/model/model.ckpt-0.index
-download ${URL}/tf_wt103/model/model.ckpt-0.meta
-cd ..
-
-cd ..
-
-# lm1b
-mkdir -p tf_lm1b && cd tf_lm1b
-
-mkdir -p data && cd data
-download ${URL}/tf_lm1b/data/cache.pkl
-download ${URL}/tf_lm1b/data/corpus-info.json
-cd ..
-
-mkdir -p model && cd model
-download ${URL}/tf_lm1b/model/checkpoint
-download ${URL}/tf_lm1b/model/model.ckpt-1191000.data-00000-of-00001
-download ${URL}/tf_lm1b/model/model.ckpt-1191000.index
-download ${URL}/tf_lm1b/model/model.ckpt-1191000.meta
-cd ..
-
-cd ..
diff --git a/transformer-xl/tf/sota/enwik8.sh b/transformer-xl/tf/sota/enwik8.sh
deleted file mode 100644
index 27b45f0..0000000
--- a/transformer-xl/tf/sota/enwik8.sh
+++ /dev/null
@@ -1,58 +0,0 @@
-#!/bin/bash
-
-# Data
-DATA_ROOT=./
-DATA_DIR=${DATA_ROOT}/pretrained_xl/tf_enwik8/data
-MODEL_DIR=${DATA_ROOT}/pretrained_xl/tf_enwik8/model
-
-# Model
-N_LAYER=24
-D_MODEL=1024
-D_EMBED=1024
-N_HEAD=8
-D_HEAD=128
-D_INNER=3072
-
-# Testing
-TEST_TGT_LEN=128
-TEST_MEM_LEN=3800
-TEST_CLAMP_LEN=1000
-
-TEST_CKPT_PATH=${MODEL_DIR}/model.ckpt-0
-TEST_BSZ=16
-TEST_NUM_CORE=2
-
-
-echo 'Preprocess test set...'
-python data_utils.py \
-  --data_dir=${DATA_DIR}/ \
-  --dataset=enwik8 \
-  --tgt_len=${TEST_TGT_LEN} \
-  --per_host_test_bsz=${TEST_BSZ} \
-  --num_passes=1 \
-  --use_tpu=False
-
-echo 'Run evaluation on test set...'
-python train_gpu.py \
-    --data_dir=${DATA_DIR}/tfrecords \
-    --record_info_dir=${DATA_DIR}/tfrecords/ \
-    --corpus_info_path=${DATA_DIR}/corpus-info.json \
-    --eval_ckpt_path=${TEST_CKPT_PATH} \
-    --model_dir=EXP-enwik8 \
-    --n_layer=${N_LAYER} \
-    --d_model=${D_MODEL} \
-    --d_embed=${D_EMBED} \
-    --n_head=${N_HEAD} \
-    --d_head=${D_HEAD} \
-    --d_inner=${D_INNER} \
-    --dropout=0.0 \
-    --dropatt=0.0 \
-    --tgt_len=${TEST_TGT_LEN} \
-    --mem_len=${TEST_MEM_LEN} \
-    --clamp_len=${TEST_CLAMP_LEN} \
-    --same_length=True \
-    --eval_batch_size=${TEST_BSZ} \
-    --num_core_per_host=${TEST_NUM_CORE} \
-    --do_train=False \
-    --do_eval=True \
-    --eval_split=test
diff --git a/transformer-xl/tf/sota/lm1b.sh b/transformer-xl/tf/sota/lm1b.sh
deleted file mode 100644
index bd49918..0000000
--- a/transformer-xl/tf/sota/lm1b.sh
+++ /dev/null
@@ -1,63 +0,0 @@
-#!/bin/bash
-
-# Data
-DATA_ROOT=./
-DATA_DIR=${DATA_ROOT}/pretrained_xl/tf_lm1b/data
-MODEL_DIR=${DATA_ROOT}/pretrained_xl/tf_lm1b/model
-
-# Model
-DIV_VAL=4
-N_LAYER=24
-D_MODEL=1280
-D_EMBED=1280
-N_HEAD=16
-D_HEAD=80
-D_INNER=8192
-
-# Testing
-TEST_TGT_LEN=32
-TEST_MEM_LEN=128
-TEST_CLAMP_LEN=-1
-
-TEST_CKPT_PATH=${MODEL_DIR}/model.ckpt-1191000
-TEST_BSZ=16
-TEST_NUM_CORE=1
-
-
-echo 'Preprocess test set...'
-python data_utils.py \
-    --data_dir=${DATA_DIR}/ \
-    --dataset=lm1b \
-    --tgt_len=${TEST_TGT_LEN} \
-    --per_host_test_bsz=${TEST_BSZ} \
-    --num_passes=1 \
-    --use_tpu=False
-
-echo 'Run evaluation on test set...'
-python train_gpu.py \
-    --data_dir=${DATA_DIR}/tfrecords \
-    --record_info_dir=${DATA_DIR}/tfrecords/ \
-    --corpus_info_path=${DATA_DIR}/corpus-info.json \
-    --eval_ckpt_path=${TEST_CKPT_PATH} \
-    --model_dir=EXP-lm1b \
-    --div_val=${DIV_VAL} \
-    --untie_r=True \
-    --proj_share_all_but_first=False \
-    --proj_same_dim=False \
-    --n_layer=${N_LAYER} \
-    --d_model=${D_MODEL} \
-    --d_embed=${D_EMBED} \
-    --n_head=${N_HEAD} \
-    --d_head=${D_HEAD} \
-    --d_inner=${D_INNER} \
-    --dropout=0.0 \
-    --dropatt=0.0 \
-    --tgt_len=${TEST_TGT_LEN} \
-    --mem_len=${TEST_MEM_LEN} \
-    --clamp_len=${TEST_CLAMP_LEN} \
-    --same_length=True \
-    --eval_batch_size=${TEST_BSZ} \
-    --num_core_per_host=${TEST_NUM_CORE} \
-    --do_train=False \
-    --do_eval=True \
-    --eval_split=test
diff --git a/transformer-xl/tf/sota/text8.sh b/transformer-xl/tf/sota/text8.sh
deleted file mode 100644
index 5d9d8f5..0000000
--- a/transformer-xl/tf/sota/text8.sh
+++ /dev/null
@@ -1,58 +0,0 @@
-#!/bin/bash
-
-# Data
-DATA_ROOT=./
-DATA_DIR=${DATA_ROOT}/pretrained_xl/tf_text8/data
-MODEL_DIR=${DATA_ROOT}/pretrained_xl/tf_text8/model
-
-# Model
-N_LAYER=24
-D_MODEL=1024
-D_EMBED=1024
-N_HEAD=8
-D_HEAD=128
-D_INNER=3072
-
-# Testing
-TEST_TGT_LEN=128
-TEST_MEM_LEN=3800
-TEST_CLAMP_LEN=1000
-
-TEST_CKPT_PATH=${MODEL_DIR}/model.ckpt-0
-TEST_BSZ=16
-TEST_NUM_CORE=2
-
-
-echo 'Preprocess test set...'
-python data_utils.py \
-  --data_dir=${DATA_DIR}/ \
-  --dataset=text8 \
-  --tgt_len=${TEST_TGT_LEN} \
-  --per_host_test_bsz=${TEST_BSZ} \
-  --num_passes=1 \
-  --use_tpu=False
-
-echo 'Run evaluation on test set...'
-python train_gpu.py \
-    --data_dir=${DATA_DIR}/tfrecords \
-    --record_info_dir=${DATA_DIR}/tfrecords/ \
-    --corpus_info_path=${DATA_DIR}/corpus-info.json \
-    --eval_ckpt_path=${TEST_CKPT_PATH} \
-    --model_dir=EXP-text8 \
-    --n_layer=${N_LAYER} \
-    --d_model=${D_MODEL} \
-    --d_embed=${D_EMBED} \
-    --n_head=${N_HEAD} \
-    --d_head=${D_HEAD} \
-    --d_inner=${D_INNER} \
-    --dropout=0.0 \
-    --dropatt=0.0 \
-    --tgt_len=${TEST_TGT_LEN} \
-    --mem_len=${TEST_MEM_LEN} \
-    --clamp_len=${TEST_CLAMP_LEN} \
-    --same_length=True \
-    --eval_batch_size=${TEST_BSZ} \
-    --num_core_per_host=${TEST_NUM_CORE} \
-    --do_train=False \
-    --do_eval=True \
-    --eval_split=test
diff --git a/transformer-xl/tf/sota/wt103.sh b/transformer-xl/tf/sota/wt103.sh
deleted file mode 100644
index 4b7f626..0000000
--- a/transformer-xl/tf/sota/wt103.sh
+++ /dev/null
@@ -1,71 +0,0 @@
-#!/bin/bash
-
-# Data
-DATA_ROOT=./
-DATA_DIR=${DATA_ROOT}/pretrained_xl/tf_wt103/data
-MODEL_DIR=${DATA_ROOT}/pretrained_xl/tf_wt103/model
-
-# Model
-DIV_VAL=4
-N_LAYER=18
-D_MODEL=1024
-D_EMBED=1024
-N_HEAD=16
-D_HEAD=64
-D_INNER=4096
-
-# Training
-TGT_LEN=256
-MEM_LEN=256
-
-BSZ=16
-NUM_CORE=2
-
-# Testing
-TEST_TGT_LEN=128
-TEST_MEM_LEN=1600
-TEST_CLAMP_LEN=1000
-
-TEST_CKPT_PATH=${MODEL_DIR}/model.ckpt-0
-TEST_BSZ=16
-TEST_NUM_CORE=1
-
-
-echo 'Preprocess test set...'
-python data_utils.py \
-    --data_dir=${DATA_DIR}/ \
-    --dataset=enwik8 \
-    --tgt_len=${TEST_TGT_LEN} \
-    --per_host_test_bsz=${TEST_BSZ} \
-    --num_passes=1 \
-    --use_tpu=False
-
-
-echo 'Run evaluation on test set...'
-python train_gpu.py \
-    --data_dir=${DATA_DIR}/tfrecords \
-    --record_info_dir=${DATA_DIR}/tfrecords/ \
-    --corpus_info_path=${DATA_DIR}/corpus-info.json \
-    --eval_ckpt_path=${TEST_CKPT_PATH} \
-    --model_dir=EXP-wt103 \
-    --div_val=${DIV_VAL} \
-    --untie_r=True \
-    --proj_share_all_but_first=True \
-    --n_layer=${N_LAYER} \
-    --d_model=${D_MODEL} \
-    --d_embed=${D_EMBED} \
-    --n_head=${N_HEAD} \
-    --d_head=${D_HEAD} \
-    --d_inner=${D_INNER} \
-    --dropout=0.0 \
-    --dropatt=0.0 \
-    --tgt_len=${TEST_TGT_LEN} \
-    --mem_len=${TEST_MEM_LEN} \
-    --clamp_len=${TEST_CLAMP_LEN} \
-    --same_length=True \
-    --eval_batch_size=${TEST_BSZ} \
-    --num_core_per_host=${TEST_NUM_CORE} \
-    --do_train=False \
-    --do_eval=True \
-    --eval_split=test
-
diff --git a/transformer-xl/tf/tpu_estimator.py b/transformer-xl/tf/tpu_estimator.py
deleted file mode 100644
index 7bc3598..0000000
--- a/transformer-xl/tf/tpu_estimator.py
+++ /dev/null
@@ -1,3519 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ===================================================================
-"""TPUEstimator class."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import collections
-import copy
-import os
-import signal
-import sys
-import threading
-import time
-import numpy as np
-import six
-from six.moves import queue as Queue  # pylint: disable=redefined-builtin
-from six.moves import xrange  # pylint: disable=redefined-builtin
-
-import math
-
-try:
-  import google3
-  from google3.third_party.tensorflow.contrib.tpu.python.ops import tpu_ops
-  from google3.third_party.tensorflow.contrib.tpu.python.tpu import error_handling
-  from google3.third_party.tensorflow.contrib.tpu.python.tpu import session_support
-  from google3.third_party.tensorflow.contrib.tpu.python.tpu import tpu
-  from google3.third_party.tensorflow.contrib.tpu.python.tpu import tpu_config
-  from google3.third_party.tensorflow.contrib.tpu.python.tpu import tpu_context
-  from google3.third_party.tensorflow.contrib.tpu.python.tpu import tpu_feed
-  from google3.third_party.tensorflow.contrib.tpu.python.tpu import training_loop
-  from google3.third_party.tensorflow.contrib.tpu.python.tpu import util as util_lib
-  from google3.third_party.tensorflow.contrib.training.python.training import hparam
-  from google3.third_party.tensorflow.core.framework import variable_pb2
-  from google3.third_party.tensorflow.core.framework.summary_pb2 import Summary
-  from google3.third_party.tensorflow.core.protobuf import config_pb2
-  from google3.third_party.tensorflow.python.data.ops import dataset_ops
-  from google3.third_party.tensorflow.python.data.util import nest as data_nest
-  from google3.third_party.tensorflow.python.estimator import estimator as estimator_lib
-  from google3.third_party.tensorflow.python.estimator import model_fn as model_fn_lib
-  from google3.third_party.tensorflow.python.estimator.export import export_output as export_output_lib
-  from google3.third_party.tensorflow.python.framework import constant_op
-  from google3.third_party.tensorflow.python.framework import dtypes
-  from google3.third_party.tensorflow.python.framework import errors
-  from google3.third_party.tensorflow.python.framework import ops
-  from google3.third_party.tensorflow.python.ops import array_ops
-  from google3.third_party.tensorflow.python.ops import check_ops
-  from google3.third_party.tensorflow.python.ops import control_flow_ops
-  from google3.third_party.tensorflow.python.ops import init_ops
-  from google3.third_party.tensorflow.python.ops import math_ops
-  from google3.third_party.tensorflow.python.ops import resource_variable_ops
-  from google3.third_party.tensorflow.python.ops import state_ops
-  from google3.third_party.tensorflow.python.ops import summary_ops_v2 as contrib_summary
-  from google3.third_party.tensorflow.python.ops import variable_scope
-  from google3.third_party.tensorflow.python.ops import variables
-  from google3.third_party.tensorflow.python.platform import tf_logging as logging
-  from google3.third_party.tensorflow.python.saved_model import tag_constants
-  from google3.third_party.tensorflow.python.summary import summary
-  from google3.third_party.tensorflow.python.training import basic_session_run_hooks
-  from google3.third_party.tensorflow.python.training import evaluation
-  from google3.third_party.tensorflow.python.training import session_run_hook
-  from google3.third_party.tensorflow.python.training import training
-  from google3.third_party.tensorflow.python.training import training_util
-  from google3.third_party.tensorflow.python.util import function_utils
-  from google3.third_party.tensorflow.python.util import nest
-  from google3.third_party.tensorflow.python.util import tf_inspect
-except:
-  import tensorflow
-  from tensorflow.contrib.tpu.python.ops import tpu_ops
-  from tensorflow.contrib.tpu.python.tpu import error_handling
-  from tensorflow.contrib.tpu.python.tpu import session_support
-  from tensorflow.contrib.tpu.python.tpu import tpu
-  from tensorflow.contrib.tpu.python.tpu import tpu_config
-  from tensorflow.contrib.tpu.python.tpu import tpu_context
-  from tensorflow.contrib.tpu.python.tpu import tpu_feed
-  from tensorflow.contrib.tpu.python.tpu import training_loop
-  from tensorflow.contrib.tpu.python.tpu import util as util_lib
-  from tensorflow.contrib.training.python.training import hparam
-  from tensorflow.core.framework import variable_pb2
-  from tensorflow.core.framework.summary_pb2 import Summary
-  from tensorflow.core.protobuf import config_pb2
-  from tensorflow.python.data.ops import dataset_ops
-  from tensorflow.python.data.util import nest as data_nest
-  from tensorflow.python.estimator import estimator as estimator_lib
-  from tensorflow.python.estimator import model_fn as model_fn_lib
-  from tensorflow.python.estimator import util as estimator_util
-  from tensorflow.python.estimator.export import export_output as export_output_lib
-  from tensorflow.python.framework import constant_op
-  from tensorflow.python.framework import dtypes
-  from tensorflow.python.framework import errors
-  from tensorflow.python.framework import ops
-  from tensorflow.python.ops import array_ops
-  from tensorflow.python.ops import check_ops
-  from tensorflow.python.ops import control_flow_ops
-  from tensorflow.python.ops import init_ops
-  from tensorflow.python.ops import math_ops
-  from tensorflow.python.ops import resource_variable_ops
-  from tensorflow.python.ops import state_ops
-  from tensorflow.python.ops import summary_ops_v2 as contrib_summary
-  from tensorflow.python.ops import variable_scope
-  from tensorflow.python.ops import variables
-  from tensorflow.python.platform import tf_logging as logging
-  from tensorflow.python.saved_model import tag_constants
-  from tensorflow.python.summary import summary
-  from tensorflow.python.training import basic_session_run_hooks
-  from tensorflow.python.training import evaluation
-  from tensorflow.python.training import session_run_hook
-  from tensorflow.python.training import training
-  from tensorflow.python.training import training_util
-  from tensorflow.python.util import function_utils
-  from tensorflow.python.util import nest
-  from tensorflow.python.util import tf_inspect
-
-
-_INITIAL_LOSS = 1e7
-_ZERO_LOSS = 0.
-_TPU_ESTIMATOR = 'custom_tpu_estimator' # CHANGE FOR RECURRENCY
-_ITERATIONS_PER_LOOP_VAR = 'iterations_per_loop'
-_BATCH_SIZE_KEY = 'batch_size'
-_CTX_KEY = 'context'
-_USE_TPU_KEY = 'use_tpu'
-_CROSS_REPLICA_SUM_OP = 'CrossReplicaSum'
-_ONE_GIGABYTE = 1024 * 1024 * 1024
-_TPU_ENQUEUE_OPS = '_tpu_enqueue_ops'
-_TPU_TRAIN_OP = '_tpu_train_op'
-_REWRITE_FOR_INFERENCE_MODE = '_rewrite_for_inference'
-
-# Ideally _USE_TPU_KEY should be reserved as well. However there are already
-# models that make use of this key, thus it can not be reserved now to prevent
-# breakage. In the long run, we would like to mitigate this by migrating models
-# off of using _USE_TPU_KEY.
-_RESERVED_PARAMS_KEYS = [_BATCH_SIZE_KEY, _CTX_KEY]
-
-
-# TODO(b/65703635): Flip the value and remove all dead code. Currently, this is
-# only used for per-core based deployments. For per-host based pipelines, if a
-# user returns a Dataset instance it will be automatically wrapped in a
-# tf.while_loop (This can be disabled by returning features and labels
-# explicitly).
-_WRAP_INPUT_FN_INTO_WHILE_LOOP = False
-
-
-ops.register_proto_function(
-    '{}_{}'.format(_TPU_ESTIMATOR, _ITERATIONS_PER_LOOP_VAR),
-    proto_type=variable_pb2.VariableDef,
-    to_proto=resource_variable_ops._to_proto_fn,  # pylint: disable=protected-access
-    from_proto=resource_variable_ops._from_proto_fn)  # pylint: disable=protected-access
-
-
-def _create_global_step(graph):
-  graph = graph or ops.get_default_graph()
-  if training.get_global_step(graph) is not None:
-    raise ValueError('"global_step" already exists.')
-  # Create in proper graph and base name_scope.
-  with graph.as_default() as g, g.name_scope(None):
-    return variable_scope.get_variable(
-        ops.GraphKeys.GLOBAL_STEP,
-        shape=[],
-        dtype=dtypes.int64,
-        initializer=init_ops.zeros_initializer(),
-        trainable=False,
-        use_resource=True,
-        collections=[ops.GraphKeys.GLOBAL_VARIABLES, ops.GraphKeys.GLOBAL_STEP])
-
-
-def _create_or_get_iterations_per_loop():
-  """Creates or gets the iterations_per_loop variable.
-
-  In TPUEstimator, the user provided computation, the model_fn, is wrapped
-  inside a tf.while_loop for peak performance. The iterations of the loop are
-  specified by this variable, which adjusts its value on the CPU after each TPU
-  program execution and before the next TPU execution.
-
-  The purpose of using a variable, rather then a constant, is to allow
-  TPUEstimator adapt the TPU training iterations according to the final steps
-  specified by users. For example, if the user sets the iterations_per_loop as 4
-  in TPUConfig and steps as 10 in TPUEstimator.train(), the iterations_per_loop
-  variable will have the following value before each TPU training.
-
-      - 1-th TPU execution: iterations_per_loop = 4
-      - 2-th TPU execution: iterations_per_loop = 4
-      - 3-th TPU execution: iterations_per_loop = 2
-
-  As model_fn increases the global step once per train_op invocation, the global
-  step is 10 after all TPU executions, matching the steps=10 inputs passed in by
-  users.
-
-  Returns:
-    A TF non-trainable resource variable.
-
-  Raises:
-    RuntimeError: If multi iterations_per_loop variables were found.
-  """
-  graph = ops.get_default_graph()
-  collection_name = '{}_{}'.format(_TPU_ESTIMATOR, _ITERATIONS_PER_LOOP_VAR)
-  iter_vars = graph.get_collection(collection_name)
-  if len(iter_vars) == 1:
-    return iter_vars[0]
-  elif len(iter_vars) > 1:
-    raise RuntimeError('Multiple iterations_per_loop_var in collection.')
-
-  with ops.colocate_with(training_util.get_global_step()):
-    with variable_scope.variable_scope(
-        _TPU_ESTIMATOR, reuse=variable_scope.AUTO_REUSE):
-      return variable_scope.get_variable(
-          _ITERATIONS_PER_LOOP_VAR,
-          initializer=init_ops.zeros_initializer(),
-          shape=[],
-          dtype=dtypes.int32,
-          trainable=False,
-          collections=[collection_name, ops.GraphKeys.LOCAL_VARIABLES],
-          use_resource=True)
-
-
-def _sync_variables_ops():
-  # Gets the variables back from TPU nodes. This means the variables updated
-  # by TPU will now be *synced* to host memory.
-  return [
-      array_ops.check_numerics(v.read_value(),
-                               'Gradient for %s is NaN' % v.name).op
-      for v in variables.trainable_variables()
-  ]
-
-
-def _increase_eval_step_op(iterations_per_loop):
-  """Returns an op to increase the eval step for TPU evaluation.
-
-  Args:
-    iterations_per_loop: Tensor. The number of eval steps running in TPU
-        system before returning to CPU host for each `Session.run`.
-
-  Returns:
-    An operation
-  """
-  eval_step = evaluation._get_or_create_eval_step()  # pylint: disable=protected-access
-  # Estimator evaluate increases 1 by default. So, we increase the difference.
-  return state_ops.assign_add(
-      eval_step,
-      math_ops.cast(iterations_per_loop - 1, dtype=eval_step.dtype),
-      use_locking=True)
-
-
-def _extract_key_names(tensor_or_dict):
-  if isinstance(tensor_or_dict, dict):
-    return sorted(tensor_or_dict.keys())
-  return []
-
-
-class _SIGNAL(object):
-  """Signal used to control the thread of infeed/outfeed.
-
-  All preserved signals must be negative numbers. Positive numbers are used to
-  indicate the number of iterations for next training/evaluation loop.
-  """
-  NEXT_BATCH = -1
-  STOP = -2
-
-
-class TPUEstimatorSpec(model_fn_lib._TPUEstimatorSpec):  # pylint: disable=protected-access
-  """Ops and objects returned from a `model_fn` and passed to `TPUEstimator`.
-
-  See `EstimatorSpec` for `mode`, `predictions`, `loss`, `train_op`, and
-  `export_outputs`.
-
-  For evaluation, `eval_metrics `is a tuple of `metric_fn` and `tensors`, where
-  `metric_fn` runs on CPU to generate metrics and `tensors` represents the
-  `Tensor`s transferred from TPU system to CPU host and passed to `metric_fn`.
-  To be precise, TPU evaluation expects a slightly different signature from the
-  @{tf.estimator.Estimator}. While `EstimatorSpec.eval_metric_ops` expects a
-  dict, `TPUEstimatorSpec.eval_metrics` is a tuple of `metric_fn` and `tensors`.
-  The `tensors` could be a list of `Tensor`s or dict of names to `Tensor`s. The
-  `tensors` usually specify the model logits, which are transferred back from
-  TPU system to CPU host. All tensors must have be batch-major, i.e., the batch
-  size is the first dimension. Once all tensors are available at CPU host from
-  all shards, they are concatenated (on CPU) and passed as positional arguments
-  to the `metric_fn` if `tensors` is list or keyword arguments if `tensors` is
-  a dict. `metric_fn` takes the `tensors` and returns a dict from metric string
-  name to the result of calling a metric function, namely a `(metric_tensor,
-  update_op)` tuple. See `TPUEstimator` for MNIST example how to specify the
-  `eval_metrics`.
-
-  `scaffold_fn` is a function running on CPU to generate the `Scaffold`. This
-  function should not capture any Tensors in `model_fn`.
-
-  `host_call` is a tuple of a `function` and a list or dictionary of `tensors`
-  to pass to that function and returns a list of Tensors. `host_call` currently
-  works for train() and evaluate(). The Tensors returned by the function is
-  executed on the CPU on every step, so there is communication overhead when
-  sending tensors from TPU to CPU. To reduce the overhead, try reducing the
-  size of the tensors. The `tensors` are concatenated along their major (batch)
-  dimension, and so must be >= rank 1. The `host_call` is useful for writing
-  summaries with @{tf.contrib.summary.create_file_writer}.
-  """
-
-  def __new__(cls,
-              mode,
-              predictions=None,
-              loss=None,
-              train_op=None,
-              eval_metrics=None,
-              export_outputs=None,
-              scaffold_fn=None,
-              host_call=None,
-              training_hooks=None,
-              evaluation_hooks=None,
-              prediction_hooks=None):
-    """Creates a validated `TPUEstimatorSpec` instance."""
-    host_calls = {}
-    if eval_metrics is not None:
-      host_calls['eval_metrics'] = eval_metrics
-    if host_call is not None:
-      host_calls['host_call'] = host_call
-    _OutfeedHostCall.validate(host_calls)
-
-    training_hooks = list(training_hooks or [])
-    evaluation_hooks = list(evaluation_hooks or [])
-    prediction_hooks = list(prediction_hooks or [])
-
-    for hook in training_hooks + evaluation_hooks + prediction_hooks:
-      if not isinstance(hook, session_run_hook.SessionRunHook):
-        raise TypeError(
-            'All hooks must be SessionRunHook instances, given: {}'.format(
-                hook))
-
-    return super(TPUEstimatorSpec, cls).__new__(
-        cls,
-        mode=mode,
-        predictions=predictions,
-        loss=loss,
-        train_op=train_op,
-        eval_metrics=eval_metrics,
-        export_outputs=export_outputs,
-        scaffold_fn=scaffold_fn,
-        host_call=host_call,
-        training_hooks=training_hooks,
-        evaluation_hooks=evaluation_hooks,
-        prediction_hooks=prediction_hooks)
-
-  def as_estimator_spec(self):
-    """Creates an equivalent `EstimatorSpec` used by CPU train/eval."""
-    host_calls = {}
-    if self.eval_metrics is not None:
-      host_calls['eval_metrics'] = self.eval_metrics
-    if self.host_call is not None:
-      host_calls['host_call'] = self.host_call
-    host_call_ret = _OutfeedHostCall.create_cpu_hostcall(host_calls)
-    eval_metric_ops = None
-    if self.eval_metrics is not None:
-      eval_metric_ops = host_call_ret['eval_metrics']
-    hooks = None
-    if self.host_call is not None:
-      hooks = [_OutfeedHostCallHook(host_call_ret['host_call'])]
-    hooks = list(hooks or [])
-    scaffold = self.scaffold_fn() if self.scaffold_fn else None
-    return model_fn_lib.EstimatorSpec(
-        mode=self.mode,
-        predictions=self.predictions,
-        loss=self.loss,
-        train_op=self.train_op,
-        eval_metric_ops=eval_metric_ops,
-        export_outputs=self.export_outputs,
-        scaffold=scaffold,
-        training_hooks=self.training_hooks + hooks,
-        evaluation_hooks=self.evaluation_hooks + hooks,
-        prediction_hooks=self.prediction_hooks + hooks)
-
-
-class _OpQueueContext(object):
-  """Manages work queue and thread for a infeed/outfeed thread."""
-
-  def __init__(self, name, target, args):
-    self._name = name
-    self._queue = Queue.Queue()
-    args = (self,) + args
-    self._thread = threading.Thread(name=name, target=target, args=args)
-    self._thread.daemon = True
-    self._thread.start()
-
-  def stop(self):
-    self._queue.put(_SIGNAL.STOP)
-
-  def send_next_batch_signal(self, iterations):
-    self._queue.put(iterations)
-
-  def read_iteration_counts(self):
-    while True:
-      iterations = self._queue.get(block=True)
-      logging.debug('%s read iterations %s', self._name, iterations)
-      if iterations == _SIGNAL.STOP:
-        logging.info('%s received shutdown signal, stopping.', self._name)
-        return
-      yield iterations
-
-  def join(self):
-    logging.info('Shutting down %s thread.' % self._name)
-    self.stop()
-    self._thread.join()
-
-
-class _OpSignalOnceQueueContext(_OpQueueContext):
-  """Manages work queue and thread for a infeed/outfeed thread.
-
-  This subclass only signals once.
-  """
-
-  def __init__(self, name, target, args):
-    super(_OpSignalOnceQueueContext, self).__init__(name, target, args)
-    self._has_signaled = False
-
-  def send_next_batch_signal(self, iterations):
-    if not self._has_signaled:
-      self._queue.put(iterations)
-      self._has_signaled = True
-
-
-class TPUInfeedOutfeedSessionHook(session_run_hook.SessionRunHook):
-  """A Session hook setting up the TPU initialization, infeed, and outfeed.
-
-  This hook does two major things:
-  1. initialize and shutdown TPU system.
-  2. launch and join the threads for infeed enqueue and (optional) outfeed
-     dequeue.
-  """
-
-  def __init__(self,
-               ctx,
-               enqueue_ops,
-               dequeue_ops,
-               run_infeed_loop_on_coordinator=True,
-               rendezvous=None):
-    self._master_job = ctx.master_job
-    self._enqueue_ops = enqueue_ops
-    self._dequeue_ops = dequeue_ops
-    self._rendezvous = rendezvous
-
-    self._run_infeed_loop_on_coordinator = run_infeed_loop_on_coordinator
-    self._initial_infeed_sleep_secs = (
-        ctx.config.tpu_config.initial_infeed_sleep_secs)
-
-    self._feed_error = None
-    self._finished = False
-
-  def begin(self):
-    logging.info('TPU job name %s', self._master_job)
-    self._iterations_per_loop_var = _create_or_get_iterations_per_loop()
-    self._init_ops = [tpu.initialize_system(job=self._master_job)]
-    self._finalize_ops = [tpu.shutdown_system(job=self._master_job)]
-
-    summary_writer_init_ops = contrib_summary.summary_writer_initializer_op()
-    self._init_ops.extend(summary_writer_init_ops)
-    # Get all the writer resources from the initializer, so we know what to
-    # flush.
-    for op in summary_writer_init_ops:
-      self._finalize_ops.append(contrib_summary.flush(writer=op.inputs[0]))
-
-  def _run_infeed(self, queue_ctx, session):
-    logging.info('Starting infeed thread controller.')
-    if self._initial_infeed_sleep_secs:
-      logging.info('%s thread sleeping for %d seconds.', self._name,
-                   self._initial_infeed_sleep_secs)
-      time.sleep(self._initial_infeed_sleep_secs)
-      logging.info('%s thread starting after sleep', self._name)
-
-    with self._rendezvous.catch_errors(source='infeed', session=session):
-      if self._run_infeed_loop_on_coordinator:
-        for count, steps in enumerate(queue_ctx.read_iteration_counts()):
-          for i in xrange(steps):
-            logging.debug('Infeed enqueue for iteration (%d, %d)', count, i)
-            session.run(self._enqueue_ops)
-      else:
-        for _ in queue_ctx.read_iteration_counts():
-          session.run(self._enqueue_ops)
-      logging.info('Infeed thread finished, shutting down.')
-
-  def _run_outfeed(self, queue_ctx, session):
-    logging.info('Starting outfeed thread controller.')
-    with self._rendezvous.catch_errors(source='outfeed', session=session):
-      for count, steps in enumerate(queue_ctx.read_iteration_counts()):
-        for i in xrange(steps):
-          logging.debug('Outfeed dequeue for iteration (%d, %d)', count, i)
-          session.run(self._dequeue_ops)
-      logging.info('Outfeed thread finished, shutting down.')
-
-  def _create_infeed_controller(self, name, target, args):
-    return _OpQueueContext(name=name, target=target, args=args)
-
-  def after_create_session(self, session, coord):
-    logging.info('Init TPU system')
-    session.run(self._init_ops,
-                options=config_pb2.RunOptions(timeout_in_ms=5 * 60 * 1000))
-
-    self._infeed_controller = self._create_infeed_controller(
-        name='InfeedController', target=self._run_infeed, args=(session,))
-
-    self._outfeed_controller = _OpQueueContext(
-        name='OutfeedController', target=self._run_outfeed, args=(session,))
-
-  def before_run(self, run_context):
-    self._feed_error = None
-
-    iterations = run_context.session.run(self._iterations_per_loop_var)
-
-    logging.info('Enqueue next (%d) batch(es) of data to infeed.', iterations)
-    self._infeed_controller.send_next_batch_signal(iterations)
-
-    logging.info('Dequeue next (%d) batch(es) of data from outfeed.',
-                 iterations)
-    self._outfeed_controller.send_next_batch_signal(iterations)
-
-  def end(self, session):
-    self._finished = True
-    logging.info('Stop infeed thread controller')
-    self._infeed_controller.join()
-    self._rendezvous.record_done('infeed')
-
-    logging.info('Stop output thread controller')
-    self._outfeed_controller.join()
-    self._rendezvous.record_done('outfeed')
-
-    logging.info('Shutdown TPU system.')
-    session.run(self._finalize_ops)
-
-
-class TPUInfeedOutfeedSessionHookForPrediction(TPUInfeedOutfeedSessionHook):
-
-  def __init__(self, ctx, enqueue_ops, dequeue_ops, rendezvous=None):
-    super(TPUInfeedOutfeedSessionHookForPrediction, self).__init__(
-        ctx, enqueue_ops, dequeue_ops, run_infeed_loop_on_coordinator=False,
-        rendezvous=rendezvous)
-
-  def _create_infeed_controller(self, name, target, args):
-    return _OpSignalOnceQueueContext(name=name, target=target, args=args)
-
-
-class _TPUStopAtStepHook(session_run_hook.SessionRunHook):
-  """Hook that requests stop at a specified step.
-
-  This hook is similar to the `session_run_hook._StopAfterNEvalsHook` with
-  following differences for TPU training:
-
-  1. This hook sets the variable for iterations_per_loop, which is used by
-     `TPUInfeedOutfeedSessionHook` to control the iterations for infeed/outfeed.
-     As the hook execution order is not guaranteed, the variable update is
-     handled in `after_create_session` and `after_run` as
-     `TPUInfeedOutfeedSessionHook` reads the variable value in `before_run`.
-
-  2. For each training loop (session.run), the global step could be increased
-     multiple times on TPU. The global step tensor value will be explicitly read
-     again in `after_run` to ensure the latest value is retrieved to avoid race
-     condition.
-  """
-
-  def __init__(self, iterations, num_steps=None, last_step=None):
-    """Initializes a `StopAtStepHook`.
-
-    Args:
-      iterations: The number of iterations to run optimizer per training loop.
-      num_steps: Number of steps to execute.
-      last_step: Step after which to stop.
-
-    Raises:
-      ValueError: If one of the arguments is invalid.
-    """
-    if num_steps is None and last_step is None:
-      raise ValueError('One of num_steps or last_step must be specified.')
-    if num_steps is not None and last_step is not None:
-      raise ValueError('Only one of num_steps or last_step can be specified.')
-    self._num_steps = num_steps
-    self._last_step = last_step
-    self._iterations = iterations
-
-  def _next_iterations(self, global_step, last_step):
-    gap = last_step - global_step
-    return min(gap, self._iterations)
-
-  def begin(self):
-    self._global_step_tensor = training_util.get_global_step()
-    if self._global_step_tensor is None:
-      raise RuntimeError('Global step should be created.')
-
-    self._iterations_per_loop_var = _create_or_get_iterations_per_loop()
-
-  def after_create_session(self, session, coord):
-    global_step = session.run(self._global_step_tensor)
-    if self._last_step is None:
-      self._last_step = global_step + self._num_steps
-
-    iterations = self._next_iterations(global_step, self._last_step)
-
-    self._iterations_per_loop_var.load(iterations, session=session)
-
-  def after_run(self, run_context, run_values):
-    # Global step cannot be retrieved via SessionRunArgs and before_run due to
-    # race condition.
-    global_step = run_context.session.run(self._global_step_tensor)
-    if global_step >= self._last_step:
-      run_context.request_stop()
-    else:
-      iterations = self._next_iterations(global_step, self._last_step)
-      self._iterations_per_loop_var.load(
-          iterations, session=run_context.session)
-
-
-class _SetEvalIterationsHook(session_run_hook.SessionRunHook):
-  """Hook that requests stop at a specified step."""
-
-  def __init__(self, num_steps):
-    """Initializes a `_SetEvalIterationsHook`.
-
-    Args:
-      num_steps: Number of steps to execute.
-    """
-    self._num_steps = num_steps
-
-  def begin(self):
-    self._iterations_per_loop_var = _create_or_get_iterations_per_loop()
-
-  def after_create_session(self, session, coord):
-    self._iterations_per_loop_var.load(self._num_steps, session=session)
-
-
-class _StoppingPredictHook(session_run_hook.SessionRunHook):
-  """Hook that requests stop according to the stopping signal in prediction."""
-
-  def __init__(self, scalar_stopping_signal):
-    self._scalar_stopping_signal = scalar_stopping_signal
-
-  def begin(self):
-    self._iterations_per_loop_var = _create_or_get_iterations_per_loop()
-
-  def after_create_session(self, session, coord):
-    # This is not necessary as we do not run infeed enqueue and outfeed dequeue
-    # in side threads for prediction model. But it makes the
-    # TPUInfeedOutfeedSessionHook prints nice message.
-    self._iterations_per_loop_var.load(1, session=session)
-
-  def before_run(self, run_context):
-    return session_run_hook.SessionRunArgs(self._scalar_stopping_signal)
-
-  def after_run(self, run_context, run_values):
-    _ = run_context
-    scalar_stopping_signal = run_values.results
-    if _StopSignals.should_stop(scalar_stopping_signal):
-      # NOTE(xiejw): In prediction, stopping signals are inserted for each
-      # batch. And we append one more batch to signal the system it should stop.
-      # The data flow might look like
-      #
-      #  batch   0: images, labels, stop = 0  (user provided)
-      #  batch   1: images, labels, stop = 0  (user provided)
-      #  ...
-      #  batch  99: images, labels, stop = 0  (user provided)
-      #  batch 100: images, labels, stop = 1  (TPUEstimator appended)
-      #
-      # where the final batch (id = 100) is appended by TPUEstimator, so we
-      # should drop it before returning the predictions to user.
-      # To achieve that, we throw the OutOfRangeError in after_run. Once
-      # Monitored Session sees this error in SessionRunHook.after_run, the
-      # "current" prediction, i.e., batch with id=100, will be discarded
-      # immediately
-      raise errors.OutOfRangeError(None, None, 'Stopped by stopping signal.')
-
-
-def generate_per_core_enqueue_ops_fn_for_host(
-    ctx, input_fn, inputs_structure_recorder, host_device, host_id):
-  """Generates infeed enqueue ops for per-core input_fn on a single host."""
-  captured_infeed_queue = _CapturedObject()
-  tpu_ordinal_function_impl = ctx.tpu_ordinal_function(host_id)
-
-  def enqueue_ops_fn():
-    """A fn returns enqueue_ops."""
-    num_cores_per_host = ctx.num_of_cores_per_host
-    per_host_sharded_inputs = []
-    for core_ordinal in range(num_cores_per_host):
-      with ops.name_scope('ordinal_%d' % (core_ordinal)):
-        user_context = tpu_context.TPUContext(
-            internal_ctx=ctx,
-            input_device=host_device,
-            invocation_index=host_id * ctx.num_of_cores_per_host + core_ordinal
-        )
-        inputs = _Inputs.from_input_fn(input_fn(user_context))
-        if inputs.is_dataset:
-          raise TypeError(
-              '`input_fn` returning `Dataset`  is not yet supported in '
-              'per-Core input pipeline deployment yet. Please set '
-              'TPUConfig.per_host_input_for_training to True or return '
-              '`features` and `labels` from `input_fn`')
-        features, labels = inputs.features_and_labels()
-
-        inputs_structure_recorder.validate_and_record_structure(
-            features, labels)
-        flattened_inputs = (
-            inputs_structure_recorder.flatten_features_and_labels(
-                features, labels))
-        per_host_sharded_inputs.append(flattened_inputs)
-
-    infeed_queue = tpu_feed.InfeedQueue(
-        number_of_tuple_elements=len(per_host_sharded_inputs[0]))
-    captured_infeed_queue.capture(infeed_queue)
-
-    per_host_enqueue_ops = infeed_queue.generate_enqueue_ops(
-        per_host_sharded_inputs, tpu_ordinal_function=tpu_ordinal_function_impl)
-    return per_host_enqueue_ops
-
-  return enqueue_ops_fn, captured_infeed_queue
-
-
-def generate_per_host_enqueue_ops_fn_for_host(
-    ctx, input_fn, inputs_structure_recorder, batch_axis, device, host_id):
-  """Generates infeed enqueue ops for per-host input_fn on a single host."""
-  captured_infeed_queue = _CapturedObject()
-
-  hooks = []
-
-  with ops.DEVICE(device):
-    user_context = tpu_context.TPUContext(
-        internal_ctx=ctx,
-        input_device=device,
-        invocation_index=host_id)
-    inputs = _Inputs.from_input_fn(input_fn(user_context))
-
-    is_dataset = inputs.is_dataset
-    if ctx.mode == model_fn_lib.ModeKeys.PREDICT:
-      if not is_dataset:
-        raise TypeError(
-            'For mode PREDICT, `input_fn` must return `Dataset` instead of '
-            '`features` and `labels`.')
-      if batch_axis is not None:
-        raise TypeError('For mode PREDICT, batch_axis is not supported yet.')
-      inputs = _InputsWithStoppingSignals(
-          dataset=inputs.dataset, batch_size=ctx.batch_size_for_input_fn,
-          add_padding=True)
-
-    if is_dataset:
-      hooks.append(inputs.dataset_initializer_hook())
-
-    tpu_ordinal_function_impl = ctx.tpu_ordinal_function(host_id)
-
-  def enqueue_ops_fn():
-    """A Fn returning the TPU infeed enqueue ops.
-
-    By providing as a Fn, it can be invoked inside the tf.while_loop such that
-    the input pipeline for multiple iterations can be executed by one
-    Session.run call.
-
-    Returns:
-      list of dict of ops.
-    """
-    with ops.DEVICE(device):
-      num_of_replicas_per_host = ctx.num_of_replicas_per_host
-      # Convert user input to features and labels.  If the user returns a
-      # dataset, it is initialized and the features and labels extracted via
-      # `dataset.iterator.get_next()`
-      features, labels = inputs.features_and_labels()
-      signals = inputs.signals()
-
-      inputs_structure_recorder.validate_and_record_structure(features, labels)
-      unsharded_tensor_list = (
-          inputs_structure_recorder.flatten_features_and_labels(
-              features, labels, signals))
-
-      infeed_queue = tpu_feed.InfeedQueue(
-          tuple_types=[t.dtype for t in unsharded_tensor_list],
-          tuple_shapes=[t.shape for t in unsharded_tensor_list],
-          shard_dimensions=batch_axis)
-      captured_infeed_queue.capture(infeed_queue)
-      infeed_queue.set_number_of_shards(num_of_replicas_per_host)
-      per_host_enqueue_ops = (
-          infeed_queue.split_inputs_and_generate_enqueue_ops(
-              unsharded_tensor_list,
-              placement_function=lambda x: device,
-              tpu_ordinal_function=tpu_ordinal_function_impl))
-      if signals is None:
-        return per_host_enqueue_ops
-      else:
-        return {
-            'ops': per_host_enqueue_ops,
-            'signals': signals,
-        }
-
-  return enqueue_ops_fn, captured_infeed_queue, hooks, is_dataset
-
-
-def generate_per_host_v2_enqueue_ops_fn_for_host(
-    ctx, input_fn, inputs_structure_recorder, device, host_id):
-  """Generates infeed enqueue ops for per-host input_fn on a single host."""
-  captured_infeed_queue = _CapturedObject()
-  hooks = []
-
-  with ops.DEVICE(device):
-    user_context = tpu_context.TPUContext(
-        internal_ctx=ctx,
-        input_device=device,
-        invocation_index=host_id)
-    inputs = _Inputs.from_input_fn(input_fn(user_context))
-
-    is_dataset = inputs.is_dataset
-    if not is_dataset:
-      raise TypeError('`input_fn` must return a `Dataset` for the PER_HOST_V2 '
-                      'input pipeline configuration.')
-
-    if ctx.mode == model_fn_lib.ModeKeys.PREDICT:
-      inputs = _InputsWithStoppingSignals(
-          dataset=inputs.dataset,
-          batch_size=ctx.batch_size_for_input_fn,
-          add_padding=True,
-          num_invocations_per_step=ctx.num_of_replicas_per_host)
-
-    hooks.append(inputs.dataset_initializer_hook())
-    tpu_ordinal_function_impl = ctx.tpu_ordinal_function(host_id)
-
-  def enqueue_ops_fn():
-    """Generates the per_host enqueue ops."""
-    control_deps = []
-    per_host_sharded_inputs = []
-    num_replicas_per_host = ctx.num_of_replicas_per_host
-    cached_signals = None
-    with ops.DEVICE(device):
-      if not inputs.is_dataset:
-        raise TypeError('`input_fn` must return a `Dataset` for this mode.')
-      for _ in range(num_replicas_per_host):
-        # Use control dependencies to ensure a deterministic ordering.
-        with ops.control_dependencies(control_deps):
-          features, labels = inputs.features_and_labels()  # Calls get_next()
-          signals = inputs.signals()
-
-          # All the replicas share the replica 0's stopping singal.
-          # This avoids inconsistent state among different model replcias.
-          if cached_signals:
-            signals['stopping'] = cached_signals['stopping']
-          else:
-            cached_signals = signals
-
-        inputs_structure_recorder.validate_and_record_structure(
-            features, labels)
-        flattened_inputs = (
-            inputs_structure_recorder.flatten_features_and_labels(
-                features, labels, signals))
-        control_deps.extend(flattened_inputs)
-        per_host_sharded_inputs.append(flattened_inputs)
-
-      if inputs_structure_recorder.flattened_input_dims:
-        input_partition_dims = inputs_structure_recorder.flattened_input_dims
-        if signals:
-          input_partition_dims += [None] * len(signals)
-        # pylint: disable=protected-access
-        infeed_queue = tpu_feed._PartitionedInfeedQueue(
-            number_of_tuple_elements=len(per_host_sharded_inputs[0]),
-            host_id=host_id,
-            input_partition_dims=input_partition_dims,
-            device_assignment=ctx.device_assignment)
-        per_host_enqueue_ops = infeed_queue.generate_enqueue_ops(
-            per_host_sharded_inputs)
-      else:
-        infeed_queue = tpu_feed.InfeedQueue(
-            number_of_tuple_elements=len(per_host_sharded_inputs[0]))
-        per_host_enqueue_ops = infeed_queue.generate_enqueue_ops(
-            per_host_sharded_inputs,
-            tpu_ordinal_function=tpu_ordinal_function_impl)
-      captured_infeed_queue.capture(infeed_queue)
-
-    if signals is None:
-      return per_host_enqueue_ops
-    else:
-      return {
-          'ops': per_host_enqueue_ops,
-          'signals': signals,
-      }
-
-  return enqueue_ops_fn, captured_infeed_queue, hooks, is_dataset
-
-
-def generate_broadcast_enqueue_ops_fn(ctx, input_fn, inputs_structure_recorder,
-                                      num_hosts):
-  """Generates infeed enqueue ops for one input_fn on all the hosts."""
-  captured_infeed_queue = _CapturedObject()
-  hooks = []
-  device_0 = ctx.tpu_host_placement_function(host_id=0)
-  with ops.DEVICE(device_0):
-    user_context = tpu_context.TPUContext(
-        internal_ctx=ctx, input_device=device_0, invocation_index=0)
-    inputs = _Inputs.from_input_fn(input_fn(user_context))
-
-    is_dataset = inputs.is_dataset
-    if ctx.mode == model_fn_lib.ModeKeys.PREDICT:
-      if not is_dataset:
-        raise TypeError(
-            'For mode PREDICT, `input_fn` must return `Dataset` instead of '
-            '`features` and `labels`.')
-
-      inputs = _InputsWithStoppingSignals(
-          dataset=inputs.dataset,
-          batch_size=ctx.batch_size_for_input_fn,
-          add_padding=True)
-
-    if is_dataset:
-      hooks.append(inputs.dataset_initializer_hook())
-    num_replicas_per_host = ctx.num_of_replicas_per_host
-
-  def tpu_ordinal_function_impl(replica_id):
-    if ctx.device_assignment:
-      return ctx.device_assignment.tpu_ordinal(replica=replica_id)
-    else:
-      return replica_id % num_replicas_per_host
-
-  def device_function_impl(replica_id):
-    return ctx.tpu_host_placement_function(replica_id=replica_id)
-
-  def enqueue_ops_fn():
-    """Generates enqueue ops for all the hosts."""
-    broadcasted_inputs = []
-    flattened_inputs = None  # Cache result from input_fn.
-    signals = None
-    for host_id in xrange(num_hosts):
-      with ops.DEVICE(ctx.tpu_host_placement_function(host_id=host_id)):
-        for _ in xrange(ctx.num_of_replicas_per_host):
-          # Note: input_fn is only called once at host 0 for the first replica.
-          # The features and labels returned from that invocation are
-          # broadcasted to other replicas(including the replicas on other
-          # hosts).
-          if flattened_inputs is None:
-            features, labels = inputs.features_and_labels()  # Calls get_next()
-            signals = inputs.signals()
-
-            inputs_structure_recorder.validate_and_record_structure(
-                features, labels)
-            flattened_inputs = (
-                inputs_structure_recorder.flatten_features_and_labels(
-                    features, labels, signals))
-          broadcasted_inputs.append(flattened_inputs)
-
-    infeed_queue = tpu_feed.InfeedQueue(
-        number_of_tuple_elements=len(broadcasted_inputs[0]))
-    captured_infeed_queue.capture(infeed_queue)
-    enqueue_ops = infeed_queue.generate_enqueue_ops(
-        broadcasted_inputs,
-        tpu_ordinal_function=tpu_ordinal_function_impl,
-        placement_function=device_function_impl)
-
-    if signals is None:
-      return enqueue_ops
-    else:
-      return {
-          'ops': enqueue_ops,
-          'signals': signals,
-      }
-
-  return enqueue_ops_fn, captured_infeed_queue, hooks, is_dataset
-
-
-class _InputPipeline(object):
-  """`_InputPipeline` handles invoking `input_fn` and piping to infeed queue.
-
-  `_InputPipeline` abstracts the per-core/per-host `input_fn` invocation from
-  call site.  To be precise, based on the configuration in
-  `_InternalTPUContext`,  it invokes `input_fn` for all cores (usually
-  multi-host TPU training) or for one host (usually for single-host TPU
-  evaluation), and sends all `features` and `labels` returned by `input_fn` to
-  TPU infeed. For per-core invocation, `features` and `labels` are piped to
-  infeed directly, one tuple for each core. For per-host invocation,  `features`
-  and `labels` are split at host (with respect to `batch_axis`) and piped to all
-  cores accordingly.
-
-  In addition, flatten/unflatten are handled by `_InputPipeline` also.  Model
-  inputs returned by the `input_fn` can have one of the following forms:
-  1. features
-  2. (features, labels)
-  3. ((arbitrarily nested structure of features), labels)
-
-  Internally, form 1 is reformed to `(features, None)` as features and labels
-  are passed separately to underlying methods. For TPU training, TPUEstimator
-  may expect multiple `features` and `labels` tuples one for each core.
-
-  TPUEstimator allows various different structures for inputs (namely `features`
-  and `labels`).  `features` can be `Tensor`, dict of string name to `Tensor`,
-  or nested tuples and `labels` could be `None`, `Tensor`, or dict of string
-  name to `Tensor`. TPU infeed/outfeed library expects flattened tensor list.
-  So, `features` and `labels` need to be flattened, before infeed enqueue, and
-  the structure of them needs to be recorded, in order to restore them after
-  infeed dequeue.
-  """
-
-  class InputsStructureRecorder(object):
-    """The recorder to record inputs structure."""
-
-    def __init__(self, input_partition_dims=None):
-      # Holds the structure of inputs
-      self._feature_structure = {}
-      self._flattened_input_dims = None
-
-      if input_partition_dims:
-        # This should have been validated in TPUConfig.
-        assert len(input_partition_dims) <= 2, 'must have 1 or 2 elements.'
-        if len(input_partition_dims) == 2:
-          self._feature_dims, self._label_dims = input_partition_dims
-        else:
-          self._feature_dims = input_partition_dims[0]
-          self._label_dims = None
-
-        assert self._feature_dims is not None, ('input_partition_dims[0] must '
-                                                'not be None')
-      else:
-        self._feature_dims = None
-        self._label_dims = None
-
-      # Internal state.
-      self._initialized = False
-
-    @property
-    def flattened_input_dims(self):
-      assert self._initialized, 'InputsStructureRecorder is not initialized.'
-      return self._flattened_input_dims
-
-    def has_labels(self):
-      return 'labels' in self._feature_structure
-
-    def _flatten_input_dims(self, feature_dims, feature_dims_names, label_dims,
-                            label_dims_names, label_names, has_labels):
-      """Flatten input dims with the same order as flattened input tensors."""
-      flattened_input_dims = []
-      if feature_dims_names:
-        # We need a fixed ordering for matching the tensors in features.
-        flattened_input_dims.extend(
-            [feature_dims[name] for name in feature_dims_names])
-      else:
-        flattened_input_dims.append(feature_dims)
-
-      if label_dims_names:
-        # We need a fixed ordering for matching the tensors in labels.
-        flattened_input_dims.extend(
-            [label_dims[name] for name in label_dims_names])
-      else:
-        if label_names:
-          num_tensors_in_label = len(label_names)
-        else:
-          num_tensors_in_label = int(has_labels)
-        # Setting `None` in input_partition_dims[1] will apply `None` to
-        # all the tensors in labels, regardless of internal structure.
-        flattened_input_dims.extend([label_dims] * num_tensors_in_label)
-
-      return flattened_input_dims
-
-    def validate_and_record_structure(self, features, labels):
-      """Validates and records the structure of `features` and `labels`."""
-      # Extract structure.
-      has_labels = labels is not None
-      feature_names = _extract_key_names(features)
-      label_names = _extract_key_names(labels)
-
-      if not self._initialized:
-        # Record structure.
-        self._initialized = True
-        if self._feature_dims is not None:
-          feature_dims_names = _extract_key_names(self._feature_dims)
-          if feature_dims_names != feature_names:
-            raise ValueError(
-                'TPUConfig.input_partition_dims[0] mismatched feature'
-                ' keys. Expected {}, got {}'.format(feature_names,
-                                                    feature_dims_names))
-
-          label_dims_names = _extract_key_names(self._label_dims)
-          if self._label_dims is not None and label_dims_names != label_names:
-            raise ValueError(
-                'TPUConfig.input_partition_dims[1] mismatched label'
-                ' keys. Expected {}, got {}'.format(label_names,
-                                                    label_dims_names))
-
-          self._flattened_input_dims = self._flatten_input_dims(
-              self._feature_dims, feature_dims_names, self._label_dims,
-              label_dims_names, label_names, has_labels)
-
-    def flatten_features_and_labels(self, features, labels, signals=None):
-      """Flattens the `features` and `labels` to a single tensor list."""
-      self._feature_structure['features'] = features
-      if labels is not None:
-        self._feature_structure['labels'] = labels
-      if signals is not None:
-        self._feature_structure['signals'] = signals
-      return data_nest.flatten(self._feature_structure)
-
-    def unflatten_features_and_labels(self, flattened_inputs):
-      """Restores the flattened inputs to original features and labels form.
-
-      Args:
-        flattened_inputs: Flattened inputs for each shard.
-
-      Returns:
-        A tuple of (`features`, `labels`), where `labels` could be None.
-        Each one, if present, should have identical structure (single tensor vs
-        dict) as the one returned by input_fn.
-
-      Raises:
-        ValueError: If the number of expected tensors from `flattened_inputs`
-          mismatches the recorded structure.
-      """
-
-      unflattened_inputs = data_nest.pack_sequence_as(self._feature_structure,
-                                                      flattened_inputs)
-      return _Inputs(
-          unflattened_inputs['features'],
-          unflattened_inputs.get('labels'),
-          signals=unflattened_inputs.get('signals'))
-
-  def __init__(self, input_fn, batch_axis, ctx):
-    """Constructor.
-
-    Args:
-      input_fn: input fn for train or eval.
-      batch_axis: A python tuple of int values describing how each tensor
-        produced by the Estimator `input_fn` should be split across the TPU
-        compute shards.
-      ctx: A `_InternalTPUContext` instance with mode.
-
-    Raises:
-      ValueError: If both `sharded_features` and `num_cores` are `None`.
-    """
-    self._inputs_structure_recorder = _InputPipeline.InputsStructureRecorder(
-        ctx.input_partition_dims)
-
-    self._sharded_per_core = ctx.is_input_sharded_per_core()
-    self._input_fn = input_fn
-    self._infeed_queue = None
-    self._ctx = ctx
-    self._batch_axis = batch_axis
-
-  def generate_infeed_enqueue_ops_and_dequeue_fn(self):
-    """Generates infeed enqueue ops and dequeue_fn."""
-    # While tf.while_loop is called, the body function, which invokes
-    # `enqueue_fn` passed in, is called to construct the graph. So, input_fn
-    # structure is recorded.
-    enqueue_ops, all_hooks, run_infeed_loop_on_coordinator = (
-        self._invoke_input_fn_and_record_structure())
-
-    self._validate_input_pipeline()
-
-    def dequeue_fn():
-      """dequeue_fn is used by TPU to retrieve the tensors."""
-      # In the model-parallel case, both the host-side and DEVICE-side
-      # computations must agree on the core on which infeed takes place. We
-      # choose to perform infeed on logical core 0 of each replica.
-      values = self._infeed_queue.generate_dequeue_op(tpu_device=0)
-      # The unflatten process uses the structure information recorded above.
-      return self._inputs_structure_recorder.unflatten_features_and_labels(
-          values)
-
-    return (enqueue_ops, dequeue_fn, all_hooks, run_infeed_loop_on_coordinator)
-
-  def _invoke_input_fn_and_record_structure(self):
-    """Deploys the input pipeline and record input structure."""
-    enqueue_ops = []
-    infeed_queues = []
-    all_hooks = []
-    num_hosts = self._ctx.num_hosts
-    tpu_host_placement_fn = self._ctx.tpu_host_placement_function
-
-    run_infeed_loop_on_coordinator = True
-
-    if self._sharded_per_core:
-      # Per-Core input pipeline deployment.
-      # Invoke input pipeline for each core and placed on the corresponding
-      # host.
-      for host_id in range(num_hosts):
-        host_device = tpu_host_placement_fn(host_id=host_id)
-        with ops.DEVICE(host_device):
-          with ops.name_scope('input_pipeline_task%d' % (host_id)):
-            enqueue_ops_fn, captured_infeed_queue = (
-                generate_per_core_enqueue_ops_fn_for_host(
-                    self._ctx, self._input_fn, self._inputs_structure_recorder,
-                    host_device, host_id))
-
-            if _WRAP_INPUT_FN_INTO_WHILE_LOOP:
-              run_infeed_loop_on_coordinator = False
-              enqueue_ops.append(
-                  _wrap_computation_in_while_loop(
-                      device=host_device, op_fn=enqueue_ops_fn))
-            else:
-              enqueue_ops.append(enqueue_ops_fn())
-            # Infeed_queue_getter must be called after enqueue_ops_fn is called.
-            infeed_queues.append(captured_infeed_queue.get())
-
-    elif self._ctx.is_input_broadcast_with_iterators():
-      # Only calls input_fn in host 0.
-      host_device = tpu_host_placement_fn(host_id=0)
-      enqueue_ops_fn, captured_infeed_queue, hooks, is_dataset = (
-          generate_broadcast_enqueue_ops_fn(self._ctx, self._input_fn,
-                                            self._inputs_structure_recorder,
-                                            num_hosts))
-      all_hooks.extend(hooks)
-      if is_dataset:
-        run_infeed_loop_on_coordinator = False
-        wrap_fn = (
-            _wrap_computation_in_while_loop
-            if self._ctx.mode != model_fn_lib.ModeKeys.PREDICT else
-            _wrap_computation_in_while_loop_with_stopping_signals)
-        enqueue_ops.append(wrap_fn(device=host_device, op_fn=enqueue_ops_fn))
-      else:
-        enqueue_ops.append(enqueue_ops_fn())
-      infeed_queues.append(captured_infeed_queue.get())
-    else:
-      for host_id in range(num_hosts):
-        host_device = tpu_host_placement_fn(host_id=host_id)
-        with ops.DEVICE(host_device):
-          with ops.name_scope('input_pipeline_task%d' % (host_id)):
-            if self._ctx.is_input_per_host_with_iterators():
-              enqueue_ops_fn, captured_infeed_queue, hooks, is_dataset = (
-                  generate_per_host_v2_enqueue_ops_fn_for_host(
-                      self._ctx, self._input_fn,
-                      self._inputs_structure_recorder, host_device, host_id))
-            else:
-              enqueue_ops_fn, captured_infeed_queue, hooks, is_dataset = (
-                  generate_per_host_enqueue_ops_fn_for_host(
-                      self._ctx, self._input_fn,
-                      self._inputs_structure_recorder, self._batch_axis,
-                      host_device, host_id))
-            all_hooks.extend(hooks)
-
-            # NOTE(xiejw): We dispatch here based on the return type of the
-            # users `input_fn`.
-            #
-            # 1. If input_fn returns a Dataset instance, we initialize the
-            # iterator outside of tf.while_loop, and call the iterator.get_next
-            # inside tf.while_loop.  This should be always safe.
-            #
-            # 2. If input_fn returns (features, labels), it is too late to wrap
-            # them inside tf.while_loop, as resource initialization cannot be
-            # handled in TF control flow properly. In this case, we will use
-            # python loop to enqueue the data into TPU system.  This may be
-            # slow compared to the previous case.
-            if is_dataset:
-              run_infeed_loop_on_coordinator = False
-              wrap_fn = (
-                  _wrap_computation_in_while_loop
-                  if self._ctx.mode != model_fn_lib.ModeKeys.PREDICT else
-                  _wrap_computation_in_while_loop_with_stopping_signals)
-              enqueue_ops.append(
-                  wrap_fn(device=host_device, op_fn=enqueue_ops_fn))
-            else:
-              enqueue_ops.append(enqueue_ops_fn())
-            infeed_queues.append(captured_infeed_queue.get())
-    # infeed_queue is used to generate dequeue ops. The only thing it uses for
-    # dequeue is dtypes and types. So, any one can be used. Here, grab the
-    # first one.
-    self._infeed_queue = infeed_queues[0]
-    return enqueue_ops, all_hooks, run_infeed_loop_on_coordinator
-
-  def _validate_input_pipeline(self):
-    """Validates the input pipeline.
-
-    Perform some sanity checks to log user friendly information. We should
-    error out to give users better error message. But, if
-    _WRAP_INPUT_FN_INTO_WHILE_LOOP is False (legacy behavior), we cannot break
-    user code, so, log a warning.
-
-    Raises:
-      RuntimeError: If the validation failed.
-    """
-    if ops.get_default_graph().get_collection(ops.GraphKeys.QUEUE_RUNNERS):
-      err_msg = ('Input pipeline contains one or more QueueRunners. '
-                 'It could be slow and not scalable. Please consider '
-                 'converting your input pipeline to use `tf.data` instead (see '
-                 'https://www.tensorflow.org/guide/datasets for '
-                 'instructions.')
-      if _WRAP_INPUT_FN_INTO_WHILE_LOOP:
-        raise RuntimeError(err_msg)
-      else:
-        logging.warn(err_msg)
-
-
-class _ModelFnWrapper(object):
-  """A `model_fn` wrapper.
-
-  This makes calling model_fn on CPU and TPU easier and more consistent and
-  performs necessary check and mutation required by TPU training and evaluation.
-
-  In addition, this wrapper manages converting the `model_fn` to a single TPU
-  train and eval step.
-  """
-
-  def __init__(self, model_fn, train_cache_fn, eval_cache_fn, config, params, ctx):
-    self._model_fn = model_fn
-    self._train_cache_fn = train_cache_fn
-    self._eval_cache_fn = eval_cache_fn
-    self._config = config
-    self._params = params
-    self._ctx = ctx
-
-  def call_without_tpu(self, features, labels, is_export_mode):
-    return self._call_model_fn(features, labels, is_export_mode=is_export_mode)
-
-  def convert_to_single_tpu_train_step(self, dequeue_fn):
-    """Converts user provided model_fn` as a single train step on TPU.
-
-    The user provided `model_fn` takes input tuple
-    (features, labels) and produces the EstimatorSpec with train_op and loss for
-    train `mode`. This usually represents a single train computation on CPU.
-
-    For TPU training, a train (computation) step is first wrapped in a
-    tf.while_loop control flow to repeat for many times and then replicated to
-    all TPU shards. Besides the input should be taken from TPU infeed rather
-    than input pipeline (input_fn) directly. To fit TPU loop and replicate
-    pattern, the original train computation should be reformed, which is the
-    returned `train_step`.
-
-    Args:
-      dequeue_fn: The function to retrieve inputs, features and labels, from TPU
-        infeed dequeue channel.
-
-    Returns:
-      A tuple of train_fn, host_calls, and captured scaffold_fn. The train_fn
-      representing the train step for TPU.
-    """
-
-    host_call = _OutfeedHostCall(self._ctx)
-    captured_scaffold_fn = _CapturedObject()
-    captured_training_hooks = _CapturedObject()
-
-    def train_step(loss, *cache):
-      """Training step function for use inside a while loop."""
-      if not self._params.get('track_mean', False):
-        del loss  # unused; required in function signature.
-
-      inputs = dequeue_fn()
-      features, labels = inputs.features_and_labels()
-
-      # Consume the current cache
-      estimator_spec = self._verify_estimator_spec(
-          self._call_model_fn(features, labels, cache=cache))
-
-      # Retrieve the new returned cache
-      """
-        `cache` consists of a list of tensors, potentially empty (of length 0)
-      """
-      cache = estimator_spec.cache
-      new_loss, train_op = estimator_spec.loss, estimator_spec.train_op
-
-      if isinstance(estimator_spec, model_fn_lib._TPUEstimatorSpec):  # pylint: disable=protected-access
-        captured_scaffold_fn.capture(estimator_spec.scaffold_fn)
-      else:
-        captured_scaffold_fn.capture(None)
-
-      captured_training_hooks.capture(estimator_spec.training_hooks)
-
-      # We must run train_op to update the variables prior to running the
-      # outfeed.
-      with ops.control_dependencies([train_op]):
-        host_call_outfeed_ops = []
-        if (isinstance(estimator_spec, model_fn_lib._TPUEstimatorSpec)  # pylint: disable=protected-access
-            and estimator_spec.host_call is not None):
-          host_call.record({'host_call': estimator_spec.host_call})
-          host_call_outfeed_ops = host_call.create_enqueue_op()
-        with ops.control_dependencies(host_call_outfeed_ops):
-          if self._params.get('track_mean', False):
-            loss = tensorflow.stop_gradient(loss)
-            return [math_ops.add(loss, new_loss)] + cache
-          else:
-            return [array_ops.identity(new_loss)] + cache
-
-    return (train_step, host_call, captured_scaffold_fn,
-            captured_training_hooks)
-
-  def convert_to_single_tpu_eval_step(self, dequeue_fn):
-    """Converts user provided model_fn` as a single eval step on TPU.
-
-    Similar to training, the user provided `model_fn` takes input tuple
-    (features, labels) and produces the TPUEstimatorSpec with eval_metrics for
-    eval `mode`. This usually represents a single evaluation computation on CPU.
-
-    For TPU evaluation, a eval (computation) step is first wrapped in a
-    tf.while_loop control flow to repeat for many times and then replicated to
-    all TPU shards. Besides the input and output are slightly different. Input,
-    features and labels, should be taken from TPU infeed rather than input
-    pipeline (input_fn) directly. Output is managed in two stages.  First, the
-    model outputs as the result of evaluation computation, usually model logits,
-    should be transferred from TPU system to CPU. Then, all model outputs are
-    concatenated first on CPU and sent to the metric_fn for metrics computation.
-    To fit TPU evaluation pattern, the original eval computation should be
-    reformed, which is the returned `eval_step`.
-
-    Args:
-      dequeue_fn: The function to retrieve inputs, features and labels, from TPU
-        infeed dequeue channel.
-
-    Returns:
-      A tuple of eval_fn, host_calls, and captured scaffold_fn. The eval_fn
-      representing the eval step for TPU.
-    """
-    host_calls = _OutfeedHostCall(self._ctx)
-    captured_scaffold_fn = _CapturedObject()
-    captured_eval_hooks = _CapturedObject()
-
-    def eval_step(total_loss, *cache):
-      """Evaluation step function for use inside a while loop."""
-      inputs = dequeue_fn()
-      features, labels = inputs.features_and_labels()
-
-      # Consume the current cache
-      tpu_estimator_spec = self._call_model_fn(features, labels, cache=cache)
-      if not isinstance(tpu_estimator_spec, model_fn_lib._TPUEstimatorSpec):  # pylint: disable=protected-access
-        raise RuntimeError(
-            'estimator_spec used by TPU evaluation must have type'
-            '`TPUEstimatorSpec`. Got {}'.format(type(tpu_estimator_spec)))
-
-      # Retrieve the new returned cache
-      cache = tpu_estimator_spec.cache
-      loss = tpu_estimator_spec.loss
-
-      captured_scaffold_fn.capture(tpu_estimator_spec.scaffold_fn)
-      captured_eval_hooks.capture(tpu_estimator_spec.evaluation_hooks)
-
-      to_record = {}
-      if tpu_estimator_spec.eval_metrics:
-        to_record['eval_metrics'] = tpu_estimator_spec.eval_metrics
-      if tpu_estimator_spec.host_call is not None:
-        # We assume that evaluate won't update global step, so we don't wrap
-        # this host_call.
-        to_record['host_call'] = tpu_estimator_spec.host_call
-      host_calls.record(to_record)
-
-      with ops.control_dependencies(host_calls.create_enqueue_op()):
-        return [math_ops.add(total_loss, loss)] + cache
-
-    return eval_step, host_calls, captured_scaffold_fn, captured_eval_hooks
-
-  def convert_to_single_tpu_predict_step(self, dequeue_fn):
-    """Converts user provided model_fn` as a single predict step on TPU.
-
-    Args:
-      dequeue_fn: The function to retrieve inputs, features and labels, from TPU
-        infeed dequeue channel.
-
-    Returns:
-      A tuple of predict_fn, host_calls, and captured scaffold_fn. The
-      predict_fn representing the predict step for TPU.
-    """
-    host_calls = _OutfeedHostCall(self._ctx)
-    captured_scaffold_fn = _CapturedObject()
-    captured_predict_hooks = _CapturedObject()
-
-    def predict_step(unused_scalar_stopping_signal):
-      """Evaluation step function for use inside a while loop."""
-      inputs = dequeue_fn()
-      features, labels = inputs.features_and_labels()
-      stopping_signals = inputs.signals()
-
-      assert stopping_signals is not None, (
-          'Internal Error: `signals` is missing.')
-
-      tpu_estimator_spec = self._call_model_fn(
-          features, labels, is_export_mode=False)
-      if not isinstance(tpu_estimator_spec, model_fn_lib._TPUEstimatorSpec):  # pylint: disable=protected-access
-        raise RuntimeError(
-            'estimator_spec used by TPU prediction must have type'
-            '`TPUEstimatorSpec`. Got {}'.format(type(tpu_estimator_spec)))
-
-      self._verify_tpu_spec_predictions(tpu_estimator_spec.predictions)
-
-      captured_scaffold_fn.capture(tpu_estimator_spec.scaffold_fn)
-      captured_predict_hooks.capture(tpu_estimator_spec.prediction_hooks)
-      to_record = {}
-      identity_fn = lambda **kwargs: kwargs
-      to_record['predictions'] = [identity_fn, tpu_estimator_spec.predictions]
-      to_record['signals'] = [identity_fn, stopping_signals]
-      if tpu_estimator_spec.host_call is not None:
-        to_record['host_call'] = tpu_estimator_spec.host_call
-      host_calls.record(to_record)
-
-      with ops.control_dependencies(host_calls.create_enqueue_op()):
-        return _StopSignals.as_scalar_stopping_signal(stopping_signals)
-
-    return (predict_step, host_calls, captured_scaffold_fn,
-            captured_predict_hooks)
-
-  def _verify_tpu_spec_predictions(self, predictions):
-    """Validates TPUEstimatorSpec.predictions dict."""
-    # TODO(xiejw): Adds validation for prediction dictionrary.
-    # TODO(xiejw): Adds support for single tensor as predictions.
-    if not isinstance(predictions, dict):
-      raise TypeError('TPUEstimatorSpec.predictions must be dict of Tensors.')
-
-    for (key, tensor) in predictions.items():
-      if tensor.shape[0].value is None:
-        raise ValueError(
-            'The tensor with key ({}) in TPUEstimatorSpec.predictions has '
-            'dynamic shape (should be static). Tensor: {}'.format(
-                key, tensor))
-    return predictions
-
-  def _validate_model_features_and_labels(self,
-                                          features,
-                                          labels,
-                                          is_export_mode):
-    """Validates that the features and labels for the model function are valid.
-
-    A valid features/labels object is the one with:
-    - Type: Tensor or a dictionary of Tensors
-    - Static shape if is_export_mode is False.
-
-    Args:
-      features: the features that would be input to the model function.
-      labels: the labels that would be input to the model function.
-      is_export_mode: boolean value specifying if in export mode.
-
-    Raises:
-      TypeError: If features/labels are not of the correct type.
-      ValueError: If features/labels have dynamic shape.
-    """
-
-    def validate(obj, obj_name):
-      """Helper validate function."""
-      if not isinstance(obj, ops.Tensor) and not isinstance(obj, dict):
-        raise TypeError(
-            'The {} to the model returned by input_fn must be either a Tensor '
-            'or a dictionary of Tensors. {}: {}'.format(obj_name, obj_name,
-                                                        obj))
-      if is_export_mode or self._ctx.is_running_on_cpu(is_export_mode):
-        return
-      if isinstance(obj, ops.Tensor):
-        if not obj.get_shape().is_fully_defined():
-          raise ValueError(
-              'The {} to the model returned by input_fn must have static shape.'
-              ' Tensor: {}'.format(obj_name, obj))
-      else:
-        for (key, value) in obj.items():
-          flattened_tensors = data_nest.flatten(value)
-          for tensor in flattened_tensors:
-            if not tensor.get_shape().is_fully_defined():
-              raise ValueError(
-                  'The {} to the model returned by input_fn must have static '
-                  'shape. Key: \'{}\', Tensor: {}'.format(
-                      obj_name, key, tensor))
-
-    validate(features, 'features')
-    if labels is not None:
-      validate(labels, 'labels')
-
-  def _call_model_fn(self, features, labels, cache=None, is_export_mode=False):
-    """Calls the model_fn with required parameters."""
-    self._validate_model_features_and_labels(features, labels, is_export_mode)
-    model_fn_args = function_utils.fn_args(self._model_fn)
-    kwargs = {}
-
-    # Makes deep copy with `config` and params` in case user mutates them.
-    config = copy.deepcopy(self._config)
-    params = copy.deepcopy(self._params)
-
-    if 'labels' in model_fn_args:
-      kwargs['labels'] = labels
-    elif labels is not None:
-      raise ValueError(
-          'model_fn does not take labels, but input_fn returns labels.')
-    if 'mode' in model_fn_args:
-      kwargs['mode'] = self._ctx.mode
-    if 'config' in model_fn_args:
-      kwargs['config'] = config
-    if 'params' in model_fn_args:
-      kwargs['params'] = params
-
-    if cache is not None:
-      params['cache'] = cache
-
-    if 'params' not in model_fn_args:
-      raise ValueError('model_fn ({}) does not include params argument, '
-                       'required by TPUEstimator to pass batch size as '
-                       'params[\'batch_size\']'.format(self._model_fn))
-
-    if is_export_mode:
-      batch_size_for_model_fn = None
-    else:
-      batch_size_for_model_fn = self._ctx.batch_size_for_model_fn
-
-    if batch_size_for_model_fn is not None:
-      _add_item_to_params(params, _BATCH_SIZE_KEY, batch_size_for_model_fn)
-
-    running_on_cpu = self._ctx.is_running_on_cpu(is_export_mode)
-    _add_item_to_params(params, _USE_TPU_KEY, not running_on_cpu)
-
-    if not running_on_cpu:
-      user_context = tpu_context.TPUContext(
-          internal_ctx=self._ctx, call_from_input_fn=False)
-      _add_item_to_params(params, _CTX_KEY, user_context)
-
-    estimator_spec = self._model_fn(features=features, **kwargs)
-    if (running_on_cpu and
-        isinstance(estimator_spec, model_fn_lib._TPUEstimatorSpec)):  # pylint: disable=protected-access
-      # The estimator_spec will be passed to `Estimator` directly, which expects
-      # type `EstimatorSpec`.
-      return estimator_spec.as_estimator_spec()
-    else:
-      return estimator_spec
-
-  def _verify_estimator_spec(self, estimator_spec):
-    """Validates the estimator_spec."""
-    if isinstance(estimator_spec, model_fn_lib._TPUEstimatorSpec):  # pylint: disable=protected-access
-      return estimator_spec
-
-    err_msg = '{} returned by EstimatorSpec is not supported in TPUEstimator.'
-    if estimator_spec.training_chief_hooks:
-      raise ValueError(
-          err_msg.format('training_chief_hooks') + 'If you want' +
-          ' to pass training hooks, please pass via training_hooks.')
-
-    if estimator_spec.scaffold:
-      logging.warning('EstimatorSpec.Scaffold is ignored by TPU train/eval. '
-                      'Please use TPUEstimatorSpec.')
-    return estimator_spec
-
-
-class _OutfeedHostCall(object):
-  """Support for `eval_metrics` and `host_call` in TPUEstimatorSpec."""
-
-  def __init__(self, ctx):
-    self._ctx = ctx
-    self._names = []
-    # All of these are dictionaries of lists keyed on the name.
-    self._host_fns = {}
-    self._tensor_keys = collections.defaultdict(list)
-    self._tensors = collections.defaultdict(list)
-    self._tensor_dtypes = collections.defaultdict(list)
-    self._tensor_shapes = collections.defaultdict(list)
-
-  @staticmethod
-  def validate(host_calls):
-    """Validates the `eval_metrics` and `host_call` in `TPUEstimatorSpec`."""
-
-    for name, host_call in host_calls.items():
-      if not isinstance(host_call, (tuple, list)):
-        raise ValueError('{} should be tuple or list'.format(name))
-      if len(host_call) != 2:
-        raise ValueError('{} should have two elements.'.format(name))
-      if not callable(host_call[0]):
-        raise TypeError('{}[0] should be callable.'.format(name))
-      if not isinstance(host_call[1], (tuple, list, dict)):
-        raise ValueError('{}[1] should be tuple or list, or dict.'.format(name))
-
-      if isinstance(host_call[1], (tuple, list)):
-        fullargspec = tf_inspect.getfullargspec(host_call[0])
-        fn_args = function_utils.fn_args(host_call[0])
-        # wrapped_hostcall_with_global_step uses varargs, so we allow that.
-        if fullargspec.varargs is None and len(host_call[1]) != len(fn_args):
-          raise RuntimeError(
-              'In TPUEstimatorSpec.{}, length of tensors {} does not match '
-              'method args of the function, which takes {}.'.format(
-                  name, len(host_call[1]), len(fn_args)))
-
-  @staticmethod
-  def create_cpu_hostcall(host_calls):
-    """Runs on the host_call on CPU instead of TPU when use_tpu=False."""
-
-    _OutfeedHostCall.validate(host_calls)
-    ret = {}
-    for name, host_call in host_calls.items():
-      host_fn, tensors = host_call
-      if isinstance(tensors, (tuple, list)):
-        ret[name] = host_fn(*tensors)
-      else:
-        # Must be dict.
-        try:
-          ret[name] = host_fn(**tensors)
-        except TypeError as e:
-          logging.warning(
-              'Exception while calling %s: %s. It is likely the tensors '
-              '(%s[1]) do not match the '
-              'function\'s arguments', name, e, name)
-          raise e
-    return ret
-
-  def record(self, host_calls):
-    """Records the host_call structure."""
-
-    for name, host_call in host_calls.items():
-      host_fn, tensor_list_or_dict = host_call
-      self._names.append(name)
-      self._host_fns[name] = host_fn
-
-      if isinstance(tensor_list_or_dict, dict):
-        for (key, tensor) in six.iteritems(tensor_list_or_dict):
-          self._tensor_keys[name].append(key)
-          self._tensors[name].append(tensor)
-          self._tensor_dtypes[name].append(tensor.dtype)
-          self._tensor_shapes[name].append(tensor.shape)
-      else:
-        # List or tuple.
-        self._tensor_keys[name] = None
-        for tensor in tensor_list_or_dict:
-          self._tensors[name].append(tensor)
-          self._tensor_dtypes[name].append(tensor.dtype)
-          self._tensor_shapes[name].append(tensor.shape)
-
-  def create_enqueue_op(self):
-    """Create the op to enqueue the recorded host_calls.
-
-    Returns:
-      A list of enqueue ops, which is empty if there are no host calls.
-    """
-    if not self._names:
-      return []
-
-    tensors = []
-    # TODO(jhseu): Consider deduping tensors.
-    for name in self._names:
-      tensors.extend(self._tensors[name])
-
-    with ops.DEVICE(tpu.core(0)):
-      return [tpu_ops.outfeed_enqueue_tuple(tensors)]
-
-  def create_tpu_hostcall(self):
-    """Sends the tensors through outfeed and runs the host_fn on CPU.
-
-    The tensors are concatenated along dimension 0 to form a global tensor
-    across all shards. The concatenated function is passed to the host_fn and
-    executed on the first host.
-
-    Returns:
-      A dictionary mapping name to the return type of the host_call by that
-      name.
-
-    Raises:
-      RuntimeError: If outfeed tensor is scalar.
-    """
-    if not self._names:
-      return {}
-
-    ret = {}
-    # For each i, dequeue_ops[i] is a list containing the tensors from all
-    # shards. This list is concatenated later.
-    dequeue_ops = []
-    tensor_dtypes = []
-    tensor_shapes = []
-    for name in self._names:
-      for _ in self._tensors[name]:
-        dequeue_ops.append([])
-      for dtype in self._tensor_dtypes[name]:
-        tensor_dtypes.append(dtype)
-      for shape in self._tensor_shapes[name]:
-        tensor_shapes.append(shape)
-
-    # Outfeed ops execute on each replica's first logical core. Note: we must
-    # constraint it such that we have at most one outfeed dequeue and enqueue
-    # per replica.
-    for i in xrange(self._ctx.num_replicas):
-      host_device, ordinal_id = self._ctx.device_for_replica(i)
-      with ops.DEVICE(host_device):
-        outfeed_tensors = tpu_ops.outfeed_dequeue_tuple(
-            dtypes=tensor_dtypes,
-            shapes=tensor_shapes,
-            device_ordinal=ordinal_id)
-        for j, item in enumerate(outfeed_tensors):
-          dequeue_ops[j].append(item)
-
-    # Deconstruct dequeue ops.
-    dequeue_ops_by_name = {}
-    pos = 0
-    for name in self._names:
-      dequeue_ops_by_name[name] = dequeue_ops[pos:pos+len(self._tensors[name])]
-      pos += len(self._tensors[name])
-
-    # It is assumed evaluation always happens on single host TPU system. So,
-    # place all ops on tpu host if possible.
-    #
-    # TODO(jhseu): Evaluate whether this is right for summaries.
-    with ops.DEVICE(self._ctx.tpu_host_placement_function(replica_id=0)):
-      for name in self._names:
-        dequeue_ops = dequeue_ops_by_name[name]
-        for i, item in enumerate(dequeue_ops):
-          if dequeue_ops[i][0].shape.ndims == 0:
-            raise RuntimeError(
-                'All tensors outfed from TPU should preserve batch size '
-                'dimension, but got scalar {}'.format(dequeue_ops[i][0]))
-          # TODO(xiejw): Allow users to specify the axis for batch size
-          # dimension.
-          dequeue_ops[i] = array_ops.concat(dequeue_ops[i], axis=0)
-
-        if self._tensor_keys[name] is not None:
-          # The user-provided eval_metrics[1] is a dict.
-          dequeue_ops = dict(zip(self._tensor_keys[name], dequeue_ops))
-          try:
-            ret[name] = self._host_fns[name](**dequeue_ops)
-          except TypeError as e:
-            logging.warning(
-                'Exception while calling %s: %s. It is likely the tensors '
-                '(%s[1]) do not match the '
-                'function\'s arguments', name, e, name)
-            raise e
-        else:
-          ret[name] = self._host_fns[name](*dequeue_ops)
-
-    return ret
-
-
-class _OutfeedHostCallHook(session_run_hook.SessionRunHook):
-  """Hook to run host calls when use_tpu=False."""
-
-  def __init__(self, tensors):
-    self._tensors = tensors
-
-  def begin(self):
-    # We duplicate this code from the TPUInfeedOutfeedSessionHook rather than
-    # create a separate hook to guarantee execution order, because summaries
-    # need to be initialized before the outfeed thread starts.
-    # TODO(jhseu): Make a wrapper hook instead?
-    self._init_ops = contrib_summary.summary_writer_initializer_op()
-    # Get all the writer resources from the initializer, so we know what to
-    # flush.
-    self._finalize_ops = []
-    for op in self._init_ops:
-      self._finalize_ops.append(contrib_summary.flush(writer=op.inputs[0]))
-
-  def after_create_session(self, session, coord):
-    session.run(self._init_ops)
-
-  def before_run(self, run_context):
-    return basic_session_run_hooks.SessionRunArgs(self._tensors)
-
-  def end(self, session):
-    session.run(self._finalize_ops)
-
-
-class ExamplesPerSecondHook(basic_session_run_hooks.StepCounterHook):
-  """Calculate and report global_step/sec and examples/sec during runtime."""
-
-  def __init__(self,
-               batch_size,
-               every_n_steps=100,
-               every_n_secs=None,
-               output_dir=None,
-               summary_writer=None):
-    self._batch_size = batch_size
-    super(ExamplesPerSecondHook, self).__init__(
-        every_n_steps=every_n_steps,
-        every_n_secs=every_n_secs,
-        output_dir=output_dir,
-        summary_writer=summary_writer)
-
-  def _log_and_record(self, elapsed_steps, elapsed_time, global_step):
-    global_step_per_sec = elapsed_steps / elapsed_time
-    examples_per_sec = self._batch_size * global_step_per_sec
-    if self._summary_writer is not None:
-      global_step_summary = Summary(value=[
-          Summary.Value(tag='global_step/sec', simple_value=global_step_per_sec)
-      ])
-      example_summary = Summary(value=[
-          Summary.Value(tag='examples/sec', simple_value=examples_per_sec)
-      ])
-      self._summary_writer.add_summary(global_step_summary, global_step)
-      self._summary_writer.add_summary(example_summary, global_step)
-    logging.info('global_step/sec: %g', global_step_per_sec)
-    logging.info('examples/sec: %g', examples_per_sec)
-
-
-class InstallSignalHandlerHook(session_run_hook.SessionRunHook):
-  """Change SIGINT (CTRL^C) handler to force quit the process.
-
-  The default behavior often results in hanging processes.
-  The original handler is restored after training/evaluation.
-  """
-
-  def __init__(self):
-    self._signal_fn = signal.getsignal(signal.SIGINT)
-
-  def before_run(self, run_context):
-    signal.signal(signal.SIGINT, signal.SIG_DFL)
-
-  def end(self, session):
-    signal.signal(signal.SIGINT, self._signal_fn)
-
-
-class TPUEstimator(estimator_lib.Estimator):
-  """Estimator with TPU support.
-
-  TPUEstimator also supports training on CPU and GPU. You don't need to define
-  a separate `tf.estimator.Estimator`.
-
-  TPUEstimator handles many of the details of running on TPU devices, such as
-  replicating inputs and models for each core, and returning to host
-  periodically to run hooks.
-
-  TPUEstimator transforms a global batch size in params to a per-shard batch
-  size when calling the `input_fn` and `model_fn`. Users should specify
-  global batch size in constructor, and then get the batch size for each shard
-  in `input_fn` and `model_fn` by `params['batch_size']`.
-
-  - For training, `model_fn` gets per-core batch size; `input_fn` may get
-    per-core or per-host batch size depending on `per_host_input_for_training`
-    in `TPUConfig` (See docstring for TPUConfig for details).
-
-  - For evaluation and prediction, `model_fn` gets per-core batch size and
-    `input_fn` get per-host batch size.
-
-  Evaluation
-  ==========
-
-  `model_fn` should return `TPUEstimatorSpec`, which expects the `eval_metrics`
-  for TPU evaluation. However, if eval_on_tpu is False, `model_fn` must return
-  `EstimatorSpec` and the evaluation will execute on CPU or GPU; in this case
-  the following discussion on TPU evaluation does not apply.
-
-  `TPUEstimatorSpec.eval_metrics` is a tuple of `metric_fn` and `tensors`, where
-  `tensors` could be a list of `Tensor`s or dict of names to `Tensor`s. (See
-  `TPUEstimatorSpec` for details).  `metric_fn` takes the `tensors` and returns
-  a dict from metric string name to the result of calling a metric function,
-  namely a `(metric_tensor, update_op)` tuple.
-
-  One can set `use_tpu` to `False` for testing. All training, evaluation, and
-  predict will be executed on CPU. `input_fn` and `model_fn` will receive
-  `train_batch_size` or `eval_batch_size` unmodified as `params['batch_size']`.
-
-  Current limitations:
-  --------------------
-
-  1. TPU evaluation only works on a single host (one TPU worker) except
-     BROADCAST mode.
-
-  2. `input_fn` for evaluation should **NOT** raise an end-of-input exception
-     (`OutOfRangeError` or `StopIteration`). And all evaluation steps and all
-     batches should have the same size.
-
-  Example (MNIST):
-  ----------------
-
-  ```
-  # The metric Fn which runs on CPU.
-  def metric_fn(labels, logits):
-    predictions = tf.argmax(logits, 1)
-    return {
-      'accuracy': tf.metrics.precision(
-          labels=labels, predictions=predictions),
-    }
-
-  # Your model Fn which runs on TPU (eval_metrics is list in this example)
-  def model_fn(features, labels, mode, config, params):
-    ...
-    logits = ...
-
-    if mode = tf.estimator.ModeKeys.EVAL:
-      return tpu_estimator.TPUEstimatorSpec(
-          mode=mode,
-          loss=loss,
-          eval_metrics=(metric_fn, [labels, logits]))
-
-  # or specify the eval_metrics tensors as dict.
-  def model_fn(features, labels, mode, config, params):
-    ...
-    final_layer_output = ...
-
-    if mode = tf.estimator.ModeKeys.EVAL:
-      return tpu_estimator.TPUEstimatorSpec(
-          mode=mode,
-          loss=loss,
-          eval_metrics=(metric_fn, {
-              'labels': labels,
-              'logits': final_layer_output,
-          }))
-  ```
-
-  Prediction
-  ==========
-
-  Prediction on TPU is an experimental feature to support large batch inference.
-  It is not designed for latency-critical system. In addition, due to some
-  usability issues, for prediction with small dataset, CPU `.predict`, i.e.,
-  creating a new `TPUEstimator` instance with `use_tpu=False`, might be more
-  convenient.
-
-  Note: In contrast to TPU training/evaluation, the `input_fn` for prediction
-  *should* raise an end-of-input exception (`OutOfRangeError` or
-  `StopIteration`), which serves as the stopping signal to `TPUEstimator`. To be
-  precise, the ops created by `input_fn` produce one batch of the data.
-  The `predict()` API processes one batch at a time. When reaching the end of
-  the data source, an end-of-input exception should be raised by one of these
-  operations. The user usually does not need to do this manually. As long as the
-  dataset is not repeated forever, the `tf.data` API will raise an end-of-input
-  exception automatically after the last batch has been produced.
-
-  Note: Estimator.predict returns a Python generator. Please consume all the
-  data from the generator so that TPUEstimator can shutdown the TPU system
-  properly for user.
-
-  Current limitations:
-  --------------------
-  1. TPU prediction only works on a single host (one TPU worker).
-
-  2. `input_fn` must return a `Dataset` instance rather than `features`. In
-  fact, .train() and .evaluate() also support Dataset as return value.
-
-  Example (MNIST):
-  ----------------
-  ```
-  height = 32
-  width = 32
-  total_examples = 100
-
-  def predict_input_fn(params):
-    batch_size = params['batch_size']
-
-    images = tf.random_uniform(
-        [total_examples, height, width, 3], minval=-1, maxval=1)
-
-    dataset = tf.data.Dataset.from_tensor_slices(images)
-    dataset = dataset.map(lambda images: {'image': images})
-
-    dataset = dataset.batch(batch_size)
-    return dataset
-
-  def model_fn(features, labels, params, mode):
-     # Generate predictions, called 'output', from features['image']
-
-    if mode == tf.estimator.ModeKeys.PREDICT:
-      return tf.contrib.tpu.TPUEstimatorSpec(
-          mode=mode,
-          predictions={
-              'predictions': output,
-              'is_padding': features['is_padding']
-          })
-
-  tpu_est = TPUEstimator(
-      model_fn=model_fn,
-      ...,
-      predict_batch_size=16)
-
-  # Fully consume the generator so that TPUEstimator can shutdown the TPU
-  # system.
-  for item in tpu_est.predict(input_fn=input_fn):
-    # Filter out item if the `is_padding` is 1.
-    # Process the 'predictions'
-  ```
-
-  Exporting
-  =========
-
-  `export_savedmodel` exports 2 metagraphs, one with `tag_constants.SERVING`,
-  and another with `tag_constants.SERVING` and `tag_constants.TPU`.
-  At serving time, these tags are used to select metagraph to load.
-
-  Before running the graph on TPU, TPU system needs to be initialized. If
-  TensorFlow Serving model-server is used, this is done automatically. If
-  not, please call `session.run(tpu.initialize_system())`.
-
-  `tpu.outside_compilation` can be used to wrap TPU incompatible ops in
-  `model_fn`.
-
-  Example:
-  ----------------
-
-  ```
-  def model_fn(features, labels, mode, config, params):
-    ...
-    logits = ...
-    export_outputs = {
-      'logits': export_output_lib.PredictOutput(
-        {'logits': logits})
-    }
-
-    def host_call(logits):
-      class_ids = math_ops.argmax(logits)
-      classes = string_ops.as_string(class_ids)
-      export_outputs['classes'] =
-        export_output_lib.ClassificationOutput(classes=classes)
-
-    tpu.outside_compilation(host_call, logits)
-
-    ...
-  ```
-
-  """
-
-  def __init__(self,
-               model_fn=None,
-               train_cache_fn=None,
-               eval_cache_fn=None,
-               model_dir=None,
-               config=None,
-               params=None,
-               use_tpu=True,
-               train_batch_size=None,
-               eval_batch_size=None,
-               predict_batch_size=None,
-               batch_axis=None,
-               eval_on_tpu=True,
-               export_to_tpu=True,
-               warm_start_from=None):
-    """Constructs an `TPUEstimator` instance.
-
-    Args:
-      model_fn: Model function as required by `Estimator` which returns
-      EstimatorSpec or TPUEstimatorSpec. `training_hooks`, 'evaluation_hooks',
-      and `prediction_hooks` must not capure any TPU Tensor inside the model_fn.
-      model_dir: Directory to save model parameters, graph and etc. This can
-        also be used to load checkpoints from the directory into a estimator to
-        continue training a previously saved model. If `None`, the model_dir in
-        `config` will be used if set. If both are set, they must be same. If
-        both are `None`, a temporary directory will be used.
-      config: An `tpu_config.RunConfig` configuration object. Cannot be `None`.
-      params: An optional `dict` of hyper parameters that will be passed into
-        `input_fn` and `model_fn`.  Keys are names of parameters, values are
-        basic python types. There are reserved keys for `TPUEstimator`,
-        including 'batch_size'.
-      use_tpu: A bool indicating whether TPU support is enabled. Currently,
-        - TPU training and evaluation respect this bit, but eval_on_tpu can
-          override execution of eval. See below.
-        - Predict still happens on CPU.
-      train_batch_size: An int representing the global training batch size.
-        TPUEstimator transforms this global batch size to a per-shard batch
-        size, as params['batch_size'], when calling `input_fn` and `model_fn`.
-        Cannot be `None` if `use_tpu` is `True`.
-        Must be divisible by total number of replicas.
-      eval_batch_size: An int representing evaluation batch size.
-        Must be divisible by total number of replicas.
-      predict_batch_size: An int representing the prediction batch size.
-        Must be divisible by total number of replicas.
-      batch_axis: A python tuple of int values describing how each tensor
-        produced by the Estimator `input_fn` should be split across the TPU
-        compute shards. For example, if your input_fn produced (images, labels)
-        where the images tensor is in `HWCN` format, your shard dimensions would
-        be [3, 0], where 3 corresponds to the `N` dimension of your images
-        Tensor, and 0 corresponds to the dimension along which to split the
-        labels to match up with the corresponding images. If None is supplied,
-        and per_host_input_for_training is True, batches will be sharded based
-        on the major dimension. If tpu_config.per_host_input_for_training is
-        False or `PER_HOST_V2`, batch_axis is ignored.
-      eval_on_tpu: If False, evaluation runs on CPU or GPU. In this case, the
-        model_fn must return `EstimatorSpec` when called with `mode` as `EVAL`.
-      export_to_tpu: If True, `export_savedmodel()` exports a metagraph for
-        serving on TPU besides the one on CPU.
-      warm_start_from: Optional string filepath to a checkpoint or SavedModel to
-                       warm-start from, or a `tf.estimator.WarmStartSettings`
-                       object to fully configure warm-starting.  If the string
-                       filepath is provided instead of a `WarmStartSettings`,
-                       then all variables are warm-started, and it is assumed
-                       that vocabularies and Tensor names are unchanged.
-
-    Raises:
-      ValueError: `params` has reserved keys already.
-    """
-    if config is None or not isinstance(config, tpu_config.RunConfig):
-      raise ValueError(
-          '`config` must be provided with type `tpu_config.RunConfig`')
-
-    if params is not None and any(k in params for k in _RESERVED_PARAMS_KEYS):
-      raise ValueError('{} are reserved keys but existed in params {}.'.format(
-          _RESERVED_PARAMS_KEYS, params))
-
-    if use_tpu:
-      # Perform some very basic validations. More validations will be found in
-      # _InternalTPUContext.
-      if train_batch_size is None:
-        raise ValueError('`train_batch_size` cannot be `None`')
-      util_lib.check_positive_integer(train_batch_size, 'train_batch_size')
-
-      if (config.tpu_config.per_host_input_for_training is
-          tpu_config.InputPipelineConfig.PER_SHARD_V1 and
-          config.tpu_config.num_cores_per_replica):
-        raise ValueError(
-            'Model parallelism only supports per host input for training. '
-            'Please adjust TPURunconfig.per_host_input_for_training.')
-
-      if eval_batch_size is not None:
-        util_lib.check_positive_integer(eval_batch_size, 'eval_batch_size')
-
-      if predict_batch_size is not None:
-        util_lib.check_positive_integer(predict_batch_size,
-                                        'predict_batch_size')
-
-    # Verifies the model_fn signature according to Estimator framework.
-    estimator_lib._verify_model_fn_args(model_fn, params)  # pylint: disable=protected-access
-    # We cannot store config and params in this constructor as parent
-    # constructor might change them, such as assigning a temp dir for
-    # config.model_dir.
-    model_function = self._augment_model_fn(
-        model_fn,
-        train_cache_fn,
-        eval_cache_fn,
-        batch_axis)
-
-    # Overwrite log_step_count_steps to disable TensorLoggingHook and
-    # StepCounterHook from being created in Estimator. TPUEstimator already
-    # added equivalent hooks in _augment_model_fn above.
-    self._log_every_n_steps = config.log_step_count_steps
-    config = config.replace(log_step_count_steps=None)
-
-    # Passing non-None params as wrapped model_fn has it.
-    params = params or {}
-    super(TPUEstimator, self).__init__(
-        model_fn=model_function,
-        model_dir=model_dir,
-        config=config,
-        params=params,
-        warm_start_from=warm_start_from)
-    self._iterations_per_training_loop = (
-        self._config.tpu_config.iterations_per_loop)
-
-    # All properties passed to _InternalTPUContext are immutable.
-    # pylint: disable=protected-access
-    self._ctx = tpu_context._get_tpu_context(
-        self._config, train_batch_size,
-        eval_batch_size, predict_batch_size,
-        use_tpu,
-        eval_on_tpu)
-
-    self._export_to_tpu = export_to_tpu
-
-    self._is_input_fn_invoked = None
-    self._rendezvous = {}
-
-  def _add_meta_graph_for_mode(self,
-                               builder,
-                               input_receiver_fn_map,
-                               checkpoint_path,
-                               strip_default_attrs,
-                               save_variables=True,
-                               mode=model_fn_lib.ModeKeys.PREDICT,
-                               export_tags=None,
-                               check_variables=True):
-    if self._export_to_tpu and mode != model_fn_lib.ModeKeys.PREDICT:
-      raise NotImplementedError(
-          'TPUEstimator only handles mode PREDICT for exporting '
-          'when `export_to_tpu` is `True`; '
-          'got {}.'.format(mode))
-
-    (super(TPUEstimator, self).
-     _add_meta_graph_for_mode(builder,
-                              input_receiver_fn_map,
-                              checkpoint_path,
-                              strip_default_attrs,
-                              save_variables,
-                              mode=mode,
-                              export_tags=export_tags,
-                              check_variables=check_variables))
-
-    if self._export_to_tpu:
-      input_receiver_fn_map = {_REWRITE_FOR_INFERENCE_MODE:
-                               input_receiver_fn_map[mode]}
-      export_tags = [tag_constants.SERVING, tag_constants.TPU]
-      mode = _REWRITE_FOR_INFERENCE_MODE
-      # See b/110052256 for why `check_variables` is `False`.
-      (super(TPUEstimator, self).
-       _add_meta_graph_for_mode(builder,
-                                input_receiver_fn_map,
-                                checkpoint_path,
-                                strip_default_attrs,
-                                save_variables=False,
-                                mode=mode,
-                                export_tags=export_tags,
-                                check_variables=False))
-
-  def _call_model_fn(self, features, labels, mode, config):
-    if mode == _REWRITE_FOR_INFERENCE_MODE:
-      return self._call_model_fn_for_inference(features, labels, mode, config)
-    else:
-      return super(TPUEstimator, self)._call_model_fn(
-          features, labels, mode, config)
-
-  def _call_model_fn_for_inference(self, features, labels, mode, config):
-    """Wraps `_call_model_fn` for `export_savedmodel`."""
-    if mode != _REWRITE_FOR_INFERENCE_MODE:
-      raise ValueError('mode must be {}; '
-                       'got {}.'.format(_REWRITE_FOR_INFERENCE_MODE, mode))
-
-    capture = _CapturedObject()
-
-    def computation():
-      """Compute tpu tensors used in export_outputs.
-
-      Passed to rewrite_for_inference so that model_fn will be called under
-      the rewriting contexts. Only tpu tensors are returned, but export_outputs
-      and scaffold are captured.
-
-      Returns:
-         A list of Tensors used in export_outputs and not marked for
-         outside_compilation.
-      """
-      # We should only call model fn once and it should be inside `computation`
-      # so that building the graph will happen under `rewrite_for_inference`.
-      mode = model_fn_lib.ModeKeys.PREDICT
-      estimator_spec = self._call_model_fn(features, labels, mode, config)
-
-      # We pick the TPU tensors out from `export_output` and later return them
-      # from `computation` for rewriting.
-      tensors_dict = collections.OrderedDict(
-          (k, _export_output_to_tensors(v))
-          for k, v in six.iteritems(estimator_spec.export_outputs)
-      )
-      tensors = nest.flatten(tensors_dict)
-      tpu_tensors = [t for t in tensors if _is_tpu_tensor(t)]
-
-      # We cannot return anything other than `tpu_tensors` here so we capture
-      # the rest for later use.
-      capture.capture((estimator_spec, tensors_dict, tensors))
-      return tpu_tensors
-
-    tpu_tensors_on_cpu = tpu.rewrite_for_inference(computation)
-    estimator_spec, tensors_dict, tensors = capture.get()
-
-    # Reconstruct `tensors`, but with `tpu_tensors` replaced with
-    # `tpu_tensors_on_cpu`.
-    new_tensors = []
-    for t in tensors:
-      if _is_tpu_tensor(t):
-        new_tensors.append(tpu_tensors_on_cpu.pop(0))
-      elif t is None:
-        new_tensors.append(None)
-      else:
-        # Only fetching `tpu_tensors_on_cpu` does not trigger
-        # TPU computation and blocks, so we add the control dependency here.
-        control_inputs = (tpu_tensors_on_cpu
-                          if isinstance(tpu_tensors_on_cpu, (list, tuple))
-                          else (tpu_tensors_on_cpu,))
-        with ops.control_dependencies(control_inputs):
-          new_tensors.append(array_ops.identity(t))
-
-    # Reconstruct `tensors_dict`.
-    new_tensors_dict = nest.pack_sequence_as(tensors_dict, new_tensors)
-    # Reconstruct `export_outputs`.
-    export_outputs = estimator_spec.export_outputs
-    new_export_outputs = collections.OrderedDict(
-        (k, _clone_export_output_with_tensors(export_outputs[k], v))
-        for k, v in six.iteritems(new_tensors_dict)
-    )
-
-    return estimator_spec._replace(export_outputs=new_export_outputs)
-
-  def _create_global_step(self, graph):
-    """Creates a global step suitable for TPUs.
-
-    Args:
-      graph: The graph in which to create the global step.
-
-    Returns:
-      A global step `Tensor`.
-
-    Raises:
-      ValueError: if the global step tensor is already defined.
-    """
-    return _create_global_step(graph)
-
-  def _convert_train_steps_to_hooks(self, steps, max_steps):
-    with self._ctx.with_mode(model_fn_lib.ModeKeys.TRAIN) as ctx:
-      if ctx.is_running_on_cpu():
-        return super(TPUEstimator, self)._convert_train_steps_to_hooks(
-            steps, max_steps)
-
-    # On TPU.
-    if steps is None and max_steps is None:
-      raise ValueError(
-          'For TPU training, one of `steps` or `max_steps` must be set. '
-          'Cannot be both `None`.')
-
-    # Estimator.train has explicit positiveness check.
-    if steps is not None:
-      util_lib.check_positive_integer(steps, 'Train steps')
-    if max_steps is not None:
-      util_lib.check_positive_integer(max_steps, 'Train max_steps')
-
-    return [
-        _TPUStopAtStepHook(self._iterations_per_training_loop, steps, max_steps)
-    ]
-
-  def _convert_eval_steps_to_hooks(self, steps):
-    with self._ctx.with_mode(model_fn_lib.ModeKeys.EVAL) as ctx:
-      if ctx.is_running_on_cpu():
-        return super(TPUEstimator, self)._convert_eval_steps_to_hooks(steps)
-
-    if steps is None:
-      raise ValueError('Evaluate `steps` must be set on TPU. Cannot be `None`.')
-
-    util_lib.check_positive_integer(steps, 'Eval steps')
-
-    return [
-        evaluation._StopAfterNEvalsHook(  # pylint: disable=protected-access
-            num_evals=steps),
-        _SetEvalIterationsHook(steps)
-    ]
-
-  def _call_input_fn(self, input_fn, mode):
-    """Calls the input function.
-
-    Args:
-      input_fn: The input function.
-      mode: ModeKeys
-
-    Returns:
-      Either features or (features, labels) where features and labels are:
-        features - `Tensor` or dictionary of string feature name to `Tensor`.
-        labels - `Tensor` or dictionary of `Tensor` with labels.
-
-    Raises:
-      ValueError: if input_fn takes invalid arguments or does not have `params`.
-    """
-    input_fn_args = function_utils.fn_args(input_fn)
-    config = self.config  # a deep copy.
-    kwargs = {}
-    if 'params' in input_fn_args:
-      kwargs['params'] = self.params  # a deep copy.
-    else:
-      raise ValueError('input_fn ({}) does not include params argument, '
-                       'required by TPUEstimator to pass batch size as '
-                       'params["batch_size"]'.format(input_fn))
-    if 'config' in input_fn_args:
-      kwargs['config'] = config
-
-    if 'mode' in input_fn_args:
-      kwargs['mode'] = mode
-
-    # Records the fact input_fn has been invoked.
-    self._is_input_fn_invoked = True
-
-    with self._ctx.with_mode(mode) as ctx:
-      # Setting the batch size in params first. This helps user to have same
-      # input_fn for use_tpu=True/False.
-      batch_size_for_input_fn = ctx.batch_size_for_input_fn
-      if batch_size_for_input_fn is not None:
-        _add_item_to_params(kwargs['params'],
-                            _BATCH_SIZE_KEY, batch_size_for_input_fn)
-
-      # For export_savedmodel, input_fn is never passed to Estimator. So,
-      # `is_export_mode` must be False.
-      if ctx.is_running_on_cpu(is_export_mode=False):
-        with ops.DEVICE('/DEVICE:CPU:0'):
-          return input_fn(**kwargs)
-
-      # For TPU computation, input_fn should be invoked in a tf.while_loop for
-      # performance. While constructing the tf.while_loop, the structure of
-      # inputs returned by the `input_fn` needs to be recorded. The structure
-      # includes whether features or labels is dict or single Tensor, dict keys,
-      # tensor shapes, and dtypes. The recorded structure is used to create the
-      # infeed dequeue ops, which must be wrapped and passed as a Fn, called
-      # inside the TPU computation, as the TPU computation is wrapped inside a
-      # tf.while_loop also. So, we either pass input_fn to model_fn or pass
-      # dequeue_fn to model_fn. Here, `input_fn` is passed directly as
-      # `features` in `model_fn` signature.
-      def _input_fn(ctx):
-        _add_item_to_params(kwargs['params'], _CTX_KEY, ctx)
-        return input_fn(**kwargs)
-
-      return _input_fn
-
-  def _validate_features_in_predict_input(self, result):
-    """Skip the validation.
-
-    For TPUEstimator, we do not need to check the result type. `_InputPipeline`
-    has stronger check. Parent class's check generates confusing warning msg.
-
-    Args:
-      result: `features` returned by input_fn.
-    """
-    pass
-
-  def train(self,
-            input_fn,
-            hooks=None,
-            steps=None,
-            max_steps=None,
-            saving_listeners=None):
-    rendezvous = error_handling.ErrorRendezvous(num_sources=3)
-    self._rendezvous[model_fn_lib.ModeKeys.TRAIN] = rendezvous
-    try:
-      return super(TPUEstimator, self).train(
-          input_fn=input_fn, hooks=hooks, steps=steps, max_steps=max_steps,
-          saving_listeners=saving_listeners
-      )
-    except Exception:  # pylint: disable=broad-except
-      rendezvous.record_error('training_loop', sys.exc_info())
-    finally:
-      rendezvous.record_done('training_loop')
-      rendezvous.raise_errors()
-
-  def evaluate(self, input_fn, steps=None, hooks=None, checkpoint_path=None,
-               name=None):
-    rendezvous = error_handling.ErrorRendezvous(num_sources=3)
-    self._rendezvous[model_fn_lib.ModeKeys.EVAL] = rendezvous
-    try:
-      return super(TPUEstimator, self).evaluate(
-          input_fn, steps=steps, hooks=hooks, checkpoint_path=checkpoint_path,
-          name=name
-      )
-    except Exception:  # pylint: disable=broad-except
-      rendezvous.record_error('evaluation_loop', sys.exc_info())
-    finally:
-      rendezvous.record_done('evaluation_loop')
-      rendezvous.raise_errors()
-
-  def predict(self,
-              input_fn,
-              predict_keys=None,
-              hooks=None,
-              checkpoint_path=None,
-              yield_single_examples=True):
-    rendezvous = error_handling.ErrorRendezvous(num_sources=3)
-    self._rendezvous[model_fn_lib.ModeKeys.PREDICT] = rendezvous
-    try:
-      for result in super(TPUEstimator, self).predict(
-          input_fn=input_fn,
-          predict_keys=predict_keys,
-          hooks=hooks,
-          checkpoint_path=checkpoint_path,
-          yield_single_examples=yield_single_examples):
-        yield result
-    except Exception:  # pylint: disable=broad-except
-      rendezvous.record_error('prediction_loop', sys.exc_info())
-    finally:
-      rendezvous.record_done('prediction_loop')
-      rendezvous.raise_errors()
-
-    rendezvous.record_done('prediction_loop')
-    rendezvous.raise_errors()
-
-  def _augment_model_fn(self, model_fn, train_cache_fn, eval_cache_fn, batch_axis):
-    """Returns a new model_fn, which wraps the TPU support."""
-
-    def _model_fn(features, labels, mode, config, params):
-      """A Estimator `model_fn` for TPUEstimator."""
-      with self._ctx.with_mode(mode) as ctx:
-        model_fn_wrapper = _ModelFnWrapper(model_fn, train_cache_fn,
-            eval_cache_fn, config, params, ctx)
-
-        # `input_fn` is called in `train()`, `evaluate()`, and `predict()`,
-        # but not in `export_savedmodel()`.
-        if self._is_input_fn_invoked:
-          is_export_mode = False
-        else:
-          is_export_mode = True
-
-        # Clear the bit.
-        self._is_input_fn_invoked = None
-
-        # examples_hook is added to training_hooks for both CPU and TPU
-        # execution.
-        examples_hook = ExamplesPerSecondHook(
-            ctx.global_batch_size,
-            output_dir=self.model_dir,
-            every_n_steps=self._log_every_n_steps)
-
-        if ctx.is_running_on_cpu(is_export_mode=is_export_mode):
-          logging.info('Running %s on CPU', mode)
-          estimator_spec = model_fn_wrapper.call_without_tpu(
-              features, labels, is_export_mode=is_export_mode)
-          estimator_spec = estimator_spec._replace(
-              training_hooks=estimator_spec.training_hooks + (examples_hook,))
-          return estimator_spec
-
-        assert labels is None, '`labels` passed to `model_fn` must be `None`.'
-        # TPUEstimator._call_input_fn passes `input_fn` as features to here.
-        assert callable(features), '`input_fn` is not callable.'
-        input_fn = features
-
-        input_holders = _InputPipeline(input_fn, batch_axis, ctx)
-        enqueue_ops, dequeue_fn, input_hooks, run_infeed_loop_on_coordinator = (
-            input_holders.generate_infeed_enqueue_ops_and_dequeue_fn())
-
-        graph = ops.get_default_graph()
-        for enqueue_op in enqueue_ops:
-          if isinstance(enqueue_op, list):
-            graph.get_collection_ref(_TPU_ENQUEUE_OPS).extend(enqueue_op)
-          else:
-            graph.add_to_collection(_TPU_ENQUEUE_OPS, enqueue_op)
-
-        if mode == model_fn_lib.ModeKeys.TRAIN:
-          loss, host_call, scaffold, training_hooks = (
-              _train_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn))
-
-          if model_fn_wrapper._params.get('track_mean', False):
-            iterations_per_loop_var = _create_or_get_iterations_per_loop()
-            loss = math_ops.div(loss,
-                                math_ops.cast(
-                                    iterations_per_loop_var,
-                                    dtype=loss.dtype))
-
-          host_ops = host_call.create_tpu_hostcall()
-          if host_ops is None:
-            host_ops = []
-
-          shutdown_hooks = []
-          shutdown_mode = os.environ.get('TF_TPU_GRACEFUL_SHUTDOWN_MODE',
-                                         'shutdown_worker')
-          if shutdown_mode:
-            if shutdown_mode == 'shutdown_worker':
-              finalizer_hooks = [
-                  session_support.ShutdownLameWorkers(timeout_ms=60*1000),
-              ]
-            elif shutdown_mode == 'shutdown_computation':
-              finalizer_hooks = [
-                  session_support.RestartComputation(timeout_ms=60*1000),
-              ]
-            else:
-              raise ValueError('Unknown TF_TPU_GRACEFUL_SHUTDOWN_MODE "%s"' %
-                               shutdown_mode)
-
-            shutdown_hooks.append(session_support.GracefulShutdownHook(
-                checkpoint_prefix=self.model_dir + '/model.ckpt',
-                on_shutdown_hooks=finalizer_hooks
-            ))
-
-          with ops.control_dependencies([loss]):
-            global_step = array_ops.identity(training.get_global_step())
-          hooks = input_hooks + shutdown_hooks
-          logging_hook_frequency = (    # Divide and round up
-              (self._log_every_n_steps +
-               self._config.tpu_config.iterations_per_loop - 1) //
-              self._config.tpu_config.iterations_per_loop)
-
-          iterations_per_loop = array_ops.identity(
-              _create_or_get_iterations_per_loop())
-
-          hooks.extend([
-              TPUInfeedOutfeedSessionHook(
-                  ctx,
-                  enqueue_ops,
-                  host_ops,
-                  run_infeed_loop_on_coordinator=(
-                      run_infeed_loop_on_coordinator),
-                  rendezvous=self._rendezvous[mode],
-              ),
-              InstallSignalHandlerHook(),
-              training.LoggingTensorHook(
-                  {
-                      'loss': array_ops.identity(loss),
-                      'ppl': tensorflow.exp(loss),
-                      'bpc': loss / tensorflow.constant(math.log(2)),
-                      '#iter/loop': iterations_per_loop,
-                      'global step': global_step,
-                  },
-                  every_n_iter=logging_hook_frequency)
-          ])
-          examples_hook._set_steps_per_run(   # pylint: disable=protected-access
-              self._config.tpu_config.iterations_per_loop)
-          hooks.append(examples_hook)
-
-          if training_hooks:
-            hooks.extend(training_hooks)
-
-          chief_hooks = []
-          if (self._config.save_checkpoints_secs or
-              self._config.save_checkpoints_steps):
-            checkpoint_hook = training.CheckpointSaverHook(
-                self.model_dir,
-                save_secs=self._config.save_checkpoints_secs,
-                save_steps=self._config.save_checkpoints_steps,
-                scaffold=scaffold)
-            checkpoint_hook._set_steps_per_run(   # pylint: disable=protected-access
-                self._config.tpu_config.iterations_per_loop)
-            chief_hooks.append(checkpoint_hook)
-
-          summary.scalar(model_fn_lib.LOSS_METRIC_KEY, loss)
-          with ops.control_dependencies([loss]):
-            update_ops = _sync_variables_ops()
-
-          # Validate the TPU training graph to catch basic errors
-          _validate_tpu_training_graph()
-
-          train_op = control_flow_ops.group(*update_ops)
-          graph.add_to_collection(_TPU_TRAIN_OP, train_op)
-
-          return model_fn_lib.EstimatorSpec(
-              mode,
-              loss=loss,
-              training_chief_hooks=chief_hooks,
-              training_hooks=hooks,
-              train_op=train_op,
-              scaffold=scaffold)
-
-        if mode == model_fn_lib.ModeKeys.EVAL:
-          total_loss, host_calls, scaffold, eval_hooks = _eval_on_tpu_system(
-              ctx, model_fn_wrapper, dequeue_fn)
-          iterations_per_loop_var = _create_or_get_iterations_per_loop()
-          mean_loss = math_ops.div(total_loss,
-                                   math_ops.cast(
-                                       iterations_per_loop_var,
-                                       dtype=total_loss.dtype))
-
-          # Creates a dummy metric update_op for all metrics. Estimator expects
-          # all metrics in eval_metric_ops have update_op and calls them one by
-          # one. The real metric update_ops are invoked in a separated thread.
-          # So, here give Estimator the dummy op for all metrics.
-          with ops.control_dependencies([mean_loss]):
-            # After TPU evaluation computation is done (the mean_loss tensor),
-            # reads all variables back from TPU and updates the eval step
-            # counter properly
-            internal_ops_to_run = _sync_variables_ops()
-            internal_ops_to_run.append(
-                _increase_eval_step_op(iterations_per_loop_var))
-            with ops.control_dependencies(internal_ops_to_run):
-              dummy_update_op = control_flow_ops.no_op()
-
-          host_call_ret = host_calls.create_tpu_hostcall()
-          eval_metric_ops = {}
-          eval_update_ops = []
-
-          for k, v in host_call_ret.get('eval_metrics', {}).items():
-            eval_metric_ops[k] = (v[0], dummy_update_op)
-            eval_update_ops.append(v[1])
-
-          if 'host_call' not in host_call_ret:
-            host_ops = []
-          else:
-            host_ops = host_call_ret['host_call']
-          hooks = [
-              TPUInfeedOutfeedSessionHook(
-                  ctx,
-                  enqueue_ops,
-                  eval_update_ops + host_ops,
-                  run_infeed_loop_on_coordinator=(
-                      run_infeed_loop_on_coordinator),
-                  rendezvous=self._rendezvous[mode]),
-          ] + input_hooks
-
-          if eval_hooks:
-            hooks.extend(eval_hooks)
-
-          return model_fn_lib.EstimatorSpec(
-              mode,
-              loss=mean_loss,
-              evaluation_hooks=hooks,
-              eval_metric_ops=eval_metric_ops,
-              scaffold=scaffold)
-
-        # Predict
-        assert mode == model_fn_lib.ModeKeys.PREDICT
-
-        (dummy_predict_op, host_calls,
-         scaffold, prediction_hooks) = _predict_on_tpu_system(
-             ctx, model_fn_wrapper, dequeue_fn)
-        with ops.control_dependencies([dummy_predict_op]):
-          internal_ops_to_run = _sync_variables_ops()
-          with ops.control_dependencies(internal_ops_to_run):
-            dummy_predict_op = control_flow_ops.no_op()
-
-        # In train and evaluation, the main TPU program is passed to monitored
-        # training session to run. Infeed enqueue and outfeed dequeue are
-        # executed in side threads. This is not the configuration for
-        # prediction mode.
-        #
-        # For prediction, the Estimator executes the EstimatorSpec.predictions
-        # directly and yield the element (via generator) to call site. So, the
-        # outfeed based prediction must be passed to MonitoredSession directly.
-        # Other parts of the TPU execution are organized as follows.
-        #
-        # 1. All outfeed based Tensors must be grouped with predictions Tensors
-        #    to form a single invocation. This avoid the issue we might trigger
-        #    multiple outfeeds incorrectly. To achieve this, `host_call` is
-        #    placed in control_dependencies of `stopping_signals`, and
-        #    `stopping_signals` is passed into _StoppingPredictHook, which sets
-        #    the `stopping_signals` as SessionRunArgs. MonitoredSession merges
-        #    all SessionRunArgs with the fetch in session.run together.
-        #
-        # 2. The TPU program (dummy_predict_op) and enqueue_ops (infeed Enqueue)
-        #    are grouped together. They will be launched once and only once in
-        #    side threads and they quit naturally according to the SAME stopping
-        #    condition.
-        enqueue_ops.append(dummy_predict_op)
-
-        host_call_ret = host_calls.create_tpu_hostcall()
-        if 'host_call' not in host_call_ret:
-          host_ops = []
-        else:
-          host_ops = host_call_ret['host_call']
-
-        predictions = host_call_ret['predictions']
-        _verify_cross_hosts_transfer_size(
-            predictions, message=(
-                'The estimated size for TPUEstimatorSpec.predictions is too '
-                'large.'))
-        signals = host_call_ret['signals']
-
-        with ops.control_dependencies(host_ops):
-          host_ops = []  # Empty, we do do not need it anymore.
-          scalar_stopping_signal = _StopSignals.as_scalar_stopping_signal(
-              signals)
-          predictions = _PaddingSignals.slice_tensor_or_dict(
-              predictions, signals)
-
-        hooks = [
-            _StoppingPredictHook(scalar_stopping_signal),
-            TPUInfeedOutfeedSessionHookForPrediction(
-                ctx, enqueue_ops, host_ops, rendezvous=self._rendezvous[mode]),
-        ] + input_hooks
-
-        if prediction_hooks:
-          hooks.extend(prediction_hooks)
-
-        return model_fn_lib.EstimatorSpec(
-            mode,
-            prediction_hooks=hooks,
-            predictions=predictions,
-            scaffold=scaffold)
-
-    return _model_fn
-
-
-def _is_tpu_tensor(tensor):
-  if not isinstance(tensor, ops.Tensor):
-    return False
-  try:
-    tensor.op.get_attr(tpu._OUTSIDE_COMPILATION_ATTR)  # pylint: disable=protected-access
-  except ValueError:
-    return True
-  else:
-    return False
-
-
-def _export_output_to_tensors(export_output):
-  """Get a list of `Tensors` used in `export_output`.
-
-  Args:
-    export_output: an `ExportOutput` object such as `ClassificationOutput`,
-            `RegressionOutput`, or `PredictOutput`.
-  Returns:
-    a list of tensors used in export_output.
-
-  Raises:
-    ValueError: if `export_output` is not one of `ClassificationOutput`,
-        `RegressionOutput`, or `PredictOutput`.
-  """
-  if isinstance(export_output, export_output_lib.ClassificationOutput):
-    return [export_output.scores, export_output.classes]
-  elif isinstance(export_output, export_output_lib.RegressionOutput):
-    return [export_output.value]
-  elif isinstance(export_output, export_output_lib.PredictOutput):
-    return export_output.outputs.values()
-  else:
-    raise ValueError(
-        '`export_output` must be have type `ClassificationOutput`, '
-        '`RegressionOutput`, or `PredictOutput`; got {}.'.format(export_output))
-
-
-def _clone_export_output_with_tensors(export_output, tensors):
-  """Clones `export_output` but with new `tensors`.
-
-  Args:
-    export_output: an `ExportOutput` object such as `ClassificationOutput`,
-            `RegressionOutput`, or `PredictOutput`.
-    tensors: a list of `Tensors` used to construct a new `export_output`.
-
-  Returns:
-    A dict similar to `export_output` but with `tensors`.
-
-  Raises:
-    ValueError: if `export_output` is not one of `ClassificationOutput`,
-        `RegressionOutput`, or `PredictOutput`.
-  """
-  if isinstance(export_output, export_output_lib.ClassificationOutput):
-    if len(tensors) != 2:
-      raise ValueError('tensors must be of length 2; '
-                       'got {}.'.format(len(tensors)))
-    return export_output_lib.ClassificationOutput(*tensors)
-  elif isinstance(export_output, export_output_lib.RegressionOutput):
-    if len(tensors) != 1:
-      raise ValueError('tensors must be of length 1; '
-                       'got {}'.format(len(tensors)))
-    return export_output_lib.RegressionOutput(*tensors)
-  elif isinstance(export_output, export_output_lib.PredictOutput):
-    return export_output_lib.PredictOutput(
-        dict(zip(export_output.outputs.keys(), tensors)))
-  else:
-    raise ValueError(
-        '`export_output` must be have type `ClassificationOutput`, '
-        '`RegressionOutput`, or `PredictOutput`; got {}.'.format(export_output))
-
-
-def _eval_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn):
-  """Executes `model_fn_wrapper` multiple times on all TPU shards."""
-  iterations_per_loop_var = _create_or_get_iterations_per_loop()
-
-  (single_tpu_eval_step, host_calls, captured_scaffold_fn, captured_eval_hooks
-  ) = model_fn_wrapper.convert_to_single_tpu_eval_step(dequeue_fn)
-
-  def multi_tpu_eval_steps_on_single_shard():
-    loop_vars = [_ZERO_LOSS]
-    if model_fn_wrapper._eval_cache_fn is not None:
-      batch_size = ctx.global_batch_size
-      num_shards = ctx._config._tpu_config.num_shards
-      loop_vars += model_fn_wrapper._eval_cache_fn(batch_size // num_shards)
-
-    return training_loop.repeat(
-        iterations_per_loop_var,
-        single_tpu_eval_step,
-        loop_vars)
-
-  ret = tpu.shard(
-      multi_tpu_eval_steps_on_single_shard,
-      inputs=[],
-      num_shards=ctx.num_replicas,
-      outputs_from_all_shards=False,
-      device_assignment=ctx.device_assignment)
-  loss = ret[0]
-
-  scaffold = _get_scaffold(captured_scaffold_fn)
-  return loss, host_calls, scaffold, captured_eval_hooks.get()
-
-
-def _train_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn):
-  """Executes `model_fn_wrapper` multiple times on all TPU shards."""
-  iterations_per_loop_var = _create_or_get_iterations_per_loop()
-
-  (single_tpu_train_step, host_call, captured_scaffold_fn,
-   captured_training_hooks) = (
-       model_fn_wrapper.convert_to_single_tpu_train_step(dequeue_fn))
-
-  def multi_tpu_train_steps_on_single_shard():
-    if model_fn_wrapper._params.get('track_mean', False):
-      loop_vars = [_ZERO_LOSS]
-    else:
-      loop_vars = [_INITIAL_LOSS]
-    if model_fn_wrapper._train_cache_fn is not None:
-      batch_size = ctx.global_batch_size
-      num_shards = ctx._config._tpu_config.num_shards
-      loop_vars += model_fn_wrapper._train_cache_fn(batch_size // num_shards)
-
-    return training_loop.repeat(
-        iterations_per_loop_var,
-        single_tpu_train_step,
-        loop_vars)
-
-  ret = tpu.shard(
-      multi_tpu_train_steps_on_single_shard,
-      inputs=[],
-      num_shards=ctx.num_replicas,
-      outputs_from_all_shards=False,
-      device_assignment=ctx.device_assignment)
-  loss = ret[0]
-
-  scaffold = _get_scaffold(captured_scaffold_fn)
-  return loss, host_call, scaffold, captured_training_hooks.get()
-
-
-def _predict_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn):
-  """Executes `model_fn_wrapper` multiple times on all TPU shards."""
-  (single_tpu_predict_step, host_calls, captured_scaffold_fn,
-   captured_predict_hooks
-  ) = model_fn_wrapper.convert_to_single_tpu_predict_step(dequeue_fn)
-
-  def multi_tpu_predict_steps_on_single_shard():
-
-    def cond(scalar_stopping_signal):
-      return math_ops.logical_not(
-          _StopSignals.should_stop(scalar_stopping_signal))
-
-    inputs = [_StopSignals.NON_STOPPING_SIGNAL]
-    outputs = training_loop.while_loop(
-        cond, single_tpu_predict_step, inputs=inputs, name=b'loop')
-    return outputs
-
-  (dummy_predict_op,) = tpu.shard(
-      multi_tpu_predict_steps_on_single_shard,
-      inputs=[],
-      num_shards=ctx.num_replicas,
-      outputs_from_all_shards=False,
-      device_assignment=ctx.device_assignment)
-
-  scaffold = _get_scaffold(captured_scaffold_fn)
-  return dummy_predict_op, host_calls, scaffold, captured_predict_hooks.get()
-
-
-def _wrap_computation_in_while_loop(device, op_fn):
-  """Wraps the ops generated by `op_fn` in tf.while_loop."""
-
-  def computation(i):
-    with ops.control_dependencies(op_fn()):
-      return i + 1
-
-  iterations_per_loop_var = _create_or_get_iterations_per_loop()
-  # By setting parallel_iterations=1, the parallel execution in while_loop is
-  # basically turned off.
-  with ops.DEVICE(device):
-    iterations = array_ops.identity(iterations_per_loop_var)
-    return control_flow_ops.while_loop(
-        lambda i: i < iterations,
-        computation, [constant_op.constant(0)],
-        parallel_iterations=1)
-
-
-def _wrap_computation_in_while_loop_with_stopping_signals(device, op_fn):
-  """Wraps the ops generated by `op_fn` in tf.while_loop."""
-
-  def cond(scalar_stopping_signal):
-    return math_ops.logical_not(
-        _StopSignals.should_stop(scalar_stopping_signal))
-
-  def computation(unused_scalar_stopping_signal):
-    return_value = op_fn()
-    execute_ops = return_value['ops']
-    signals = return_value['signals']
-    with ops.control_dependencies(execute_ops):
-      return _StopSignals.as_scalar_stopping_signal(signals)
-
-  # By setting parallel_iterations=1, the parallel execution in while_loop is
-  # basically turned off.
-  with ops.DEVICE(device):
-    return control_flow_ops.while_loop(
-        cond,
-        computation, [_StopSignals.NON_STOPPING_SIGNAL],
-        parallel_iterations=1)
-
-
-def _validate_tpu_training_graph():
-  """Validate graph before running distributed training.
-
-  Raises:
-    ValueError: If the graph seems invalid for running on DEVICE
-  """
-  operations = ops.get_default_graph().get_operations()
-
-  # Check if there is atleast one CrossReplicaSum operation in the graph
-  # This should be introduced by using the CrossShardOptimizer wrapper
-  cross_replica_sum_ops = [
-      o for o in operations if o.type == _CROSS_REPLICA_SUM_OP
-  ]
-  if not cross_replica_sum_ops:
-    raise ValueError(
-        'CrossShardOptimizer must be used for model training on TPUs.')
-
-
-class _CapturedObject(object):
-  """A placeholder to capture an object.
-
-  This is useful when we need to capture a Python object in the Tensorflow
-  control flow body function and use it outside the control flow.
-  """
-
-  def __init__(self):
-    self._object = None
-    self._captured = False
-
-  def capture(self, o):
-    if self._captured:
-      raise RuntimeError(
-          'InternalError: Object can capture only once. Please file bug.')
-
-    self._captured = True
-    self._object = o
-
-  def get(self):
-    if not self._captured:
-      raise RuntimeError(
-          'InternalError: Object is not captured properly before `get`. '
-          'Please file bug.')
-    return self._object
-
-
-def _get_scaffold(captured_scaffold_fn):
-  """Retrieves the Scaffold from `captured_scaffold_fn`."""
-  with _CapturingContext(message='Inside scaffold_fn'):
-    scaffold_fn = captured_scaffold_fn.get()
-    if scaffold_fn:
-      scaffold = scaffold_fn()
-      if scaffold is None:
-        raise ValueError(
-            'TPUEstimatorSpec.scaffold_fn returns None, which is not allowed')
-    else:
-      scaffold = None
-
-  if scaffold:
-    wrapped_finalize = scaffold.finalize
-
-    def _finalize():
-      with _CapturingContext('Inside Scaffold.finalize'):
-        wrapped_finalize()
-
-    scaffold.finalize = _finalize
-  return scaffold
-
-
-class _CapturingContext(control_flow_ops.ControlFlowContext):
-  """Tracks references to Tensors defined in TPU replication."""
-
-  def __init__(self, message):
-    control_flow_ops.ControlFlowContext.__init__(self)
-    self._message = message
-
-  def AddOp(self, op):  # pylint: disable=invalid-name
-    for c in op.inputs:
-      if tpu._TPU_REPLICATE_ATTR in c.op.node_def.attr:  # pylint: disable=protected-access
-        raise ValueError('{}: Op {} depends on TPU computation {}, '
-                         'which is not allowed.'.format(self._message, op, c))
-
-  def to_control_flow_context_def(self, context_def, export_scope=None):
-    # pylint: disable=useless-super-delegation
-    # NOTE(slebedev): the method is required by `ControlFlowContext`.
-    super(_CapturingContext, self).to_control_flow_context_def(
-      context_def, export_scope)
-
-  def __enter__(self):
-    # pylint: disable=protected-access
-    self._g = ops.get_default_graph()
-    self._old = self._g._get_control_flow_context()
-    self._g._set_control_flow_context(self)
-    # pylint: enable=protected-access
-
-  def __exit__(self, _, __, ___):  # pylint: disable=invalid-name
-    self._g._set_control_flow_context(self._old)  # pylint: disable=protected-access
-
-
-class _Inputs(object):
-  """A data structure representing the input_fn returned values.
-
-  This also supports the returned value from input_fn as `Dataset`.
-  """
-
-  def __init__(self, features=None, labels=None, dataset=None, signals=None):
-    if dataset is not None and (features is not None or labels is not None or
-                                signals is not None):
-      raise RuntimeError('Internal Error: Either (features and labels) or '
-                         'dataset should be provided, not both. Please file '
-                         'bug')
-
-    self._features = features
-    self._labels = labels
-    self._signals = signals
-
-    self._dataset = dataset
-    self._iterator = None
-
-  @staticmethod
-  def from_input_fn(return_values):
-    """Returns an `_Inputs` instance according to `input_fn` return value."""
-    if isinstance(return_values, dataset_ops.Dataset):
-      dataset = return_values
-      return _Inputs(dataset=dataset)
-
-    features, labels = _Inputs._parse_inputs(return_values)
-    return _Inputs(features, labels)
-
-  @staticmethod
-  def _parse_inputs(return_values):
-    if isinstance(return_values, tuple):
-      features, labels = return_values
-    else:
-      features, labels = return_values, None
-    return features, labels
-
-  @property
-  def is_dataset(self):
-    """Returns True if the return value from input_fn is Dataset."""
-    return self._dataset is not None
-
-  def dataset_initializer_hook(self):
-    """Returns a `SessionRunHook` to initialize this dataset.
-
-    This must be called before `features_and_labels`.
-    """
-    iterator = self._dataset.make_initializable_iterator()
-    # pylint: disable=protected-access
-    hook = estimator_util._DatasetInitializerHook(iterator)
-    # pylint: enable=protected-access
-    self._iterator = iterator
-    return hook
-
-  def features_and_labels(self):
-    """Gets `features` and `labels`."""
-    if self.is_dataset:
-      if self._iterator is None:
-        raise RuntimeError('Internal error: Must call dataset_initializer_hook '
-                           'before calling features_and_labels(). Please file '
-                           'a bug!')
-      return _Inputs._parse_inputs(self._iterator.get_next())
-
-    return (self._features, self._labels)
-
-  def signals(self):
-    return self._signals
-
-  @property
-  def dataset(self):
-    return self._dataset
-
-
-class _InputsWithStoppingSignals(_Inputs):
-  """Inputs with `_StopSignals` inserted into the dataset."""
-
-  def __init__(self,
-               dataset,
-               batch_size,
-               add_padding=False,
-               num_invocations_per_step=1):
-
-    assert dataset is not None
-    user_provided_dataset = dataset.map(
-        _InputsWithStoppingSignals.insert_stopping_signal(
-            stop=False, batch_size=batch_size, add_padding=add_padding))
-    if num_invocations_per_step == 1:
-      final_batch_dataset = dataset.take(1).map(
-          _InputsWithStoppingSignals.insert_stopping_signal(
-              stop=True, batch_size=batch_size, add_padding=add_padding))
-    else:
-      # We append (2 * num_invocations_per_step - 1) batches for exhausting the
-      # user_provided_dataset and stop properly.
-      # For example, if num_invocations_per_step is 2, we append 3 additional
-      # padding batches: b1, b2, b3.
-      # If user_provided_dataset contains two batches: a1, a2
-      # Step 1: [a1, a2]
-      # Step 2: [b1, b2] -> STOP
-      # If user_provided_dataset contains three batches: a1, a2, a3.
-      # The training loops:
-      # Step 1: [a1, a2]
-      # Step 2: [a3, b1]
-      # Step 3: [b2, b3] -> STOP.
-      final_batch_dataset = dataset.take(1).map(
-          _InputsWithStoppingSignals.insert_stopping_signal(
-              stop=True, batch_size=batch_size, add_padding=add_padding))
-      final_batch_dataset = final_batch_dataset.repeat(
-          2 * num_invocations_per_step - 1)
-
-      def _set_mask(data_dict):
-        signals = data_dict['signals']
-        signals['padding_mask'] = array_ops.ones_like(signals['padding_mask'])
-        data_dict['signals'] = signals
-        return data_dict
-
-      # Mask out the extra batch.
-      final_batch_dataset = final_batch_dataset.map(_set_mask)
-
-    dataset = user_provided_dataset.concatenate(final_batch_dataset).prefetch(2)
-
-    super(_InputsWithStoppingSignals, self).__init__(dataset=dataset)
-    self._current_inputs = None
-
-  def features_and_labels(self):
-    if self._current_inputs is not None:
-      raise RuntimeError(
-          'Internal Error: The previous inputs have not been properly '
-          'consumed. First call features_and_labels, then call signals.')
-
-    inputs_with_signals = self._iterator.get_next()
-    features = inputs_with_signals['features']
-    labels = inputs_with_signals.get('labels')
-
-    self._current_inputs = inputs_with_signals
-    return features, labels
-
-  def signals(self):
-    """Returns the `Signals` from `_Inputs`."""
-    if self._current_inputs is None:
-      raise RuntimeError(
-          'Internal Error: The current inputs have not been properly '
-          'generated. First call features_and_labels, then call signals.')
-    signals = self._current_inputs['signals']
-    self._current_inputs = None
-    return signals
-
-  @staticmethod
-  def insert_stopping_signal(stop, batch_size, add_padding=False):
-    """Inserts stopping_signal into dataset via _map_fn.
-
-    Here we change the data structure in the dataset, such that the return value
-    is a dictionary now and `features`, `labels`, and `signals` are three
-    distinguished keys in that dict. This provides a better structure, which
-    eases the process to decompose the inputs (see `features_and_labels`).
-
-    Args:
-      stop: bool, state of current stopping signals.
-      batch_size: int, batch size.
-      add_padding: bool, whether to pad the tensor to full batch size.
-
-    Returns:
-      A map_fn passed to dataset.map API.
-    """
-
-    def _map_fn(*args):
-      """The map fn to insert signals."""
-      if len(args) == 1:
-        # Unpack the single Tensor/dict argument as features. This is required
-        # for the input_fn returns no labels.
-        args = args[0]
-      features, labels = _Inputs._parse_inputs(args)
-      new_input_dict = {}
-
-      if add_padding:
-        padding_mask, features, labels = (
-            _PaddingSignals.pad_features_and_labels(
-                features, labels, batch_size))
-
-        new_input_dict['features'] = features
-        if labels is not None:
-          new_input_dict['labels'] = labels
-
-      else:
-        new_input_dict['features'] = features
-        if labels is not None:
-          new_input_dict['labels'] = labels
-        padding_mask = None
-
-      new_input_dict['signals'] = _StopSignals(
-          stop=stop, batch_size=batch_size, padding_mask=padding_mask).as_dict()
-
-      return new_input_dict
-
-    return _map_fn
-
-
-class _StopSignals(object):
-  """Signals class holding all logic to handle TPU stopping condition."""
-
-  NON_STOPPING_SIGNAL = False
-  STOPPING_SIGNAL = True
-
-  def __init__(self, stop, batch_size, padding_mask=None):
-    self._stop = stop
-    self._batch_size = batch_size
-    self._padding_mask = padding_mask
-
-  def as_dict(self):
-    """Returns the signals as Python dict."""
-    shape = [self._batch_size, 1]
-    dtype = dtypes.bool
-
-    if self._stop:
-      stopping = array_ops.ones(shape=shape, dtype=dtype)
-    else:
-      stopping = array_ops.zeros(shape=shape, dtype=dtype)
-
-    signals = {'stopping': stopping}
-    if self._padding_mask is not None:
-      signals['padding_mask'] = self._padding_mask
-    return signals
-
-  @staticmethod
-  def as_scalar_stopping_signal(signals):
-    return array_ops.identity(signals['stopping'][0][0])
-
-  @staticmethod
-  def should_stop(scalar_stopping_signal):
-    """Detects whether scalar_stopping_signal indicates stopping."""
-    if isinstance(scalar_stopping_signal, ops.Tensor):
-      # STOPPING_SIGNAL is a constant True. Here, the logical_and is just the TF
-      # way to express the bool check whether scalar_stopping_signal is True.
-      return math_ops.logical_and(
-          scalar_stopping_signal, _StopSignals.STOPPING_SIGNAL)
-    else:
-      # For non Tensor case, it is used in SessionRunHook. So, we cannot modify
-      # the graph anymore. Here, we use pure Python.
-      return bool(scalar_stopping_signal)
-
-
-class _PaddingSignals(object):
-  """Signals class holding all logic to handle padding."""
-
-  @staticmethod
-  def pad_features_and_labels(features, labels, batch_size):
-    """Pads out the batch dimension of features and labels."""
-    real_batch_size = array_ops.shape(
-        _PaddingSignals._find_any_tensor(features))[0]
-
-    batch_size_tensor = constant_op.constant(batch_size, dtypes.int32)
-
-    check_greater = check_ops.assert_greater_equal(
-        batch_size_tensor, real_batch_size,
-        data=(batch_size_tensor, real_batch_size),
-        message='The real batch size should not be greater than batch_size.')
-
-    with ops.control_dependencies([check_greater]):
-      missing_count = batch_size_tensor - real_batch_size
-
-    def pad_single_tensor(tensor):
-      """Pads out the batch dimension of a tensor to the complete batch_size."""
-      rank = len(tensor.shape)
-      assert rank > 0
-      padding = array_ops.stack([[0, missing_count]] + [[0, 0]] * (rank - 1))
-      padded_shape = (batch_size,) + tuple(tensor.shape[1:])
-      padded_tensor = array_ops.pad(tensor, padding)
-      padded_tensor.set_shape(padded_shape)
-      return padded_tensor
-
-    def nest_pad(tensor_or_dict):
-      return nest.map_structure(pad_single_tensor, tensor_or_dict)
-
-    features = nest_pad(features)
-    if labels is not None:
-      labels = nest_pad(labels)
-
-    padding_mask = _PaddingSignals._padding_mask(
-        real_batch_size, missing_count, batch_size)
-
-    return padding_mask, features, labels
-
-  @staticmethod
-  def slice_tensor_or_dict(tensor_or_dict, signals):
-    """Slice the real Tensors according to padding mask in signals."""
-
-    padding_mask = signals['padding_mask']
-    batch_size = array_ops.shape(padding_mask)[0]
-
-    def verify_batch_size(tensor):
-      check_batch_size = math_ops.equal(batch_size, tensor.shape[0])
-      with ops.control_dependencies([check_batch_size]):
-        return array_ops.identity(tensor)
-
-    def slice_single_tensor(tensor):
-      rank = len(tensor.shape)
-      assert rank > 0
-      real_batch_size = batch_size - math_ops.reduce_sum(padding_mask)
-      return verify_batch_size(tensor)[0:real_batch_size]
-
-    # As we split the Tensors to all TPU cores and concat them back, it is
-    # important to ensure the real data is placed before padded ones, i.e.,
-    # order is preserved. By that, the sliced padding mask should have all 0's.
-    # If this assertion failed, # the slice logic here would not hold.
-    sliced_padding_mask = slice_single_tensor(padding_mask)
-    assert_padding_mask = math_ops.equal(
-        math_ops.reduce_sum(sliced_padding_mask), 0)
-
-    with ops.control_dependencies([assert_padding_mask]):
-      should_stop = _StopSignals.should_stop(
-          _StopSignals.as_scalar_stopping_signal(signals))
-
-    is_full_batch = math_ops.equal(math_ops.reduce_sum(padding_mask), 0)
-
-    def slice_fn(tensor):
-      # If the current batch is full batch or part of stopping signals, we do
-      # not need to slice to save performance.
-      return control_flow_ops.cond(
-          math_ops.logical_or(should_stop, is_full_batch),
-          (lambda: verify_batch_size(tensor)),
-          (lambda: slice_single_tensor(tensor)))
-
-    return nest.map_structure(slice_fn, tensor_or_dict)
-
-  @staticmethod
-  def _find_any_tensor(batch_features):
-    tensors = [x for x in nest.flatten(batch_features)
-               if isinstance(x, ops.Tensor)]
-    if not tensors:
-      raise ValueError('Cannot find any Tensor in features dict.')
-    return tensors[0]
-
-  @staticmethod
-  def _padding_mask(real_batch_size, missing_count, batch_size):
-    padding_mask = array_ops.concat(
-        [
-            array_ops.zeros((real_batch_size,), dtype=dtypes.int32),
-            array_ops.ones((missing_count,), dtype=dtypes.int32)
-        ],
-        axis=0)
-    padding_mask.set_shape((batch_size,))
-    return padding_mask
-
-
-def _verify_cross_hosts_transfer_size(tensor_dict, message):
-  total_size = 0
-  tensor_structure = {}
-  for key, tensor in tensor_dict.items():
-    shape = tensor.shape
-    size = np.product(shape) * tensor.dtype.size
-    tensor_structure[key] = shape
-    total_size += size
-  if total_size >= _ONE_GIGABYTE:
-    raise ValueError(
-        '{} The transfer size is larger than the protobuf limit. Please '
-        'consider to use Tensors with smaller shapes or reduce batch '
-        'size. Given:\n'
-        '{}'.format(message, '\n'.join([
-            ' -- Key: {}, Shape: {}'.format(k, v)
-            for k, v in tensor_structure.items()])))
-
-
-def _add_item_to_params(params, key, value):
-  """Adds a new item into `params`."""
-  if isinstance(params, hparam.HParams):
-    # For HParams, we need to use special API.
-    if key in params:
-      params.set_hparam(key, value)
-    else:
-      params.add_hparam(key, value)
-  else:
-    # Now params is Python dict.
-    params[key] = value
-
-
-def export_estimator_savedmodel(estimator,
-                                export_dir_base,
-                                serving_input_receiver_fn,
-                                assets_extra=None,
-                                as_text=False,
-                                checkpoint_path=None,
-                                strip_default_attrs=False):
-  """Export `Estimator` trained model for TPU inference.
-
-  Args:
-    estimator: `Estimator` with which model has been trained.
-    export_dir_base: A string containing a directory in which to create
-      timestamped subdirectories containing exported SavedModels.
-    serving_input_receiver_fn: A function that takes no argument and
-      returns a `ServingInputReceiver` or `TensorServingInputReceiver`.
-    assets_extra: A dict specifying how to populate the assets.extra directory
-      within the exported SavedModel, or `None` if no extra assets are needed.
-    as_text: whether to write the SavedModel proto in text format.
-    checkpoint_path: The checkpoint path to export.  If `None` (the default),
-      the most recent checkpoint found within the model directory is chosen.
-    strip_default_attrs: Boolean. If `True`, default-valued attributes will be
-      removed from the NodeDefs.
-
-  Returns:
-    The string path to the exported directory.
-  """
-  # `TPUEstimator` requires `tpu_config.RunConfig`, so we cannot use
-  # `estimator.config`.
-  config = tpu_config.RunConfig(model_dir=estimator.model_dir)
-  est = TPUEstimator(
-      estimator._model_fn,  # pylint: disable=protected-access
-      config=config,
-      params=estimator.params,
-      use_tpu=True,
-      train_batch_size=2048,  # Does not matter.
-      eval_batch_size=2048,  # Does not matter.
-  )
-  return est.export_savedmodel(export_dir_base, serving_input_receiver_fn,
-                               assets_extra,
-                               as_text,
-                               checkpoint_path,
-                               strip_default_attrs)
diff --git a/transformer-xl/tf/train.py b/transformer-xl/tf/train.py
deleted file mode 100644
index 5ad7449..0000000
--- a/transformer-xl/tf/train.py
+++ /dev/null
@@ -1,462 +0,0 @@
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import math
-import time
-
-from absl import flags
-import absl.logging as _logging  # pylint: disable=unused-import
-
-from six.moves import xrange  # pylint: disable=redefined-builtin
-
-import tensorflow as tf
-from tensorflow.gfile import Exists as exists
-import model
-import data_utils
-import tpu_estimator
-
-import numpy as np
-from time import sleep
-
-
-# TPU parameters
-flags.DEFINE_string("master", default=None,
-                    help="master")
-flags.DEFINE_string("tpu", default=None,
-      help="The Cloud TPU to use for training. This should be either the name "
-      "used when creating the Cloud TPU, or a grpc://ip.address.of.tpu:8470 url.")
-flags.DEFINE_string("gcp_project", default=None,
-      help="Project name for the Cloud TPU-enabled project. If not specified, "
-      "we will attempt to automatically detect the GCE project from metadata.")
-flags.DEFINE_string("tpu_zone",default=None,
-      help="GCE zone where the Cloud TPU is located in. If not specified, we "
-      "will attempt to automatically detect the GCE project from metadata.")
-flags.DEFINE_bool("use_tpu", default=True,
-      help="Use TPUs rather than plain CPUs.")
-flags.DEFINE_integer("num_hosts", default=1,
-      help="number of TPU hosts")
-flags.DEFINE_integer("num_core_per_host", default=8,
-      help="number of cores per host")
-
-# Experiment (data/checkpoint/directory) parameters
-flags.DEFINE_string("data_dir", default="",
-      help="Path to tf-records directory.")
-flags.DEFINE_string("record_info_dir", default="",
-      help="Path to local directory containing filenames.txt.")
-flags.DEFINE_string("corpus_info_path", default="",
-      help="Path to corpus-info.json file.")
-flags.DEFINE_string("model_dir", default=None,
-      help="Estimator model_dir.")
-flags.DEFINE_bool("do_eval", default=False,
-      help="Whether to run eval on the dev set.")
-flags.DEFINE_bool("track_mean", default=True,
-      help="Trace mean loss during training.")
-flags.DEFINE_string("eval_ckpt_path", None,
-      help="Checkpoint path for evaluation."
-           "If set, model_dir will be ignored."
-           "If unset, will use the latest ckpt in model_dir.")
-flags.DEFINE_string("warm_start_path", None,
-      help="Checkpoint path for warm start."
-           "If set, will clear Adam states."
-           "Note that the new model_dir should be different"
-           " from warm_start_path.")
-
-# Optimization paramenters
-flags.DEFINE_float("learning_rate", default=2.5e-4,
-      help="Maximum learning rate.")
-flags.DEFINE_float("clip", default=0.25,
-      help="Gradient clipping value.")
-# for cosine decay
-flags.DEFINE_float("min_lr_ratio", default=0.01,
-      help="Minimum ratio learning rate.")
-flags.DEFINE_integer("warmup_steps", default=0,
-      help="Number of steps for linear lr warmup.")
-
-# Training parameters
-flags.DEFINE_integer("train_batch_size", default=60,
-      help="Size of train batch.")
-flags.DEFINE_integer("eval_batch_size", default=60,
-      help="Size of valid batch.")
-flags.DEFINE_integer("train_steps", default=100000,
-      help="Total number of training steps.")
-flags.DEFINE_integer("iterations", default=500,
-      help="Number of iterations per repeat loop.")
-flags.DEFINE_integer("save_steps", default=10000,
-      help="number of steps for model checkpointing.")
-
-# Evaluation parameters
-flags.DEFINE_integer("max_eval_batch", default=-1,
-      help="Set -1 to turn off. Only used in test mode.")
-flags.DEFINE_bool("do_eval_only", default=False,
-      help="Run evaluation only.")
-flags.DEFINE_integer("start_eval_steps", default=10000,
-      help="Which checkpoint to start with in `do_eval_only` mode.")
-flags.DEFINE_string("eval_split", "valid",
-      help="Which data split to evaluate.")
-
-# Model paramenters
-flags.DEFINE_integer("tgt_len", default=70,
-      help="Number of steps to predict")
-flags.DEFINE_integer("mem_len", default=70,
-      help="Number of steps to cache")
-flags.DEFINE_bool("same_length", default=False,
-      help="Same length attention")
-flags.DEFINE_integer("clamp_len", default=-1,
-      help="Clamp length")
-
-flags.DEFINE_integer("n_layer", default=6,
-      help="Number of layers.")
-flags.DEFINE_integer("d_model", default=500,
-      help="Dimension of the model.")
-flags.DEFINE_integer("d_embed", default=500,
-      help="Dimension of the embeddings.")
-flags.DEFINE_integer("n_head", default=10,
-      help="Number of attention heads.")
-flags.DEFINE_integer("d_head", default=50,
-      help="Dimension of each attention head.")
-flags.DEFINE_integer("d_inner", default=1000,
-      help="Dimension of inner hidden size in positionwise feed-forward.")
-flags.DEFINE_float("dropout", default=0.1,
-      help="Dropout rate.")
-flags.DEFINE_float("dropatt", default=0.1,
-      help="Attention dropout rate.")
-flags.DEFINE_bool("untie_r", default=False,
-      help="untie r_w_bias and r_r_bias")
-
-# Adaptive Softmax / Embedding
-flags.DEFINE_bool("tie_weight", default=True,
-      help="Tie embedding and softmax weight.")
-flags.DEFINE_integer("div_val", default=1,
-      help="Divide the embedding size by this val for each bin")
-flags.DEFINE_bool("proj_share_all_but_first", default=False,
-      help="True to share all but first projs, False not to share.")
-flags.DEFINE_bool("proj_same_dim", default=True,
-      help="Project the bin with the same dimension.")
-
-# Parameter initialization
-flags.DEFINE_enum("init", default="normal",
-      enum_values=["normal", "uniform"],
-      help="Initialization method.")
-flags.DEFINE_float("init_std", default=0.02,
-      help="Initialization std when init is normal.")
-flags.DEFINE_float("proj_init_std", default=0.01,
-      help="Initialization std for embedding projection.")
-flags.DEFINE_float("init_range", default=0.1,
-      help="Initialization std when init is uniform.")
-
-
-FLAGS = flags.FLAGS
-
-def metric_fn(loss):
-  """Evaluation metric Fn which runs on CPU."""
-  perplexity = tf.exp(tf.reduce_mean(loss))
-  bpc = tf.reduce_mean(loss) / tf.constant(math.log(2))
-  return {
-      "perplexity": tf.metrics.mean(perplexity),
-      "bpc": tf.metrics.mean(bpc),
-  }
-
-
-def get_model_fn(n_token, cutoffs, train_bin_sizes, eval_bin_sizes):
-  def model_fn(features, labels, mode, params):
-    is_training = (mode == tf.estimator.ModeKeys.TRAIN)
-
-
-    batch_size = params["batch_size"]
-
-    mems = params["cache"]
-    inp = tf.transpose(features["inputs"], [1, 0])
-    tgt = tf.transpose(features["labels"], [1, 0])
-
-    bin_sizes = train_bin_sizes if is_training else eval_bin_sizes
-    if bin_sizes:
-      inp_perms = [tf.transpose(features["inp_mask"], [1, 0])]
-      tgt_perms = [tf.transpose(features["tgt_mask"], [1, 0])]
-
-      head_tgt = tf.transpose(features["head_labels"], [1, 0])
-
-      for b in range(len(bin_sizes)):
-        inp_perm = tf.transpose(features["inp_perm_{}".format(b)], [1, 0, 2])
-        tgt_perm = tf.transpose(features["tgt_perm_{}".format(b)], [1, 0, 2])
-
-        inp_perms.append(inp_perm)
-        tgt_perms.append(tgt_perm)
-    else:
-      inp_perms, tgt_perms, head_tgt = None, None, None
-
-    if FLAGS.init == "uniform":
-      initializer = tf.initializers.random_uniform(
-          minval=-FLAGS.init_range,
-          maxval=FLAGS.init_range,
-          seed=None)
-    elif FLAGS.init == "normal":
-      initializer = tf.initializers.random_normal(
-          stddev=FLAGS.init_std,
-          seed=None)
-      proj_initializer = tf.initializers.random_normal(
-          stddev=FLAGS.proj_init_std,
-          seed=None)
-
-    tie_projs = [False for _ in range(len(cutoffs) + 1)]
-    if FLAGS.proj_share_all_but_first:
-      for i in range(1, len(tie_projs)):
-        tie_projs[i] = True
-
-    tf.logging.info("Vocab size : {}".format(n_token))
-    tf.logging.info("Batch size : {}".format(batch_size))
-
-    loss, new_mems = model.transformer(
-        dec_inp=inp,
-        target=tgt,
-        mems=mems,
-        n_token=n_token,
-        n_layer=FLAGS.n_layer,
-        d_model=FLAGS.d_model,
-        d_embed=FLAGS.d_embed,
-        n_head=FLAGS.n_head,
-        d_head=FLAGS.d_head,
-        d_inner=FLAGS.d_inner,
-        dropout=FLAGS.dropout,
-        dropatt=FLAGS.dropatt,
-        initializer=initializer,
-        is_training=is_training,
-        mem_len=FLAGS.mem_len,
-        cutoffs=cutoffs,
-        div_val=FLAGS.div_val,
-        tie_projs=tie_projs,
-        input_perms=inp_perms,
-        target_perms=tgt_perms,
-        head_target=head_tgt,
-        same_length=FLAGS.same_length,
-        clamp_len=FLAGS.clamp_len,
-        use_tpu=FLAGS.use_tpu,
-        untie_r=FLAGS.untie_r,
-        proj_same_dim=FLAGS.proj_same_dim)
-
-    total_loss = tf.reduce_mean(loss)
-
-    if mode == tf.estimator.ModeKeys.EVAL:
-      if FLAGS.use_tpu:
-        with tf.colocate_with(total_loss):
-          total_loss = tf.contrib.tpu.cross_replica_sum(total_loss) \
-                     / FLAGS.num_hosts / FLAGS.num_core_per_host
-      metric_loss = tf.tile(tf.reshape(total_loss, [1, 1]), [batch_size, 1])
-      eval_spec = tf.contrib.tpu.TPUEstimatorSpec(
-          mode=mode,
-          loss=total_loss,
-          eval_metrics=(metric_fn, [metric_loss]))
-
-      eval_spec.cache = new_mems
-
-      return eval_spec
-
-    # Configuring the optimization step.
-    global_step = tf.train.get_global_step()
-
-    # increase the learning rate linearly
-    if FLAGS.warmup_steps > 0:
-      warmup_lr = tf.to_float(global_step) / tf.to_float(FLAGS.warmup_steps) \
-                  * FLAGS.learning_rate
-    else:
-      warmup_lr = 0.0
-
-    # number of parameters
-    num_params = np.sum([np.prod(v.shape) for v in tf.trainable_variables()])
-    tf.logging.info("#params: {}".format(num_params))
-
-    # format_str = '{{:<{0}s}}\t{{}}'.format(
-    #     max([len(v.name) for v in tf.trainable_variables()]))
-    # for v in tf.trainable_variables():
-    #   tf.logging.info(format_str.format(v.name, v.get_shape()))
-
-
-    # decay the learning rate using the cosine schedule
-    decay_lr = tf.train.cosine_decay(
-        FLAGS.learning_rate,
-        global_step=global_step-FLAGS.warmup_steps,
-        decay_steps=FLAGS.train_steps-FLAGS.warmup_steps,
-        alpha=FLAGS.min_lr_ratio)
-
-    learning_rate = tf.where(global_step < FLAGS.warmup_steps,
-                             warmup_lr, decay_lr)
-
-    if FLAGS.use_tpu:
-      optimizer = tf.contrib.tpu.CrossShardOptimizer(
-          tf.train.AdamOptimizer(learning_rate=learning_rate))
-      #GradientDescentOptimizer
-    else:
-      optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
-
-    grads_and_vars = optimizer.compute_gradients(total_loss)
-    gradients, variables = zip(*grads_and_vars)
-    clipped, _ = tf.clip_by_global_norm(gradients, FLAGS.clip)
-    train_op = optimizer.apply_gradients(
-        zip(clipped, variables), global_step=tf.train.get_global_step())
-
-    # Constucting TPUEstimatorSpec with cache.
-    train_spec = tf.contrib.tpu.TPUEstimatorSpec(
-        mode=mode, loss=total_loss, train_op=train_op)
-
-    if FLAGS.mem_len < FLAGS.tgt_len:
-      new_mems = [new_mems[: FLAGS.mem_len] for mem_t in new_mems]
-    train_spec.cache = new_mems
-
-    return train_spec
-
-  return model_fn
-
-
-def get_cache_fn(mem_len):
-
-  def cache_fn(batch_size):
-    mems = []
-    for l in xrange(FLAGS.n_layer):
-      if mem_len > 0:
-        mems.append(
-          tf.zeros([mem_len, batch_size, FLAGS.d_model], dtype=tf.float32))
-      else:
-        mems.append(tf.zeros([mem_len], dtype=tf.float32))
-
-    return mems
-
-  return cache_fn
-
-
-def main(unused_argv):
-  del unused_argv  # Unused
-
-  tf.logging.set_verbosity(tf.logging.INFO)
-
-  # Get corpus info
-  corpus_info = data_utils.get_corpus_info(FLAGS.corpus_info_path)
-  n_token = corpus_info["vocab_size"]
-  cutoffs = corpus_info["cutoffs"][1:-1]
-
-  if FLAGS.save_steps == 0:
-    FLAGS.save_steps = None
-
-  if not FLAGS.do_eval_only:
-    # Get train input function
-    train_input_fn, train_record_info = data_utils.get_input_fn(
-        record_info_dir=FLAGS.record_info_dir,
-        split="train",
-        per_host_bsz=FLAGS.train_batch_size // FLAGS.num_hosts,
-        tgt_len=FLAGS.tgt_len,
-        num_core_per_host=FLAGS.num_core_per_host,
-        num_hosts=FLAGS.num_hosts,
-        use_tpu=FLAGS.use_tpu)
-    train_bin_sizes = train_record_info["bin_sizes"]
-    num_train_batch = train_record_info["num_batch"]
-
-    # Get train cache function
-    train_cache_fn = get_cache_fn(FLAGS.mem_len)
-  else:
-    train_bin_sizes = []
-    num_train_batch = None
-    train_cache_fn = None
-
-  if FLAGS.do_eval or FLAGS.do_eval_only:
-    assert FLAGS.num_hosts == 1
-    # Get eval input function
-    eval_input_fn, eval_record_info = data_utils.get_input_fn(
-        record_info_dir=FLAGS.record_info_dir,
-        split=FLAGS.eval_split,
-        per_host_bsz=FLAGS.eval_batch_size // FLAGS.num_hosts,
-        tgt_len=FLAGS.tgt_len,
-        num_core_per_host=FLAGS.num_core_per_host,
-        num_hosts=FLAGS.num_hosts,
-        use_tpu=FLAGS.use_tpu)
-    eval_bin_sizes = eval_record_info["bin_sizes"]
-    num_eval_batch = eval_record_info["num_batch"]
-
-    if FLAGS.max_eval_batch > 0:
-      num_eval_batch = min(FLAGS.max_eval_batch, num_eval_batch)
-
-    # Get eval cache function
-    eval_cache_fn = get_cache_fn(FLAGS.mem_len)
-    model_fn = get_model_fn(n_token, cutoffs, train_bin_sizes, eval_bin_sizes)
-  else:
-    eval_cache_fn = None
-    model_fn = get_model_fn(n_token, cutoffs, train_bin_sizes, [])
-
-  ##### Create estimator
-  # TPU Configuration
-  tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
-      FLAGS.tpu, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)
-
-  per_host_input = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2
-  run_config = tf.contrib.tpu.RunConfig(
-      cluster=tpu_cluster_resolver,
-      model_dir=FLAGS.model_dir,
-      session_config=tf.ConfigProto(
-          allow_soft_placement=True, log_device_placement=True),
-      tpu_config=tf.contrib.tpu.TPUConfig(
-          iterations_per_loop=FLAGS.iterations,
-          num_shards=FLAGS.num_core_per_host * FLAGS.num_hosts,
-          per_host_input_for_training=per_host_input),
-      keep_checkpoint_max=100000, # effectively save all checkpoints
-      save_checkpoints_secs=None,
-      save_checkpoints_steps=FLAGS.save_steps
-  )
-
-  # warm start
-  warm_start_from = None
-  if FLAGS.warm_start_path is not None:
-    warm_start_from = tf.estimator.WarmStartSettings(
-        ckpt_to_initialize_from=FLAGS.warm_start_path)
-
-  # TPU Estimator
-  estimator = tpu_estimator.TPUEstimator(
-      model_fn=model_fn,
-      train_cache_fn=train_cache_fn,
-      eval_cache_fn=eval_cache_fn,
-      use_tpu=FLAGS.use_tpu,
-      config=run_config,
-      params={"data_dir":FLAGS.data_dir, "track_mean":FLAGS.track_mean},
-      train_batch_size=FLAGS.train_batch_size,
-      eval_batch_size=FLAGS.eval_batch_size,
-      warm_start_from=warm_start_from)
-
-  if FLAGS.do_eval_only:
-    if FLAGS.eval_ckpt_path is not None:
-      ret = estimator.evaluate(input_fn=eval_input_fn, steps=num_eval_batch,
-                               checkpoint_path=FLAGS.eval_ckpt_path)
-      tf.logging.info("=" * 200)
-      log_str = "Eval results | "
-      for key, val in ret.items():
-        log_str += "{} {} | ".format(key, val)
-      tf.logging.info(log_str)
-      tf.logging.info("=" * 200)
-    else:
-      ckpt_state = tf.train.get_checkpoint_state(FLAGS.model_dir)
-      eval_results = []
-      for eval_checkpoint in ckpt_state.all_model_checkpoint_paths:
-        if not exists(eval_checkpoint + ".index"): continue
-        global_step = int(eval_checkpoint.split("-")[-1])
-        if global_step < FLAGS.start_eval_steps or global_step > FLAGS.train_steps:
-          continue
-        ret = estimator.evaluate(input_fn=eval_input_fn, steps=num_eval_batch,
-                                 checkpoint_path=eval_checkpoint)
-        eval_results.append(ret)
-
-      eval_results.sort(key = lambda x: x["perplexity"])
-
-      tf.logging.info("=" * 200)
-      log_str = "Best results | "
-      for key, val in eval_results[0].items():
-        log_str += "{} {} | ".format(key, val)
-      tf.logging.info(log_str)
-      tf.logging.info("=" * 200)
-  else:
-    if not FLAGS.do_eval:
-      estimator.train(input_fn=train_input_fn, steps=FLAGS.train_steps)
-    else:
-      for step in range(0, FLAGS.train_steps, num_train_batch):
-        train_steps = min(FLAGS.train_steps - step, num_train_batch)
-        estimator.train(input_fn=train_input_fn, steps=train_steps)
-        estimator.evaluate(input_fn=eval_input_fn, steps=num_eval_batch)
-
-
-if __name__ == "__main__":
-  tf.app.run()
diff --git a/transformer-xl/tf/train_gpu.py b/transformer-xl/tf/train_gpu.py
deleted file mode 100644
index bf83b79..0000000
--- a/transformer-xl/tf/train_gpu.py
+++ /dev/null
@@ -1,475 +0,0 @@
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-import math
-import time
-
-from absl import flags
-import absl.logging as _logging  # pylint: disable=unused-import
-
-import tensorflow as tf
-import model
-import data_utils
-
-from gpu_utils import assign_to_gpu, average_grads_and_vars
-
-import numpy as np
-
-# GPU config
-flags.DEFINE_integer("num_hosts", default=1,
-      help="Number of TPU hosts")
-flags.DEFINE_integer("num_core_per_host", default=8,
-      help="Number of cores per host")
-
-# Experiment (data/checkpoint/directory) config
-flags.DEFINE_string("data_dir", default="",
-      help="Path to tf-records directory.")
-flags.DEFINE_string("record_info_dir", default="",
-      help="Path to local directory containing filenames.txt.")
-flags.DEFINE_string("corpus_info_path", default="",
-      help="Path to corpus-info.json file.")
-flags.DEFINE_string("model_dir", default=None,
-      help="Estimator model_dir.")
-flags.DEFINE_bool("do_train", default=True,
-      help="Whether to run training.")
-flags.DEFINE_bool("do_eval", default=False,
-      help="Whether to run eval on the dev set.")
-flags.DEFINE_string("eval_ckpt_path", None,
-      help="Checkpoint path for do_test evaluation."
-           "If set, model_dir will be ignored."
-           "If unset, will use the latest ckpt in model_dir.")
-flags.DEFINE_string("warm_start_path", None,
-      help="Checkpoint path for warm start."
-           "If set, will clear Adam states."
-           "Note that the new model_dir should be different"
-           " from warm_start_path.")
-
-# Optimization config
-flags.DEFINE_float("learning_rate", default=2.5e-4,
-      help="Maximum learning rate.")
-flags.DEFINE_float("clip", default=0.25,
-      help="Gradient clipping value.")
-# for cosine decay
-flags.DEFINE_float("min_lr_ratio", default=0.004,
-      help="Minimum ratio learning rate.")
-flags.DEFINE_integer("warmup_steps", default=0,
-      help="Number of steps for linear lr warmup.")
-
-# Training config
-flags.DEFINE_integer("train_batch_size", default=60,
-      help="Size of train batch.")
-flags.DEFINE_integer("eval_batch_size", default=60,
-      help="Size of valid batch.")
-flags.DEFINE_integer("train_steps", default=100000,
-      help="Total number of training steps.")
-flags.DEFINE_integer("iterations", default=500,
-      help="Number of iterations per repeat loop.")
-flags.DEFINE_integer("save_steps", default=10000,
-      help="number of steps for model checkpointing.")
-
-# Evaluation config
-flags.DEFINE_bool("do_test", default=False,
-      help="Run on the test set.")
-flags.DEFINE_integer("max_eval_batch", default=-1,
-      help="Set -1 to turn off. Only used in test mode.")
-flags.DEFINE_bool("do_eval_only", default=False,
-      help="Run evaluation only.")
-flags.DEFINE_integer("start_eval_steps", default=10000,
-      help="Which checkpoint to start with in `do_eval_only` mode.")
-flags.DEFINE_string("eval_split", "valid",
-      help="Which data split to evaluate.")
-
-# Model config
-flags.DEFINE_integer("tgt_len", default=70,
-      help="Number of steps to predict")
-flags.DEFINE_integer("mem_len", default=70,
-      help="Number of steps to cache")
-flags.DEFINE_bool("same_length", default=False,
-      help="Same length attention")
-flags.DEFINE_integer("clamp_len", default=-1,
-      help="Clamp length")
-
-flags.DEFINE_integer("n_layer", default=6,
-      help="Number of layers.")
-flags.DEFINE_integer("d_model", default=500,
-      help="Dimension of the model.")
-flags.DEFINE_integer("d_embed", default=500,
-      help="Dimension of the embeddings.")
-flags.DEFINE_integer("n_head", default=10,
-      help="Number of attention heads.")
-flags.DEFINE_integer("d_head", default=50,
-      help="Dimension of each attention head.")
-flags.DEFINE_integer("d_inner", default=1000,
-      help="Dimension of inner hidden size in positionwise feed-forward.")
-flags.DEFINE_float("dropout", default=0.1,
-      help="Dropout rate.")
-flags.DEFINE_float("dropatt", default=0.1,
-      help="Attention dropout rate.")
-flags.DEFINE_bool("untie_r", default=False,
-      help="untie r_w_bias and r_r_bias")
-
-# Adaptive Softmax / Embedding
-flags.DEFINE_bool("tie_weight", default=True,
-      help="Tie embedding and softmax weight.")
-flags.DEFINE_integer("div_val", default=1,
-      help="Divide the embedding size by this val for each bin")
-flags.DEFINE_bool("proj_share_all_but_first", default=False,
-      help="True to share all but first projs, False not to share.")
-flags.DEFINE_bool("proj_same_dim", default=True,
-      help="Project the bin with the same dimension.")
-
-# Parameter initialization
-flags.DEFINE_enum("init", default="normal",
-      enum_values=["normal", "uniform"],
-      help="Initialization method.")
-flags.DEFINE_float("init_std", default=0.02,
-      help="Initialization std when init is normal.")
-flags.DEFINE_float("proj_init_std", default=0.01,
-      help="Initialization std for embedding projection.")
-flags.DEFINE_float("init_range", default=0.1,
-      help="Initialization std when init is uniform.")
-
-FLAGS = flags.FLAGS
-
-def get_model_fn(n_token, cutoffs):
-  def model_fn(inp, tgt, mems, is_training):
-    inp = tf.transpose(inp, [1, 0])
-    tgt = tf.transpose(tgt, [1, 0])
-
-    if FLAGS.init == "uniform":
-      initializer = tf.initializers.random_uniform(
-          minval=-FLAGS.init_range,
-          maxval=FLAGS.init_range,
-          seed=None)
-    elif FLAGS.init == "normal":
-      initializer = tf.initializers.random_normal(
-          stddev=FLAGS.init_std,
-          seed=None)
-      proj_initializer = tf.initializers.random_normal(
-          stddev=FLAGS.proj_init_std,
-          seed=None)
-
-    tie_projs = [False for _ in range(len(cutoffs) + 1)]
-    if FLAGS.proj_share_all_but_first:
-      for i in range(1, len(tie_projs)):
-        tie_projs[i] = True
-
-    loss, new_mems = model.transformer(
-        dec_inp=inp,
-        target=tgt,
-        mems=mems,
-        n_token=n_token,
-        n_layer=FLAGS.n_layer,
-        d_model=FLAGS.d_model,
-        d_embed=FLAGS.d_embed,
-        n_head=FLAGS.n_head,
-        d_head=FLAGS.d_head,
-        d_inner=FLAGS.d_inner,
-        dropout=FLAGS.dropout,
-        dropatt=FLAGS.dropatt,
-        initializer=initializer,
-        proj_initializer=proj_initializer,
-        is_training=is_training,
-        mem_len=FLAGS.mem_len,
-        cutoffs=cutoffs,
-        div_val=FLAGS.div_val,
-        tie_projs=tie_projs,
-        input_perms=None,
-        target_perms=None,
-        head_target=None,
-        same_length=FLAGS.same_length,
-        clamp_len=FLAGS.clamp_len,
-        use_tpu=False,
-        untie_r=FLAGS.untie_r,
-        proj_same_dim=FLAGS.proj_same_dim)
-
-    # number of parameters
-    num_params = sum([np.prod(v.shape) for v in tf.trainable_variables()])
-    tf.logging.info('#params: {}'.format(num_params))
-
-    # format_str = '{{:<{0}s}}\t{{}}'.format(
-    #     max([len(v.name) for v in tf.trainable_variables()]))
-    # for v in tf.trainable_variables():
-    #   tf.logging.info(format_str.format(v.name, v.get_shape()))
-
-    if is_training:
-      all_vars = tf.trainable_variables()
-      grads = tf.gradients(loss, all_vars)
-      grads_and_vars = list(zip(grads, all_vars))
-
-      return loss, new_mems, grads_and_vars
-    else:
-      return loss, new_mems
-
-  return model_fn
-
-
-def single_core_graph(n_token, cutoffs, is_training, inp, tgt, mems):
-  model_fn = get_model_fn(
-      n_token=n_token,
-      cutoffs=cutoffs)
-
-  model_ret = model_fn(
-      inp=inp,
-      tgt=tgt,
-      mems=mems,
-      is_training=is_training)
-
-  return model_ret
-
-
-def train(n_token, cutoffs, ps_device):
-  ##### Get input function and model function
-  train_input_fn, train_record_info = data_utils.get_input_fn(
-      record_info_dir=FLAGS.record_info_dir,
-      split="train",
-      per_host_bsz=FLAGS.train_batch_size,
-      tgt_len=FLAGS.tgt_len,
-      num_core_per_host=FLAGS.num_core_per_host,
-      num_hosts=1,
-      use_tpu=False)
-
-  tf.logging.info("num of batches {}".format(train_record_info["num_batch"]))
-
-  ##### Create computational graph
-  train_set = train_input_fn({
-      "batch_size": FLAGS.train_batch_size,
-      "data_dir": FLAGS.data_dir})
-
-  input_feed, label_feed = train_set.make_one_shot_iterator().get_next()
-
-  inputs = tf.split(input_feed, FLAGS.num_core_per_host, 0)
-  labels = tf.split(label_feed, FLAGS.num_core_per_host, 0)
-
-  per_core_bsz = FLAGS.train_batch_size // FLAGS.num_core_per_host
-
-  tower_mems, tower_losses, tower_new_mems, tower_grads_and_vars = [], [], [], []
-
-  for i in range(FLAGS.num_core_per_host):
-    reuse = True if i > 0 else None
-    with tf.DEVICE(assign_to_gpu(i, ps_device)), \
-        tf.variable_scope(tf.get_variable_scope(), reuse=reuse):
-
-      mems_i = [tf.placeholder(tf.float32,
-                               [FLAGS.mem_len, per_core_bsz, FLAGS.d_model])
-                for _ in range(FLAGS.n_layer)]
-
-      loss_i, new_mems_i, grads_and_vars_i = single_core_graph(
-          n_token=n_token,
-          cutoffs=cutoffs,
-          is_training=True,
-          inp=inputs[i],
-          tgt=labels[i],
-          mems=mems_i)
-
-      tower_mems.append(mems_i)
-      tower_losses.append(loss_i)
-      tower_new_mems.append(new_mems_i)
-      tower_grads_and_vars.append(grads_and_vars_i)
-
-  ## average losses and gradients across towers
-  if len(tower_losses) > 1:
-    loss = tf.add_n(tower_losses) / len(tower_losses)
-    grads_and_vars = average_grads_and_vars(tower_grads_and_vars)
-  else:
-    loss = tower_losses[0]
-    grads_and_vars = tower_grads_and_vars[0]
-  grads, all_vars = zip(*grads_and_vars)
-
-  ## clip gradient
-  clipped, gnorm = tf.clip_by_global_norm(grads, FLAGS.clip)
-  grads_and_vars = list(zip(clipped, all_vars))
-
-  ## configure the optimizer
-  global_step = tf.train.get_or_create_global_step()
-
-  # warmup stage: increase the learning rate linearly
-  if FLAGS.warmup_steps > 0:
-    warmup_lr = tf.to_float(global_step) / tf.to_float(FLAGS.warmup_steps) \
-                * FLAGS.learning_rate
-  else:
-    warmup_lr = 0.0
-
-  # decay stage: decay the learning rate using the cosine schedule
-  decay_lr = tf.train.cosine_decay(
-      FLAGS.learning_rate,
-      global_step=global_step-FLAGS.warmup_steps,
-      decay_steps=FLAGS.train_steps-FLAGS.warmup_steps,
-      alpha=FLAGS.min_lr_ratio)
-
-  # choose warmup or decay
-  learning_rate = tf.where(global_step < FLAGS.warmup_steps,
-                           warmup_lr, decay_lr)
-
-  # get the train op
-  optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
-  train_op = optimizer.apply_gradients(grads_and_vars, global_step)
-
-  ##### Training loop
-  tower_mems_np = [
-      [np.zeros([FLAGS.mem_len, per_core_bsz, FLAGS.d_model], dtype=np.float32)
-          for layer in range(FLAGS.n_layer)]
-      for core in range(FLAGS.num_core_per_host)
-  ]
-
-  saver = tf.train.Saver()
-
-  with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
-    sess.run(tf.global_variables_initializer())
-
-    if FLAGS.warm_start_path is not None:
-      tf.logging.info("warm start from {}".format(FLAGS.warm_start_path))
-      saver.restore(sess, FLAGS.warm_start_path)
-
-    fetches = [loss, tower_new_mems, global_step, gnorm, learning_rate, train_op]
-
-    total_loss, prev_step = 0., -1
-    while True:
-      feed_dict = {}
-      for i in range(FLAGS.num_core_per_host):
-        for m, m_np in zip(tower_mems[i], tower_mems_np[i]):
-          feed_dict[m] = m_np
-
-      fetched = sess.run(fetches, feed_dict=feed_dict)
-
-      loss_np, tower_mems_np, curr_step = fetched[:3]
-      total_loss += loss_np
-
-      if curr_step > 0 and curr_step % FLAGS.iterations == 0:
-        curr_loss = total_loss / (curr_step - prev_step)
-        tf.logging.info("[{}] | gnorm {:.2f} lr {:8.6f} "
-            "| loss {:.2f} | pplx {:>7.2f}, bpc {:>7.4f}".format(
-            curr_step, fetched[-3], fetched[-2],
-            curr_loss, math.exp(curr_loss), curr_loss / math.log(2)))
-        total_loss, prev_step = 0., curr_step
-
-      if curr_step > 0 and curr_step % FLAGS.save_steps == 0:
-        save_path = os.path.join(FLAGS.model_dir, "model.ckpt")
-        saver.save(sess, save_path)
-        tf.logging.info("Model saved in path: {}".format(save_path))
-
-      if curr_step == FLAGS.train_steps:
-        break
-
-
-def evaluate(n_token, cutoffs, ps_device):
-  ##### Get input function and model function
-  eval_input_fn, eval_record_info = data_utils.get_input_fn(
-      record_info_dir=FLAGS.record_info_dir,
-      split=FLAGS.eval_split,
-      per_host_bsz=FLAGS.eval_batch_size,
-      tgt_len=FLAGS.tgt_len,
-      num_core_per_host=FLAGS.num_core_per_host,
-      num_hosts=1,
-      use_tpu=False)
-
-  num_batch = eval_record_info["num_batch"]
-  if FLAGS.max_eval_batch > 0:
-      num_batch = FLAGS.max_eval_batch
-  tf.logging.info("num of batches {}".format(num_batch))
-
-  ##### Create computational graph
-  eval_set = eval_input_fn({
-      "batch_size": FLAGS.eval_batch_size,
-      "data_dir": FLAGS.data_dir})
-
-  input_feed, label_feed = eval_set.make_one_shot_iterator().get_next()
-
-  inputs = tf.split(input_feed, FLAGS.num_core_per_host, 0)
-  labels = tf.split(label_feed, FLAGS.num_core_per_host, 0)
-
-  per_core_bsz = FLAGS.eval_batch_size // FLAGS.num_core_per_host
-  tower_mems, tower_losses, tower_new_mems = [], [], []
-
-  for i in range(FLAGS.num_core_per_host):
-    with tf.DEVICE(assign_to_gpu(i, ps_device)), \
-        tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):
-
-      mems_i = [tf.placeholder(tf.float32,
-                    [FLAGS.mem_len, per_core_bsz, FLAGS.d_model])
-                for _ in range(FLAGS.n_layer)]
-
-      loss_i, new_mems_i = single_core_graph(
-          n_token=n_token,
-          cutoffs=cutoffs,
-          is_training=False,
-          inp=inputs[i],
-          tgt=labels[i],
-          mems=mems_i)
-
-      tower_mems.append(mems_i)
-      tower_losses.append(loss_i)
-      tower_new_mems.append(new_mems_i)
-
-  ## sum losses across towers
-  if len(tower_losses) > 1:
-    loss = tf.add_n(tower_losses) / len(tower_losses)
-  else:
-    loss = tower_losses[0]
-
-  ##### Evaluation loop
-  tower_mems_np = [
-      [np.zeros([FLAGS.mem_len, per_core_bsz, FLAGS.d_model], dtype=np.float32)
-          for layer in range(FLAGS.n_layer)]
-      for core in range(FLAGS.num_core_per_host)
-  ]
-
-  saver = tf.train.Saver()
-
-  with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
-    sess.run(tf.global_variables_initializer())
-
-    if FLAGS.eval_ckpt_path is None:
-      eval_ckpt_path = tf.train.latest_checkpoint(FLAGS.model_dir)
-    else:
-      eval_ckpt_path = FLAGS.eval_ckpt_path
-    tf.logging.info("Evaluate {}".format(eval_ckpt_path))
-    saver.restore(sess, eval_ckpt_path)
-
-    fetches = [loss, tower_new_mems, tf.size(label_feed)]
-
-    format_str = "  >> processing batch {{:{0}d}}/{{:{0}d}} ..".format(
-        len(str(num_batch)))
-
-    total_loss, total_cnt = 0, 0
-    for step in range(num_batch):
-      if step % (num_batch // 10) == 0:
-        tf.logging.info(format_str.format(step, num_batch))
-
-      feed_dict = {}
-      for i in range(FLAGS.num_core_per_host):
-        for m, m_np in zip(tower_mems[i], tower_mems_np[i]):
-          feed_dict[m] = m_np
-
-      fetched = sess.run(fetches, feed_dict=feed_dict)
-
-      loss_np, tower_mems_np, cnt_np = fetched[:3]
-      total_loss += loss_np * cnt_np
-      total_cnt += cnt_np
-
-    avg_loss = total_loss / total_cnt
-    tf.logging.info("| loss {:.2f} | pplx {:>7.2f}, bpc {:>7.4f}".format(
-        avg_loss, math.exp(avg_loss), avg_loss / math.log(2)))
-
-
-def main(unused_argv):
-  del unused_argv  # Unused
-
-  tf.logging.set_verbosity(tf.logging.INFO)
-
-  # Get corpus info
-  corpus_info = data_utils.get_corpus_info(FLAGS.corpus_info_path)
-  n_token = corpus_info["vocab_size"]
-  cutoffs = corpus_info["cutoffs"][1:-1]
-  tf.logging.info("n_token {}".format(n_token))
-
-  if FLAGS.do_train:
-    train(n_token, cutoffs, "/gpu:0")
-  if FLAGS.do_eval:
-    evaluate(n_token, cutoffs, "/gpu:0")
-
-
-if __name__ == "__main__":
-  tf.app.run()
diff --git a/transformer-xl/tf/vocabulary.py b/transformer-xl/tf/vocabulary.py
deleted file mode 100644
index 20c728f..0000000
--- a/transformer-xl/tf/vocabulary.py
+++ /dev/null
@@ -1,170 +0,0 @@
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from collections import Counter, OrderedDict
-
-import numpy as np
-
-import tensorflow as tf
-
-from tensorflow.gfile import Open as open
-from tensorflow.gfile import Exists as exists
-
-class Vocab(object):
-  def __init__(self, special=[], min_freq=0, max_size=None, lower_case=True,
-         delimiter=None, vocab_file=None):
-    self.counter = Counter()
-    self.special = special
-    self.min_freq = min_freq
-    self.max_size = max_size
-    self.lower_case = lower_case
-    self.delimiter = delimiter
-    self.vocab_file = vocab_file
-
-  def tokenize(self, line, add_eos=False, add_double_eos=False):
-    line = line.strip()
-    # convert to lower case
-    if self.lower_case:
-      line = line.lower()
-
-    # empty delimiter '' will evaluate False
-    if self.delimiter == '':
-      symbols = line
-    else:
-      symbols = line.split(self.delimiter)
-
-    if add_double_eos: # lm1b
-      return ['<S>'] + symbols + ['<S>']
-    elif add_eos:
-      return symbols + ['<eos>']
-    else:
-      return symbols
-
-  def count_file(self, path, verbose=False, add_eos=False):
-    if verbose: print('counting file {} ...'.format(path))
-    assert exists(path)
-
-    sents = []
-    with open(path, 'r') as f:
-      for idx, line in enumerate(f):
-        if verbose and idx > 0 and idx % 500000 == 0:
-          print('  line {}'.format(idx))
-        symbols = self.tokenize(line, add_eos=add_eos)
-        self.counter.update(symbols)
-        sents.append(symbols)
-
-    return sents
-
-  def count_sents(self, sents, verbose=False):
-    """
-      sents : a list of sentences, each a list of tokenized symbols
-    """
-    if verbose: print('counting {} sents ...'.format(len(sents)))
-    for idx, symbols in enumerate(sents):
-      if verbose and idx > 0 and idx % 500000 == 0:
-        print('  line {}'.format(idx))
-      self.counter.update(symbols)
-
-  def _build_from_file(self, vocab_file):
-    self.idx2sym = []
-    self.sym2idx = OrderedDict()
-
-    with open(vocab_file, 'r') as f:
-      for line in f:
-        symb = line.strip().split()[0]
-        self.add_symbol(symb)
-    self.unk_idx = self.sym2idx['<UNK>']
-
-  def build_vocab(self):
-    if self.vocab_file:
-      print('building vocab from {}'.format(self.vocab_file))
-      self._build_from_file(self.vocab_file)
-      print('final vocab size {}'.format(len(self)))
-    else:
-      print('building vocab with min_freq={}, max_size={}'.format(
-        self.min_freq, self.max_size))
-      self.idx2sym = []
-      self.sym2idx = OrderedDict()
-
-      for sym in self.special:
-        self.add_special(sym)
-
-      for sym, cnt in self.counter.most_common(self.max_size):
-        if cnt < self.min_freq: break
-        self.add_symbol(sym)
-
-      print('final vocab size {} from {} unique tokens'.format(
-        len(self), len(self.counter)))
-
-  def encode_file(self, path, ordered=False, verbose=False, add_eos=True,
-          add_double_eos=False):
-    if verbose: print('encoding file {} ...'.format(path))
-    assert exists(path)
-    encoded = []
-    with open(path, 'r') as f:
-      for idx, line in enumerate(f):
-        if verbose and idx > 0 and idx % 500000 == 0:
-          print('  line {}'.format(idx))
-        symbols = self.tokenize(line, add_eos=add_eos,
-          add_double_eos=add_double_eos)
-        encoded.append(self.convert_to_nparray(symbols))
-
-    if ordered:
-      encoded = np.concatenate(encoded)
-
-    return encoded
-
-  def encode_sents(self, sents, ordered=False, verbose=False):
-    if verbose: print('encoding {} sents ...'.format(len(sents)))
-    encoded = []
-    for idx, symbols in enumerate(sents):
-      if verbose and idx > 0 and idx % 500000 == 0:
-        print('  line {}'.format(idx))
-      encoded.append(self.convert_to_nparray(symbols))
-
-    if ordered:
-      encoded = np.concatenate(encoded)
-
-    return encoded
-
-  def add_special(self, sym):
-    if sym not in self.sym2idx:
-      self.idx2sym.append(sym)
-      self.sym2idx[sym] = len(self.idx2sym) - 1
-      setattr(self, '{}_idx'.format(sym.strip('<>')), self.sym2idx[sym])
-
-  def add_symbol(self, sym):
-    if sym not in self.sym2idx:
-      self.idx2sym.append(sym)
-      self.sym2idx[sym] = len(self.idx2sym) - 1
-
-  def get_sym(self, idx):
-    assert 0 <= idx < len(self), 'Index {} out of range'.format(idx)
-    return self.idx2sym[idx]
-
-  def get_idx(self, sym):
-    if sym in self.sym2idx:
-      return self.sym2idx[sym]
-    else:
-      assert hasattr(self, 'unk_idx')
-      return self.sym2idx.get(sym, self.unk_idx)
-
-  def get_symbols(self, indices):
-    return [self.get_sym(idx) for idx in indices]
-
-  def get_indices(self, symbols):
-    return [self.get_idx(sym) for sym in symbols]
-
-  def convert_to_nparray(self, symbols):
-    nparray = np.array(self.get_indices(symbols), dtype=np.int64)
-    return nparray
-
-  def convert_to_sent(self, indices, exclude=None):
-    if exclude is None:
-      return ' '.join([self.get_sym(idx) for idx in indices])
-    else:
-      return ' '.join([self.get_sym(idx) for idx in indices if idx not in exclude])
-
-  def __len__(self):
-    return len(self.idx2sym)
diff --git a/uv.lock b/uv.lock
index 988c22e..bf27f7f 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,6 +1,10 @@
 version = 1
 revision = 3
-requires-python = ">=3.12"
+requires-python = ">=3.11"
+resolution-markers = [
+    "python_full_version >= '3.12'",
+    "python_full_version < '3.12'",
+]
 
 [[package]]
 name = "aiohappyeyeballs"
@@ -26,6 +30,23 @@ dependencies = [
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/1c/ce/3b83ebba6b3207a7135e5fcaba49706f8a4b6008153b4e30540c982fae26/aiohttp-3.13.2.tar.gz", hash = "sha256:40176a52c186aefef6eb3cad2cdd30cd06e3afbe88fe8ab2af9c0b90f228daca", size = 7837994, upload-time = "2025-10-28T20:59:39.937Z" }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/35/74/b321e7d7ca762638cdf8cdeceb39755d9c745aff7a64c8789be96ddf6e96/aiohttp-3.13.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:4647d02df098f6434bafd7f32ad14942f05a9caa06c7016fdcc816f343997dd0", size = 743409, upload-time = "2025-10-28T20:56:00.354Z" },
+    { url = "https://files.pythonhosted.org/packages/99/3d/91524b905ec473beaf35158d17f82ef5a38033e5809fe8742e3657cdbb97/aiohttp-3.13.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e3403f24bcb9c3b29113611c3c16a2a447c3953ecf86b79775e7be06f7ae7ccb", size = 497006, upload-time = "2025-10-28T20:56:01.85Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/d3/7f68bc02a67716fe80f063e19adbd80a642e30682ce74071269e17d2dba1/aiohttp-3.13.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:43dff14e35aba17e3d6d5ba628858fb8cb51e30f44724a2d2f0c75be492c55e9", size = 493195, upload-time = "2025-10-28T20:56:03.314Z" },
+    { url = "https://files.pythonhosted.org/packages/98/31/913f774a4708775433b7375c4f867d58ba58ead833af96c8af3621a0d243/aiohttp-3.13.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e2a9ea08e8c58bb17655630198833109227dea914cd20be660f52215f6de5613", size = 1747759, upload-time = "2025-10-28T20:56:04.904Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/63/04efe156f4326f31c7c4a97144f82132c3bb21859b7bb84748d452ccc17c/aiohttp-3.13.2-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:53b07472f235eb80e826ad038c9d106c2f653584753f3ddab907c83f49eedead", size = 1704456, upload-time = "2025-10-28T20:56:06.986Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/02/4e16154d8e0a9cf4ae76f692941fd52543bbb148f02f098ca73cab9b1c1b/aiohttp-3.13.2-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e736c93e9c274fce6419af4aac199984d866e55f8a4cec9114671d0ea9688780", size = 1807572, upload-time = "2025-10-28T20:56:08.558Z" },
+    { url = "https://files.pythonhosted.org/packages/34/58/b0583defb38689e7f06798f0285b1ffb3a6fb371f38363ce5fd772112724/aiohttp-3.13.2-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ff5e771f5dcbc81c64898c597a434f7682f2259e0cd666932a913d53d1341d1a", size = 1895954, upload-time = "2025-10-28T20:56:10.545Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/f3/083907ee3437425b4e376aa58b2c915eb1a33703ec0dc30040f7ae3368c6/aiohttp-3.13.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a3b6fb0c207cc661fa0bf8c66d8d9b657331ccc814f4719468af61034b478592", size = 1747092, upload-time = "2025-10-28T20:56:12.118Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/61/98a47319b4e425cc134e05e5f3fc512bf9a04bf65aafd9fdcda5d57ec693/aiohttp-3.13.2-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:97a0895a8e840ab3520e2288db7cace3a1981300d48babeb50e7425609e2e0ab", size = 1606815, upload-time = "2025-10-28T20:56:14.191Z" },
+    { url = "https://files.pythonhosted.org/packages/97/4b/e78b854d82f66bb974189135d31fce265dee0f5344f64dd0d345158a5973/aiohttp-3.13.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9e8f8afb552297aca127c90cb840e9a1d4bfd6a10d7d8f2d9176e1acc69bad30", size = 1723789, upload-time = "2025-10-28T20:56:16.101Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/fc/9d2ccc794fc9b9acd1379d625c3a8c64a45508b5091c546dea273a41929e/aiohttp-3.13.2-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:ed2f9c7216e53c3df02264f25d824b079cc5914f9e2deba94155190ef648ee40", size = 1718104, upload-time = "2025-10-28T20:56:17.655Z" },
+    { url = "https://files.pythonhosted.org/packages/66/65/34564b8765ea5c7d79d23c9113135d1dd3609173da13084830f1507d56cf/aiohttp-3.13.2-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:99c5280a329d5fa18ef30fd10c793a190d996567667908bef8a7f81f8202b948", size = 1785584, upload-time = "2025-10-28T20:56:19.238Z" },
+    { url = "https://files.pythonhosted.org/packages/30/be/f6a7a426e02fc82781afd62016417b3948e2207426d90a0e478790d1c8a4/aiohttp-3.13.2-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:2ca6ffef405fc9c09a746cb5d019c1672cd7f402542e379afc66b370833170cf", size = 1595126, upload-time = "2025-10-28T20:56:20.836Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/c7/8e22d5d28f94f67d2af496f14a83b3c155d915d1fe53d94b66d425ec5b42/aiohttp-3.13.2-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:47f438b1a28e926c37632bff3c44df7d27c9b57aaf4e34b1def3c07111fdb782", size = 1800665, upload-time = "2025-10-28T20:56:22.922Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/11/91133c8b68b1da9fc16555706aa7276fdf781ae2bb0876c838dd86b8116e/aiohttp-3.13.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9acda8604a57bb60544e4646a4615c1866ee6c04a8edef9b8ee6fd1d8fa2ddc8", size = 1739532, upload-time = "2025-10-28T20:56:25.924Z" },
+    { url = "https://files.pythonhosted.org/packages/17/6b/3747644d26a998774b21a616016620293ddefa4d63af6286f389aedac844/aiohttp-3.13.2-cp311-cp311-win32.whl", hash = "sha256:868e195e39b24aaa930b063c08bb0c17924899c16c672a28a65afded9c46c6ec", size = 431876, upload-time = "2025-10-28T20:56:27.524Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/63/688462108c1a00eb9f05765331c107f95ae86f6b197b865d29e930b7e462/aiohttp-3.13.2-cp311-cp311-win_amd64.whl", hash = "sha256:7fd19df530c292542636c2a9a85854fab93474396a52f1695e799186bbd7f24c", size = 456205, upload-time = "2025-10-28T20:56:29.062Z" },
     { url = "https://files.pythonhosted.org/packages/29/9b/01f00e9856d0a73260e86dd8ed0c2234a466c5c1712ce1c281548df39777/aiohttp-3.13.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:b1e56bab2e12b2b9ed300218c351ee2a3d8c8fdab5b1ec6193e11a817767e47b", size = 737623, upload-time = "2025-10-28T20:56:30.797Z" },
     { url = "https://files.pythonhosted.org/packages/5a/1b/4be39c445e2b2bd0aab4ba736deb649fabf14f6757f405f0c9685019b9e9/aiohttp-3.13.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:364e25edaabd3d37b1db1f0cbcee8c73c9a3727bfa262b83e5e4cf3489a2a9dc", size = 492664, upload-time = "2025-10-28T20:56:32.708Z" },
     { url = "https://files.pythonhosted.org/packages/28/66/d35dcfea8050e131cdd731dff36434390479b4045a8d0b9d7111b0a968f1/aiohttp-3.13.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c5c94825f744694c4b8db20b71dba9a257cd2ba8e010a803042123f3a25d50d7", size = 491808, upload-time = "2025-10-28T20:56:34.57Z" },
@@ -111,30 +132,35 @@ wheels = [
 
 [[package]]
 name = "alembic"
-version = "1.17.1"
+version = "1.17.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "mako" },
     { name = "sqlalchemy" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/6e/b6/2a81d7724c0c124edc5ec7a167e85858b6fd31b9611c6fb8ecf617b7e2d3/alembic-1.17.1.tar.gz", hash = "sha256:8a289f6778262df31571d29cca4c7fbacd2f0f582ea0816f4c399b6da7528486", size = 1981285, upload-time = "2025-10-29T00:23:16.667Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/02/a6/74c8cadc2882977d80ad756a13857857dbcf9bd405bc80b662eb10651282/alembic-1.17.2.tar.gz", hash = "sha256:bbe9751705c5e0f14877f02d46c53d10885e377e3d90eda810a016f9baa19e8e", size = 1988064, upload-time = "2025-11-14T20:35:04.057Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/a5/32/7df1d81ec2e50fb661944a35183d87e62d3f6c6d9f8aff64a4f245226d55/alembic-1.17.1-py3-none-any.whl", hash = "sha256:cbc2386e60f89608bb63f30d2d6cc66c7aaed1fe105bd862828600e5ad167023", size = 247848, upload-time = "2025-10-29T00:23:18.79Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/88/6237e97e3385b57b5f1528647addea5cc03d4d65d5979ab24327d41fb00d/alembic-1.17.2-py3-none-any.whl", hash = "sha256:f483dd1fe93f6c5d49217055e4d15b905b425b6af906746abb35b69c1996c4e6", size = 248554, upload-time = "2025-11-14T20:35:05.699Z" },
 ]
 
+[[package]]
+name = "antlr4-python3-runtime"
+version = "4.9.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/3e/38/7859ff46355f76f8d19459005ca000b6e7012f2f1ca597746cbcd1fbfe5e/antlr4-python3-runtime-4.9.3.tar.gz", hash = "sha256:f224469b4168294902bb1efa80a8bf7855f24c99aef99cbefc1bcd3cce77881b", size = 117034, upload-time = "2021-11-06T17:52:23.524Z" }
+
 [[package]]
 name = "anyio"
-version = "4.11.0"
+version = "4.12.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "idna" },
-    { name = "sniffio" },
     { name = "typing-extensions", marker = "python_full_version < '3.13'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/c6/78/7d432127c41b50bccba979505f272c16cbcadcc33645d5fa3a738110ae75/anyio-4.11.0.tar.gz", hash = "sha256:82a8d0b81e318cc5ce71a5f1f8b5c4e63619620b63141ef8c995fa0db95a57c4", size = 219094, upload-time = "2025-09-23T09:19:12.58Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/16/ce/8a777047513153587e5434fd752e89334ac33e379aa3497db860eeb60377/anyio-4.12.0.tar.gz", hash = "sha256:73c693b567b0c55130c104d0b43a9baf3aa6a31fc6110116509f27bf75e21ec0", size = 228266, upload-time = "2025-11-28T23:37:38.911Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/15/b3/9b1a8074496371342ec1e796a96f99c82c945a339cd81a8e73de28b4cf9e/anyio-4.11.0-py3-none-any.whl", hash = "sha256:0287e96f4d26d4149305414d4e3bc32f0dcd0862365a4bddea19d7a1ec38c4fc", size = 109097, upload-time = "2025-09-23T09:19:10.601Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/9c/36c5c37947ebfb8c7f22e0eb6e4d188ee2d53aa3880f3f2744fb894f0cb1/anyio-4.12.0-py3-none-any.whl", hash = "sha256:dad2376a628f98eeca4881fc56cd06affd18f659b17a747d3ff0307ced94b1bb", size = 113362, upload-time = "2025-11-28T23:36:57.897Z" },
 ]
 
 [[package]]
@@ -161,6 +187,22 @@ version = "3.4.4"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/13/69/33ddede1939fdd074bce5434295f38fae7136463422fe4fd3e0e89b98062/charset_normalizer-3.4.4.tar.gz", hash = "sha256:94537985111c35f28720e43603b8e7b43a6ecfb2ce1d3058bbe955b73404e21a", size = 129418, upload-time = "2025-10-14T04:42:32.879Z" }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/ed/27/c6491ff4954e58a10f69ad90aca8a1b6fe9c5d3c6f380907af3c37435b59/charset_normalizer-3.4.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6e1fcf0720908f200cd21aa4e6750a48ff6ce4afe7ff5a79a90d5ed8a08296f8", size = 206988, upload-time = "2025-10-14T04:40:33.79Z" },
+    { url = "https://files.pythonhosted.org/packages/94/59/2e87300fe67ab820b5428580a53cad894272dbb97f38a7a814a2a1ac1011/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5f819d5fe9234f9f82d75bdfa9aef3a3d72c4d24a6e57aeaebba32a704553aa0", size = 147324, upload-time = "2025-10-14T04:40:34.961Z" },
+    { url = "https://files.pythonhosted.org/packages/07/fb/0cf61dc84b2b088391830f6274cb57c82e4da8bbc2efeac8c025edb88772/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:a59cb51917aa591b1c4e6a43c132f0cdc3c76dbad6155df4e28ee626cc77a0a3", size = 142742, upload-time = "2025-10-14T04:40:36.105Z" },
+    { url = "https://files.pythonhosted.org/packages/62/8b/171935adf2312cd745d290ed93cf16cf0dfe320863ab7cbeeae1dcd6535f/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8ef3c867360f88ac904fd3f5e1f902f13307af9052646963ee08ff4f131adafc", size = 160863, upload-time = "2025-10-14T04:40:37.188Z" },
+    { url = "https://files.pythonhosted.org/packages/09/73/ad875b192bda14f2173bfc1bc9a55e009808484a4b256748d931b6948442/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d9e45d7faa48ee908174d8fe84854479ef838fc6a705c9315372eacbc2f02897", size = 157837, upload-time = "2025-10-14T04:40:38.435Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/fc/de9cce525b2c5b94b47c70a4b4fb19f871b24995c728e957ee68ab1671ea/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:840c25fb618a231545cbab0564a799f101b63b9901f2569faecd6b222ac72381", size = 151550, upload-time = "2025-10-14T04:40:40.053Z" },
+    { url = "https://files.pythonhosted.org/packages/55/c2/43edd615fdfba8c6f2dfbd459b25a6b3b551f24ea21981e23fb768503ce1/charset_normalizer-3.4.4-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ca5862d5b3928c4940729dacc329aa9102900382fea192fc5e52eb69d6093815", size = 149162, upload-time = "2025-10-14T04:40:41.163Z" },
+    { url = "https://files.pythonhosted.org/packages/03/86/bde4ad8b4d0e9429a4e82c1e8f5c659993a9a863ad62c7df05cf7b678d75/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d9c7f57c3d666a53421049053eaacdd14bbd0a528e2186fcb2e672effd053bb0", size = 150019, upload-time = "2025-10-14T04:40:42.276Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/86/a151eb2af293a7e7bac3a739b81072585ce36ccfb4493039f49f1d3cae8c/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:277e970e750505ed74c832b4bf75dac7476262ee2a013f5574dd49075879e161", size = 143310, upload-time = "2025-10-14T04:40:43.439Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/fe/43dae6144a7e07b87478fdfc4dbe9efd5defb0e7ec29f5f58a55aeef7bf7/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:31fd66405eaf47bb62e8cd575dc621c56c668f27d46a61d975a249930dd5e2a4", size = 162022, upload-time = "2025-10-14T04:40:44.547Z" },
+    { url = "https://files.pythonhosted.org/packages/80/e6/7aab83774f5d2bca81f42ac58d04caf44f0cc2b65fc6db2b3b2e8a05f3b3/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:0d3d8f15c07f86e9ff82319b3d9ef6f4bf907608f53fe9d92b28ea9ae3d1fd89", size = 149383, upload-time = "2025-10-14T04:40:46.018Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/e8/b289173b4edae05c0dde07f69f8db476a0b511eac556dfe0d6bda3c43384/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:9f7fcd74d410a36883701fafa2482a6af2ff5ba96b9a620e9e0721e28ead5569", size = 159098, upload-time = "2025-10-14T04:40:47.081Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/df/fe699727754cae3f8478493c7f45f777b17c3ef0600e28abfec8619eb49c/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ebf3e58c7ec8a8bed6d66a75d7fb37b55e5015b03ceae72a8e7c74495551e224", size = 152991, upload-time = "2025-10-14T04:40:48.246Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/86/584869fe4ddb6ffa3bd9f491b87a01568797fb9bd8933f557dba9771beaf/charset_normalizer-3.4.4-cp311-cp311-win32.whl", hash = "sha256:eecbc200c7fd5ddb9a7f16c7decb07b566c29fa2161a16cf67b8d068bd21690a", size = 99456, upload-time = "2025-10-14T04:40:49.376Z" },
+    { url = "https://files.pythonhosted.org/packages/65/f6/62fdd5feb60530f50f7e38b4f6a1d5203f4d16ff4f9f0952962c044e919a/charset_normalizer-3.4.4-cp311-cp311-win_amd64.whl", hash = "sha256:5ae497466c7901d54b639cf42d5b8c1b6a4fead55215500d2f486d34db48d016", size = 106978, upload-time = "2025-10-14T04:40:50.844Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/9d/0710916e6c82948b3be62d9d398cb4fcf4e97b56d6a6aeccd66c4b2f2bd5/charset_normalizer-3.4.4-cp311-cp311-win_arm64.whl", hash = "sha256:65e2befcd84bc6f37095f5961e68a6f077bf44946771354a28ad434c2cce0ae1", size = 99969, upload-time = "2025-10-14T04:40:52.272Z" },
     { url = "https://files.pythonhosted.org/packages/f3/85/1637cd4af66fa687396e757dec650f28025f2a2f5a5531a3208dc0ec43f2/charset_normalizer-3.4.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0a98e6759f854bd25a58a73fa88833fba3b7c491169f86ce1180c948ab3fd394", size = 208425, upload-time = "2025-10-14T04:40:53.353Z" },
     { url = "https://files.pythonhosted.org/packages/9d/6a/04130023fef2a0d9c62d0bae2649b69f7b7d8d24ea5536feef50551029df/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b5b290ccc2a263e8d185130284f8501e3e36c5e02750fc6b6bdeb2e9e96f1e25", size = 148162, upload-time = "2025-10-14T04:40:54.558Z" },
     { url = "https://files.pythonhosted.org/packages/78/29/62328d79aa60da22c9e0b9a66539feae06ca0f5a4171ac4f7dc285b83688/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74bb723680f9f7a6234dcf67aea57e708ec1fbdf5699fb91dfd6f511b0a320ef", size = 144558, upload-time = "2025-10-14T04:40:55.677Z" },
@@ -212,18 +254,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/0a/4c/925909008ed5a988ccbb72dcc897407e5d6d3bd72410d69e051fc0c14647/charset_normalizer-3.4.4-py3-none-any.whl", hash = "sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f", size = 53402, upload-time = "2025-10-14T04:42:31.76Z" },
 ]
 
-[[package]]
-name = "click"
-version = "8.3.1"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "colorama", marker = "sys_platform == 'win32'" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/3d/fa/656b739db8587d7b5dfa22e22ed02566950fbfbcdc20311993483657a5c0/click-8.3.1.tar.gz", hash = "sha256:12ff4785d337a1bb490bb7e9c2b1ee5da3112e94a8622f26a6c77f5d2fc6842a", size = 295065, upload-time = "2025-11-15T20:45:42.706Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/98/78/01c019cdb5d6498122777c1a43056ebb3ebfeef2076d9d026bfe15583b2b/click-8.3.1-py3-none-any.whl", hash = "sha256:981153a64e25f12d547d3426c367a4857371575ee7ad18df2a6183ab0545b2a6", size = 108274, upload-time = "2025-11-15T20:45:41.139Z" },
-]
-
 [[package]]
 name = "colorama"
 version = "0.4.6"
@@ -254,6 +284,17 @@ dependencies = [
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/58/01/1253e6698a07380cd31a736d248a3f2a50a7c88779a1813da27503cadc2a/contourpy-1.3.3.tar.gz", hash = "sha256:083e12155b210502d0bca491432bb04d56dc3432f95a979b429f2848c3dbe880", size = 13466174, upload-time = "2025-07-26T12:03:12.549Z" }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/91/2e/c4390a31919d8a78b90e8ecf87cd4b4c4f05a5b48d05ec17db8e5404c6f4/contourpy-1.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:709a48ef9a690e1343202916450bc48b9e51c049b089c7f79a267b46cffcdaa1", size = 288773, upload-time = "2025-07-26T12:01:02.277Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/44/c4b0b6095fef4dc9c420e041799591e3b63e9619e3044f7f4f6c21c0ab24/contourpy-1.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:23416f38bfd74d5d28ab8429cc4d63fa67d5068bd711a85edb1c3fb0c3e2f381", size = 270149, upload-time = "2025-07-26T12:01:04.072Z" },
+    { url = "https://files.pythonhosted.org/packages/30/2e/dd4ced42fefac8470661d7cb7e264808425e6c5d56d175291e93890cce09/contourpy-1.3.3-cp311-cp311-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:929ddf8c4c7f348e4c0a5a3a714b5c8542ffaa8c22954862a46ca1813b667ee7", size = 329222, upload-time = "2025-07-26T12:01:05.688Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/74/cc6ec2548e3d276c71389ea4802a774b7aa3558223b7bade3f25787fafc2/contourpy-1.3.3-cp311-cp311-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:9e999574eddae35f1312c2b4b717b7885d4edd6cb46700e04f7f02db454e67c1", size = 377234, upload-time = "2025-07-26T12:01:07.054Z" },
+    { url = "https://files.pythonhosted.org/packages/03/b3/64ef723029f917410f75c09da54254c5f9ea90ef89b143ccadb09df14c15/contourpy-1.3.3-cp311-cp311-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0bf67e0e3f482cb69779dd3061b534eb35ac9b17f163d851e2a547d56dba0a3a", size = 380555, upload-time = "2025-07-26T12:01:08.801Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/4b/6157f24ca425b89fe2eb7e7be642375711ab671135be21e6faa100f7448c/contourpy-1.3.3-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:51e79c1f7470158e838808d4a996fa9bac72c498e93d8ebe5119bc1e6becb0db", size = 355238, upload-time = "2025-07-26T12:01:10.319Z" },
+    { url = "https://files.pythonhosted.org/packages/98/56/f914f0dd678480708a04cfd2206e7c382533249bc5001eb9f58aa693e200/contourpy-1.3.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:598c3aaece21c503615fd59c92a3598b428b2f01bfb4b8ca9c4edeecc2438620", size = 1326218, upload-time = "2025-07-26T12:01:12.659Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/d7/4a972334a0c971acd5172389671113ae82aa7527073980c38d5868ff1161/contourpy-1.3.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:322ab1c99b008dad206d406bb61d014cf0174df491ae9d9d0fac6a6fda4f977f", size = 1392867, upload-time = "2025-07-26T12:01:15.533Z" },
+    { url = "https://files.pythonhosted.org/packages/75/3e/f2cc6cd56dc8cff46b1a56232eabc6feea52720083ea71ab15523daab796/contourpy-1.3.3-cp311-cp311-win32.whl", hash = "sha256:fd907ae12cd483cd83e414b12941c632a969171bf90fc937d0c9f268a31cafff", size = 183677, upload-time = "2025-07-26T12:01:17.088Z" },
+    { url = "https://files.pythonhosted.org/packages/98/4b/9bd370b004b5c9d8045c6c33cf65bae018b27aca550a3f657cdc99acdbd8/contourpy-1.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:3519428f6be58431c56581f1694ba8e50626f2dd550af225f82fb5f5814d2a42", size = 225234, upload-time = "2025-07-26T12:01:18.256Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/b6/71771e02c2e004450c12b1120a5f488cad2e4d5b590b1af8bad060360fe4/contourpy-1.3.3-cp311-cp311-win_arm64.whl", hash = "sha256:15ff10bfada4bf92ec8b31c62bf7c1834c244019b4a33095a68000d7075df470", size = 193123, upload-time = "2025-07-26T12:01:19.848Z" },
     { url = "https://files.pythonhosted.org/packages/be/45/adfee365d9ea3d853550b2e735f9d66366701c65db7855cd07621732ccfc/contourpy-1.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b08a32ea2f8e42cf1d4be3169a98dd4be32bafe4f22b6c4cb4ba810fa9e5d2cb", size = 293419, upload-time = "2025-07-26T12:01:21.16Z" },
     { url = "https://files.pythonhosted.org/packages/53/3e/405b59cfa13021a56bba395a6b3aca8cec012b45bf177b0eaf7a202cde2c/contourpy-1.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:556dba8fb6f5d8742f2923fe9457dbdd51e1049c4a43fd3986a0b14a1d815fc6", size = 273979, upload-time = "2025-07-26T12:01:22.448Z" },
     { url = "https://files.pythonhosted.org/packages/d4/1c/a12359b9b2ca3a845e8f7f9ac08bdf776114eb931392fcad91743e2ea17b/contourpy-1.3.3-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:92d9abc807cf7d0e047b95ca5d957cf4792fcd04e920ca70d48add15c1a90ea7", size = 332653, upload-time = "2025-07-26T12:01:24.155Z" },
@@ -309,6 +350,11 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/93/8a/68a4ec5c55a2971213d29a9374913f7e9f18581945a7a31d1a39b5d2dfe5/contourpy-1.3.3-cp314-cp314t-win32.whl", hash = "sha256:e74a9a0f5e3fff48fb5a7f2fd2b9b70a3fe014a67522f79b7cca4c0c7e43c9ae", size = 202428, upload-time = "2025-07-26T12:02:48.691Z" },
     { url = "https://files.pythonhosted.org/packages/fa/96/fd9f641ffedc4fa3ace923af73b9d07e869496c9cc7a459103e6e978992f/contourpy-1.3.3-cp314-cp314t-win_amd64.whl", hash = "sha256:13b68d6a62db8eafaebb8039218921399baf6e47bf85006fd8529f2a08ef33fc", size = 250331, upload-time = "2025-07-26T12:02:50.137Z" },
     { url = "https://files.pythonhosted.org/packages/ae/8c/469afb6465b853afff216f9528ffda78a915ff880ed58813ba4faf4ba0b6/contourpy-1.3.3-cp314-cp314t-win_arm64.whl", hash = "sha256:b7448cb5a725bb1e35ce88771b86fba35ef418952474492cf7c764059933ff8b", size = 203831, upload-time = "2025-07-26T12:02:51.449Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/29/8dcfe16f0107943fa92388c23f6e05cff0ba58058c4c95b00280d4c75a14/contourpy-1.3.3-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:cd5dfcaeb10f7b7f9dc8941717c6c2ade08f587be2226222c12b25f0483ed497", size = 278809, upload-time = "2025-07-26T12:02:52.74Z" },
+    { url = "https://files.pythonhosted.org/packages/85/a9/8b37ef4f7dafeb335daee3c8254645ef5725be4d9c6aa70b50ec46ef2f7e/contourpy-1.3.3-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:0c1fc238306b35f246d61a1d416a627348b5cf0648648a031e14bb8705fcdfe8", size = 261593, upload-time = "2025-07-26T12:02:54.037Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/59/ebfb8c677c75605cc27f7122c90313fd2f375ff3c8d19a1694bda74aaa63/contourpy-1.3.3-pp311-pypy311_pp73-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:70f9aad7de812d6541d29d2bbf8feb22ff7e1c299523db288004e3157ff4674e", size = 302202, upload-time = "2025-07-26T12:02:55.947Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/37/21972a15834d90bfbfb009b9d004779bd5a07a0ec0234e5ba8f64d5736f4/contourpy-1.3.3-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5ed3657edf08512fc3fe81b510e35c2012fbd3081d2e26160f27ca28affec989", size = 329207, upload-time = "2025-07-26T12:02:57.468Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/58/bd257695f39d05594ca4ad60df5bcb7e32247f9951fd09a9b8edb82d1daa/contourpy-1.3.3-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:3d1a3799d62d45c18bafd41c5fa05120b96a28079f2393af559b843d1a966a77", size = 225315, upload-time = "2025-07-26T12:02:58.801Z" },
 ]
 
 [[package]]
@@ -365,43 +411,51 @@ wheels = [
 
 [[package]]
 name = "fonttools"
-version = "4.60.1"
+version = "4.61.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/4b/42/97a13e47a1e51a5a7142475bbcf5107fe3a68fc34aef331c897d5fb98ad0/fonttools-4.60.1.tar.gz", hash = "sha256:ef00af0439ebfee806b25f24c8f92109157ff3fac5731dc7867957812e87b8d9", size = 3559823, upload-time = "2025-09-29T21:13:27.129Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/33/f9/0e84d593c0e12244150280a630999835a64f2852276161b62a0f98318de0/fonttools-4.61.0.tar.gz", hash = "sha256:ec520a1f0c7758d7a858a00f090c1745f6cde6a7c5e76fb70ea4044a15f712e7", size = 3561884, upload-time = "2025-11-28T17:05:49.491Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/e3/f7/a10b101b7a6f8836a5adb47f2791f2075d044a6ca123f35985c42edc82d8/fonttools-4.60.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:7b0c6d57ab00dae9529f3faf187f2254ea0aa1e04215cf2f1a8ec277c96661bc", size = 2832953, upload-time = "2025-09-29T21:11:39.616Z" },
-    { url = "https://files.pythonhosted.org/packages/ed/fe/7bd094b59c926acf2304d2151354ddbeb74b94812f3dc943c231db09cb41/fonttools-4.60.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:839565cbf14645952d933853e8ade66a463684ed6ed6c9345d0faf1f0e868877", size = 2352706, upload-time = "2025-09-29T21:11:41.826Z" },
-    { url = "https://files.pythonhosted.org/packages/c0/ca/4bb48a26ed95a1e7eba175535fe5805887682140ee0a0d10a88e1de84208/fonttools-4.60.1-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:8177ec9676ea6e1793c8a084a90b65a9f778771998eb919d05db6d4b1c0b114c", size = 4923716, upload-time = "2025-09-29T21:11:43.893Z" },
-    { url = "https://files.pythonhosted.org/packages/b8/9f/2cb82999f686c1d1ddf06f6ae1a9117a880adbec113611cc9d22b2fdd465/fonttools-4.60.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:996a4d1834524adbb423385d5a629b868ef9d774670856c63c9a0408a3063401", size = 4968175, upload-time = "2025-09-29T21:11:46.439Z" },
-    { url = "https://files.pythonhosted.org/packages/18/79/be569699e37d166b78e6218f2cde8c550204f2505038cdd83b42edc469b9/fonttools-4.60.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a46b2f450bc79e06ef3b6394f0c68660529ed51692606ad7f953fc2e448bc903", size = 4911031, upload-time = "2025-09-29T21:11:48.977Z" },
-    { url = "https://files.pythonhosted.org/packages/cc/9f/89411cc116effaec5260ad519162f64f9c150e5522a27cbb05eb62d0c05b/fonttools-4.60.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6ec722ee589e89a89f5b7574f5c45604030aa6ae24cb2c751e2707193b466fed", size = 5062966, upload-time = "2025-09-29T21:11:54.344Z" },
-    { url = "https://files.pythonhosted.org/packages/62/a1/f888221934b5731d46cb9991c7a71f30cb1f97c0ef5fcf37f8da8fce6c8e/fonttools-4.60.1-cp312-cp312-win32.whl", hash = "sha256:b2cf105cee600d2de04ca3cfa1f74f1127f8455b71dbad02b9da6ec266e116d6", size = 2218750, upload-time = "2025-09-29T21:11:56.601Z" },
-    { url = "https://files.pythonhosted.org/packages/88/8f/a55b5550cd33cd1028601df41acd057d4be20efa5c958f417b0c0613924d/fonttools-4.60.1-cp312-cp312-win_amd64.whl", hash = "sha256:992775c9fbe2cf794786fa0ffca7f09f564ba3499b8fe9f2f80bd7197db60383", size = 2267026, upload-time = "2025-09-29T21:11:58.852Z" },
-    { url = "https://files.pythonhosted.org/packages/7c/5b/cdd2c612277b7ac7ec8c0c9bc41812c43dc7b2d5f2b0897e15fdf5a1f915/fonttools-4.60.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6f68576bb4bbf6060c7ab047b1574a1ebe5c50a17de62830079967b211059ebb", size = 2825777, upload-time = "2025-09-29T21:12:01.22Z" },
-    { url = "https://files.pythonhosted.org/packages/d6/8a/de9cc0540f542963ba5e8f3a1f6ad48fa211badc3177783b9d5cadf79b5d/fonttools-4.60.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:eedacb5c5d22b7097482fa834bda0dafa3d914a4e829ec83cdea2a01f8c813c4", size = 2348080, upload-time = "2025-09-29T21:12:03.785Z" },
-    { url = "https://files.pythonhosted.org/packages/2d/8b/371ab3cec97ee3fe1126b3406b7abd60c8fec8975fd79a3c75cdea0c3d83/fonttools-4.60.1-cp313-cp313-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b33a7884fabd72bdf5f910d0cf46be50dce86a0362a65cfc746a4168c67eb96c", size = 4903082, upload-time = "2025-09-29T21:12:06.382Z" },
-    { url = "https://files.pythonhosted.org/packages/04/05/06b1455e4bc653fcb2117ac3ef5fa3a8a14919b93c60742d04440605d058/fonttools-4.60.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2409d5fb7b55fd70f715e6d34e7a6e4f7511b8ad29a49d6df225ee76da76dd77", size = 4960125, upload-time = "2025-09-29T21:12:09.314Z" },
-    { url = "https://files.pythonhosted.org/packages/8e/37/f3b840fcb2666f6cb97038793606bdd83488dca2d0b0fc542ccc20afa668/fonttools-4.60.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c8651e0d4b3bdeda6602b85fdc2abbefc1b41e573ecb37b6779c4ca50753a199", size = 4901454, upload-time = "2025-09-29T21:12:11.931Z" },
-    { url = "https://files.pythonhosted.org/packages/fd/9e/eb76f77e82f8d4a46420aadff12cec6237751b0fb9ef1de373186dcffb5f/fonttools-4.60.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:145daa14bf24824b677b9357c5e44fd8895c2a8f53596e1b9ea3496081dc692c", size = 5044495, upload-time = "2025-09-29T21:12:15.241Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/b3/cede8f8235d42ff7ae891bae8d619d02c8ac9fd0cfc450c5927a6200c70d/fonttools-4.60.1-cp313-cp313-win32.whl", hash = "sha256:2299df884c11162617a66b7c316957d74a18e3758c0274762d2cc87df7bc0272", size = 2217028, upload-time = "2025-09-29T21:12:17.96Z" },
-    { url = "https://files.pythonhosted.org/packages/75/4d/b022c1577807ce8b31ffe055306ec13a866f2337ecee96e75b24b9b753ea/fonttools-4.60.1-cp313-cp313-win_amd64.whl", hash = "sha256:a3db56f153bd4c5c2b619ab02c5db5192e222150ce5a1bc10f16164714bc39ac", size = 2266200, upload-time = "2025-09-29T21:12:20.14Z" },
-    { url = "https://files.pythonhosted.org/packages/9a/83/752ca11c1aa9a899b793a130f2e466b79ea0cf7279c8d79c178fc954a07b/fonttools-4.60.1-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:a884aef09d45ba1206712c7dbda5829562d3fea7726935d3289d343232ecb0d3", size = 2822830, upload-time = "2025-09-29T21:12:24.406Z" },
-    { url = "https://files.pythonhosted.org/packages/57/17/bbeab391100331950a96ce55cfbbff27d781c1b85ebafb4167eae50d9fe3/fonttools-4.60.1-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8a44788d9d91df72d1a5eac49b31aeb887a5f4aab761b4cffc4196c74907ea85", size = 2345524, upload-time = "2025-09-29T21:12:26.819Z" },
-    { url = "https://files.pythonhosted.org/packages/3d/2e/d4831caa96d85a84dd0da1d9f90d81cec081f551e0ea216df684092c6c97/fonttools-4.60.1-cp314-cp314-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:e852d9dda9f93ad3651ae1e3bb770eac544ec93c3807888798eccddf84596537", size = 4843490, upload-time = "2025-09-29T21:12:29.123Z" },
-    { url = "https://files.pythonhosted.org/packages/49/13/5e2ea7c7a101b6fc3941be65307ef8df92cbbfa6ec4804032baf1893b434/fonttools-4.60.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:154cb6ee417e417bf5f7c42fe25858c9140c26f647c7347c06f0cc2d47eff003", size = 4944184, upload-time = "2025-09-29T21:12:31.414Z" },
-    { url = "https://files.pythonhosted.org/packages/0c/2b/cf9603551c525b73fc47c52ee0b82a891579a93d9651ed694e4e2cd08bb8/fonttools-4.60.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:5664fd1a9ea7f244487ac8f10340c4e37664675e8667d6fee420766e0fb3cf08", size = 4890218, upload-time = "2025-09-29T21:12:33.936Z" },
-    { url = "https://files.pythonhosted.org/packages/fd/2f/933d2352422e25f2376aae74f79eaa882a50fb3bfef3c0d4f50501267101/fonttools-4.60.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:583b7f8e3c49486e4d489ad1deacfb8d5be54a8ef34d6df824f6a171f8511d99", size = 4999324, upload-time = "2025-09-29T21:12:36.637Z" },
-    { url = "https://files.pythonhosted.org/packages/38/99/234594c0391221f66216bc2c886923513b3399a148defaccf81dc3be6560/fonttools-4.60.1-cp314-cp314-win32.whl", hash = "sha256:66929e2ea2810c6533a5184f938502cfdaea4bc3efb7130d8cc02e1c1b4108d6", size = 2220861, upload-time = "2025-09-29T21:12:39.108Z" },
-    { url = "https://files.pythonhosted.org/packages/3e/1d/edb5b23726dde50fc4068e1493e4fc7658eeefcaf75d4c5ffce067d07ae5/fonttools-4.60.1-cp314-cp314-win_amd64.whl", hash = "sha256:f3d5be054c461d6a2268831f04091dc82753176f6ea06dc6047a5e168265a987", size = 2270934, upload-time = "2025-09-29T21:12:41.339Z" },
-    { url = "https://files.pythonhosted.org/packages/fb/da/1392aaa2170adc7071fe7f9cfd181a5684a7afcde605aebddf1fb4d76df5/fonttools-4.60.1-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:b6379e7546ba4ae4b18f8ae2b9bc5960936007a1c0e30b342f662577e8bc3299", size = 2894340, upload-time = "2025-09-29T21:12:43.774Z" },
-    { url = "https://files.pythonhosted.org/packages/bf/a7/3b9f16e010d536ce567058b931a20b590d8f3177b2eda09edd92e392375d/fonttools-4.60.1-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:9d0ced62b59e0430b3690dbc5373df1c2aa7585e9a8ce38eff87f0fd993c5b01", size = 2375073, upload-time = "2025-09-29T21:12:46.437Z" },
-    { url = "https://files.pythonhosted.org/packages/9b/b5/e9bcf51980f98e59bb5bb7c382a63c6f6cac0eec5f67de6d8f2322382065/fonttools-4.60.1-cp314-cp314t-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:875cb7764708b3132637f6c5fb385b16eeba0f7ac9fa45a69d35e09b47045801", size = 4849758, upload-time = "2025-09-29T21:12:48.694Z" },
-    { url = "https://files.pythonhosted.org/packages/e3/dc/1d2cf7d1cba82264b2f8385db3f5960e3d8ce756b4dc65b700d2c496f7e9/fonttools-4.60.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a184b2ea57b13680ab6d5fbde99ccef152c95c06746cb7718c583abd8f945ccc", size = 5085598, upload-time = "2025-09-29T21:12:51.081Z" },
-    { url = "https://files.pythonhosted.org/packages/5d/4d/279e28ba87fb20e0c69baf72b60bbf1c4d873af1476806a7b5f2b7fac1ff/fonttools-4.60.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:026290e4ec76583881763fac284aca67365e0be9f13a7fb137257096114cb3bc", size = 4957603, upload-time = "2025-09-29T21:12:53.423Z" },
-    { url = "https://files.pythonhosted.org/packages/78/d4/ff19976305e0c05aa3340c805475abb00224c954d3c65e82c0a69633d55d/fonttools-4.60.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:f0e8817c7d1a0c2eedebf57ef9a9896f3ea23324769a9a2061a80fe8852705ed", size = 4974184, upload-time = "2025-09-29T21:12:55.962Z" },
-    { url = "https://files.pythonhosted.org/packages/63/22/8553ff6166f5cd21cfaa115aaacaa0dc73b91c079a8cfd54a482cbc0f4f5/fonttools-4.60.1-cp314-cp314t-win32.whl", hash = "sha256:1410155d0e764a4615774e5c2c6fc516259fe3eca5882f034eb9bfdbee056259", size = 2282241, upload-time = "2025-09-29T21:12:58.179Z" },
-    { url = "https://files.pythonhosted.org/packages/8a/cb/fa7b4d148e11d5a72761a22e595344133e83a9507a4c231df972e657579b/fonttools-4.60.1-cp314-cp314t-win_amd64.whl", hash = "sha256:022beaea4b73a70295b688f817ddc24ed3e3418b5036ffcd5658141184ef0d0c", size = 2345760, upload-time = "2025-09-29T21:13:00.375Z" },
-    { url = "https://files.pythonhosted.org/packages/c7/93/0dd45cd283c32dea1545151d8c3637b4b8c53cdb3a625aeb2885b184d74d/fonttools-4.60.1-py3-none-any.whl", hash = "sha256:906306ac7afe2156fcf0042173d6ebbb05416af70f6b370967b47f8f00103bbb", size = 1143175, upload-time = "2025-09-29T21:13:24.134Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/be/5aa89cdddf2863d8afbdc19eb8ec5d8d35d40eeeb8e6cf52c5ff1c2dbd33/fonttools-4.61.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:a32a16951cbf113d38f1dd8551b277b6e06e0f6f776fece0f99f746d739e1be3", size = 2847553, upload-time = "2025-11-28T17:04:30.539Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/3e/6ff643b07cead1236a534f51291ae2981721cf419135af5b740c002a66dd/fonttools-4.61.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:328a9c227984bebaf69f3ac9062265f8f6acc7ddf2e4e344c63358579af0aa3d", size = 2388298, upload-time = "2025-11-28T17:04:32.161Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/15/fca8dfbe7b482e6f240b1aad0ed7c6e2e75e7a28efa3d3a03b570617b5e5/fonttools-4.61.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2f0bafc8a3b3749c69cc610e5aa3da832d39c2a37a68f03d18ec9a02ecaac04a", size = 5054133, upload-time = "2025-11-28T17:04:34.035Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/a2/821c61c691b21fd09e07528a9a499cc2b075ac83ddb644aa16c9875a64bc/fonttools-4.61.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:b5ca59b7417d149cf24e4c1933c9f44b2957424fc03536f132346d5242e0ebe5", size = 5031410, upload-time = "2025-11-28T17:04:36.141Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/f6/8b16339e93d03c732c8a23edefe3061b17a5f9107ddc47a3215ecd054cac/fonttools-4.61.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:df8cbce85cf482eb01f4551edca978c719f099c623277bda8332e5dbe7dba09d", size = 5030005, upload-time = "2025-11-28T17:04:38.314Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/eb/d4e150427bdaa147755239c931bbce829a88149ade5bfd8a327afe565567/fonttools-4.61.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:7fb5b84f48a6a733ca3d7f41aa9551908ccabe8669ffe79586560abcc00a9cfd", size = 5154026, upload-time = "2025-11-28T17:04:40.34Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/5f/3dd00ce0dba6759943c707b1830af8c0bcf6f8f1a9fe46cb82e7ac2aaa74/fonttools-4.61.0-cp311-cp311-win32.whl", hash = "sha256:787ef9dfd1ea9fe49573c272412ae5f479d78e671981819538143bec65863865", size = 2276035, upload-time = "2025-11-28T17:04:42.59Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/44/798c472f096ddf12955eddb98f4f7c906e7497695d04ce073ddf7161d134/fonttools-4.61.0-cp311-cp311-win_amd64.whl", hash = "sha256:14fafda386377b6131d9e448af42d0926bad47e038de0e5ba1d58c25d621f028", size = 2327290, upload-time = "2025-11-28T17:04:44.57Z" },
+    { url = "https://files.pythonhosted.org/packages/00/5d/19e5939f773c7cb05480fe2e881d63870b63ee2b4bdb9a77d55b1d36c7b9/fonttools-4.61.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:e24a1565c4e57111ec7f4915f8981ecbb61adf66a55f378fdc00e206059fcfef", size = 2846930, upload-time = "2025-11-28T17:04:46.639Z" },
+    { url = "https://files.pythonhosted.org/packages/25/b2/0658faf66f705293bd7e739a4f038302d188d424926be9c59bdad945664b/fonttools-4.61.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:e2bfacb5351303cae9f072ccf3fc6ecb437a6f359c0606bae4b1ab6715201d87", size = 2383016, upload-time = "2025-11-28T17:04:48.525Z" },
+    { url = "https://files.pythonhosted.org/packages/29/a3/1fa90b95b690f0d7541f48850adc40e9019374d896c1b8148d15012b2458/fonttools-4.61.0-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:0bdcf2e29d65c26299cc3d502f4612365e8b90a939f46cd92d037b6cb7bb544a", size = 4949425, upload-time = "2025-11-28T17:04:50.482Z" },
+    { url = "https://files.pythonhosted.org/packages/af/00/acf18c00f6c501bd6e05ee930f926186f8a8e268265407065688820f1c94/fonttools-4.61.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e6cd0d9051b8ddaf7385f99dd82ec2a058e2b46cf1f1961e68e1ff20fcbb61af", size = 4999632, upload-time = "2025-11-28T17:04:52.508Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/e0/19a2b86e54109b1d2ee8743c96a1d297238ae03243897bc5345c0365f34d/fonttools-4.61.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e074bc07c31406f45c418e17c1722e83560f181d122c412fa9e815df0ff74810", size = 4939438, upload-time = "2025-11-28T17:04:54.437Z" },
+    { url = "https://files.pythonhosted.org/packages/04/35/7b57a5f57d46286360355eff8d6b88c64ab6331107f37a273a71c803798d/fonttools-4.61.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5a9b78da5d5faa17e63b2404b77feeae105c1b7e75f26020ab7a27b76e02039f", size = 5088960, upload-time = "2025-11-28T17:04:56.348Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/0e/6c5023eb2e0fe5d1ababc7e221e44acd3ff668781489cc1937a6f83d620a/fonttools-4.61.0-cp312-cp312-win32.whl", hash = "sha256:9821ed77bb676736b88fa87a737c97b6af06e8109667e625a4f00158540ce044", size = 2264404, upload-time = "2025-11-28T17:04:58.149Z" },
+    { url = "https://files.pythonhosted.org/packages/36/0b/63273128c7c5df19b1e4cd92e0a1e6ea5bb74a400c4905054c96ad60a675/fonttools-4.61.0-cp312-cp312-win_amd64.whl", hash = "sha256:0011d640afa61053bc6590f9a3394bd222de7cfde19346588beabac374e9d8ac", size = 2314427, upload-time = "2025-11-28T17:04:59.812Z" },
+    { url = "https://files.pythonhosted.org/packages/17/45/334f0d7f181e5473cfb757e1b60f4e60e7fc64f28d406e5d364a952718c0/fonttools-4.61.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ba774b8cbd8754f54b8eb58124e8bd45f736b2743325ab1a5229698942b9b433", size = 2841801, upload-time = "2025-11-28T17:05:01.621Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/63/97b9c78e1f79bc741d4efe6e51f13872d8edb2b36e1b9fb2bab0d4491bb7/fonttools-4.61.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c84b430616ed73ce46e9cafd0bf0800e366a3e02fb7e1ad7c1e214dbe3862b1f", size = 2379024, upload-time = "2025-11-28T17:05:03.668Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/80/c87bc524a90dbeb2a390eea23eae448286983da59b7e02c67fa0ca96a8c5/fonttools-4.61.0-cp313-cp313-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b2b734d8391afe3c682320840c8191de9bd24e7eb85768dd4dc06ed1b63dbb1b", size = 4923706, upload-time = "2025-11-28T17:05:05.494Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/f6/a3b0374811a1de8c3f9207ec88f61ad1bb96f938ed89babae26c065c2e46/fonttools-4.61.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a5c5fff72bf31b0e558ed085e4fd7ed96eb85881404ecc39ed2a779e7cf724eb", size = 4979751, upload-time = "2025-11-28T17:05:07.665Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/3b/30f63b4308b449091573285f9d27619563a84f399946bca3eadc9554afbe/fonttools-4.61.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:14a290c5c93fcab76b7f451e6a4b7721b712d90b3b5ed6908f1abcf794e90d6d", size = 4921113, upload-time = "2025-11-28T17:05:09.551Z" },
+    { url = "https://files.pythonhosted.org/packages/41/6c/58e6e9b7d9d8bf2d7010bd7bb493060b39b02a12d1cda64a8bfb116ce760/fonttools-4.61.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:13e3e20a5463bfeb77b3557d04b30bd6a96a6bb5c15c7b2e7908903e69d437a0", size = 5063183, upload-time = "2025-11-28T17:05:11.677Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/e3/52c790ab2b07492df059947a1fd7778e105aac5848c0473029a4d20481a2/fonttools-4.61.0-cp313-cp313-win32.whl", hash = "sha256:6781e7a4bb010be1cd69a29927b0305c86b843395f2613bdabe115f7d6ea7f34", size = 2263159, upload-time = "2025-11-28T17:05:13.292Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/1f/116013b200fbeba871046554d5d2a45fefa69a05c40e9cdfd0d4fff53edc/fonttools-4.61.0-cp313-cp313-win_amd64.whl", hash = "sha256:c53b47834ae41e8e4829171cc44fec0fdf125545a15f6da41776b926b9645a9a", size = 2313530, upload-time = "2025-11-28T17:05:14.848Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/99/59b1e25987787cb714aa9457cee4c9301b7c2153f0b673e2b8679d37669d/fonttools-4.61.0-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:96dfc9bc1f2302224e48e6ee37e656eddbab810b724b52e9d9c13a57a6abad01", size = 2841429, upload-time = "2025-11-28T17:05:16.671Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/b2/4c1911d4332c8a144bb3b44416e274ccca0e297157c971ea1b3fbb855590/fonttools-4.61.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:3b2065d94e5d63aafc2591c8b6ccbdb511001d9619f1bca8ad39b745ebeb5efa", size = 2378987, upload-time = "2025-11-28T17:05:18.69Z" },
+    { url = "https://files.pythonhosted.org/packages/24/b0/f442e90fde5d2af2ae0cb54008ab6411edc557ee33b824e13e1d04925ac9/fonttools-4.61.0-cp314-cp314-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:e0d87e81e4d869549585ba0beb3f033718501c1095004f5e6aef598d13ebc216", size = 4873270, upload-time = "2025-11-28T17:05:20.625Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/04/f5d5990e33053c8a59b90b1d7e10ad9b97a73f42c745304da0e709635fab/fonttools-4.61.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1cfa2eb9bae650e58f0e8ad53c49d19a844d6034d6b259f30f197238abc1ccee", size = 4968270, upload-time = "2025-11-28T17:05:22.515Z" },
+    { url = "https://files.pythonhosted.org/packages/94/9f/2091402e0d27c9c8c4bab5de0e5cd146d9609a2d7d1c666bbb75c0011c1a/fonttools-4.61.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:4238120002e68296d55e091411c09eab94e111c8ce64716d17df53fd0eb3bb3d", size = 4919799, upload-time = "2025-11-28T17:05:24.437Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/72/86adab22fde710b829f8ffbc8f264df01928e5b7a8f6177fa29979ebf256/fonttools-4.61.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:b6ceac262cc62bec01b3bb59abccf41b24ef6580869e306a4e88b7e56bb4bdda", size = 5030966, upload-time = "2025-11-28T17:05:26.115Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/a7/7c8e31b003349e845b853f5e0a67b95ff6b052fa4f5224f8b72624f5ac69/fonttools-4.61.0-cp314-cp314-win32.whl", hash = "sha256:adbb4ecee1a779469a77377bbe490565effe8fce6fb2e6f95f064de58f8bac85", size = 2267243, upload-time = "2025-11-28T17:05:27.807Z" },
+    { url = "https://files.pythonhosted.org/packages/20/ee/f434fe7749360497c52b7dcbcfdbccdaab0a71c59f19d572576066717122/fonttools-4.61.0-cp314-cp314-win_amd64.whl", hash = "sha256:02bdf8e04d1a70476564b8640380f04bb4ac74edc1fc71f1bacb840b3e398ee9", size = 2318822, upload-time = "2025-11-28T17:05:29.882Z" },
+    { url = "https://files.pythonhosted.org/packages/33/b3/c16255320255e5c1863ca2b2599bb61a46e2f566db0bbb9948615a8fe692/fonttools-4.61.0-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:627216062d90ab0d98215176d8b9562c4dd5b61271d35f130bcd30f6a8aaa33a", size = 2924917, upload-time = "2025-11-28T17:05:31.46Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/b8/08067ae21de705a817777c02ef36ab0b953cbe91d8adf134f9c2da75ed6d/fonttools-4.61.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:7b446623c9cd5f14a59493818eaa80255eec2468c27d2c01b56e05357c263195", size = 2413576, upload-time = "2025-11-28T17:05:33.343Z" },
+    { url = "https://files.pythonhosted.org/packages/42/f1/96ff43f92addce2356780fdc203f2966206f3d22ea20e242c27826fd7442/fonttools-4.61.0-cp314-cp314t-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:70e2a0c0182ee75e493ef33061bfebf140ea57e035481d2f95aa03b66c7a0e05", size = 4877447, upload-time = "2025-11-28T17:05:35.278Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/1e/a3d8e51ed9ccfd7385e239ae374b78d258a0fb82d82cab99160a014a45d1/fonttools-4.61.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9064b0f55b947e929ac669af5311ab1f26f750214db6dd9a0c97e091e918f486", size = 5095681, upload-time = "2025-11-28T17:05:37.142Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/f6/d256bd6c1065c146a0bdddf1c62f542e08ae5b3405dbf3fcc52be272f674/fonttools-4.61.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:2cb5e45a824ce14b90510024d0d39dae51bd4fbb54c42a9334ea8c8cf4d95cbe", size = 4974140, upload-time = "2025-11-28T17:05:39.5Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/0c/96633eb4b26f138cc48561c6e0c44b4ea48acea56b20b507d6b14f8e80ce/fonttools-4.61.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6e5ca8c62efdec7972dfdfd454415c4db49b89aeaefaaacada432f3b7eea9866", size = 5001741, upload-time = "2025-11-28T17:05:41.424Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/9a/3b536bad3be4f26186f296e749ff17bad3e6d57232c104d752d24b2e265b/fonttools-4.61.0-cp314-cp314t-win32.whl", hash = "sha256:63c7125d31abe3e61d7bb917329b5543c5b3448db95f24081a13aaf064360fc8", size = 2330707, upload-time = "2025-11-28T17:05:43.548Z" },
+    { url = "https://files.pythonhosted.org/packages/18/ea/e6b9ac610451ee9f04477c311ad126de971f6112cb579fa391d2a8edb00b/fonttools-4.61.0-cp314-cp314t-win_amd64.whl", hash = "sha256:67d841aa272be5500de7f447c40d1d8452783af33b4c3599899319f6ef9ad3c1", size = 2395950, upload-time = "2025-11-28T17:05:45.638Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/14/634f7daea5ffe6a5f7a0322ba8e1a0e23c9257b80aa91458107896d1dfc7/fonttools-4.61.0-py3-none-any.whl", hash = "sha256:276f14c560e6f98d24ef7f5f44438e55ff5a67f78fa85236b218462c9f5d0635", size = 1144485, upload-time = "2025-11-28T17:05:47.573Z" },
 ]
 
 [[package]]
@@ -410,6 +464,22 @@ version = "1.8.0"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/2d/f5/c831fac6cc817d26fd54c7eaccd04ef7e0288806943f7cc5bbf69f3ac1f0/frozenlist-1.8.0.tar.gz", hash = "sha256:3ede829ed8d842f6cd48fc7081d7a41001a56f1f38603f9d49bf3020d59a31ad", size = 45875, upload-time = "2025-10-06T05:38:17.865Z" }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/bc/03/077f869d540370db12165c0aa51640a873fb661d8b315d1d4d67b284d7ac/frozenlist-1.8.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:09474e9831bc2b2199fad6da3c14c7b0fbdd377cce9d3d77131be28906cb7d84", size = 86912, upload-time = "2025-10-06T05:35:45.98Z" },
+    { url = "https://files.pythonhosted.org/packages/df/b5/7610b6bd13e4ae77b96ba85abea1c8cb249683217ef09ac9e0ae93f25a91/frozenlist-1.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:17c883ab0ab67200b5f964d2b9ed6b00971917d5d8a92df149dc2c9779208ee9", size = 50046, upload-time = "2025-10-06T05:35:47.009Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/ef/0e8f1fe32f8a53dd26bdd1f9347efe0778b0fddf62789ea683f4cc7d787d/frozenlist-1.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fa47e444b8ba08fffd1c18e8cdb9a75db1b6a27f17507522834ad13ed5922b93", size = 50119, upload-time = "2025-10-06T05:35:48.38Z" },
+    { url = "https://files.pythonhosted.org/packages/11/b1/71a477adc7c36e5fb628245dfbdea2166feae310757dea848d02bd0689fd/frozenlist-1.8.0-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:2552f44204b744fba866e573be4c1f9048d6a324dfe14475103fd51613eb1d1f", size = 231067, upload-time = "2025-10-06T05:35:49.97Z" },
+    { url = "https://files.pythonhosted.org/packages/45/7e/afe40eca3a2dc19b9904c0f5d7edfe82b5304cb831391edec0ac04af94c2/frozenlist-1.8.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:957e7c38f250991e48a9a73e6423db1bb9dd14e722a10f6b8bb8e16a0f55f695", size = 233160, upload-time = "2025-10-06T05:35:51.729Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/aa/7416eac95603ce428679d273255ffc7c998d4132cfae200103f164b108aa/frozenlist-1.8.0-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:8585e3bb2cdea02fc88ffa245069c36555557ad3609e83be0ec71f54fd4abb52", size = 228544, upload-time = "2025-10-06T05:35:53.246Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/3d/2a2d1f683d55ac7e3875e4263d28410063e738384d3adc294f5ff3d7105e/frozenlist-1.8.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:edee74874ce20a373d62dc28b0b18b93f645633c2943fd90ee9d898550770581", size = 243797, upload-time = "2025-10-06T05:35:54.497Z" },
+    { url = "https://files.pythonhosted.org/packages/78/1e/2d5565b589e580c296d3bb54da08d206e797d941a83a6fdea42af23be79c/frozenlist-1.8.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c9a63152fe95756b85f31186bddf42e4c02c6321207fd6601a1c89ebac4fe567", size = 247923, upload-time = "2025-10-06T05:35:55.861Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/c3/65872fcf1d326a7f101ad4d86285c403c87be7d832b7470b77f6d2ed5ddc/frozenlist-1.8.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b6db2185db9be0a04fecf2f241c70b63b1a242e2805be291855078f2b404dd6b", size = 230886, upload-time = "2025-10-06T05:35:57.399Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/76/ac9ced601d62f6956f03cc794f9e04c81719509f85255abf96e2510f4265/frozenlist-1.8.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:f4be2e3d8bc8aabd566f8d5b8ba7ecc09249d74ba3c9ed52e54dc23a293f0b92", size = 245731, upload-time = "2025-10-06T05:35:58.563Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/49/ecccb5f2598daf0b4a1415497eba4c33c1e8ce07495eb07d2860c731b8d5/frozenlist-1.8.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:c8d1634419f39ea6f5c427ea2f90ca85126b54b50837f31497f3bf38266e853d", size = 241544, upload-time = "2025-10-06T05:35:59.719Z" },
+    { url = "https://files.pythonhosted.org/packages/53/4b/ddf24113323c0bbcc54cb38c8b8916f1da7165e07b8e24a717b4a12cbf10/frozenlist-1.8.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:1a7fa382a4a223773ed64242dbe1c9c326ec09457e6b8428efb4118c685c3dfd", size = 241806, upload-time = "2025-10-06T05:36:00.959Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/fb/9b9a084d73c67175484ba2789a59f8eebebd0827d186a8102005ce41e1ba/frozenlist-1.8.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:11847b53d722050808926e785df837353bd4d75f1d494377e59b23594d834967", size = 229382, upload-time = "2025-10-06T05:36:02.22Z" },
+    { url = "https://files.pythonhosted.org/packages/95/a3/c8fb25aac55bf5e12dae5c5aa6a98f85d436c1dc658f21c3ac73f9fa95e5/frozenlist-1.8.0-cp311-cp311-win32.whl", hash = "sha256:27c6e8077956cf73eadd514be8fb04d77fc946a7fe9f7fe167648b0b9085cc25", size = 39647, upload-time = "2025-10-06T05:36:03.409Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/f5/603d0d6a02cfd4c8f2a095a54672b3cf967ad688a60fb9faf04fc4887f65/frozenlist-1.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:ac913f8403b36a2c8610bbfd25b8013488533e71e62b4b4adce9c86c8cea905b", size = 44064, upload-time = "2025-10-06T05:36:04.368Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/16/c2c9ab44e181f043a86f9a8f84d5124b62dbcb3a02c0977ec72b9ac1d3e0/frozenlist-1.8.0-cp311-cp311-win_arm64.whl", hash = "sha256:d4d3214a0f8394edfa3e303136d0575eece0745ff2b47bd2cb2e66dd92d4351a", size = 39937, upload-time = "2025-10-06T05:36:05.669Z" },
     { url = "https://files.pythonhosted.org/packages/69/29/948b9aa87e75820a38650af445d2ef2b6b8a6fab1a23b6bb9e4ef0be2d59/frozenlist-1.8.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:78f7b9e5d6f2fdb88cdde9440dc147259b62b9d3b019924def9f6478be254ac1", size = 87782, upload-time = "2025-10-06T05:36:06.649Z" },
     { url = "https://files.pythonhosted.org/packages/64/80/4f6e318ee2a7c0750ed724fa33a4bdf1eacdc5a39a7a24e818a773cd91af/frozenlist-1.8.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:229bf37d2e4acdaf808fd3f06e854a4a7a3661e871b10dc1f8f1896a3b05f18b", size = 50594, upload-time = "2025-10-06T05:36:07.69Z" },
     { url = "https://files.pythonhosted.org/packages/2b/94/5c8a2b50a496b11dd519f4a24cb5496cf125681dd99e94c604ccdea9419a/frozenlist-1.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f833670942247a14eafbb675458b4e61c82e002a148f49e68257b79296e865c4", size = 50448, upload-time = "2025-10-06T05:36:08.78Z" },
@@ -495,11 +565,11 @@ wheels = [
 
 [[package]]
 name = "fsspec"
-version = "2025.10.0"
+version = "2024.9.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/24/7f/2747c0d332b9acfa75dc84447a066fdf812b5a6b8d30472b74d309bfe8cb/fsspec-2025.10.0.tar.gz", hash = "sha256:b6789427626f068f9a83ca4e8a3cc050850b6c0f71f99ddb4f542b8266a26a59", size = 309285, upload-time = "2025-10-30T14:58:44.036Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/62/7c/12b0943011daaaa9c35c2a2e22e5eb929ac90002f08f1259d69aedad84de/fsspec-2024.9.0.tar.gz", hash = "sha256:4b0afb90c2f21832df142f292649035d80b421f60a9e1c027802e5a0da2b04e8", size = 286206, upload-time = "2024-09-04T15:06:57.91Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/eb/02/a6b21098b1d5d6249b7c5ab69dde30108a71e4e819d4a9778f1de1d5b70d/fsspec-2025.10.0-py3-none-any.whl", hash = "sha256:7c7712353ae7d875407f97715f0e1ffcc21e33d5b24556cb1e090ae9409ec61d", size = 200966, upload-time = "2025-10-30T14:58:42.53Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/a0/6aaea0c2fbea2f89bfd5db25fb1e3481896a423002ebe4e55288907a97a3/fsspec-2024.9.0-py3-none-any.whl", hash = "sha256:a0947d552d8a6efa72cc2c730b12c41d043509156966cca4fb157b0f2a0c574b", size = 179253, upload-time = "2024-09-04T15:06:55.908Z" },
 ]
 
 [package.optional-dependencies]
@@ -509,41 +579,49 @@ http = [
 
 [[package]]
 name = "greenlet"
-version = "3.2.4"
+version = "3.3.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/03/b8/704d753a5a45507a7aab61f18db9509302ed3d0a27ac7e0359ec2905b1a6/greenlet-3.2.4.tar.gz", hash = "sha256:0dca0d95ff849f9a364385f36ab49f50065d76964944638be9691e1832e9f86d", size = 188260, upload-time = "2025-08-07T13:24:33.51Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/c7/e5/40dbda2736893e3e53d25838e0f19a2b417dfc122b9989c91918db30b5d3/greenlet-3.3.0.tar.gz", hash = "sha256:a82bb225a4e9e4d653dd2fb7b8b2d36e4fb25bc0165422a11e48b88e9e6f78fb", size = 190651, upload-time = "2025-12-04T14:49:44.05Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/44/69/9b804adb5fd0671f367781560eb5eb586c4d495277c93bde4307b9e28068/greenlet-3.2.4-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:3b67ca49f54cede0186854a008109d6ee71f66bd57bb36abd6d0a0267b540cdd", size = 274079, upload-time = "2025-08-07T13:15:45.033Z" },
-    { url = "https://files.pythonhosted.org/packages/46/e9/d2a80c99f19a153eff70bc451ab78615583b8dac0754cfb942223d2c1a0d/greenlet-3.2.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ddf9164e7a5b08e9d22511526865780a576f19ddd00d62f8a665949327fde8bb", size = 640997, upload-time = "2025-08-07T13:42:56.234Z" },
-    { url = "https://files.pythonhosted.org/packages/3b/16/035dcfcc48715ccd345f3a93183267167cdd162ad123cd93067d86f27ce4/greenlet-3.2.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f28588772bb5fb869a8eb331374ec06f24a83a9c25bfa1f38b6993afe9c1e968", size = 655185, upload-time = "2025-08-07T13:45:27.624Z" },
-    { url = "https://files.pythonhosted.org/packages/31/da/0386695eef69ffae1ad726881571dfe28b41970173947e7c558d9998de0f/greenlet-3.2.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:5c9320971821a7cb77cfab8d956fa8e39cd07ca44b6070db358ceb7f8797c8c9", size = 649926, upload-time = "2025-08-07T13:53:15.251Z" },
-    { url = "https://files.pythonhosted.org/packages/68/88/69bf19fd4dc19981928ceacbc5fd4bb6bc2215d53199e367832e98d1d8fe/greenlet-3.2.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c60a6d84229b271d44b70fb6e5fa23781abb5d742af7b808ae3f6efd7c9c60f6", size = 651839, upload-time = "2025-08-07T13:18:30.281Z" },
-    { url = "https://files.pythonhosted.org/packages/19/0d/6660d55f7373b2ff8152401a83e02084956da23ae58cddbfb0b330978fe9/greenlet-3.2.4-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3b3812d8d0c9579967815af437d96623f45c0f2ae5f04e366de62a12d83a8fb0", size = 607586, upload-time = "2025-08-07T13:18:28.544Z" },
-    { url = "https://files.pythonhosted.org/packages/8e/1a/c953fdedd22d81ee4629afbb38d2f9d71e37d23caace44775a3a969147d4/greenlet-3.2.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:abbf57b5a870d30c4675928c37278493044d7c14378350b3aa5d484fa65575f0", size = 1123281, upload-time = "2025-08-07T13:42:39.858Z" },
-    { url = "https://files.pythonhosted.org/packages/3f/c7/12381b18e21aef2c6bd3a636da1088b888b97b7a0362fac2e4de92405f97/greenlet-3.2.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:20fb936b4652b6e307b8f347665e2c615540d4b42b3b4c8a321d8286da7e520f", size = 1151142, upload-time = "2025-08-07T13:18:22.981Z" },
-    { url = "https://files.pythonhosted.org/packages/27/45/80935968b53cfd3f33cf99ea5f08227f2646e044568c9b1555b58ffd61c2/greenlet-3.2.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ee7a6ec486883397d70eec05059353b8e83eca9168b9f3f9a361971e77e0bcd0", size = 1564846, upload-time = "2025-11-04T12:42:15.191Z" },
-    { url = "https://files.pythonhosted.org/packages/69/02/b7c30e5e04752cb4db6202a3858b149c0710e5453b71a3b2aec5d78a1aab/greenlet-3.2.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:326d234cbf337c9c3def0676412eb7040a35a768efc92504b947b3e9cfc7543d", size = 1633814, upload-time = "2025-11-04T12:42:17.175Z" },
-    { url = "https://files.pythonhosted.org/packages/e9/08/b0814846b79399e585f974bbeebf5580fbe59e258ea7be64d9dfb253c84f/greenlet-3.2.4-cp312-cp312-win_amd64.whl", hash = "sha256:a7d4e128405eea3814a12cc2605e0e6aedb4035bf32697f72deca74de4105e02", size = 299899, upload-time = "2025-08-07T13:38:53.448Z" },
-    { url = "https://files.pythonhosted.org/packages/49/e8/58c7f85958bda41dafea50497cbd59738c5c43dbbea5ee83d651234398f4/greenlet-3.2.4-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:1a921e542453fe531144e91e1feedf12e07351b1cf6c9e8a3325ea600a715a31", size = 272814, upload-time = "2025-08-07T13:15:50.011Z" },
-    { url = "https://files.pythonhosted.org/packages/62/dd/b9f59862e9e257a16e4e610480cfffd29e3fae018a68c2332090b53aac3d/greenlet-3.2.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd3c8e693bff0fff6ba55f140bf390fa92c994083f838fece0f63be121334945", size = 641073, upload-time = "2025-08-07T13:42:57.23Z" },
-    { url = "https://files.pythonhosted.org/packages/f7/0b/bc13f787394920b23073ca3b6c4a7a21396301ed75a655bcb47196b50e6e/greenlet-3.2.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:710638eb93b1fa52823aa91bf75326f9ecdfd5e0466f00789246a5280f4ba0fc", size = 655191, upload-time = "2025-08-07T13:45:29.752Z" },
-    { url = "https://files.pythonhosted.org/packages/f2/d6/6adde57d1345a8d0f14d31e4ab9c23cfe8e2cd39c3baf7674b4b0338d266/greenlet-3.2.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:c5111ccdc9c88f423426df3fd1811bfc40ed66264d35aa373420a34377efc98a", size = 649516, upload-time = "2025-08-07T13:53:16.314Z" },
-    { url = "https://files.pythonhosted.org/packages/7f/3b/3a3328a788d4a473889a2d403199932be55b1b0060f4ddd96ee7cdfcad10/greenlet-3.2.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d76383238584e9711e20ebe14db6c88ddcedc1829a9ad31a584389463b5aa504", size = 652169, upload-time = "2025-08-07T13:18:32.861Z" },
-    { url = "https://files.pythonhosted.org/packages/ee/43/3cecdc0349359e1a527cbf2e3e28e5f8f06d3343aaf82ca13437a9aa290f/greenlet-3.2.4-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:23768528f2911bcd7e475210822ffb5254ed10d71f4028387e5a99b4c6699671", size = 610497, upload-time = "2025-08-07T13:18:31.636Z" },
-    { url = "https://files.pythonhosted.org/packages/b8/19/06b6cf5d604e2c382a6f31cafafd6f33d5dea706f4db7bdab184bad2b21d/greenlet-3.2.4-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:00fadb3fedccc447f517ee0d3fd8fe49eae949e1cd0f6a611818f4f6fb7dc83b", size = 1121662, upload-time = "2025-08-07T13:42:41.117Z" },
-    { url = "https://files.pythonhosted.org/packages/a2/15/0d5e4e1a66fab130d98168fe984c509249c833c1a3c16806b90f253ce7b9/greenlet-3.2.4-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:d25c5091190f2dc0eaa3f950252122edbbadbb682aa7b1ef2f8af0f8c0afefae", size = 1149210, upload-time = "2025-08-07T13:18:24.072Z" },
-    { url = "https://files.pythonhosted.org/packages/1c/53/f9c440463b3057485b8594d7a638bed53ba531165ef0ca0e6c364b5cc807/greenlet-3.2.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6e343822feb58ac4d0a1211bd9399de2b3a04963ddeec21530fc426cc121f19b", size = 1564759, upload-time = "2025-11-04T12:42:19.395Z" },
-    { url = "https://files.pythonhosted.org/packages/47/e4/3bb4240abdd0a8d23f4f88adec746a3099f0d86bfedb623f063b2e3b4df0/greenlet-3.2.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ca7f6f1f2649b89ce02f6f229d7c19f680a6238af656f61e0115b24857917929", size = 1634288, upload-time = "2025-11-04T12:42:21.174Z" },
-    { url = "https://files.pythonhosted.org/packages/0b/55/2321e43595e6801e105fcfdee02b34c0f996eb71e6ddffca6b10b7e1d771/greenlet-3.2.4-cp313-cp313-win_amd64.whl", hash = "sha256:554b03b6e73aaabec3745364d6239e9e012d64c68ccd0b8430c64ccc14939a8b", size = 299685, upload-time = "2025-08-07T13:24:38.824Z" },
-    { url = "https://files.pythonhosted.org/packages/22/5c/85273fd7cc388285632b0498dbbab97596e04b154933dfe0f3e68156c68c/greenlet-3.2.4-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:49a30d5fda2507ae77be16479bdb62a660fa51b1eb4928b524975b3bde77b3c0", size = 273586, upload-time = "2025-08-07T13:16:08.004Z" },
-    { url = "https://files.pythonhosted.org/packages/d1/75/10aeeaa3da9332c2e761e4c50d4c3556c21113ee3f0afa2cf5769946f7a3/greenlet-3.2.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:299fd615cd8fc86267b47597123e3f43ad79c9d8a22bebdce535e53550763e2f", size = 686346, upload-time = "2025-08-07T13:42:59.944Z" },
-    { url = "https://files.pythonhosted.org/packages/c0/aa/687d6b12ffb505a4447567d1f3abea23bd20e73a5bed63871178e0831b7a/greenlet-3.2.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:c17b6b34111ea72fc5a4e4beec9711d2226285f0386ea83477cbb97c30a3f3a5", size = 699218, upload-time = "2025-08-07T13:45:30.969Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/8b/29aae55436521f1d6f8ff4e12fb676f3400de7fcf27fccd1d4d17fd8fecd/greenlet-3.2.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b4a1870c51720687af7fa3e7cda6d08d801dae660f75a76f3845b642b4da6ee1", size = 694659, upload-time = "2025-08-07T13:53:17.759Z" },
-    { url = "https://files.pythonhosted.org/packages/92/2e/ea25914b1ebfde93b6fc4ff46d6864564fba59024e928bdc7de475affc25/greenlet-3.2.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:061dc4cf2c34852b052a8620d40f36324554bc192be474b9e9770e8c042fd735", size = 695355, upload-time = "2025-08-07T13:18:34.517Z" },
-    { url = "https://files.pythonhosted.org/packages/72/60/fc56c62046ec17f6b0d3060564562c64c862948c9d4bc8aa807cf5bd74f4/greenlet-3.2.4-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:44358b9bf66c8576a9f57a590d5f5d6e72fa4228b763d0e43fee6d3b06d3a337", size = 657512, upload-time = "2025-08-07T13:18:33.969Z" },
-    { url = "https://files.pythonhosted.org/packages/23/6e/74407aed965a4ab6ddd93a7ded3180b730d281c77b765788419484cdfeef/greenlet-3.2.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:2917bdf657f5859fbf3386b12d68ede4cf1f04c90c3a6bc1f013dd68a22e2269", size = 1612508, upload-time = "2025-11-04T12:42:23.427Z" },
-    { url = "https://files.pythonhosted.org/packages/0d/da/343cd760ab2f92bac1845ca07ee3faea9fe52bee65f7bcb19f16ad7de08b/greenlet-3.2.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:015d48959d4add5d6c9f6c5210ee3803a830dce46356e3bc326d6776bde54681", size = 1680760, upload-time = "2025-11-04T12:42:25.341Z" },
-    { url = "https://files.pythonhosted.org/packages/e3/a5/6ddab2b4c112be95601c13428db1d8b6608a8b6039816f2ba09c346c08fc/greenlet-3.2.4-cp314-cp314-win_amd64.whl", hash = "sha256:e37ab26028f12dbb0ff65f29a8d3d44a765c61e729647bf2ddfbbed621726f01", size = 303425, upload-time = "2025-08-07T13:32:27.59Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/cb/48e964c452ca2b92175a9b2dca037a553036cb053ba69e284650ce755f13/greenlet-3.3.0-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:e29f3018580e8412d6aaf5641bb7745d38c85228dacf51a73bd4e26ddf2a6a8e", size = 274908, upload-time = "2025-12-04T14:23:26.435Z" },
+    { url = "https://files.pythonhosted.org/packages/28/da/38d7bff4d0277b594ec557f479d65272a893f1f2a716cad91efeb8680953/greenlet-3.3.0-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a687205fb22794e838f947e2194c0566d3812966b41c78709554aa883183fb62", size = 577113, upload-time = "2025-12-04T14:50:05.493Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/f2/89c5eb0faddc3ff014f1c04467d67dee0d1d334ab81fadbf3744847f8a8a/greenlet-3.3.0-cp311-cp311-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4243050a88ba61842186cb9e63c7dfa677ec146160b0efd73b855a3d9c7fcf32", size = 590338, upload-time = "2025-12-04T14:57:41.136Z" },
+    { url = "https://files.pythonhosted.org/packages/80/d7/db0a5085035d05134f8c089643da2b44cc9b80647c39e93129c5ef170d8f/greenlet-3.3.0-cp311-cp311-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:670d0f94cd302d81796e37299bcd04b95d62403883b24225c6b5271466612f45", size = 601098, upload-time = "2025-12-04T15:07:11.898Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/a6/e959a127b630a58e23529972dbc868c107f9d583b5a9f878fb858c46bc1a/greenlet-3.3.0-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6cb3a8ec3db4a3b0eb8a3c25436c2d49e3505821802074969db017b87bc6a948", size = 590206, upload-time = "2025-12-04T14:26:01.254Z" },
+    { url = "https://files.pythonhosted.org/packages/48/60/29035719feb91798693023608447283b266b12efc576ed013dd9442364bb/greenlet-3.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2de5a0b09eab81fc6a382791b995b1ccf2b172a9fec934747a7a23d2ff291794", size = 1550668, upload-time = "2025-12-04T15:04:22.439Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/5f/783a23754b691bfa86bd72c3033aa107490deac9b2ef190837b860996c9f/greenlet-3.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4449a736606bd30f27f8e1ff4678ee193bc47f6ca810d705981cfffd6ce0d8c5", size = 1615483, upload-time = "2025-12-04T14:27:28.083Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/d5/c339b3b4bc8198b7caa4f2bd9fd685ac9f29795816d8db112da3d04175bb/greenlet-3.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:7652ee180d16d447a683c04e4c5f6441bae7ba7b17ffd9f6b3aff4605e9e6f71", size = 301164, upload-time = "2025-12-04T14:42:51.577Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/0a/a3871375c7b9727edaeeea994bfff7c63ff7804c9829c19309ba2e058807/greenlet-3.3.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:b01548f6e0b9e9784a2c99c5651e5dc89ffcbe870bc5fb2e5ef864e9cc6b5dcb", size = 276379, upload-time = "2025-12-04T14:23:30.498Z" },
+    { url = "https://files.pythonhosted.org/packages/43/ab/7ebfe34dce8b87be0d11dae91acbf76f7b8246bf9d6b319c741f99fa59c6/greenlet-3.3.0-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:349345b770dc88f81506c6861d22a6ccd422207829d2c854ae2af8025af303e3", size = 597294, upload-time = "2025-12-04T14:50:06.847Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/39/f1c8da50024feecd0793dbd5e08f526809b8ab5609224a2da40aad3a7641/greenlet-3.3.0-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e8e18ed6995e9e2c0b4ed264d2cf89260ab3ac7e13555b8032b25a74c6d18655", size = 607742, upload-time = "2025-12-04T14:57:42.349Z" },
+    { url = "https://files.pythonhosted.org/packages/77/cb/43692bcd5f7a0da6ec0ec6d58ee7cddb606d055ce94a62ac9b1aa481e969/greenlet-3.3.0-cp312-cp312-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c024b1e5696626890038e34f76140ed1daf858e37496d33f2af57f06189e70d7", size = 622297, upload-time = "2025-12-04T15:07:13.552Z" },
+    { url = "https://files.pythonhosted.org/packages/75/b0/6bde0b1011a60782108c01de5913c588cf51a839174538d266de15e4bf4d/greenlet-3.3.0-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:047ab3df20ede6a57c35c14bf5200fcf04039d50f908270d3f9a7a82064f543b", size = 609885, upload-time = "2025-12-04T14:26:02.368Z" },
+    { url = "https://files.pythonhosted.org/packages/49/0e/49b46ac39f931f59f987b7cd9f34bfec8ef81d2a1e6e00682f55be5de9f4/greenlet-3.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2d9ad37fc657b1102ec880e637cccf20191581f75c64087a549e66c57e1ceb53", size = 1567424, upload-time = "2025-12-04T15:04:23.757Z" },
+    { url = "https://files.pythonhosted.org/packages/05/f5/49a9ac2dff7f10091935def9165c90236d8f175afb27cbed38fb1d61ab6b/greenlet-3.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:83cd0e36932e0e7f36a64b732a6f60c2fc2df28c351bae79fbaf4f8092fe7614", size = 1636017, upload-time = "2025-12-04T14:27:29.688Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/79/3912a94cf27ec503e51ba493692d6db1e3cd8ac7ac52b0b47c8e33d7f4f9/greenlet-3.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:a7a34b13d43a6b78abf828a6d0e87d3385680eaf830cd60d20d52f249faabf39", size = 301964, upload-time = "2025-12-04T14:36:58.316Z" },
+    { url = "https://files.pythonhosted.org/packages/02/2f/28592176381b9ab2cafa12829ba7b472d177f3acc35d8fbcf3673d966fff/greenlet-3.3.0-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:a1e41a81c7e2825822f4e068c48cb2196002362619e2d70b148f20a831c00739", size = 275140, upload-time = "2025-12-04T14:23:01.282Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/80/fbe937bf81e9fca98c981fe499e59a3f45df2a04da0baa5c2be0dca0d329/greenlet-3.3.0-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9f515a47d02da4d30caaa85b69474cec77b7929b2e936ff7fb853d42f4bf8808", size = 599219, upload-time = "2025-12-04T14:50:08.309Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/ff/7c985128f0514271b8268476af89aee6866df5eec04ac17dcfbc676213df/greenlet-3.3.0-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7d2d9fd66bfadf230b385fdc90426fcd6eb64db54b40c495b72ac0feb5766c54", size = 610211, upload-time = "2025-12-04T14:57:43.968Z" },
+    { url = "https://files.pythonhosted.org/packages/79/07/c47a82d881319ec18a4510bb30463ed6891f2ad2c1901ed5ec23d3de351f/greenlet-3.3.0-cp313-cp313-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:30a6e28487a790417d036088b3bcb3f3ac7d8babaa7d0139edbaddebf3af9492", size = 624311, upload-time = "2025-12-04T15:07:14.697Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/8e/424b8c6e78bd9837d14ff7df01a9829fc883ba2ab4ea787d4f848435f23f/greenlet-3.3.0-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:087ea5e004437321508a8d6f20efc4cfec5e3c30118e1417ea96ed1d93950527", size = 612833, upload-time = "2025-12-04T14:26:03.669Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/ba/56699ff9b7c76ca12f1cdc27a886d0f81f2189c3455ff9f65246780f713d/greenlet-3.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ab97cf74045343f6c60a39913fa59710e4bd26a536ce7ab2397adf8b27e67c39", size = 1567256, upload-time = "2025-12-04T15:04:25.276Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/37/f31136132967982d698c71a281a8901daf1a8fbab935dce7c0cf15f942cc/greenlet-3.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5375d2e23184629112ca1ea89a53389dddbffcf417dad40125713d88eb5f96e8", size = 1636483, upload-time = "2025-12-04T14:27:30.804Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/71/ba21c3fb8c5dce83b8c01f458a42e99ffdb1963aeec08fff5a18588d8fd7/greenlet-3.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:9ee1942ea19550094033c35d25d20726e4f1c40d59545815e1128ac58d416d38", size = 301833, upload-time = "2025-12-04T14:32:23.929Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/7c/f0a6d0ede2c7bf092d00bc83ad5bafb7e6ec9b4aab2fbdfa6f134dc73327/greenlet-3.3.0-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:60c2ef0f578afb3c8d92ea07ad327f9a062547137afe91f38408f08aacab667f", size = 275671, upload-time = "2025-12-04T14:23:05.267Z" },
+    { url = "https://files.pythonhosted.org/packages/44/06/dac639ae1a50f5969d82d2e3dd9767d30d6dbdbab0e1a54010c8fe90263c/greenlet-3.3.0-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0a5d554d0712ba1de0a6c94c640f7aeba3f85b3a6e1f2899c11c2c0428da9365", size = 646360, upload-time = "2025-12-04T14:50:10.026Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/94/0fb76fe6c5369fba9bf98529ada6f4c3a1adf19e406a47332245ef0eb357/greenlet-3.3.0-cp314-cp314-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3a898b1e9c5f7307ebbde4102908e6cbfcb9ea16284a3abe15cab996bee8b9b3", size = 658160, upload-time = "2025-12-04T14:57:45.41Z" },
+    { url = "https://files.pythonhosted.org/packages/93/79/d2c70cae6e823fac36c3bbc9077962105052b7ef81db2f01ec3b9bf17e2b/greenlet-3.3.0-cp314-cp314-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:dcd2bdbd444ff340e8d6bdf54d2f206ccddbb3ccfdcd3c25bf4afaa7b8f0cf45", size = 671388, upload-time = "2025-12-04T15:07:15.789Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/14/bab308fc2c1b5228c3224ec2bf928ce2e4d21d8046c161e44a2012b5203e/greenlet-3.3.0-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5773edda4dc00e173820722711d043799d3adb4f01731f40619e07ea2750b955", size = 660166, upload-time = "2025-12-04T14:26:05.099Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/d2/91465d39164eaa0085177f61983d80ffe746c5a1860f009811d498e7259c/greenlet-3.3.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ac0549373982b36d5fd5d30beb8a7a33ee541ff98d2b502714a09f1169f31b55", size = 1615193, upload-time = "2025-12-04T15:04:27.041Z" },
+    { url = "https://files.pythonhosted.org/packages/42/1b/83d110a37044b92423084d52d5d5a3b3a73cafb51b547e6d7366ff62eff1/greenlet-3.3.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d198d2d977460358c3b3a4dc844f875d1adb33817f0613f663a656f463764ccc", size = 1683653, upload-time = "2025-12-04T14:27:32.366Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/9a/9030e6f9aa8fd7808e9c31ba4c38f87c4f8ec324ee67431d181fe396d705/greenlet-3.3.0-cp314-cp314-win_amd64.whl", hash = "sha256:73f51dd0e0bdb596fb0417e475fa3c5e32d4c83638296e560086b8d7da7c4170", size = 305387, upload-time = "2025-12-04T14:26:51.063Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/66/bd6317bc5932accf351fc19f177ffba53712a202f9df10587da8df257c7e/greenlet-3.3.0-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:d6ed6f85fae6cdfdb9ce04c9bf7a08d666cfcfb914e7d006f44f840b46741931", size = 282638, upload-time = "2025-12-04T14:25:20.941Z" },
+    { url = "https://files.pythonhosted.org/packages/30/cf/cc81cb030b40e738d6e69502ccbd0dd1bced0588e958f9e757945de24404/greenlet-3.3.0-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d9125050fcf24554e69c4cacb086b87b3b55dc395a8b3ebe6487b045b2614388", size = 651145, upload-time = "2025-12-04T14:50:11.039Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/ea/1020037b5ecfe95ca7df8d8549959baceb8186031da83d5ecceff8b08cd2/greenlet-3.3.0-cp314-cp314t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:87e63ccfa13c0a0f6234ed0add552af24cc67dd886731f2261e46e241608bee3", size = 654236, upload-time = "2025-12-04T14:57:47.007Z" },
+    { url = "https://files.pythonhosted.org/packages/69/cc/1e4bae2e45ca2fa55299f4e85854606a78ecc37fead20d69322f96000504/greenlet-3.3.0-cp314-cp314t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2662433acbca297c9153a4023fe2161c8dcfdcc91f10433171cf7e7d94ba2221", size = 662506, upload-time = "2025-12-04T15:07:16.906Z" },
+    { url = "https://files.pythonhosted.org/packages/57/b9/f8025d71a6085c441a7eaff0fd928bbb275a6633773667023d19179fe815/greenlet-3.3.0-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3c6e9b9c1527a78520357de498b0e709fb9e2f49c3a513afd5a249007261911b", size = 653783, upload-time = "2025-12-04T14:26:06.225Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/c7/876a8c7a7485d5d6b5c6821201d542ef28be645aa024cfe1145b35c120c1/greenlet-3.3.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:286d093f95ec98fdd92fcb955003b8a3d054b4e2cab3e2707a5039e7b50520fd", size = 1614857, upload-time = "2025-12-04T15:04:28.484Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/dc/041be1dff9f23dac5f48a43323cd0789cb798342011c19a248d9c9335536/greenlet-3.3.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6c10513330af5b8ae16f023e8ddbfb486ab355d04467c4679c5cfe4659975dd9", size = 1676034, upload-time = "2025-12-04T14:27:33.531Z" },
 ]
 
 [[package]]
@@ -555,35 +633,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" },
 ]
 
-[[package]]
-name = "hf-xet"
-version = "1.2.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/5e/6e/0f11bacf08a67f7fb5ee09740f2ca54163863b07b70d579356e9222ce5d8/hf_xet-1.2.0.tar.gz", hash = "sha256:a8c27070ca547293b6890c4bf389f713f80e8c478631432962bb7f4bc0bd7d7f", size = 506020, upload-time = "2025-10-24T19:04:32.129Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/9e/a5/85ef910a0aa034a2abcfadc360ab5ac6f6bc4e9112349bd40ca97551cff0/hf_xet-1.2.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:ceeefcd1b7aed4956ae8499e2199607765fbd1c60510752003b6cc0b8413b649", size = 2861870, upload-time = "2025-10-24T19:04:11.422Z" },
-    { url = "https://files.pythonhosted.org/packages/ea/40/e2e0a7eb9a51fe8828ba2d47fe22a7e74914ea8a0db68a18c3aa7449c767/hf_xet-1.2.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b70218dd548e9840224df5638fdc94bd033552963cfa97f9170829381179c813", size = 2717584, upload-time = "2025-10-24T19:04:09.586Z" },
-    { url = "https://files.pythonhosted.org/packages/a5/7d/daf7f8bc4594fdd59a8a596f9e3886133fdc68e675292218a5e4c1b7e834/hf_xet-1.2.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7d40b18769bb9a8bc82a9ede575ce1a44c75eb80e7375a01d76259089529b5dc", size = 3315004, upload-time = "2025-10-24T19:04:00.314Z" },
-    { url = "https://files.pythonhosted.org/packages/b1/ba/45ea2f605fbf6d81c8b21e4d970b168b18a53515923010c312c06cd83164/hf_xet-1.2.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:cd3a6027d59cfb60177c12d6424e31f4b5ff13d8e3a1247b3a584bf8977e6df5", size = 3222636, upload-time = "2025-10-24T19:03:58.111Z" },
-    { url = "https://files.pythonhosted.org/packages/4a/1d/04513e3cab8f29ab8c109d309ddd21a2705afab9d52f2ba1151e0c14f086/hf_xet-1.2.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6de1fc44f58f6dd937956c8d304d8c2dea264c80680bcfa61ca4a15e7b76780f", size = 3408448, upload-time = "2025-10-24T19:04:20.951Z" },
-    { url = "https://files.pythonhosted.org/packages/f0/7c/60a2756d7feec7387db3a1176c632357632fbe7849fce576c5559d4520c7/hf_xet-1.2.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f182f264ed2acd566c514e45da9f2119110e48a87a327ca271027904c70c5832", size = 3503401, upload-time = "2025-10-24T19:04:22.549Z" },
-    { url = "https://files.pythonhosted.org/packages/4e/64/48fffbd67fb418ab07451e4ce641a70de1c40c10a13e25325e24858ebe5a/hf_xet-1.2.0-cp313-cp313t-win_amd64.whl", hash = "sha256:293a7a3787e5c95d7be1857358a9130694a9c6021de3f27fa233f37267174382", size = 2900866, upload-time = "2025-10-24T19:04:33.461Z" },
-    { url = "https://files.pythonhosted.org/packages/e2/51/f7e2caae42f80af886db414d4e9885fac959330509089f97cccb339c6b87/hf_xet-1.2.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:10bfab528b968c70e062607f663e21e34e2bba349e8038db546646875495179e", size = 2861861, upload-time = "2025-10-24T19:04:19.01Z" },
-    { url = "https://files.pythonhosted.org/packages/6e/1d/a641a88b69994f9371bd347f1dd35e5d1e2e2460a2e350c8d5165fc62005/hf_xet-1.2.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2a212e842647b02eb6a911187dc878e79c4aa0aa397e88dd3b26761676e8c1f8", size = 2717699, upload-time = "2025-10-24T19:04:17.306Z" },
-    { url = "https://files.pythonhosted.org/packages/df/e0/e5e9bba7d15f0318955f7ec3f4af13f92e773fbb368c0b8008a5acbcb12f/hf_xet-1.2.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:30e06daccb3a7d4c065f34fc26c14c74f4653069bb2b194e7f18f17cbe9939c0", size = 3314885, upload-time = "2025-10-24T19:04:07.642Z" },
-    { url = "https://files.pythonhosted.org/packages/21/90/b7fe5ff6f2b7b8cbdf1bd56145f863c90a5807d9758a549bf3d916aa4dec/hf_xet-1.2.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:29c8fc913a529ec0a91867ce3d119ac1aac966e098cf49501800c870328cc090", size = 3221550, upload-time = "2025-10-24T19:04:05.55Z" },
-    { url = "https://files.pythonhosted.org/packages/6f/cb/73f276f0a7ce46cc6a6ec7d6c7d61cbfe5f2e107123d9bbd0193c355f106/hf_xet-1.2.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e159cbfcfbb29f920db2c09ed8b660eb894640d284f102ada929b6e3dc410a", size = 3408010, upload-time = "2025-10-24T19:04:28.598Z" },
-    { url = "https://files.pythonhosted.org/packages/b8/1e/d642a12caa78171f4be64f7cd9c40e3ca5279d055d0873188a58c0f5fbb9/hf_xet-1.2.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:9c91d5ae931510107f148874e9e2de8a16052b6f1b3ca3c1b12f15ccb491390f", size = 3503264, upload-time = "2025-10-24T19:04:30.397Z" },
-    { url = "https://files.pythonhosted.org/packages/17/b5/33764714923fa1ff922770f7ed18c2daae034d21ae6e10dbf4347c854154/hf_xet-1.2.0-cp314-cp314t-win_amd64.whl", hash = "sha256:210d577732b519ac6ede149d2f2f34049d44e8622bf14eb3d63bbcd2d4b332dc", size = 2901071, upload-time = "2025-10-24T19:04:37.463Z" },
-    { url = "https://files.pythonhosted.org/packages/96/2d/22338486473df5923a9ab7107d375dbef9173c338ebef5098ef593d2b560/hf_xet-1.2.0-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:46740d4ac024a7ca9b22bebf77460ff43332868b661186a8e46c227fdae01848", size = 2866099, upload-time = "2025-10-24T19:04:15.366Z" },
-    { url = "https://files.pythonhosted.org/packages/7f/8c/c5becfa53234299bc2210ba314eaaae36c2875e0045809b82e40a9544f0c/hf_xet-1.2.0-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:27df617a076420d8845bea087f59303da8be17ed7ec0cd7ee3b9b9f579dff0e4", size = 2722178, upload-time = "2025-10-24T19:04:13.695Z" },
-    { url = "https://files.pythonhosted.org/packages/9a/92/cf3ab0b652b082e66876d08da57fcc6fa2f0e6c70dfbbafbd470bb73eb47/hf_xet-1.2.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3651fd5bfe0281951b988c0facbe726aa5e347b103a675f49a3fa8144c7968fd", size = 3320214, upload-time = "2025-10-24T19:04:03.596Z" },
-    { url = "https://files.pythonhosted.org/packages/46/92/3f7ec4a1b6a65bf45b059b6d4a5d38988f63e193056de2f420137e3c3244/hf_xet-1.2.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:d06fa97c8562fb3ee7a378dd9b51e343bc5bc8190254202c9771029152f5e08c", size = 3229054, upload-time = "2025-10-24T19:04:01.949Z" },
-    { url = "https://files.pythonhosted.org/packages/0b/dd/7ac658d54b9fb7999a0ccb07ad863b413cbaf5cf172f48ebcd9497ec7263/hf_xet-1.2.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:4c1428c9ae73ec0939410ec73023c4f842927f39db09b063b9482dac5a3bb737", size = 3413812, upload-time = "2025-10-24T19:04:24.585Z" },
-    { url = "https://files.pythonhosted.org/packages/92/68/89ac4e5b12a9ff6286a12174c8538a5930e2ed662091dd2572bbe0a18c8a/hf_xet-1.2.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a55558084c16b09b5ed32ab9ed38421e2d87cf3f1f89815764d1177081b99865", size = 3508920, upload-time = "2025-10-24T19:04:26.927Z" },
-    { url = "https://files.pythonhosted.org/packages/cb/44/870d44b30e1dcfb6a65932e3e1506c103a8a5aea9103c337e7a53180322c/hf_xet-1.2.0-cp37-abi3-win_amd64.whl", hash = "sha256:e6584a52253f72c9f52f9e549d5895ca7a471608495c4ecaa6cc73dba2b24d69", size = 2905735, upload-time = "2025-10-24T19:04:35.928Z" },
-]
-
 [[package]]
 name = "httpcore"
 version = "1.0.9"
@@ -614,23 +663,34 @@ wheels = [
 
 [[package]]
 name = "huggingface-hub"
-version = "1.1.4"
+version = "0.27.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "filelock" },
     { name = "fsspec" },
-    { name = "hf-xet", marker = "platform_machine == 'AMD64' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" },
-    { name = "httpx" },
     { name = "packaging" },
     { name = "pyyaml" },
-    { name = "shellingham" },
+    { name = "requests" },
     { name = "tqdm" },
-    { name = "typer-slim" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/44/8a/3cba668d9cd1b4e3eb6c1c3ff7bf0f74a7809bdbb5c327bcdbdbac802d23/huggingface_hub-1.1.4.tar.gz", hash = "sha256:a7424a766fffa1a11e4c1ac2040a1557e2101f86050fdf06627e7b74cc9d2ad6", size = 606842, upload-time = "2025-11-13T10:51:57.602Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/36/c6/e3709b61de8e7832dbe19f0d9637e81356cede733d99359fbce125423774/huggingface_hub-0.27.0.tar.gz", hash = "sha256:902cce1a1be5739f5589e560198a65a8edcfd3b830b1666f36e4b961f0454fac", size = 379286, upload-time = "2024-12-16T13:13:35.857Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/33/3f/969137c9d9428ed8bf171d27604243dd950a47cac82414826e2aebbc0a4c/huggingface_hub-1.1.4-py3-none-any.whl", hash = "sha256:867799fbd2ef338b7f8b03d038d9c0e09415dfe45bb2893b48a510d1d746daa5", size = 515580, upload-time = "2025-11-13T10:51:55.742Z" },
+    { url = "https://files.pythonhosted.org/packages/61/8c/fbdc0a88a622d9fa54e132d7bf3ee03ec602758658a2db5b339a65be2cfe/huggingface_hub-0.27.0-py3-none-any.whl", hash = "sha256:8f2e834517f1f1ddf1ecc716f91b120d7333011b7485f665a9a412eacb1a2a81", size = 450537, upload-time = "2024-12-16T13:13:32.181Z" },
+]
+
+[[package]]
+name = "hydra-core"
+version = "1.3.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "antlr4-python3-runtime" },
+    { name = "omegaconf" },
+    { name = "packaging" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/6d/8e/07e42bc434a847154083b315779b0a81d567154504624e181caf2c71cd98/hydra-core-1.3.2.tar.gz", hash = "sha256:8a878ed67216997c3e9d88a8e72e7b4767e81af37afb4ea3334b269a4390a824", size = 3263494, upload-time = "2023-02-23T18:33:43.03Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c6/50/e0edd38dcd63fb26a8547f13d28f7a008bc4a3fd4eb4ff030673f22ad41a/hydra_core-1.3.2-py3-none-any.whl", hash = "sha256:fa0238a9e31df3373b35b0bfb672c34cc92718d21f81311d8996a16de1141d8b", size = 154547, upload-time = "2023-02-23T18:33:40.801Z" },
 ]
 
 [[package]]
@@ -660,6 +720,19 @@ version = "1.4.9"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/5c/3c/85844f1b0feb11ee581ac23fe5fce65cd049a200c1446708cc1b7f922875/kiwisolver-1.4.9.tar.gz", hash = "sha256:c3b22c26c6fd6811b0ae8363b95ca8ce4ea3c202d3d0975b2914310ceb1bcc4d", size = 97564, upload-time = "2025-08-10T21:27:49.279Z" }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/6f/ab/c80b0d5a9d8a1a65f4f815f2afff9798b12c3b9f31f1d304dd233dd920e2/kiwisolver-1.4.9-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:eb14a5da6dc7642b0f3a18f13654847cd8b7a2550e2645a5bda677862b03ba16", size = 124167, upload-time = "2025-08-10T21:25:53.403Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/c0/27fe1a68a39cf62472a300e2879ffc13c0538546c359b86f149cc19f6ac3/kiwisolver-1.4.9-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:39a219e1c81ae3b103643d2aedb90f1ef22650deb266ff12a19e7773f3e5f089", size = 66579, upload-time = "2025-08-10T21:25:54.79Z" },
+    { url = "https://files.pythonhosted.org/packages/31/a2/a12a503ac1fd4943c50f9822678e8015a790a13b5490354c68afb8489814/kiwisolver-1.4.9-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2405a7d98604b87f3fc28b1716783534b1b4b8510d8142adca34ee0bc3c87543", size = 65309, upload-time = "2025-08-10T21:25:55.76Z" },
+    { url = "https://files.pythonhosted.org/packages/66/e1/e533435c0be77c3f64040d68d7a657771194a63c279f55573188161e81ca/kiwisolver-1.4.9-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:dc1ae486f9abcef254b5618dfb4113dd49f94c68e3e027d03cf0143f3f772b61", size = 1435596, upload-time = "2025-08-10T21:25:56.861Z" },
+    { url = "https://files.pythonhosted.org/packages/67/1e/51b73c7347f9aabdc7215aa79e8b15299097dc2f8e67dee2b095faca9cb0/kiwisolver-1.4.9-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8a1f570ce4d62d718dce3f179ee78dac3b545ac16c0c04bb363b7607a949c0d1", size = 1246548, upload-time = "2025-08-10T21:25:58.246Z" },
+    { url = "https://files.pythonhosted.org/packages/21/aa/72a1c5d1e430294f2d32adb9542719cfb441b5da368d09d268c7757af46c/kiwisolver-1.4.9-cp311-cp311-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:cb27e7b78d716c591e88e0a09a2139c6577865d7f2e152488c2cc6257f460872", size = 1263618, upload-time = "2025-08-10T21:25:59.857Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/af/db1509a9e79dbf4c260ce0cfa3903ea8945f6240e9e59d1e4deb731b1a40/kiwisolver-1.4.9-cp311-cp311-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:15163165efc2f627eb9687ea5f3a28137217d217ac4024893d753f46bce9de26", size = 1317437, upload-time = "2025-08-10T21:26:01.105Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/f2/3ea5ee5d52abacdd12013a94130436e19969fa183faa1e7c7fbc89e9a42f/kiwisolver-1.4.9-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:bdee92c56a71d2b24c33a7d4c2856bd6419d017e08caa7802d2963870e315028", size = 2195742, upload-time = "2025-08-10T21:26:02.675Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/9b/1efdd3013c2d9a2566aa6a337e9923a00590c516add9a1e89a768a3eb2fc/kiwisolver-1.4.9-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:412f287c55a6f54b0650bd9b6dce5aceddb95864a1a90c87af16979d37c89771", size = 2290810, upload-time = "2025-08-10T21:26:04.009Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/e5/cfdc36109ae4e67361f9bc5b41323648cb24a01b9ade18784657e022e65f/kiwisolver-1.4.9-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:2c93f00dcba2eea70af2be5f11a830a742fe6b579a1d4e00f47760ef13be247a", size = 2461579, upload-time = "2025-08-10T21:26:05.317Z" },
+    { url = "https://files.pythonhosted.org/packages/62/86/b589e5e86c7610842213994cdea5add00960076bef4ae290c5fa68589cac/kiwisolver-1.4.9-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f117e1a089d9411663a3207ba874f31be9ac8eaa5b533787024dc07aeb74f464", size = 2268071, upload-time = "2025-08-10T21:26:06.686Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/c6/f8df8509fd1eee6c622febe54384a96cfaf4d43bf2ccec7a0cc17e4715c9/kiwisolver-1.4.9-cp311-cp311-win_amd64.whl", hash = "sha256:be6a04e6c79819c9a8c2373317d19a96048e5a3f90bec587787e86a1153883c2", size = 73840, upload-time = "2025-08-10T21:26:07.94Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/2d/16e0581daafd147bc11ac53f032a2b45eabac897f42a338d0a13c1e5c436/kiwisolver-1.4.9-cp311-cp311-win_arm64.whl", hash = "sha256:0ae37737256ba2de764ddc12aed4956460277f00c4996d51a197e72f62f5eec7", size = 65159, upload-time = "2025-08-10T21:26:09.048Z" },
     { url = "https://files.pythonhosted.org/packages/86/c9/13573a747838aeb1c76e3267620daa054f4152444d1f3d1a2324b78255b5/kiwisolver-1.4.9-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:ac5a486ac389dddcc5bef4f365b6ae3ffff2c433324fb38dd35e3fab7c957999", size = 123686, upload-time = "2025-08-10T21:26:10.034Z" },
     { url = "https://files.pythonhosted.org/packages/51/ea/2ecf727927f103ffd1739271ca19c424d0e65ea473fbaeea1c014aea93f6/kiwisolver-1.4.9-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f2ba92255faa7309d06fe44c3a4a97efe1c8d640c2a79a5ef728b685762a6fd2", size = 66460, upload-time = "2025-08-10T21:26:11.083Z" },
     { url = "https://files.pythonhosted.org/packages/5b/5a/51f5464373ce2aeb5194508298a508b6f21d3867f499556263c64c621914/kiwisolver-1.4.9-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4a2899935e724dd1074cb568ce7ac0dce28b2cd6ab539c8e001a8578eb106d14", size = 64952, upload-time = "2025-08-10T21:26:12.058Z" },
@@ -724,6 +797,23 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/99/dd/841e9a66c4715477ea0abc78da039832fbb09dac5c35c58dc4c41a407b8a/kiwisolver-1.4.9-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:aedff62918805fb62d43a4aa2ecd4482c380dc76cd31bd7c8878588a61bd0369", size = 2391835, upload-time = "2025-08-10T21:27:34.23Z" },
     { url = "https://files.pythonhosted.org/packages/0c/28/4b2e5c47a0da96896fdfdb006340ade064afa1e63675d01ea5ac222b6d52/kiwisolver-1.4.9-cp314-cp314t-win_amd64.whl", hash = "sha256:1fa333e8b2ce4d9660f2cda9c0e1b6bafcfb2457a9d259faa82289e73ec24891", size = 79988, upload-time = "2025-08-10T21:27:35.587Z" },
     { url = "https://files.pythonhosted.org/packages/80/be/3578e8afd18c88cdf9cb4cffde75a96d2be38c5a903f1ed0ceec061bd09e/kiwisolver-1.4.9-cp314-cp314t-win_arm64.whl", hash = "sha256:4a48a2ce79d65d363597ef7b567ce3d14d68783d2b2263d98db3d9477805ba32", size = 70260, upload-time = "2025-08-10T21:27:36.606Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/0f/36d89194b5a32c054ce93e586d4049b6c2c22887b0eb229c61c68afd3078/kiwisolver-1.4.9-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:720e05574713db64c356e86732c0f3c5252818d05f9df320f0ad8380641acea5", size = 60104, upload-time = "2025-08-10T21:27:43.287Z" },
+    { url = "https://files.pythonhosted.org/packages/52/ba/4ed75f59e4658fd21fe7dde1fee0ac397c678ec3befba3fe6482d987af87/kiwisolver-1.4.9-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:17680d737d5335b552994a2008fab4c851bcd7de33094a82067ef3a576ff02fa", size = 58592, upload-time = "2025-08-10T21:27:44.314Z" },
+    { url = "https://files.pythonhosted.org/packages/33/01/a8ea7c5ea32a9b45ceeaee051a04c8ed4320f5add3c51bfa20879b765b70/kiwisolver-1.4.9-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:85b5352f94e490c028926ea567fc569c52ec79ce131dadb968d3853e809518c2", size = 80281, upload-time = "2025-08-10T21:27:45.369Z" },
+    { url = "https://files.pythonhosted.org/packages/da/e3/dbd2ecdce306f1d07a1aaf324817ee993aab7aee9db47ceac757deabafbe/kiwisolver-1.4.9-pp311-pypy311_pp73-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:464415881e4801295659462c49461a24fb107c140de781d55518c4b80cb6790f", size = 78009, upload-time = "2025-08-10T21:27:46.376Z" },
+    { url = "https://files.pythonhosted.org/packages/da/e9/0d4add7873a73e462aeb45c036a2dead2562b825aa46ba326727b3f31016/kiwisolver-1.4.9-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:fb940820c63a9590d31d88b815e7a3aa5915cad3ce735ab45f0c730b39547de1", size = 73929, upload-time = "2025-08-10T21:27:48.236Z" },
+]
+
+[[package]]
+name = "linkify-it-py"
+version = "2.0.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "uc-micro-py" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/2a/ae/bb56c6828e4797ba5a4821eec7c43b8bf40f69cda4d4f5f8c8a2810ec96a/linkify-it-py-2.0.3.tar.gz", hash = "sha256:68cda27e162e9215c17d786649d1da0021a451bdc436ef9e0fa0ba5234b9b048", size = 27946, upload-time = "2024-02-04T14:48:04.179Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/04/1e/b832de447dee8b582cac175871d2f6c3d5077cc56d5575cadba1fd1cccfa/linkify_it_py-2.0.3-py3-none-any.whl", hash = "sha256:6bcbc417b0ac14323382aef5c5192c0075bf8a9d6b41820a2b66371eac6b6d79", size = 19820, upload-time = "2024-02-04T14:48:02.496Z" },
 ]
 
 [[package]]
@@ -747,12 +837,40 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/87/fb/99f81ac72ae23375f22b7afdb7642aba97c00a713c217124420147681a2f/mako-1.3.10-py3-none-any.whl", hash = "sha256:baef24a52fc4fc514a0887ac600f9f1cff3d82c61d4d700a1fa84d597b88db59", size = 78509, upload-time = "2025-04-10T12:50:53.297Z" },
 ]
 
+[[package]]
+name = "markdown-it-py"
+version = "4.0.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "mdurl" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3", size = 73070, upload-time = "2025-08-11T12:57:52.854Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload-time = "2025-08-11T12:57:51.923Z" },
+]
+
+[package.optional-dependencies]
+linkify = [
+    { name = "linkify-it-py" },
+]
+
 [[package]]
 name = "markupsafe"
 version = "3.0.3"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/7e/99/7690b6d4034fffd95959cbe0c02de8deb3098cc577c67bb6a24fe5d7caa7/markupsafe-3.0.3.tar.gz", hash = "sha256:722695808f4b6457b320fdc131280796bdceb04ab50fe1795cd540799ebe1698", size = 80313, upload-time = "2025-09-27T18:37:40.426Z" }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/08/db/fefacb2136439fc8dd20e797950e749aa1f4997ed584c62cfb8ef7c2be0e/markupsafe-3.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1cc7ea17a6824959616c525620e387f6dd30fec8cb44f649e31712db02123dad", size = 11631, upload-time = "2025-09-27T18:36:18.185Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/2e/5898933336b61975ce9dc04decbc0a7f2fee78c30353c5efba7f2d6ff27a/markupsafe-3.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4bd4cd07944443f5a265608cc6aab442e4f74dff8088b0dfc8238647b8f6ae9a", size = 12058, upload-time = "2025-09-27T18:36:19.444Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/09/adf2df3699d87d1d8184038df46a9c80d78c0148492323f4693df54e17bb/markupsafe-3.0.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b5420a1d9450023228968e7e6a9ce57f65d148ab56d2313fcd589eee96a7a50", size = 24287, upload-time = "2025-09-27T18:36:20.768Z" },
+    { url = "https://files.pythonhosted.org/packages/30/ac/0273f6fcb5f42e314c6d8cd99effae6a5354604d461b8d392b5ec9530a54/markupsafe-3.0.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0bf2a864d67e76e5c9a34dc26ec616a66b9888e25e7b9460e1c76d3293bd9dbf", size = 22940, upload-time = "2025-09-27T18:36:22.249Z" },
+    { url = "https://files.pythonhosted.org/packages/19/ae/31c1be199ef767124c042c6c3e904da327a2f7f0cd63a0337e1eca2967a8/markupsafe-3.0.3-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc51efed119bc9cfdf792cdeaa4d67e8f6fcccab66ed4bfdd6bde3e59bfcbb2f", size = 21887, upload-time = "2025-09-27T18:36:23.535Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/76/7edcab99d5349a4532a459e1fe64f0b0467a3365056ae550d3bcf3f79e1e/markupsafe-3.0.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:068f375c472b3e7acbe2d5318dea141359e6900156b5b2ba06a30b169086b91a", size = 23692, upload-time = "2025-09-27T18:36:24.823Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/28/6e74cdd26d7514849143d69f0bf2399f929c37dc2b31e6829fd2045b2765/markupsafe-3.0.3-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:7be7b61bb172e1ed687f1754f8e7484f1c8019780f6f6b0786e76bb01c2ae115", size = 21471, upload-time = "2025-09-27T18:36:25.95Z" },
+    { url = "https://files.pythonhosted.org/packages/62/7e/a145f36a5c2945673e590850a6f8014318d5577ed7e5920a4b3448e0865d/markupsafe-3.0.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f9e130248f4462aaa8e2552d547f36ddadbeaa573879158d721bbd33dfe4743a", size = 22923, upload-time = "2025-09-27T18:36:27.109Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/62/d9c46a7f5c9adbeeeda52f5b8d802e1094e9717705a645efc71b0913a0a8/markupsafe-3.0.3-cp311-cp311-win32.whl", hash = "sha256:0db14f5dafddbb6d9208827849fad01f1a2609380add406671a26386cdf15a19", size = 14572, upload-time = "2025-09-27T18:36:28.045Z" },
+    { url = "https://files.pythonhosted.org/packages/83/8a/4414c03d3f891739326e1783338e48fb49781cc915b2e0ee052aa490d586/markupsafe-3.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:de8a88e63464af587c950061a5e6a67d3632e36df62b986892331d4620a35c01", size = 15077, upload-time = "2025-09-27T18:36:29.025Z" },
+    { url = "https://files.pythonhosted.org/packages/35/73/893072b42e6862f319b5207adc9ae06070f095b358655f077f69a35601f0/markupsafe-3.0.3-cp311-cp311-win_arm64.whl", hash = "sha256:3b562dd9e9ea93f13d53989d23a7e775fdfd1066c33494ff43f5418bc8c58a5c", size = 13876, upload-time = "2025-09-27T18:36:29.954Z" },
     { url = "https://files.pythonhosted.org/packages/5a/72/147da192e38635ada20e0a2e1a51cf8823d2119ce8883f7053879c2199b5/markupsafe-3.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d53197da72cc091b024dd97249dfc7794d6a56530370992a5e1a08983ad9230e", size = 11615, upload-time = "2025-09-27T18:36:30.854Z" },
     { url = "https://files.pythonhosted.org/packages/9a/81/7e4e08678a1f98521201c3079f77db69fb552acd56067661f8c2f534a718/markupsafe-3.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1872df69a4de6aead3491198eaf13810b565bdbeec3ae2dc8780f14458ec73ce", size = 12020, upload-time = "2025-09-27T18:36:31.971Z" },
     { url = "https://files.pythonhosted.org/packages/1e/2c/799f4742efc39633a1b54a92eec4082e4f815314869865d876824c257c1e/markupsafe-3.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3a7e8ae81ae39e62a41ec302f972ba6ae23a5c5396c8e60113e9066ef893da0d", size = 24332, upload-time = "2025-09-27T18:36:32.813Z" },
@@ -827,6 +945,13 @@ dependencies = [
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/ae/e2/d2d5295be2f44c678ebaf3544ba32d20c1f9ef08c49fe47f496180e1db15/matplotlib-3.10.7.tar.gz", hash = "sha256:a06ba7e2a2ef9131c79c49e63dad355d2d878413a0376c1727c8b9335ff731c7", size = 34804865, upload-time = "2025-10-09T00:28:00.669Z" }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/fc/bc/0fb489005669127ec13f51be0c6adc074d7cf191075dab1da9fe3b7a3cfc/matplotlib-3.10.7-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:53b492410a6cd66c7a471de6c924f6ede976e963c0f3097a3b7abfadddc67d0a", size = 8257507, upload-time = "2025-10-09T00:26:19.073Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/6a/d42588ad895279ff6708924645b5d2ed54a7fb2dc045c8a804e955aeace1/matplotlib-3.10.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d9749313deb729f08207718d29c86246beb2ea3fdba753595b55901dee5d2fd6", size = 8119565, upload-time = "2025-10-09T00:26:21.023Z" },
+    { url = "https://files.pythonhosted.org/packages/10/b7/4aa196155b4d846bd749cf82aa5a4c300cf55a8b5e0dfa5b722a63c0f8a0/matplotlib-3.10.7-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2222c7ba2cbde7fe63032769f6eb7e83ab3227f47d997a8453377709b7fe3a5a", size = 8692668, upload-time = "2025-10-09T00:26:22.967Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/e7/664d2b97016f46683a02d854d730cfcf54ff92c1dafa424beebef50f831d/matplotlib-3.10.7-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e91f61a064c92c307c5a9dc8c05dc9f8a68f0a3be199d9a002a0622e13f874a1", size = 9521051, upload-time = "2025-10-09T00:26:25.041Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/a3/37aef1404efa615f49b5758a5e0261c16dd88f389bc1861e722620e4a754/matplotlib-3.10.7-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6f1851eab59ca082c95df5a500106bad73672645625e04538b3ad0f69471ffcc", size = 9576878, upload-time = "2025-10-09T00:26:27.478Z" },
+    { url = "https://files.pythonhosted.org/packages/33/cd/b145f9797126f3f809d177ca378de57c45413c5099c5990de2658760594a/matplotlib-3.10.7-cp311-cp311-win_amd64.whl", hash = "sha256:6516ce375109c60ceec579e699524e9d504cd7578506f01150f7a6bc174a775e", size = 8115142, upload-time = "2025-10-09T00:26:29.774Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/39/63bca9d2b78455ed497fcf51a9c71df200a11048f48249038f06447fa947/matplotlib-3.10.7-cp311-cp311-win_arm64.whl", hash = "sha256:b172db79759f5f9bc13ef1c3ef8b9ee7b37b0247f987fbbbdaa15e4f87fd46a9", size = 7992439, upload-time = "2025-10-09T00:26:40.32Z" },
     { url = "https://files.pythonhosted.org/packages/be/b3/09eb0f7796932826ec20c25b517d568627754f6c6462fca19e12c02f2e12/matplotlib-3.10.7-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7a0edb7209e21840e8361e91ea84ea676658aa93edd5f8762793dec77a4a6748", size = 8272389, upload-time = "2025-10-09T00:26:42.474Z" },
     { url = "https://files.pythonhosted.org/packages/11/0b/1ae80ddafb8652fd8046cb5c8460ecc8d4afccb89e2c6d6bec61e04e1eaf/matplotlib-3.10.7-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c380371d3c23e0eadf8ebff114445b9f970aff2010198d498d4ab4c3b41eea4f", size = 8128247, upload-time = "2025-10-09T00:26:44.77Z" },
     { url = "https://files.pythonhosted.org/packages/7d/18/95ae2e242d4a5c98bd6e90e36e128d71cf1c7e39b0874feaed3ef782e789/matplotlib-3.10.7-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d5f256d49fea31f40f166a5e3131235a5d2f4b7f44520b1cf0baf1ce568ccff0", size = 8696996, upload-time = "2025-10-09T00:26:46.792Z" },
@@ -862,6 +987,73 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/e6/a5/85e2edf76ea0ad4288d174926d9454ea85f3ce5390cc4e6fab196cbf250b/matplotlib-3.10.7-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:702590829c30aada1e8cef0568ddbffa77ca747b4d6e36c6d173f66e301f89cc", size = 9594066, upload-time = "2025-10-09T00:27:43.694Z" },
     { url = "https://files.pythonhosted.org/packages/39/69/9684368a314f6d83fe5c5ad2a4121a3a8e03723d2e5c8ea17b66c1bad0e7/matplotlib-3.10.7-cp314-cp314t-win_amd64.whl", hash = "sha256:f79d5de970fc90cd5591f60053aecfce1fcd736e0303d9f0bf86be649fa68fb8", size = 8342832, upload-time = "2025-10-09T00:27:45.543Z" },
     { url = "https://files.pythonhosted.org/packages/04/5f/e22e08da14bc1a0894184640d47819d2338b792732e20d292bf86e5ab785/matplotlib-3.10.7-cp314-cp314t-win_arm64.whl", hash = "sha256:cb783436e47fcf82064baca52ce748af71725d0352e1d31564cbe9c95df92b9c", size = 8172585, upload-time = "2025-10-09T00:27:47.185Z" },
+    { url = "https://files.pythonhosted.org/packages/58/8f/76d5dc21ac64a49e5498d7f0472c0781dae442dd266a67458baec38288ec/matplotlib-3.10.7-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:15112bcbaef211bd663fa935ec33313b948e214454d949b723998a43357b17b0", size = 8252283, upload-time = "2025-10-09T00:27:54.739Z" },
+    { url = "https://files.pythonhosted.org/packages/27/0d/9c5d4c2317feb31d819e38c9f947c942f42ebd4eb935fc6fd3518a11eaa7/matplotlib-3.10.7-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:d2a959c640cdeecdd2ec3136e8ea0441da59bcaf58d67e9c590740addba2cb68", size = 8116733, upload-time = "2025-10-09T00:27:56.406Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/cc/3fe688ff1355010937713164caacf9ed443675ac48a997bab6ed23b3f7c0/matplotlib-3.10.7-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3886e47f64611046bc1db523a09dd0a0a6bed6081e6f90e13806dd1d1d1b5e91", size = 8693919, upload-time = "2025-10-09T00:27:58.41Z" },
+]
+
+[[package]]
+name = "mdit-py-plugins"
+version = "0.5.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "markdown-it-py" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b2/fd/a756d36c0bfba5f6e39a1cdbdbfdd448dc02692467d83816dff4592a1ebc/mdit_py_plugins-0.5.0.tar.gz", hash = "sha256:f4918cb50119f50446560513a8e311d574ff6aaed72606ddae6d35716fe809c6", size = 44655, upload-time = "2025-08-11T07:25:49.083Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fb/86/dd6e5db36df29e76c7a7699123569a4a18c1623ce68d826ed96c62643cae/mdit_py_plugins-0.5.0-py3-none-any.whl", hash = "sha256:07a08422fc1936a5d26d146759e9155ea466e842f5ab2f7d2266dd084c8dab1f", size = 57205, upload-time = "2025-08-11T07:25:47.597Z" },
+]
+
+[[package]]
+name = "mdurl"
+version = "0.1.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" },
+]
+
+[[package]]
+name = "memray"
+version = "1.19.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "jinja2" },
+    { name = "rich" },
+    { name = "textual" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/36/18/5df5995a7b142e12ab194f4b2fd1473efd51f4f622dfe47f3c013c3c11f7/memray-1.19.1.tar.gz", hash = "sha256:7fcf306eae2c00144920b01913f42fa7f235af7a80fa3226ab124672a5cb1d8f", size = 2395421, upload-time = "2025-10-16T02:26:51.513Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0f/3b/3b4486ca09a304b5083c211bfc11ef8f982dea8ddfee81bd53d13dcae57d/memray-1.19.1-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:2cb5026aede2805215edc442519d85ecf0604e98bd1d9ef6be060004547f6688", size = 2185258, upload-time = "2025-10-16T02:25:37.4Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/d9/3a0765130b889ccc6f79c33835f90e71fd5b9093e4a4ccd00e3136bbd337/memray-1.19.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e2eb057b3e9545a1bc90ca71911834bde019f66d7e5306729abce3478a23855b", size = 2152129, upload-time = "2025-10-16T02:25:38.863Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/cc/82c52291e161148a6bff30525d7066d146c8b74507894580884944f2efb1/memray-1.19.1-cp311-cp311-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:004588fbf0ac91fb15d58e09b16c5fc28644ee48893b04dfcd28338ae56e378d", size = 9787551, upload-time = "2025-10-16T02:25:40.285Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/c4/630d0ec979c0d2a36edaaf3f8cc9de7eaa19b0319147b5c106abb53f429c/memray-1.19.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:548cb205ef856f546275754abc1c5f7f6aafac427c247bb4581791eaaa47a770", size = 10031985, upload-time = "2025-10-16T02:25:42.576Z" },
+    { url = "https://files.pythonhosted.org/packages/04/11/c00f16dca17915657c64cf81892a306afe3fd10fbaa872eeb1cf7b6f3226/memray-1.19.1-cp311-cp311-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b53dfb11f22d390a3d58cfee59eef8c2385bcc3cff5e7f79c80fa5952bc224a4", size = 9415840, upload-time = "2025-10-16T02:25:44.199Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/41/e2536161e6069b592b6d749f9a31182cb5e9be7bdb373a4ba8ae8ad33089/memray-1.19.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:464a2705c601ab2ff59d26c99345965a33bcd98065877a0a1884d9a17745ccd1", size = 12260092, upload-time = "2025-10-16T02:25:47.048Z" },
+    { url = "https://files.pythonhosted.org/packages/49/8d/42030314a7f8721984e3df85186c8432b15f05b2b6d915ed0f322aa7eb45/memray-1.19.1-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:57b0430e4800b8cbc38e1f529ad7af959cc96386e00773c8af57c46eddb15ecd", size = 2187201, upload-time = "2025-10-16T02:25:48.744Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/8c/92972176b8079a7ffb367958e118475c7a0d13c3983215fd280f4ea69c6f/memray-1.19.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:595414e753a0152282974b954827aeaf552dc02f47ed16a2743821ed461b6c51", size = 2155387, upload-time = "2025-10-16T02:25:49.802Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/ab/47e5beaa5291c2e3e2e695bcbaf8266ed61734ece75fb8bf8f24041ad660/memray-1.19.1-cp312-cp312-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:ead57c4be9ea89b78d8ce2017f8f3e28f552fc2279cf5d24bf75d70bdfe39ca7", size = 9748375, upload-time = "2025-10-16T02:25:51.107Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/15/d9331de7f2e7ff88289998d0feb5b14e97902abac1753b6238fdc0b9c395/memray-1.19.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:41d829191287a995eea8b832fe7c8de243cf9e5d32d53169952695c7210e3a6b", size = 10019968, upload-time = "2025-10-16T02:25:53.357Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/48/07353c55b8e40d03f125e2fb750cae3845dabed6a3c4e7c78214cfd927f5/memray-1.19.1-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5855a9c3f3cfcf8ef01151514332535756b5d7be17bdba84016b0ca57d86f7f8", size = 9397345, upload-time = "2025-10-16T02:25:54.998Z" },
+    { url = "https://files.pythonhosted.org/packages/51/05/47706972dc07c50ed7c4098d6e0d19e322dee05769952ff945d5e54dc04d/memray-1.19.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:216918a42abdd3c18c4771862584feda3a24bf7205db6f000a41be9ddc1c98b4", size = 12238354, upload-time = "2025-10-16T02:25:56.684Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/5f/a67da1226dc993501253164bd348d553a522e954057fd1042a74d0ee5768/memray-1.19.1-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:56b20156c2754762ccfcfa03fd88ce33ecd712aacd302ef099a871b3197fe4a2", size = 2185883, upload-time = "2025-10-16T02:25:59.046Z" },
+    { url = "https://files.pythonhosted.org/packages/50/94/e48e6999910b542254e5354b07cac6cad6dd1c4d4f1dd8b2cb16c6bdffc6/memray-1.19.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e9d93995a91a8383fda95a1f7a15247aca2abd2f80f7f7c7ff56b3d89a5d7893", size = 2154720, upload-time = "2025-10-16T02:26:00.547Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/dd/5d90c042c0afce46b42de271b6157ca32048522f7ba8097a602593b2292a/memray-1.19.1-cp313-cp313-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:500956020d245ad3440cc2fae06c1d781f339e30f8d58654bc5ae9f51f999fab", size = 9745457, upload-time = "2025-10-16T02:26:01.761Z" },
+    { url = "https://files.pythonhosted.org/packages/52/40/617b15e62d5de1718e81ee436a1f19d4d40274ead97ac0eda188baebb986/memray-1.19.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9770b6046a8b7d7bb982c2cefb324a60a0ee7cd7c35c58c0df058355a9a54676", size = 10019011, upload-time = "2025-10-16T02:26:03.75Z" },
+    { url = "https://files.pythonhosted.org/packages/47/9c/cdd27e52876244a8350ade32460eb18ae4a5f69656d0f02474f9fbdb1f85/memray-1.19.1-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:89593bfec1924aff4571e7bb00066b1cd832a828d701b0380009d790139aa814", size = 9397754, upload-time = "2025-10-16T02:26:05.934Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/74/17352ebd79117d46064016cce8389d88920da1cb99883cb1838f59221176/memray-1.19.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5dd00e0b4f5820f7a793691c0faeb15e4fbb5472198184605c29d0a961355741", size = 12244258, upload-time = "2025-10-16T02:26:07.614Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/74/29f3f3f07b2e5a208d9f9bf3fee2374a5f155ff82c4b124b69b8ee246cc7/memray-1.19.1-cp314-cp314-macosx_10_14_x86_64.whl", hash = "sha256:b202f1d96211d73712f5db8281c437dcbbd9cc91e520ca44b8406466b9672624", size = 2185812, upload-time = "2025-10-16T02:26:09.582Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/81/e96753df56e5369b8e123fa62f291aac1ca7d34b8368c2cf7a11e91e2a0e/memray-1.19.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:f1a99cbb0b413a945e07529e521c7441cb46e4d5e6868dd810cbdaa80af0b74c", size = 2156210, upload-time = "2025-10-16T02:26:10.655Z" },
+    { url = "https://files.pythonhosted.org/packages/34/0c/8de8faa01bf8fcf8938d845d93026310b8c3ee4cbb2402382a92898533d5/memray-1.19.1-cp314-cp314-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:c48461e7a8ba0b12ae740316e41564e18db2533ebeb1a093b2c8232d9c7c2653", size = 9745083, upload-time = "2025-10-16T02:26:12.194Z" },
+    { url = "https://files.pythonhosted.org/packages/15/77/9a0e205fd77ceabd2ce2359cb6b27e7e7e36cb9fc74ee1361643246b8307/memray-1.19.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:265812e729c90a9240d6a23dfa89d8bea11cb67d37a1411f7a690948584ff024", size = 9999936, upload-time = "2025-10-16T02:26:16.636Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/ea/58049103b757f8cdf6bff78eb24317d781f28bc44a5fe38887efe1937ac8/memray-1.19.1-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:808eb35fa012fa8e25582e3c9b76d9f0471e87776c7cd86e6c149da34fed22ed", size = 9394455, upload-time = "2025-10-16T02:26:18.423Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/fc/93a97315d3d321243b60fe70366db1b7846e4b5911e7ebda7e118522e5c1/memray-1.19.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e4b33fa1b6e8619e589882b44e6bdce0ad51d8bea2dd24f7afae6efcfcd8ffa8", size = 12233185, upload-time = "2025-10-16T02:26:20.508Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/3c/e5154059614a181fd13ed4106b4a69a811da35a2dabf2be1642c640977c9/memray-1.19.1-cp314-cp314t-macosx_10_14_x86_64.whl", hash = "sha256:e905d04e337e1482af988f349b1062ec330408bf1d8e5b0cfe8c0c7b47959692", size = 2202778, upload-time = "2025-10-16T02:26:22.094Z" },
+    { url = "https://files.pythonhosted.org/packages/52/3f/473fb7935f4f34834da63a59f659201b917cd1386decbccf7c8e9bfd74cc/memray-1.19.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:9fa0d8d8df8a0cad97e934dcd1cb698af00ffb10cd79277907c2cf97212f0bd9", size = 2172671, upload-time = "2025-10-16T02:26:23.187Z" },
+    { url = "https://files.pythonhosted.org/packages/51/f5/13f1ce7a88d4437eb2b4a24c7b7cead1fedf9284e5381b86e4630a3ce044/memray-1.19.1-cp314-cp314t-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:2fe3886eef669017810782ce63b1cdf8a426f07a27ea6a9f73d9dc3e5c448b0b", size = 9703654, upload-time = "2025-10-16T02:26:24.402Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/0a/a23f7b893915bfb1536963d5923ec048f10c70ec83c2ba31843d357be4c5/memray-1.19.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f7238113251d325da2d405b067ec180842b93a7fb10ff06fb3f7c261225b33ae", size = 9962696, upload-time = "2025-10-16T02:26:26.142Z" },
+    { url = "https://files.pythonhosted.org/packages/37/28/c1a1fc6bdd2cf3f5241623c54ac66525b30c9c7bb16e2ad70046d9bd8db7/memray-1.19.1-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:334754a0ad5664a703516307772a4555ffdc616586168b28efd31e8862a6cdb1", size = 9379408, upload-time = "2025-10-16T02:26:27.937Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/45/4bf72fff0070f1760e0ead5fc78b50e2bef35be702cb7c3cd5b33aa776d5/memray-1.19.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6a193cc20bbe60eccee8c9b4d9eb78ccd69a3248f0291d5d1a7fdda62aa19b53", size = 12148593, upload-time = "2025-10-16T02:26:29.708Z" },
 ]
 
 [[package]]
@@ -879,6 +1071,24 @@ version = "6.7.0"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/80/1e/5492c365f222f907de1039b91f922b93fa4f764c713ee858d235495d8f50/multidict-6.7.0.tar.gz", hash = "sha256:c6e99d9a65ca282e578dfea819cfa9c0a62b2499d8677392e09feaf305e9e6f5", size = 101834, upload-time = "2025-10-06T14:52:30.657Z" }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/34/9e/5c727587644d67b2ed479041e4b1c58e30afc011e3d45d25bbe35781217c/multidict-6.7.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:4d409aa42a94c0b3fa617708ef5276dfe81012ba6753a0370fcc9d0195d0a1fc", size = 76604, upload-time = "2025-10-06T14:48:54.277Z" },
+    { url = "https://files.pythonhosted.org/packages/17/e4/67b5c27bd17c085a5ea8f1ec05b8a3e5cba0ca734bfcad5560fb129e70ca/multidict-6.7.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:14c9e076eede3b54c636f8ce1c9c252b5f057c62131211f0ceeec273810c9721", size = 44715, upload-time = "2025-10-06T14:48:55.445Z" },
+    { url = "https://files.pythonhosted.org/packages/4d/e1/866a5d77be6ea435711bef2a4291eed11032679b6b28b56b4776ab06ba3e/multidict-6.7.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4c09703000a9d0fa3c3404b27041e574cc7f4df4c6563873246d0e11812a94b6", size = 44332, upload-time = "2025-10-06T14:48:56.706Z" },
+    { url = "https://files.pythonhosted.org/packages/31/61/0c2d50241ada71ff61a79518db85ada85fdabfcf395d5968dae1cbda04e5/multidict-6.7.0-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:a265acbb7bb33a3a2d626afbe756371dce0279e7b17f4f4eda406459c2b5ff1c", size = 245212, upload-time = "2025-10-06T14:48:58.042Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/e0/919666a4e4b57fff1b57f279be1c9316e6cdc5de8a8b525d76f6598fefc7/multidict-6.7.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:51cb455de290ae462593e5b1cb1118c5c22ea7f0d3620d9940bf695cea5a4bd7", size = 246671, upload-time = "2025-10-06T14:49:00.004Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/cc/d027d9c5a520f3321b65adea289b965e7bcbd2c34402663f482648c716ce/multidict-6.7.0-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:db99677b4457c7a5c5a949353e125ba72d62b35f74e26da141530fbb012218a7", size = 225491, upload-time = "2025-10-06T14:49:01.393Z" },
+    { url = "https://files.pythonhosted.org/packages/75/c4/bbd633980ce6155a28ff04e6a6492dd3335858394d7bb752d8b108708558/multidict-6.7.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f470f68adc395e0183b92a2f4689264d1ea4b40504a24d9882c27375e6662bb9", size = 257322, upload-time = "2025-10-06T14:49:02.745Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/6d/d622322d344f1f053eae47e033b0b3f965af01212de21b10bcf91be991fb/multidict-6.7.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0db4956f82723cc1c270de9c6e799b4c341d327762ec78ef82bb962f79cc07d8", size = 254694, upload-time = "2025-10-06T14:49:04.15Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/9f/78f8761c2705d4c6d7516faed63c0ebdac569f6db1bef95e0d5218fdc146/multidict-6.7.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3e56d780c238f9e1ae66a22d2adf8d16f485381878250db8d496623cd38b22bd", size = 246715, upload-time = "2025-10-06T14:49:05.967Z" },
+    { url = "https://files.pythonhosted.org/packages/78/59/950818e04f91b9c2b95aab3d923d9eabd01689d0dcd889563988e9ea0fd8/multidict-6.7.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9d14baca2ee12c1a64740d4531356ba50b82543017f3ad6de0deb943c5979abb", size = 243189, upload-time = "2025-10-06T14:49:07.37Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/3d/77c79e1934cad2ee74991840f8a0110966d9599b3af95964c0cd79bb905b/multidict-6.7.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:295a92a76188917c7f99cda95858c822f9e4aae5824246bba9b6b44004ddd0a6", size = 237845, upload-time = "2025-10-06T14:49:08.759Z" },
+    { url = "https://files.pythonhosted.org/packages/63/1b/834ce32a0a97a3b70f86437f685f880136677ac00d8bce0027e9fd9c2db7/multidict-6.7.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:39f1719f57adbb767ef592a50ae5ebb794220d1188f9ca93de471336401c34d2", size = 246374, upload-time = "2025-10-06T14:49:10.574Z" },
+    { url = "https://files.pythonhosted.org/packages/23/ef/43d1c3ba205b5dec93dc97f3fba179dfa47910fc73aaaea4f7ceb41cec2a/multidict-6.7.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:0a13fb8e748dfc94749f622de065dd5c1def7e0d2216dba72b1d8069a389c6ff", size = 253345, upload-time = "2025-10-06T14:49:12.331Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/03/eaf95bcc2d19ead522001f6a650ef32811aa9e3624ff0ad37c445c7a588c/multidict-6.7.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:e3aa16de190d29a0ea1b48253c57d99a68492c8dd8948638073ab9e74dc9410b", size = 246940, upload-time = "2025-10-06T14:49:13.821Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/df/ec8a5fd66ea6cd6f525b1fcbb23511b033c3e9bc42b81384834ffa484a62/multidict-6.7.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a048ce45dcdaaf1defb76b2e684f997fb5abf74437b6cb7b22ddad934a964e34", size = 242229, upload-time = "2025-10-06T14:49:15.603Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/a2/59b405d59fd39ec86d1142630e9049243015a5f5291ba49cadf3c090c541/multidict-6.7.0-cp311-cp311-win32.whl", hash = "sha256:a90af66facec4cebe4181b9e62a68be65e45ac9b52b67de9eec118701856e7ff", size = 41308, upload-time = "2025-10-06T14:49:16.871Z" },
+    { url = "https://files.pythonhosted.org/packages/32/0f/13228f26f8b882c34da36efa776c3b7348455ec383bab4a66390e42963ae/multidict-6.7.0-cp311-cp311-win_amd64.whl", hash = "sha256:95b5ffa4349df2887518bb839409bcf22caa72d82beec453216802f475b23c81", size = 46037, upload-time = "2025-10-06T14:49:18.457Z" },
+    { url = "https://files.pythonhosted.org/packages/84/1f/68588e31b000535a3207fd3c909ebeec4fb36b52c442107499c18a896a2a/multidict-6.7.0-cp311-cp311-win_arm64.whl", hash = "sha256:329aa225b085b6f004a4955271a7ba9f1087e39dcb7e65f6284a988264a63912", size = 43023, upload-time = "2025-10-06T14:49:19.648Z" },
     { url = "https://files.pythonhosted.org/packages/c2/9e/9f61ac18d9c8b475889f32ccfa91c9f59363480613fc807b6e3023d6f60b/multidict-6.7.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:8a3862568a36d26e650a19bb5cbbba14b71789032aebc0423f8cc5f150730184", size = 76877, upload-time = "2025-10-06T14:49:20.884Z" },
     { url = "https://files.pythonhosted.org/packages/38/6f/614f09a04e6184f8824268fce4bc925e9849edfa654ddd59f0b64508c595/multidict-6.7.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:960c60b5849b9b4f9dcc9bea6e3626143c252c74113df2c1540aebce70209b45", size = 45467, upload-time = "2025-10-06T14:49:22.054Z" },
     { url = "https://files.pythonhosted.org/packages/b3/93/c4f67a436dd026f2e780c433277fff72be79152894d9fc36f44569cab1a6/multidict-6.7.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2049be98fb57a31b4ccf870bf377af2504d4ae35646a19037ec271e4c07998aa", size = 43834, upload-time = "2025-10-06T14:49:23.566Z" },
@@ -981,6 +1191,9 @@ dependencies = [
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/72/fd/2ae3826f5be24c6ed87266bc4e59c46ea5b059a103f3d7e7eb76a52aeecb/multiprocess-0.70.18.tar.gz", hash = "sha256:f9597128e6b3e67b23956da07cf3d2e5cba79e2f4e0fba8d7903636663ec6d0d", size = 1798503, upload-time = "2025-04-17T03:11:27.742Z" }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/55/4d/9af0d1279c84618bcd35bf5fd7e371657358c7b0a523e54a9cffb87461f8/multiprocess-0.70.18-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:8b8940ae30139e04b076da6c5b83e9398585ebdf0f2ad3250673fef5b2ff06d6", size = 144695, upload-time = "2025-04-17T03:11:09.161Z" },
+    { url = "https://files.pythonhosted.org/packages/17/bf/87323e79dd0562474fad3373c21c66bc6c3c9963b68eb2a209deb4c8575e/multiprocess-0.70.18-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:0929ba95831adb938edbd5fb801ac45e705ecad9d100b3e653946b7716cb6bd3", size = 144742, upload-time = "2025-04-17T03:11:10.072Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/74/cb8c831e58dc6d5cf450b17c7db87f14294a1df52eb391da948b5e0a0b94/multiprocess-0.70.18-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:4d77f8e4bfe6c6e2e661925bbf9aed4d5ade9a1c6502d5dfc10129b9d1141797", size = 144745, upload-time = "2025-04-17T03:11:11.453Z" },
     { url = "https://files.pythonhosted.org/packages/ba/d8/0cba6cf51a1a31f20471fbc823a716170c73012ddc4fb85d706630ed6e8f/multiprocess-0.70.18-py310-none-any.whl", hash = "sha256:60c194974c31784019c1f459d984e8f33ee48f10fcf42c309ba97b30d9bd53ea", size = 134948, upload-time = "2025-04-17T03:11:20.223Z" },
     { url = "https://files.pythonhosted.org/packages/4b/88/9039f2fed1012ef584751d4ceff9ab4a51e5ae264898f0b7cbf44340a859/multiprocess-0.70.18-py311-none-any.whl", hash = "sha256:5aa6eef98e691281b3ad923be2832bf1c55dd2c859acd73e5ec53a66aae06a1d", size = 144462, upload-time = "2025-04-17T03:11:21.657Z" },
     { url = "https://files.pythonhosted.org/packages/bf/b6/5f922792be93b82ec6b5f270bbb1ef031fd0622847070bbcf9da816502cc/multiprocess-0.70.18-py312-none-any.whl", hash = "sha256:9b78f8e5024b573730bfb654783a13800c2c0f2dfc0c25e70b40d184d64adaa2", size = 150287, upload-time = "2025-04-17T03:11:22.69Z" },
@@ -991,74 +1204,92 @@ wheels = [
 
 [[package]]
 name = "networkx"
-version = "3.5"
+version = "3.6.1"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/6c/4f/ccdb8ad3a38e583f214547fd2f7ff1fc160c43a75af88e6aec213404b96a/networkx-3.5.tar.gz", hash = "sha256:d4c6f9cf81f52d69230866796b82afbccdec3db7ae4fbd1b65ea750feed50037", size = 2471065, upload-time = "2025-05-29T11:35:07.804Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/6a/51/63fe664f3908c97be9d2e4f1158eb633317598cfa6e1fc14af5383f17512/networkx-3.6.1.tar.gz", hash = "sha256:26b7c357accc0c8cde558ad486283728b65b6a95d85ee1cd66bafab4c8168509", size = 2517025, upload-time = "2025-12-08T17:02:39.908Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/eb/8d/776adee7bbf76365fdd7f2552710282c79a4ead5d2a46408c9043a2b70ba/networkx-3.5-py3-none-any.whl", hash = "sha256:0030d386a9a06dee3565298b4a734b68589749a544acbb6c412dc9e2489ec6ec", size = 2034406, upload-time = "2025-05-29T11:35:04.961Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/c9/b2622292ea83fbb4ec318f5b9ab867d0a28ab43c5717bb85b0a5f6b3b0a4/networkx-3.6.1-py3-none-any.whl", hash = "sha256:d47fbf302e7d9cbbb9e2555a0d267983d2aa476bac30e90dfbe5669bd57f3762", size = 2068504, upload-time = "2025-12-08T17:02:38.159Z" },
 ]
 
 [[package]]
 name = "numpy"
-version = "2.3.4"
+version = "2.3.5"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/b5/f4/098d2270d52b41f1bd7db9fc288aaa0400cb48c2a3e2af6fa365d9720947/numpy-2.3.4.tar.gz", hash = "sha256:a7d018bfedb375a8d979ac758b120ba846a7fe764911a64465fd87b8729f4a6a", size = 20582187, upload-time = "2025-10-15T16:18:11.77Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/76/65/21b3bc86aac7b8f2862db1e808f1ea22b028e30a225a34a5ede9bf8678f2/numpy-2.3.5.tar.gz", hash = "sha256:784db1dcdab56bf0517743e746dfb0f885fc68d948aba86eeec2cba234bdf1c0", size = 20584950, upload-time = "2025-11-16T22:52:42.067Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/96/7a/02420400b736f84317e759291b8edaeee9dc921f72b045475a9cbdb26b17/numpy-2.3.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ef1b5a3e808bc40827b5fa2c8196151a4c5abe110e1726949d7abddfe5c7ae11", size = 20957727, upload-time = "2025-10-15T16:15:44.9Z" },
-    { url = "https://files.pythonhosted.org/packages/18/90/a014805d627aa5750f6f0e878172afb6454552da929144b3c07fcae1bb13/numpy-2.3.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c2f91f496a87235c6aaf6d3f3d89b17dba64996abadccb289f48456cff931ca9", size = 14187262, upload-time = "2025-10-15T16:15:47.761Z" },
-    { url = "https://files.pythonhosted.org/packages/c7/e4/0a94b09abe89e500dc748e7515f21a13e30c5c3fe3396e6d4ac108c25fca/numpy-2.3.4-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:f77e5b3d3da652b474cc80a14084927a5e86a5eccf54ca8ca5cbd697bf7f2667", size = 5115992, upload-time = "2025-10-15T16:15:50.144Z" },
-    { url = "https://files.pythonhosted.org/packages/88/dd/db77c75b055c6157cbd4f9c92c4458daef0dd9cbe6d8d2fe7f803cb64c37/numpy-2.3.4-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:8ab1c5f5ee40d6e01cbe96de5863e39b215a4d24e7d007cad56c7184fdf4aeef", size = 6648672, upload-time = "2025-10-15T16:15:52.442Z" },
-    { url = "https://files.pythonhosted.org/packages/e1/e6/e31b0d713719610e406c0ea3ae0d90760465b086da8783e2fd835ad59027/numpy-2.3.4-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:77b84453f3adcb994ddbd0d1c5d11db2d6bda1a2b7fd5ac5bd4649d6f5dc682e", size = 14284156, upload-time = "2025-10-15T16:15:54.351Z" },
-    { url = "https://files.pythonhosted.org/packages/f9/58/30a85127bfee6f108282107caf8e06a1f0cc997cb6b52cdee699276fcce4/numpy-2.3.4-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4121c5beb58a7f9e6dfdee612cb24f4df5cd4db6e8261d7f4d7450a997a65d6a", size = 16641271, upload-time = "2025-10-15T16:15:56.67Z" },
-    { url = "https://files.pythonhosted.org/packages/06/f2/2e06a0f2adf23e3ae29283ad96959267938d0efd20a2e25353b70065bfec/numpy-2.3.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:65611ecbb00ac9846efe04db15cbe6186f562f6bb7e5e05f077e53a599225d16", size = 16059531, upload-time = "2025-10-15T16:15:59.412Z" },
-    { url = "https://files.pythonhosted.org/packages/b0/e7/b106253c7c0d5dc352b9c8fab91afd76a93950998167fa3e5afe4ef3a18f/numpy-2.3.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:dabc42f9c6577bcc13001b8810d300fe814b4cfbe8a92c873f269484594f9786", size = 18578983, upload-time = "2025-10-15T16:16:01.804Z" },
-    { url = "https://files.pythonhosted.org/packages/73/e3/04ecc41e71462276ee867ccbef26a4448638eadecf1bc56772c9ed6d0255/numpy-2.3.4-cp312-cp312-win32.whl", hash = "sha256:a49d797192a8d950ca59ee2d0337a4d804f713bb5c3c50e8db26d49666e351dc", size = 6291380, upload-time = "2025-10-15T16:16:03.938Z" },
-    { url = "https://files.pythonhosted.org/packages/3d/a8/566578b10d8d0e9955b1b6cd5db4e9d4592dd0026a941ff7994cedda030a/numpy-2.3.4-cp312-cp312-win_amd64.whl", hash = "sha256:985f1e46358f06c2a09921e8921e2c98168ed4ae12ccd6e5e87a4f1857923f32", size = 12787999, upload-time = "2025-10-15T16:16:05.801Z" },
-    { url = "https://files.pythonhosted.org/packages/58/22/9c903a957d0a8071b607f5b1bff0761d6e608b9a965945411f867d515db1/numpy-2.3.4-cp312-cp312-win_arm64.whl", hash = "sha256:4635239814149e06e2cb9db3dd584b2fa64316c96f10656983b8026a82e6e4db", size = 10197412, upload-time = "2025-10-15T16:16:07.854Z" },
-    { url = "https://files.pythonhosted.org/packages/57/7e/b72610cc91edf138bc588df5150957a4937221ca6058b825b4725c27be62/numpy-2.3.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c090d4860032b857d94144d1a9976b8e36709e40386db289aaf6672de2a81966", size = 20950335, upload-time = "2025-10-15T16:16:10.304Z" },
-    { url = "https://files.pythonhosted.org/packages/3e/46/bdd3370dcea2f95ef14af79dbf81e6927102ddf1cc54adc0024d61252fd9/numpy-2.3.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a13fc473b6db0be619e45f11f9e81260f7302f8d180c49a22b6e6120022596b3", size = 14179878, upload-time = "2025-10-15T16:16:12.595Z" },
-    { url = "https://files.pythonhosted.org/packages/ac/01/5a67cb785bda60f45415d09c2bc245433f1c68dd82eef9c9002c508b5a65/numpy-2.3.4-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:3634093d0b428e6c32c3a69b78e554f0cd20ee420dcad5a9f3b2a63762ce4197", size = 5108673, upload-time = "2025-10-15T16:16:14.877Z" },
-    { url = "https://files.pythonhosted.org/packages/c2/cd/8428e23a9fcebd33988f4cb61208fda832800ca03781f471f3727a820704/numpy-2.3.4-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:043885b4f7e6e232d7df4f51ffdef8c36320ee9d5f227b380ea636722c7ed12e", size = 6641438, upload-time = "2025-10-15T16:16:16.805Z" },
-    { url = "https://files.pythonhosted.org/packages/3e/d1/913fe563820f3c6b079f992458f7331278dcd7ba8427e8e745af37ddb44f/numpy-2.3.4-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4ee6a571d1e4f0ea6d5f22d6e5fbd6ed1dc2b18542848e1e7301bd190500c9d7", size = 14281290, upload-time = "2025-10-15T16:16:18.764Z" },
-    { url = "https://files.pythonhosted.org/packages/9e/7e/7d306ff7cb143e6d975cfa7eb98a93e73495c4deabb7d1b5ecf09ea0fd69/numpy-2.3.4-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fc8a63918b04b8571789688b2780ab2b4a33ab44bfe8ccea36d3eba51228c953", size = 16636543, upload-time = "2025-10-15T16:16:21.072Z" },
-    { url = "https://files.pythonhosted.org/packages/47/6a/8cfc486237e56ccfb0db234945552a557ca266f022d281a2f577b98e955c/numpy-2.3.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:40cc556d5abbc54aabe2b1ae287042d7bdb80c08edede19f0c0afb36ae586f37", size = 16056117, upload-time = "2025-10-15T16:16:23.369Z" },
-    { url = "https://files.pythonhosted.org/packages/b1/0e/42cb5e69ea901e06ce24bfcc4b5664a56f950a70efdcf221f30d9615f3f3/numpy-2.3.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ecb63014bb7f4ce653f8be7f1df8cbc6093a5a2811211770f6606cc92b5a78fd", size = 18577788, upload-time = "2025-10-15T16:16:27.496Z" },
-    { url = "https://files.pythonhosted.org/packages/86/92/41c3d5157d3177559ef0a35da50f0cda7fa071f4ba2306dd36818591a5bc/numpy-2.3.4-cp313-cp313-win32.whl", hash = "sha256:e8370eb6925bb8c1c4264fec52b0384b44f675f191df91cbe0140ec9f0955646", size = 6282620, upload-time = "2025-10-15T16:16:29.811Z" },
-    { url = "https://files.pythonhosted.org/packages/09/97/fd421e8bc50766665ad35536c2bb4ef916533ba1fdd053a62d96cc7c8b95/numpy-2.3.4-cp313-cp313-win_amd64.whl", hash = "sha256:56209416e81a7893036eea03abcb91c130643eb14233b2515c90dcac963fe99d", size = 12784672, upload-time = "2025-10-15T16:16:31.589Z" },
-    { url = "https://files.pythonhosted.org/packages/ad/df/5474fb2f74970ca8eb978093969b125a84cc3d30e47f82191f981f13a8a0/numpy-2.3.4-cp313-cp313-win_arm64.whl", hash = "sha256:a700a4031bc0fd6936e78a752eefb79092cecad2599ea9c8039c548bc097f9bc", size = 10196702, upload-time = "2025-10-15T16:16:33.902Z" },
-    { url = "https://files.pythonhosted.org/packages/11/83/66ac031464ec1767ea3ed48ce40f615eb441072945e98693bec0bcd056cc/numpy-2.3.4-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:86966db35c4040fdca64f0816a1c1dd8dbd027d90fca5a57e00e1ca4cd41b879", size = 21049003, upload-time = "2025-10-15T16:16:36.101Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/99/5b14e0e686e61371659a1d5bebd04596b1d72227ce36eed121bb0aeab798/numpy-2.3.4-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:838f045478638b26c375ee96ea89464d38428c69170360b23a1a50fa4baa3562", size = 14302980, upload-time = "2025-10-15T16:16:39.124Z" },
-    { url = "https://files.pythonhosted.org/packages/2c/44/e9486649cd087d9fc6920e3fc3ac2aba10838d10804b1e179fb7cbc4e634/numpy-2.3.4-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:d7315ed1dab0286adca467377c8381cd748f3dc92235f22a7dfc42745644a96a", size = 5231472, upload-time = "2025-10-15T16:16:41.168Z" },
-    { url = "https://files.pythonhosted.org/packages/3e/51/902b24fa8887e5fe2063fd61b1895a476d0bbf46811ab0c7fdf4bd127345/numpy-2.3.4-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:84f01a4d18b2cc4ade1814a08e5f3c907b079c847051d720fad15ce37aa930b6", size = 6739342, upload-time = "2025-10-15T16:16:43.777Z" },
-    { url = "https://files.pythonhosted.org/packages/34/f1/4de9586d05b1962acdcdb1dc4af6646361a643f8c864cef7c852bf509740/numpy-2.3.4-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:817e719a868f0dacde4abdfc5c1910b301877970195db9ab6a5e2c4bd5b121f7", size = 14354338, upload-time = "2025-10-15T16:16:46.081Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/06/1c16103b425de7969d5a76bdf5ada0804b476fed05d5f9e17b777f1cbefd/numpy-2.3.4-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:85e071da78d92a214212cacea81c6da557cab307f2c34b5f85b628e94803f9c0", size = 16702392, upload-time = "2025-10-15T16:16:48.455Z" },
-    { url = "https://files.pythonhosted.org/packages/34/b2/65f4dc1b89b5322093572b6e55161bb42e3e0487067af73627f795cc9d47/numpy-2.3.4-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:2ec646892819370cf3558f518797f16597b4e4669894a2ba712caccc9da53f1f", size = 16134998, upload-time = "2025-10-15T16:16:51.114Z" },
-    { url = "https://files.pythonhosted.org/packages/d4/11/94ec578896cdb973aaf56425d6c7f2aff4186a5c00fac15ff2ec46998b46/numpy-2.3.4-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:035796aaaddfe2f9664b9a9372f089cfc88bd795a67bd1bfe15e6e770934cf64", size = 18651574, upload-time = "2025-10-15T16:16:53.429Z" },
-    { url = "https://files.pythonhosted.org/packages/62/b7/7efa763ab33dbccf56dade36938a77345ce8e8192d6b39e470ca25ff3cd0/numpy-2.3.4-cp313-cp313t-win32.whl", hash = "sha256:fea80f4f4cf83b54c3a051f2f727870ee51e22f0248d3114b8e755d160b38cfb", size = 6413135, upload-time = "2025-10-15T16:16:55.992Z" },
-    { url = "https://files.pythonhosted.org/packages/43/70/aba4c38e8400abcc2f345e13d972fb36c26409b3e644366db7649015f291/numpy-2.3.4-cp313-cp313t-win_amd64.whl", hash = "sha256:15eea9f306b98e0be91eb344a94c0e630689ef302e10c2ce5f7e11905c704f9c", size = 12928582, upload-time = "2025-10-15T16:16:57.943Z" },
-    { url = "https://files.pythonhosted.org/packages/67/63/871fad5f0073fc00fbbdd7232962ea1ac40eeaae2bba66c76214f7954236/numpy-2.3.4-cp313-cp313t-win_arm64.whl", hash = "sha256:b6c231c9c2fadbae4011ca5e7e83e12dc4a5072f1a1d85a0a7b3ed754d145a40", size = 10266691, upload-time = "2025-10-15T16:17:00.048Z" },
-    { url = "https://files.pythonhosted.org/packages/72/71/ae6170143c115732470ae3a2d01512870dd16e0953f8a6dc89525696069b/numpy-2.3.4-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:81c3e6d8c97295a7360d367f9f8553973651b76907988bb6066376bc2252f24e", size = 20955580, upload-time = "2025-10-15T16:17:02.509Z" },
-    { url = "https://files.pythonhosted.org/packages/af/39/4be9222ffd6ca8a30eda033d5f753276a9c3426c397bb137d8e19dedd200/numpy-2.3.4-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:7c26b0b2bf58009ed1f38a641f3db4be8d960a417ca96d14e5b06df1506d41ff", size = 14188056, upload-time = "2025-10-15T16:17:04.873Z" },
-    { url = "https://files.pythonhosted.org/packages/6c/3d/d85f6700d0a4aa4f9491030e1021c2b2b7421b2b38d01acd16734a2bfdc7/numpy-2.3.4-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:62b2198c438058a20b6704351b35a1d7db881812d8512d67a69c9de1f18ca05f", size = 5116555, upload-time = "2025-10-15T16:17:07.499Z" },
-    { url = "https://files.pythonhosted.org/packages/bf/04/82c1467d86f47eee8a19a464c92f90a9bb68ccf14a54c5224d7031241ffb/numpy-2.3.4-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:9d729d60f8d53a7361707f4b68a9663c968882dd4f09e0d58c044c8bf5faee7b", size = 6643581, upload-time = "2025-10-15T16:17:09.774Z" },
-    { url = "https://files.pythonhosted.org/packages/0c/d3/c79841741b837e293f48bd7db89d0ac7a4f2503b382b78a790ef1dc778a5/numpy-2.3.4-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bd0c630cf256b0a7fd9d0a11c9413b42fef5101219ce6ed5a09624f5a65392c7", size = 14299186, upload-time = "2025-10-15T16:17:11.937Z" },
-    { url = "https://files.pythonhosted.org/packages/e8/7e/4a14a769741fbf237eec5a12a2cbc7a4c4e061852b6533bcb9e9a796c908/numpy-2.3.4-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d5e081bc082825f8b139f9e9fe42942cb4054524598aaeb177ff476cc76d09d2", size = 16638601, upload-time = "2025-10-15T16:17:14.391Z" },
-    { url = "https://files.pythonhosted.org/packages/93/87/1c1de269f002ff0a41173fe01dcc925f4ecff59264cd8f96cf3b60d12c9b/numpy-2.3.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:15fb27364ed84114438fff8aaf998c9e19adbeba08c0b75409f8c452a8692c52", size = 16074219, upload-time = "2025-10-15T16:17:17.058Z" },
-    { url = "https://files.pythonhosted.org/packages/cd/28/18f72ee77408e40a76d691001ae599e712ca2a47ddd2c4f695b16c65f077/numpy-2.3.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:85d9fb2d8cd998c84d13a79a09cc0c1091648e848e4e6249b0ccd7f6b487fa26", size = 18576702, upload-time = "2025-10-15T16:17:19.379Z" },
-    { url = "https://files.pythonhosted.org/packages/c3/76/95650169b465ececa8cf4b2e8f6df255d4bf662775e797ade2025cc51ae6/numpy-2.3.4-cp314-cp314-win32.whl", hash = "sha256:e73d63fd04e3a9d6bc187f5455d81abfad05660b212c8804bf3b407e984cd2bc", size = 6337136, upload-time = "2025-10-15T16:17:22.886Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/89/a231a5c43ede5d6f77ba4a91e915a87dea4aeea76560ba4d2bf185c683f0/numpy-2.3.4-cp314-cp314-win_amd64.whl", hash = "sha256:3da3491cee49cf16157e70f607c03a217ea6647b1cea4819c4f48e53d49139b9", size = 12920542, upload-time = "2025-10-15T16:17:24.783Z" },
-    { url = "https://files.pythonhosted.org/packages/0d/0c/ae9434a888f717c5ed2ff2393b3f344f0ff6f1c793519fa0c540461dc530/numpy-2.3.4-cp314-cp314-win_arm64.whl", hash = "sha256:6d9cd732068e8288dbe2717177320723ccec4fb064123f0caf9bbd90ab5be868", size = 10480213, upload-time = "2025-10-15T16:17:26.935Z" },
-    { url = "https://files.pythonhosted.org/packages/83/4b/c4a5f0841f92536f6b9592694a5b5f68c9ab37b775ff342649eadf9055d3/numpy-2.3.4-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:22758999b256b595cf0b1d102b133bb61866ba5ceecf15f759623b64c020c9ec", size = 21052280, upload-time = "2025-10-15T16:17:29.638Z" },
-    { url = "https://files.pythonhosted.org/packages/3e/80/90308845fc93b984d2cc96d83e2324ce8ad1fd6efea81b324cba4b673854/numpy-2.3.4-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:9cb177bc55b010b19798dc5497d540dea67fd13a8d9e882b2dae71de0cf09eb3", size = 14302930, upload-time = "2025-10-15T16:17:32.384Z" },
-    { url = "https://files.pythonhosted.org/packages/3d/4e/07439f22f2a3b247cec4d63a713faae55e1141a36e77fb212881f7cda3fb/numpy-2.3.4-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:0f2bcc76f1e05e5ab58893407c63d90b2029908fa41f9f1cc51eecce936c3365", size = 5231504, upload-time = "2025-10-15T16:17:34.515Z" },
-    { url = "https://files.pythonhosted.org/packages/ab/de/1e11f2547e2fe3d00482b19721855348b94ada8359aef5d40dd57bfae9df/numpy-2.3.4-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:8dc20bde86802df2ed8397a08d793da0ad7a5fd4ea3ac85d757bf5dd4ad7c252", size = 6739405, upload-time = "2025-10-15T16:17:36.128Z" },
-    { url = "https://files.pythonhosted.org/packages/3b/40/8cd57393a26cebe2e923005db5134a946c62fa56a1087dc7c478f3e30837/numpy-2.3.4-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5e199c087e2aa71c8f9ce1cb7a8e10677dc12457e7cc1be4798632da37c3e86e", size = 14354866, upload-time = "2025-10-15T16:17:38.884Z" },
-    { url = "https://files.pythonhosted.org/packages/93/39/5b3510f023f96874ee6fea2e40dfa99313a00bf3ab779f3c92978f34aace/numpy-2.3.4-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:85597b2d25ddf655495e2363fe044b0ae999b75bc4d630dc0d886484b03a5eb0", size = 16703296, upload-time = "2025-10-15T16:17:41.564Z" },
-    { url = "https://files.pythonhosted.org/packages/41/0d/19bb163617c8045209c1996c4e427bccbc4bbff1e2c711f39203c8ddbb4a/numpy-2.3.4-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:04a69abe45b49c5955923cf2c407843d1c85013b424ae8a560bba16c92fe44a0", size = 16136046, upload-time = "2025-10-15T16:17:43.901Z" },
-    { url = "https://files.pythonhosted.org/packages/e2/c1/6dba12fdf68b02a21ac411c9df19afa66bed2540f467150ca64d246b463d/numpy-2.3.4-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:e1708fac43ef8b419c975926ce1eaf793b0c13b7356cfab6ab0dc34c0a02ac0f", size = 18652691, upload-time = "2025-10-15T16:17:46.247Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/73/f85056701dbbbb910c51d846c58d29fd46b30eecd2b6ba760fc8b8a1641b/numpy-2.3.4-cp314-cp314t-win32.whl", hash = "sha256:863e3b5f4d9915aaf1b8ec79ae560ad21f0b8d5e3adc31e73126491bb86dee1d", size = 6485782, upload-time = "2025-10-15T16:17:48.872Z" },
-    { url = "https://files.pythonhosted.org/packages/17/90/28fa6f9865181cb817c2471ee65678afa8a7e2a1fb16141473d5fa6bacc3/numpy-2.3.4-cp314-cp314t-win_amd64.whl", hash = "sha256:962064de37b9aef801d33bc579690f8bfe6c5e70e29b61783f60bcba838a14d6", size = 13113301, upload-time = "2025-10-15T16:17:50.938Z" },
-    { url = "https://files.pythonhosted.org/packages/54/23/08c002201a8e7e1f9afba93b97deceb813252d9cfd0d3351caed123dcf97/numpy-2.3.4-cp314-cp314t-win_arm64.whl", hash = "sha256:8b5a9a39c45d852b62693d9b3f3e0fe052541f804296ff401a72a1b60edafb29", size = 10547532, upload-time = "2025-10-15T16:17:53.48Z" },
+    { url = "https://files.pythonhosted.org/packages/43/77/84dd1d2e34d7e2792a236ba180b5e8fcc1e3e414e761ce0253f63d7f572e/numpy-2.3.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:de5672f4a7b200c15a4127042170a694d4df43c992948f5e1af57f0174beed10", size = 17034641, upload-time = "2025-11-16T22:49:19.336Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/ea/25e26fa5837106cde46ae7d0b667e20f69cbbc0efd64cba8221411ab26ae/numpy-2.3.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:acfd89508504a19ed06ef963ad544ec6664518c863436306153e13e94605c218", size = 12528324, upload-time = "2025-11-16T22:49:22.582Z" },
+    { url = "https://files.pythonhosted.org/packages/4d/1a/e85f0eea4cf03d6a0228f5c0256b53f2df4bc794706e7df019fc622e47f1/numpy-2.3.5-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:ffe22d2b05504f786c867c8395de703937f934272eb67586817b46188b4ded6d", size = 5356872, upload-time = "2025-11-16T22:49:25.408Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/bb/35ef04afd567f4c989c2060cde39211e4ac5357155c1833bcd1166055c61/numpy-2.3.5-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:872a5cf366aec6bb1147336480fef14c9164b154aeb6542327de4970282cd2f5", size = 6893148, upload-time = "2025-11-16T22:49:27.549Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/2b/05bbeb06e2dff5eab512dfc678b1cc5ee94d8ac5956a0885c64b6b26252b/numpy-2.3.5-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3095bdb8dd297e5920b010e96134ed91d852d81d490e787beca7e35ae1d89cf7", size = 14557282, upload-time = "2025-11-16T22:49:30.964Z" },
+    { url = "https://files.pythonhosted.org/packages/65/fb/2b23769462b34398d9326081fad5655198fcf18966fcb1f1e49db44fbf31/numpy-2.3.5-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8cba086a43d54ca804ce711b2a940b16e452807acebe7852ff327f1ecd49b0d4", size = 16897903, upload-time = "2025-11-16T22:49:34.191Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/14/085f4cf05fc3f1e8aa95e85404e984ffca9b2275a5dc2b1aae18a67538b8/numpy-2.3.5-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:6cf9b429b21df6b99f4dee7a1218b8b7ffbbe7df8764dc0bd60ce8a0708fed1e", size = 16341672, upload-time = "2025-11-16T22:49:37.2Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/3b/1f73994904142b2aa290449b3bb99772477b5fd94d787093e4f24f5af763/numpy-2.3.5-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:396084a36abdb603546b119d96528c2f6263921c50df3c8fd7cb28873a237748", size = 18838896, upload-time = "2025-11-16T22:49:39.727Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/b9/cf6649b2124f288309ffc353070792caf42ad69047dcc60da85ee85fea58/numpy-2.3.5-cp311-cp311-win32.whl", hash = "sha256:b0c7088a73aef3d687c4deef8452a3ac7c1be4e29ed8bf3b366c8111128ac60c", size = 6563608, upload-time = "2025-11-16T22:49:42.079Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/44/9fe81ae1dcc29c531843852e2874080dc441338574ccc4306b39e2ff6e59/numpy-2.3.5-cp311-cp311-win_amd64.whl", hash = "sha256:a414504bef8945eae5f2d7cb7be2d4af77c5d1cb5e20b296c2c25b61dff2900c", size = 13078442, upload-time = "2025-11-16T22:49:43.99Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/a7/f99a41553d2da82a20a2f22e93c94f928e4490bb447c9ff3c4ff230581d3/numpy-2.3.5-cp311-cp311-win_arm64.whl", hash = "sha256:0cd00b7b36e35398fa2d16af7b907b65304ef8bb4817a550e06e5012929830fa", size = 10458555, upload-time = "2025-11-16T22:49:47.092Z" },
+    { url = "https://files.pythonhosted.org/packages/44/37/e669fe6cbb2b96c62f6bbedc6a81c0f3b7362f6a59230b23caa673a85721/numpy-2.3.5-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:74ae7b798248fe62021dbf3c914245ad45d1a6b0cb4a29ecb4b31d0bfbc4cc3e", size = 16733873, upload-time = "2025-11-16T22:49:49.84Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/65/df0db6c097892c9380851ab9e44b52d4f7ba576b833996e0080181c0c439/numpy-2.3.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ee3888d9ff7c14604052b2ca5535a30216aa0a58e948cdd3eeb8d3415f638769", size = 12259838, upload-time = "2025-11-16T22:49:52.863Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/e1/1ee06e70eb2136797abe847d386e7c0e830b67ad1d43f364dd04fa50d338/numpy-2.3.5-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:612a95a17655e213502f60cfb9bf9408efdc9eb1d5f50535cc6eb365d11b42b5", size = 5088378, upload-time = "2025-11-16T22:49:55.055Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/9c/1ca85fb86708724275103b81ec4cf1ac1d08f465368acfc8da7ab545bdae/numpy-2.3.5-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:3101e5177d114a593d79dd79658650fe28b5a0d8abeb8ce6f437c0e6df5be1a4", size = 6628559, upload-time = "2025-11-16T22:49:57.371Z" },
+    { url = "https://files.pythonhosted.org/packages/74/78/fcd41e5a0ce4f3f7b003da85825acddae6d7ecb60cf25194741b036ca7d6/numpy-2.3.5-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8b973c57ff8e184109db042c842423ff4f60446239bd585a5131cc47f06f789d", size = 14250702, upload-time = "2025-11-16T22:49:59.632Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/23/2a1b231b8ff672b4c450dac27164a8b2ca7d9b7144f9c02d2396518352eb/numpy-2.3.5-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0d8163f43acde9a73c2a33605353a4f1bc4798745a8b1d73183b28e5b435ae28", size = 16606086, upload-time = "2025-11-16T22:50:02.127Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/c5/5ad26fbfbe2012e190cc7d5003e4d874b88bb18861d0829edc140a713021/numpy-2.3.5-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:51c1e14eb1e154ebd80e860722f9e6ed6ec89714ad2db2d3aa33c31d7c12179b", size = 16025985, upload-time = "2025-11-16T22:50:04.536Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/fa/dd48e225c46c819288148d9d060b047fd2a6fb1eb37eae25112ee4cb4453/numpy-2.3.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b46b4ec24f7293f23adcd2d146960559aaf8020213de8ad1909dba6c013bf89c", size = 18542976, upload-time = "2025-11-16T22:50:07.557Z" },
+    { url = "https://files.pythonhosted.org/packages/05/79/ccbd23a75862d95af03d28b5c6901a1b7da4803181513d52f3b86ed9446e/numpy-2.3.5-cp312-cp312-win32.whl", hash = "sha256:3997b5b3c9a771e157f9aae01dd579ee35ad7109be18db0e85dbdbe1de06e952", size = 6285274, upload-time = "2025-11-16T22:50:10.746Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/57/8aeaf160312f7f489dea47ab61e430b5cb051f59a98ae68b7133ce8fa06a/numpy-2.3.5-cp312-cp312-win_amd64.whl", hash = "sha256:86945f2ee6d10cdfd67bcb4069c1662dd711f7e2a4343db5cecec06b87cf31aa", size = 12782922, upload-time = "2025-11-16T22:50:12.811Z" },
+    { url = "https://files.pythonhosted.org/packages/78/a6/aae5cc2ca78c45e64b9ef22f089141d661516856cf7c8a54ba434576900d/numpy-2.3.5-cp312-cp312-win_arm64.whl", hash = "sha256:f28620fe26bee16243be2b7b874da327312240a7cdc38b769a697578d2100013", size = 10194667, upload-time = "2025-11-16T22:50:16.16Z" },
+    { url = "https://files.pythonhosted.org/packages/db/69/9cde09f36da4b5a505341180a3f2e6fadc352fd4d2b7096ce9778db83f1a/numpy-2.3.5-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d0f23b44f57077c1ede8c5f26b30f706498b4862d3ff0a7298b8411dd2f043ff", size = 16728251, upload-time = "2025-11-16T22:50:19.013Z" },
+    { url = "https://files.pythonhosted.org/packages/79/fb/f505c95ceddd7027347b067689db71ca80bd5ecc926f913f1a23e65cf09b/numpy-2.3.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:aa5bc7c5d59d831d9773d1170acac7893ce3a5e130540605770ade83280e7188", size = 12254652, upload-time = "2025-11-16T22:50:21.487Z" },
+    { url = "https://files.pythonhosted.org/packages/78/da/8c7738060ca9c31b30e9301ee0cf6c5ffdbf889d9593285a1cead337f9a5/numpy-2.3.5-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:ccc933afd4d20aad3c00bcef049cb40049f7f196e0397f1109dba6fed63267b0", size = 5083172, upload-time = "2025-11-16T22:50:24.562Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/b4/ee5bb2537fb9430fd2ef30a616c3672b991a4129bb1c7dcc42aa0abbe5d7/numpy-2.3.5-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:afaffc4393205524af9dfa400fa250143a6c3bc646c08c9f5e25a9f4b4d6a903", size = 6622990, upload-time = "2025-11-16T22:50:26.47Z" },
+    { url = "https://files.pythonhosted.org/packages/95/03/dc0723a013c7d7c19de5ef29e932c3081df1c14ba582b8b86b5de9db7f0f/numpy-2.3.5-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9c75442b2209b8470d6d5d8b1c25714270686f14c749028d2199c54e29f20b4d", size = 14248902, upload-time = "2025-11-16T22:50:28.861Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/10/ca162f45a102738958dcec8023062dad0cbc17d1ab99d68c4e4a6c45fb2b/numpy-2.3.5-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:11e06aa0af8c0f05104d56450d6093ee639e15f24ecf62d417329d06e522e017", size = 16597430, upload-time = "2025-11-16T22:50:31.56Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/51/c1e29be863588db58175175f057286900b4b3327a1351e706d5e0f8dd679/numpy-2.3.5-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ed89927b86296067b4f81f108a2271d8926467a8868e554eaf370fc27fa3ccaf", size = 16024551, upload-time = "2025-11-16T22:50:34.242Z" },
+    { url = "https://files.pythonhosted.org/packages/83/68/8236589d4dbb87253d28259d04d9b814ec0ecce7cb1c7fed29729f4c3a78/numpy-2.3.5-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:51c55fe3451421f3a6ef9a9c1439e82101c57a2c9eab9feb196a62b1a10b58ce", size = 18533275, upload-time = "2025-11-16T22:50:37.651Z" },
+    { url = "https://files.pythonhosted.org/packages/40/56/2932d75b6f13465239e3b7b7e511be27f1b8161ca2510854f0b6e521c395/numpy-2.3.5-cp313-cp313-win32.whl", hash = "sha256:1978155dd49972084bd6ef388d66ab70f0c323ddee6f693d539376498720fb7e", size = 6277637, upload-time = "2025-11-16T22:50:40.11Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/88/e2eaa6cffb115b85ed7c7c87775cb8bcf0816816bc98ca8dbfa2ee33fe6e/numpy-2.3.5-cp313-cp313-win_amd64.whl", hash = "sha256:00dc4e846108a382c5869e77c6ed514394bdeb3403461d25a829711041217d5b", size = 12779090, upload-time = "2025-11-16T22:50:42.503Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/88/3f41e13a44ebd4034ee17baa384acac29ba6a4fcc2aca95f6f08ca0447d1/numpy-2.3.5-cp313-cp313-win_arm64.whl", hash = "sha256:0472f11f6ec23a74a906a00b48a4dcf3849209696dff7c189714511268d103ae", size = 10194710, upload-time = "2025-11-16T22:50:44.971Z" },
+    { url = "https://files.pythonhosted.org/packages/13/cb/71744144e13389d577f867f745b7df2d8489463654a918eea2eeb166dfc9/numpy-2.3.5-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:414802f3b97f3c1eef41e530aaba3b3c1620649871d8cb38c6eaff034c2e16bd", size = 16827292, upload-time = "2025-11-16T22:50:47.715Z" },
+    { url = "https://files.pythonhosted.org/packages/71/80/ba9dc6f2a4398e7f42b708a7fdc841bb638d353be255655498edbf9a15a8/numpy-2.3.5-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5ee6609ac3604fa7780e30a03e5e241a7956f8e2fcfe547d51e3afa5247ac47f", size = 12378897, upload-time = "2025-11-16T22:50:51.327Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/6d/db2151b9f64264bcceccd51741aa39b50150de9b602d98ecfe7e0c4bff39/numpy-2.3.5-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:86d835afea1eaa143012a2d7a3f45a3adce2d7adc8b4961f0b362214d800846a", size = 5207391, upload-time = "2025-11-16T22:50:54.542Z" },
+    { url = "https://files.pythonhosted.org/packages/80/ae/429bacace5ccad48a14c4ae5332f6aa8ab9f69524193511d60ccdfdc65fa/numpy-2.3.5-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:30bc11310e8153ca664b14c5f1b73e94bd0503681fcf136a163de856f3a50139", size = 6721275, upload-time = "2025-11-16T22:50:56.794Z" },
+    { url = "https://files.pythonhosted.org/packages/74/5b/1919abf32d8722646a38cd527bc3771eb229a32724ee6ba340ead9b92249/numpy-2.3.5-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1062fde1dcf469571705945b0f221b73928f34a20c904ffb45db101907c3454e", size = 14306855, upload-time = "2025-11-16T22:50:59.208Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/87/6831980559434973bebc30cd9c1f21e541a0f2b0c280d43d3afd909b66d0/numpy-2.3.5-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ce581db493ea1a96c0556360ede6607496e8bf9b3a8efa66e06477267bc831e9", size = 16657359, upload-time = "2025-11-16T22:51:01.991Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/91/c797f544491ee99fd00495f12ebb7802c440c1915811d72ac5b4479a3356/numpy-2.3.5-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:cc8920d2ec5fa99875b670bb86ddeb21e295cb07aa331810d9e486e0b969d946", size = 16093374, upload-time = "2025-11-16T22:51:05.291Z" },
+    { url = "https://files.pythonhosted.org/packages/74/a6/54da03253afcbe7a72785ec4da9c69fb7a17710141ff9ac5fcb2e32dbe64/numpy-2.3.5-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:9ee2197ef8c4f0dfe405d835f3b6a14f5fee7782b5de51ba06fb65fc9b36e9f1", size = 18594587, upload-time = "2025-11-16T22:51:08.585Z" },
+    { url = "https://files.pythonhosted.org/packages/80/e9/aff53abbdd41b0ecca94285f325aff42357c6b5abc482a3fcb4994290b18/numpy-2.3.5-cp313-cp313t-win32.whl", hash = "sha256:70b37199913c1bd300ff6e2693316c6f869c7ee16378faf10e4f5e3275b299c3", size = 6405940, upload-time = "2025-11-16T22:51:11.541Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/81/50613fec9d4de5480de18d4f8ef59ad7e344d497edbef3cfd80f24f98461/numpy-2.3.5-cp313-cp313t-win_amd64.whl", hash = "sha256:b501b5fa195cc9e24fe102f21ec0a44dffc231d2af79950b451e0d99cea02234", size = 12920341, upload-time = "2025-11-16T22:51:14.312Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/ab/08fd63b9a74303947f34f0bd7c5903b9c5532c2d287bead5bdf4c556c486/numpy-2.3.5-cp313-cp313t-win_arm64.whl", hash = "sha256:a80afd79f45f3c4a7d341f13acbe058d1ca8ac017c165d3fa0d3de6bc1a079d7", size = 10262507, upload-time = "2025-11-16T22:51:16.846Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/97/1a914559c19e32d6b2e233cf9a6a114e67c856d35b1d6babca571a3e880f/numpy-2.3.5-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:bf06bc2af43fa8d32d30fae16ad965663e966b1a3202ed407b84c989c3221e82", size = 16735706, upload-time = "2025-11-16T22:51:19.558Z" },
+    { url = "https://files.pythonhosted.org/packages/57/d4/51233b1c1b13ecd796311216ae417796b88b0616cfd8a33ae4536330748a/numpy-2.3.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:052e8c42e0c49d2575621c158934920524f6c5da05a1d3b9bab5d8e259e045f0", size = 12264507, upload-time = "2025-11-16T22:51:22.492Z" },
+    { url = "https://files.pythonhosted.org/packages/45/98/2fe46c5c2675b8306d0b4a3ec3494273e93e1226a490f766e84298576956/numpy-2.3.5-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:1ed1ec893cff7040a02c8aa1c8611b94d395590d553f6b53629a4461dc7f7b63", size = 5093049, upload-time = "2025-11-16T22:51:25.171Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/0e/0698378989bb0ac5f1660c81c78ab1fe5476c1a521ca9ee9d0710ce54099/numpy-2.3.5-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:2dcd0808a421a482a080f89859a18beb0b3d1e905b81e617a188bd80422d62e9", size = 6626603, upload-time = "2025-11-16T22:51:27Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/a6/9ca0eecc489640615642a6cbc0ca9e10df70df38c4d43f5a928ff18d8827/numpy-2.3.5-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:727fd05b57df37dc0bcf1a27767a3d9a78cbbc92822445f32cc3436ba797337b", size = 14262696, upload-time = "2025-11-16T22:51:29.402Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/f6/07ec185b90ec9d7217a00eeeed7383b73d7e709dae2a9a021b051542a708/numpy-2.3.5-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fffe29a1ef00883599d1dc2c51aa2e5d80afe49523c261a74933df395c15c520", size = 16597350, upload-time = "2025-11-16T22:51:32.167Z" },
+    { url = "https://files.pythonhosted.org/packages/75/37/164071d1dde6a1a84c9b8e5b414fa127981bad47adf3a6b7e23917e52190/numpy-2.3.5-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:8f7f0e05112916223d3f438f293abf0727e1181b5983f413dfa2fefc4098245c", size = 16040190, upload-time = "2025-11-16T22:51:35.403Z" },
+    { url = "https://files.pythonhosted.org/packages/08/3c/f18b82a406b04859eb026d204e4e1773eb41c5be58410f41ffa511d114ae/numpy-2.3.5-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2e2eb32ddb9ccb817d620ac1d8dae7c3f641c1e5f55f531a33e8ab97960a75b8", size = 18536749, upload-time = "2025-11-16T22:51:39.698Z" },
+    { url = "https://files.pythonhosted.org/packages/40/79/f82f572bf44cf0023a2fe8588768e23e1592585020d638999f15158609e1/numpy-2.3.5-cp314-cp314-win32.whl", hash = "sha256:66f85ce62c70b843bab1fb14a05d5737741e74e28c7b8b5a064de10142fad248", size = 6335432, upload-time = "2025-11-16T22:51:42.476Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/2e/235b4d96619931192c91660805e5e49242389742a7a82c27665021db690c/numpy-2.3.5-cp314-cp314-win_amd64.whl", hash = "sha256:e6a0bc88393d65807d751a614207b7129a310ca4fe76a74e5c7da5fa5671417e", size = 12919388, upload-time = "2025-11-16T22:51:45.275Z" },
+    { url = "https://files.pythonhosted.org/packages/07/2b/29fd75ce45d22a39c61aad74f3d718e7ab67ccf839ca8b60866054eb15f8/numpy-2.3.5-cp314-cp314-win_arm64.whl", hash = "sha256:aeffcab3d4b43712bb7a60b65f6044d444e75e563ff6180af8f98dd4b905dfd2", size = 10476651, upload-time = "2025-11-16T22:51:47.749Z" },
+    { url = "https://files.pythonhosted.org/packages/17/e1/f6a721234ebd4d87084cfa68d081bcba2f5cfe1974f7de4e0e8b9b2a2ba1/numpy-2.3.5-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:17531366a2e3a9e30762c000f2c43a9aaa05728712e25c11ce1dbe700c53ad41", size = 16834503, upload-time = "2025-11-16T22:51:50.443Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/1c/baf7ffdc3af9c356e1c135e57ab7cf8d247931b9554f55c467efe2c69eff/numpy-2.3.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:d21644de1b609825ede2f48be98dfde4656aefc713654eeee280e37cadc4e0ad", size = 12381612, upload-time = "2025-11-16T22:51:53.609Z" },
+    { url = "https://files.pythonhosted.org/packages/74/91/f7f0295151407ddc9ba34e699013c32c3c91944f9b35fcf9281163dc1468/numpy-2.3.5-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:c804e3a5aba5460c73955c955bdbd5c08c354954e9270a2c1565f62e866bdc39", size = 5210042, upload-time = "2025-11-16T22:51:56.213Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/3b/78aebf345104ec50dd50a4d06ddeb46a9ff5261c33bcc58b1c4f12f85ec2/numpy-2.3.5-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:cc0a57f895b96ec78969c34f682c602bf8da1a0270b09bc65673df2e7638ec20", size = 6724502, upload-time = "2025-11-16T22:51:58.584Z" },
+    { url = "https://files.pythonhosted.org/packages/02/c6/7c34b528740512e57ef1b7c8337ab0b4f0bddf34c723b8996c675bc2bc91/numpy-2.3.5-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:900218e456384ea676e24ea6a0417f030a3b07306d29d7ad843957b40a9d8d52", size = 14308962, upload-time = "2025-11-16T22:52:01.698Z" },
+    { url = "https://files.pythonhosted.org/packages/80/35/09d433c5262bc32d725bafc619e095b6a6651caf94027a03da624146f655/numpy-2.3.5-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:09a1bea522b25109bf8e6f3027bd810f7c1085c64a0c7ce050c1676ad0ba010b", size = 16655054, upload-time = "2025-11-16T22:52:04.267Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/ab/6a7b259703c09a88804fa2430b43d6457b692378f6b74b356155283566ac/numpy-2.3.5-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:04822c00b5fd0323c8166d66c701dc31b7fbd252c100acd708c48f763968d6a3", size = 16091613, upload-time = "2025-11-16T22:52:08.651Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/88/330da2071e8771e60d1038166ff9d73f29da37b01ec3eb43cb1427464e10/numpy-2.3.5-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:d6889ec4ec662a1a37eb4b4fb26b6100841804dac55bd9df579e326cdc146227", size = 18591147, upload-time = "2025-11-16T22:52:11.453Z" },
+    { url = "https://files.pythonhosted.org/packages/51/41/851c4b4082402d9ea860c3626db5d5df47164a712cb23b54be028b184c1c/numpy-2.3.5-cp314-cp314t-win32.whl", hash = "sha256:93eebbcf1aafdf7e2ddd44c2923e2672e1010bddc014138b229e49725b4d6be5", size = 6479806, upload-time = "2025-11-16T22:52:14.641Z" },
+    { url = "https://files.pythonhosted.org/packages/90/30/d48bde1dfd93332fa557cff1972fbc039e055a52021fbef4c2c4b1eefd17/numpy-2.3.5-cp314-cp314t-win_amd64.whl", hash = "sha256:c8a9958e88b65c3b27e22ca2a076311636850b612d6bbfb76e8d156aacde2aaf", size = 13105760, upload-time = "2025-11-16T22:52:17.975Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/fd/4b5eb0b3e888d86aee4d198c23acec7d214baaf17ea93c1adec94c9518b9/numpy-2.3.5-cp314-cp314t-win_arm64.whl", hash = "sha256:6203fdf9f3dc5bdaed7319ad8698e685c7a3be10819f41d32a0723e611733b42", size = 10545459, upload-time = "2025-11-16T22:52:20.55Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/65/f9dea8e109371ade9c782b4e4756a82edf9d3366bca495d84d79859a0b79/numpy-2.3.5-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:f0963b55cdd70fad460fa4c1341f12f976bb26cb66021a5580329bd498988310", size = 16910689, upload-time = "2025-11-16T22:52:23.247Z" },
+    { url = "https://files.pythonhosted.org/packages/00/4f/edb00032a8fb92ec0a679d3830368355da91a69cab6f3e9c21b64d0bb986/numpy-2.3.5-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:f4255143f5160d0de972d28c8f9665d882b5f61309d8362fdd3e103cf7bf010c", size = 12457053, upload-time = "2025-11-16T22:52:26.367Z" },
+    { url = "https://files.pythonhosted.org/packages/16/a4/e8a53b5abd500a63836a29ebe145fc1ab1f2eefe1cfe59276020373ae0aa/numpy-2.3.5-pp311-pypy311_pp73-macosx_14_0_arm64.whl", hash = "sha256:a4b9159734b326535f4dd01d947f919c6eefd2d9827466a696c44ced82dfbc18", size = 5285635, upload-time = "2025-11-16T22:52:29.266Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/2f/37eeb9014d9c8b3e9c55bc599c68263ca44fdbc12a93e45a21d1d56df737/numpy-2.3.5-pp311-pypy311_pp73-macosx_14_0_x86_64.whl", hash = "sha256:2feae0d2c91d46e59fcd62784a3a83b3fb677fead592ce51b5a6fbb4f95965ff", size = 6801770, upload-time = "2025-11-16T22:52:31.421Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/e4/68d2f474df2cb671b2b6c2986a02e520671295647dad82484cde80ca427b/numpy-2.3.5-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ffac52f28a7849ad7576293c0cb7b9f08304e8f7d738a8cb8a90ec4c55a998eb", size = 14391768, upload-time = "2025-11-16T22:52:33.593Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/50/94ccd8a2b141cb50651fddd4f6a48874acb3c91c8f0842b08a6afc4b0b21/numpy-2.3.5-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:63c0e9e7eea69588479ebf4a8a270d5ac22763cc5854e9a7eae952a3908103f7", size = 16729263, upload-time = "2025-11-16T22:52:36.369Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/ee/346fa473e666fe14c52fcdd19ec2424157290a032d4c41f98127bfb31ac7/numpy-2.3.5-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:f16417ec91f12f814b10bafe79ef77e70113a2f5f7018640e7425ff979253425", size = 12967213, upload-time = "2025-11-16T22:52:39.38Z" },
 ]
 
 [[package]]
@@ -1195,6 +1426,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/a2/eb/86626c1bbc2edb86323022371c39aa48df6fd8b0a1647bc274577f72e90b/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b17e2001cc0d751a5bc2c6ec6d26ad95913324a4adb86788c944f8ce9ba441f", size = 89954, upload-time = "2025-03-07T01:42:44.131Z" },
 ]
 
+[[package]]
+name = "omegaconf"
+version = "2.3.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "antlr4-python3-runtime" },
+    { name = "pyyaml" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/09/48/6388f1bb9da707110532cb70ec4d2822858ddfb44f1cdf1233c20a80ea4b/omegaconf-2.3.0.tar.gz", hash = "sha256:d5d4b6d29955cc50ad50c46dc269bcd92c6e00f5f90d23ab5fee7bfca4ba4cc7", size = 3298120, upload-time = "2022-12-08T20:59:22.753Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e3/94/1843518e420fa3ed6919835845df698c7e27e183cb997394e4a670973a65/omegaconf-2.3.0-py3-none-any.whl", hash = "sha256:7b4df175cdb08ba400f45cae3bdcae7ba8365db4d165fc65fd04b050ab63b46b", size = 79500, upload-time = "2022-12-08T20:59:19.686Z" },
+]
+
 [[package]]
 name = "optuna"
 version = "4.5.0"
@@ -1234,6 +1478,13 @@ dependencies = [
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/33/01/d40b85317f86cf08d853a4f495195c73815fdf205eef3993821720274518/pandas-2.3.3.tar.gz", hash = "sha256:e05e1af93b977f7eafa636d043f9f94c7ee3ac81af99c13508215942e64c993b", size = 4495223, upload-time = "2025-09-29T23:34:51.853Z" }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/c1/fa/7ac648108144a095b4fb6aa3de1954689f7af60a14cf25583f4960ecb878/pandas-2.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:602b8615ebcc4a0c1751e71840428ddebeb142ec02c786e8ad6b1ce3c8dec523", size = 11578790, upload-time = "2025-09-29T23:18:30.065Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/35/74442388c6cf008882d4d4bdfc4109be87e9b8b7ccd097ad1e7f006e2e95/pandas-2.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8fe25fc7b623b0ef6b5009149627e34d2a4657e880948ec3c840e9402e5c1b45", size = 10833831, upload-time = "2025-09-29T23:38:56.071Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/e4/de154cbfeee13383ad58d23017da99390b91d73f8c11856f2095e813201b/pandas-2.3.3-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b468d3dad6ff947df92dcb32ede5b7bd41a9b3cceef0a30ed925f6d01fb8fa66", size = 12199267, upload-time = "2025-09-29T23:18:41.627Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/c9/63f8d545568d9ab91476b1818b4741f521646cbdd151c6efebf40d6de6f7/pandas-2.3.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b98560e98cb334799c0b07ca7967ac361a47326e9b4e5a7dfb5ab2b1c9d35a1b", size = 12789281, upload-time = "2025-09-29T23:18:56.834Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/00/a5ac8c7a0e67fd1a6059e40aa08fa1c52cc00709077d2300e210c3ce0322/pandas-2.3.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1d37b5848ba49824e5c30bedb9c830ab9b7751fd049bc7914533e01c65f79791", size = 13240453, upload-time = "2025-09-29T23:19:09.247Z" },
+    { url = "https://files.pythonhosted.org/packages/27/4d/5c23a5bc7bd209231618dd9e606ce076272c9bc4f12023a70e03a86b4067/pandas-2.3.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:db4301b2d1f926ae677a751eb2bd0e8c5f5319c9cb3f88b0becbbb0b07b34151", size = 13890361, upload-time = "2025-09-29T23:19:25.342Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/59/712db1d7040520de7a4965df15b774348980e6df45c129b8c64d0dbe74ef/pandas-2.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:f086f6fe114e19d92014a1966f43a3e62285109afe874f067f5abbdcbb10e59c", size = 11348702, upload-time = "2025-09-29T23:19:38.296Z" },
     { url = "https://files.pythonhosted.org/packages/9c/fb/231d89e8637c808b997d172b18e9d4a4bc7bf31296196c260526055d1ea0/pandas-2.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6d21f6d74eb1725c2efaa71a2bfc661a0689579b58e9c0ca58a739ff0b002b53", size = 11597846, upload-time = "2025-09-29T23:19:48.856Z" },
     { url = "https://files.pythonhosted.org/packages/5c/bd/bf8064d9cfa214294356c2d6702b716d3cf3bb24be59287a6a21e24cae6b/pandas-2.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3fd2f887589c7aa868e02632612ba39acb0b8948faf5cc58f0850e165bd46f35", size = 10729618, upload-time = "2025-09-29T23:39:08.659Z" },
     { url = "https://files.pythonhosted.org/packages/57/56/cf2dbe1a3f5271370669475ead12ce77c61726ffd19a35546e31aa8edf4e/pandas-2.3.3-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ecaf1e12bdc03c86ad4a7ea848d66c685cb6851d807a26aa245ca3d2017a1908", size = 11737212, upload-time = "2025-09-29T23:19:59.765Z" },
@@ -1275,6 +1526,17 @@ version = "12.0.0"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/5a/b0/cace85a1b0c9775a9f8f5d5423c8261c858760e2466c79b2dd184638b056/pillow-12.0.0.tar.gz", hash = "sha256:87d4f8125c9988bfbed67af47dd7a953e2fc7b0cc1e7800ec6d2080d490bb353", size = 47008828, upload-time = "2025-10-15T18:24:14.008Z" }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/0e/5a/a2f6773b64edb921a756eb0729068acad9fc5208a53f4a349396e9436721/pillow-12.0.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:0fd00cac9c03256c8b2ff58f162ebcd2587ad3e1f2e397eab718c47e24d231cc", size = 5289798, upload-time = "2025-10-15T18:21:47.763Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/05/069b1f8a2e4b5a37493da6c5868531c3f77b85e716ad7a590ef87d58730d/pillow-12.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a3475b96f5908b3b16c47533daaa87380c491357d197564e0ba34ae75c0f3257", size = 4650589, upload-time = "2025-10-15T18:21:49.515Z" },
+    { url = "https://files.pythonhosted.org/packages/61/e3/2c820d6e9a36432503ead175ae294f96861b07600a7156154a086ba7111a/pillow-12.0.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:110486b79f2d112cf6add83b28b627e369219388f64ef2f960fef9ebaf54c642", size = 6230472, upload-time = "2025-10-15T18:21:51.052Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/89/63427f51c64209c5e23d4d52071c8d0f21024d3a8a487737caaf614a5795/pillow-12.0.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5269cc1caeedb67e6f7269a42014f381f45e2e7cd42d834ede3c703a1d915fe3", size = 8033887, upload-time = "2025-10-15T18:21:52.604Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/1b/c9711318d4901093c15840f268ad649459cd81984c9ec9887756cca049a5/pillow-12.0.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:aa5129de4e174daccbc59d0a3b6d20eaf24417d59851c07ebb37aeb02947987c", size = 6343964, upload-time = "2025-10-15T18:21:54.619Z" },
+    { url = "https://files.pythonhosted.org/packages/41/1e/db9470f2d030b4995083044cd8738cdd1bf773106819f6d8ba12597d5352/pillow-12.0.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bee2a6db3a7242ea309aa7ee8e2780726fed67ff4e5b40169f2c940e7eb09227", size = 7034756, upload-time = "2025-10-15T18:21:56.151Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/b0/6177a8bdd5ee4ed87cba2de5a3cc1db55ffbbec6176784ce5bb75aa96798/pillow-12.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:90387104ee8400a7b4598253b4c406f8958f59fcf983a6cea2b50d59f7d63d0b", size = 6458075, upload-time = "2025-10-15T18:21:57.759Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/5e/61537aa6fa977922c6a03253a0e727e6e4a72381a80d63ad8eec350684f2/pillow-12.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bc91a56697869546d1b8f0a3ff35224557ae7f881050e99f615e0119bf934b4e", size = 7125955, upload-time = "2025-10-15T18:21:59.372Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/3d/d5033539344ee3cbd9a4d69e12e63ca3a44a739eb2d4c8da350a3d38edd7/pillow-12.0.0-cp311-cp311-win32.whl", hash = "sha256:27f95b12453d165099c84f8a8bfdfd46b9e4bda9e0e4b65f0635430027f55739", size = 6298440, upload-time = "2025-10-15T18:22:00.982Z" },
+    { url = "https://files.pythonhosted.org/packages/4d/42/aaca386de5cc8bd8a0254516957c1f265e3521c91515b16e286c662854c4/pillow-12.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:b583dc9070312190192631373c6c8ed277254aa6e6084b74bdd0a6d3b221608e", size = 6999256, upload-time = "2025-10-15T18:22:02.617Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/f1/9197c9c2d5708b785f631a6dfbfa8eb3fb9672837cb92ae9af812c13b4ed/pillow-12.0.0-cp311-cp311-win_arm64.whl", hash = "sha256:759de84a33be3b178a64c8ba28ad5c135900359e85fb662bc6e403ad4407791d", size = 2436025, upload-time = "2025-10-15T18:22:04.598Z" },
     { url = "https://files.pythonhosted.org/packages/2c/90/4fcce2c22caf044e660a198d740e7fbc14395619e3cb1abad12192c0826c/pillow-12.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:53561a4ddc36facb432fae7a9d8afbfaf94795414f5cdc5fc52f28c1dca90371", size = 5249377, upload-time = "2025-10-15T18:22:05.993Z" },
     { url = "https://files.pythonhosted.org/packages/fd/e0/ed960067543d080691d47d6938ebccbf3976a931c9567ab2fbfab983a5dd/pillow-12.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:71db6b4c1653045dacc1585c1b0d184004f0d7e694c7b34ac165ca70c0838082", size = 4650343, upload-time = "2025-10-15T18:22:07.718Z" },
     { url = "https://files.pythonhosted.org/packages/e7/a1/f81fdeddcb99c044bf7d6faa47e12850f13cee0849537a7d27eeab5534d4/pillow-12.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2fa5f0b6716fc88f11380b88b31fe591a06c6315e955c096c35715788b339e3f", size = 6232981, upload-time = "2025-10-15T18:22:09.287Z" },
@@ -1336,6 +1598,22 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/a2/2f/16cabcc6426c32218ace36bf0d55955e813f2958afddbf1d391849fee9d1/pillow-12.0.0-cp314-cp314t-win32.whl", hash = "sha256:3830c769decf88f1289680a59d4f4c46c72573446352e2befec9a8512104fa52", size = 6408045, upload-time = "2025-10-15T18:23:53.177Z" },
     { url = "https://files.pythonhosted.org/packages/35/73/e29aa0c9c666cf787628d3f0dcf379f4791fba79f4936d02f8b37165bdf8/pillow-12.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:905b0365b210c73afb0ebe9101a32572152dfd1c144c7e28968a331b9217b94a", size = 7148282, upload-time = "2025-10-15T18:23:55.316Z" },
     { url = "https://files.pythonhosted.org/packages/c1/70/6b41bdcddf541b437bbb9f47f94d2db5d9ddef6c37ccab8c9107743748a4/pillow-12.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:99353a06902c2e43b43e8ff74ee65a7d90307d82370604746738a1e0661ccca7", size = 2525630, upload-time = "2025-10-15T18:23:57.149Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/b3/582327e6c9f86d037b63beebe981425d6811104cb443e8193824ef1a2f27/pillow-12.0.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:b22bd8c974942477156be55a768f7aa37c46904c175be4e158b6a86e3a6b7ca8", size = 5215068, upload-time = "2025-10-15T18:23:59.594Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/d6/67748211d119f3b6540baf90f92fae73ae51d5217b171b0e8b5f7e5d558f/pillow-12.0.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:805ebf596939e48dbb2e4922a1d3852cfc25c38160751ce02da93058b48d252a", size = 4614994, upload-time = "2025-10-15T18:24:01.669Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/e1/f8281e5d844c41872b273b9f2c34a4bf64ca08905668c8ae730eedc7c9fa/pillow-12.0.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cae81479f77420d217def5f54b5b9d279804d17e982e0f2fa19b1d1e14ab5197", size = 5246639, upload-time = "2025-10-15T18:24:03.403Z" },
+    { url = "https://files.pythonhosted.org/packages/94/5a/0d8ab8ffe8a102ff5df60d0de5af309015163bf710c7bb3e8311dd3b3ad0/pillow-12.0.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:aeaefa96c768fc66818730b952a862235d68825c178f1b3ffd4efd7ad2edcb7c", size = 6986839, upload-time = "2025-10-15T18:24:05.344Z" },
+    { url = "https://files.pythonhosted.org/packages/20/2e/3434380e8110b76cd9eb00a363c484b050f949b4bbe84ba770bb8508a02c/pillow-12.0.0-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:09f2d0abef9e4e2f349305a4f8cc784a8a6c2f58a8c4892eea13b10a943bd26e", size = 5313505, upload-time = "2025-10-15T18:24:07.137Z" },
+    { url = "https://files.pythonhosted.org/packages/57/ca/5a9d38900d9d74785141d6580950fe705de68af735ff6e727cb911b64740/pillow-12.0.0-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bdee52571a343d721fb2eb3b090a82d959ff37fc631e3f70422e0c2e029f3e76", size = 5963654, upload-time = "2025-10-15T18:24:09.579Z" },
+    { url = "https://files.pythonhosted.org/packages/95/7e/f896623c3c635a90537ac093c6a618ebe1a90d87206e42309cb5d98a1b9e/pillow-12.0.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:b290fd8aa38422444d4b50d579de197557f182ef1068b75f5aa8558638b8d0a5", size = 6997850, upload-time = "2025-10-15T18:24:11.495Z" },
+]
+
+[[package]]
+name = "platformdirs"
+version = "4.5.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/cf/86/0248f086a84f01b37aaec0fa567b397df1a119f73c16f6c7a9aac73ea309/platformdirs-4.5.1.tar.gz", hash = "sha256:61d5cdcc6065745cdd94f0f878977f8de9437be93de97c1c12f853c9c0cdcbda", size = 21715, upload-time = "2025-12-05T13:52:58.638Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/cb/28/3bfe2fa5a7b9c46fe7e13c97bda14c895fb10fa2ebf1d0abb90e0cea7ee1/platformdirs-4.5.1-py3-none-any.whl", hash = "sha256:d03afa3963c806a9bed9d5125c8f4cb2fdaf74a55ab60e5d59b3fde758104d31", size = 18731, upload-time = "2025-12-05T13:52:56.823Z" },
 ]
 
 [[package]]
@@ -1344,9 +1622,16 @@ version = "0.1.0"
 source = { virtual = "." }
 dependencies = [
     { name = "datasets" },
+    { name = "fsspec" },
+    { name = "huggingface-hub" },
     { name = "lorem" },
+]
+
+[package.optional-dependencies]
+dev = [
+    { name = "hydra-core" },
     { name = "matplotlib" },
-    { name = "numpy" },
+    { name = "memray" },
     { name = "optuna" },
     { name = "torch" },
     { name = "torchdata" },
@@ -1355,15 +1640,19 @@ dependencies = [
 
 [package.metadata]
 requires-dist = [
-    { name = "datasets", specifier = ">=4.4.1" },
+    { name = "datasets", specifier = ">=3.2.0" },
+    { name = "fsspec", specifier = "==2024.9.0" },
+    { name = "huggingface-hub", specifier = "==0.27.0" },
+    { name = "hydra-core", marker = "extra == 'dev'", specifier = ">=1.3.2" },
     { name = "lorem", specifier = ">=0.1.1" },
-    { name = "matplotlib", specifier = ">=3.10.7" },
-    { name = "numpy", specifier = ">=2.3.4" },
-    { name = "optuna", specifier = ">=4.5.0" },
-    { name = "torch", specifier = ">=2.9.0" },
-    { name = "torchdata", specifier = ">=0.11.0" },
-    { name = "torchvision", specifier = ">=0.24.0" },
+    { name = "matplotlib", marker = "extra == 'dev'", specifier = ">=3.10.7" },
+    { name = "memray", marker = "extra == 'dev'", specifier = ">=1.19.1" },
+    { name = "optuna", marker = "extra == 'dev'", specifier = "==4.5.0" },
+    { name = "torch", marker = "extra == 'dev'", specifier = "==2.9.0" },
+    { name = "torchdata", marker = "extra == 'dev'", specifier = "==0.7.1" },
+    { name = "torchvision", marker = "extra == 'dev'", specifier = "==0.24.0" },
 ]
+provides-extras = ["dev"]
 
 [[package]]
 name = "propcache"
@@ -1371,6 +1660,21 @@ version = "0.4.1"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/9e/da/e9fc233cf63743258bff22b3dfa7ea5baef7b5bc324af47a0ad89b8ffc6f/propcache-0.4.1.tar.gz", hash = "sha256:f48107a8c637e80362555f37ecf49abe20370e557cc4ab374f04ec4423c97c3d", size = 46442, upload-time = "2025-10-08T19:49:02.291Z" }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/8c/d4/4e2c9aaf7ac2242b9358f98dccd8f90f2605402f5afeff6c578682c2c491/propcache-0.4.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:60a8fda9644b7dfd5dece8c61d8a85e271cb958075bfc4e01083c148b61a7caf", size = 80208, upload-time = "2025-10-08T19:46:24.597Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/21/d7b68e911f9c8e18e4ae43bdbc1e1e9bbd971f8866eb81608947b6f585ff/propcache-0.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c30b53e7e6bda1d547cabb47c825f3843a0a1a42b0496087bb58d8fedf9f41b5", size = 45777, upload-time = "2025-10-08T19:46:25.733Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/1d/11605e99ac8ea9435651ee71ab4cb4bf03f0949586246476a25aadfec54a/propcache-0.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6918ecbd897443087a3b7cd978d56546a812517dcaaca51b49526720571fa93e", size = 47647, upload-time = "2025-10-08T19:46:27.304Z" },
+    { url = "https://files.pythonhosted.org/packages/58/1a/3c62c127a8466c9c843bccb503d40a273e5cc69838805f322e2826509e0d/propcache-0.4.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3d902a36df4e5989763425a8ab9e98cd8ad5c52c823b34ee7ef307fd50582566", size = 214929, upload-time = "2025-10-08T19:46:28.62Z" },
+    { url = "https://files.pythonhosted.org/packages/56/b9/8fa98f850960b367c4b8fe0592e7fc341daa7a9462e925228f10a60cf74f/propcache-0.4.1-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a9695397f85973bb40427dedddf70d8dc4a44b22f1650dd4af9eedf443d45165", size = 221778, upload-time = "2025-10-08T19:46:30.358Z" },
+    { url = "https://files.pythonhosted.org/packages/46/a6/0ab4f660eb59649d14b3d3d65c439421cf2f87fe5dd68591cbe3c1e78a89/propcache-0.4.1-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2bb07ffd7eaad486576430c89f9b215f9e4be68c4866a96e97db9e97fead85dc", size = 228144, upload-time = "2025-10-08T19:46:32.607Z" },
+    { url = "https://files.pythonhosted.org/packages/52/6a/57f43e054fb3d3a56ac9fc532bc684fc6169a26c75c353e65425b3e56eef/propcache-0.4.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fd6f30fdcf9ae2a70abd34da54f18da086160e4d7d9251f81f3da0ff84fc5a48", size = 210030, upload-time = "2025-10-08T19:46:33.969Z" },
+    { url = "https://files.pythonhosted.org/packages/40/e2/27e6feebb5f6b8408fa29f5efbb765cd54c153ac77314d27e457a3e993b7/propcache-0.4.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:fc38cba02d1acba4e2869eef1a57a43dfbd3d49a59bf90dda7444ec2be6a5570", size = 208252, upload-time = "2025-10-08T19:46:35.309Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/f8/91c27b22ccda1dbc7967f921c42825564fa5336a01ecd72eb78a9f4f53c2/propcache-0.4.1-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:67fad6162281e80e882fb3ec355398cf72864a54069d060321f6cd0ade95fe85", size = 202064, upload-time = "2025-10-08T19:46:36.993Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/26/7f00bd6bd1adba5aafe5f4a66390f243acab58eab24ff1a08bebb2ef9d40/propcache-0.4.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:f10207adf04d08bec185bae14d9606a1444715bc99180f9331c9c02093e1959e", size = 212429, upload-time = "2025-10-08T19:46:38.398Z" },
+    { url = "https://files.pythonhosted.org/packages/84/89/fd108ba7815c1117ddca79c228f3f8a15fc82a73bca8b142eb5de13b2785/propcache-0.4.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:e9b0d8d0845bbc4cfcdcbcdbf5086886bc8157aa963c31c777ceff7846c77757", size = 216727, upload-time = "2025-10-08T19:46:39.732Z" },
+    { url = "https://files.pythonhosted.org/packages/79/37/3ec3f7e3173e73f1d600495d8b545b53802cbf35506e5732dd8578db3724/propcache-0.4.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:981333cb2f4c1896a12f4ab92a9cc8f09ea664e9b7dbdc4eff74627af3a11c0f", size = 205097, upload-time = "2025-10-08T19:46:41.025Z" },
+    { url = "https://files.pythonhosted.org/packages/61/b0/b2631c19793f869d35f47d5a3a56fb19e9160d3c119f15ac7344fc3ccae7/propcache-0.4.1-cp311-cp311-win32.whl", hash = "sha256:f1d2f90aeec838a52f1c1a32fe9a619fefd5e411721a9117fbf82aea638fe8a1", size = 38084, upload-time = "2025-10-08T19:46:42.693Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/78/6cce448e2098e9f3bfc91bb877f06aa24b6ccace872e39c53b2f707c4648/propcache-0.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:364426a62660f3f699949ac8c621aad6977be7126c5807ce48c0aeb8e7333ea6", size = 41637, upload-time = "2025-10-08T19:46:43.778Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/e9/754f180cccd7f51a39913782c74717c581b9cc8177ad0e949f4d51812383/propcache-0.4.1-cp311-cp311-win_arm64.whl", hash = "sha256:e53f3a38d3510c11953f3e6a33f205c6d1b001129f972805ca9b42fc308bc239", size = 38064, upload-time = "2025-10-08T19:46:44.872Z" },
     { url = "https://files.pythonhosted.org/packages/a2/0f/f17b1b2b221d5ca28b4b876e8bb046ac40466513960646bda8e1853cdfa2/propcache-0.4.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:e153e9cd40cc8945138822807139367f256f89c6810c2634a4f6902b52d3b4e2", size = 80061, upload-time = "2025-10-08T19:46:46.075Z" },
     { url = "https://files.pythonhosted.org/packages/76/47/8ccf75935f51448ba9a16a71b783eb7ef6b9ee60f5d14c7f8a8a79fbeed7/propcache-0.4.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:cd547953428f7abb73c5ad82cbb32109566204260d98e41e5dfdc682eb7f8403", size = 46037, upload-time = "2025-10-08T19:46:47.23Z" },
     { url = "https://files.pythonhosted.org/packages/0a/b6/5c9a0e42df4d00bfb4a3cbbe5cf9f54260300c88a0e9af1f47ca5ce17ac0/propcache-0.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f048da1b4f243fc44f205dfd320933a951b8d89e0afd4c7cacc762a8b9165207", size = 47324, upload-time = "2025-10-08T19:46:48.384Z" },
@@ -1455,6 +1759,13 @@ version = "22.0.0"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/30/53/04a7fdc63e6056116c9ddc8b43bc28c12cdd181b85cbeadb79278475f3ae/pyarrow-22.0.0.tar.gz", hash = "sha256:3d600dc583260d845c7d8a6db540339dd883081925da2bd1c5cb808f720b3cd9", size = 1151151, upload-time = "2025-10-24T12:30:00.762Z" }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/2e/b7/18f611a8cdc43417f9394a3ccd3eace2f32183c08b9eddc3d17681819f37/pyarrow-22.0.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:3e294c5eadfb93d78b0763e859a0c16d4051fc1c5231ae8956d61cb0b5666f5a", size = 34272022, upload-time = "2025-10-24T10:04:28.973Z" },
+    { url = "https://files.pythonhosted.org/packages/26/5c/f259e2526c67eb4b9e511741b19870a02363a47a35edbebc55c3178db22d/pyarrow-22.0.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:69763ab2445f632d90b504a815a2a033f74332997052b721002298ed6de40f2e", size = 35995834, upload-time = "2025-10-24T10:04:35.467Z" },
+    { url = "https://files.pythonhosted.org/packages/50/8d/281f0f9b9376d4b7f146913b26fac0aa2829cd1ee7e997f53a27411bbb92/pyarrow-22.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:b41f37cabfe2463232684de44bad753d6be08a7a072f6a83447eeaf0e4d2a215", size = 45030348, upload-time = "2025-10-24T10:04:43.366Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/e5/53c0a1c428f0976bf22f513d79c73000926cb00b9c138d8e02daf2102e18/pyarrow-22.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:35ad0f0378c9359b3f297299c3309778bb03b8612f987399a0333a560b43862d", size = 47699480, upload-time = "2025-10-24T10:04:51.486Z" },
+    { url = "https://files.pythonhosted.org/packages/95/e1/9dbe4c465c3365959d183e6345d0a8d1dc5b02ca3f8db4760b3bc834cf25/pyarrow-22.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8382ad21458075c2e66a82a29d650f963ce51c7708c7c0ff313a8c206c4fd5e8", size = 48011148, upload-time = "2025-10-24T10:04:59.585Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/b4/7caf5d21930061444c3cf4fa7535c82faf5263e22ce43af7c2759ceb5b8b/pyarrow-22.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1a812a5b727bc09c3d7ea072c4eebf657c2f7066155506ba31ebf4792f88f016", size = 50276964, upload-time = "2025-10-24T10:05:08.175Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/f3/cec89bd99fa3abf826f14d4e53d3d11340ce6f6af4d14bdcd54cd83b6576/pyarrow-22.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:ec5d40dd494882704fb876c16fa7261a69791e784ae34e6b5992e977bd2e238c", size = 28106517, upload-time = "2025-10-24T10:05:14.314Z" },
     { url = "https://files.pythonhosted.org/packages/af/63/ba23862d69652f85b615ca14ad14f3bcfc5bf1b99ef3f0cd04ff93fdad5a/pyarrow-22.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:bea79263d55c24a32b0d79c00a1c58bb2ee5f0757ed95656b01c0fb310c5af3d", size = 34211578, upload-time = "2025-10-24T10:05:21.583Z" },
     { url = "https://files.pythonhosted.org/packages/b1/d0/f9ad86fe809efd2bcc8be32032fa72e8b0d112b01ae56a053006376c5930/pyarrow-22.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:12fe549c9b10ac98c91cf791d2945e878875d95508e1a5d14091a7aaa66d9cf8", size = 35989906, upload-time = "2025-10-24T10:05:29.485Z" },
     { url = "https://files.pythonhosted.org/packages/b4/a8/f910afcb14630e64d673f15904ec27dd31f1e009b77033c365c84e8c1e1d/pyarrow-22.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:334f900ff08ce0423407af97e6c26ad5d4e3b0763645559ece6fbf3747d6a8f5", size = 45021677, upload-time = "2025-10-24T10:05:38.274Z" },
@@ -1492,6 +1803,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/7b/03/f335d6c52b4a4761bcc83499789a1e2e16d9d201a58c327a9b5cc9a41bd9/pyarrow-22.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:0c34fe18094686194f204a3b1787a27456897d8a2d62caf84b61e8dfbc0252ae", size = 29185594, upload-time = "2025-10-24T10:09:53.111Z" },
 ]
 
+[[package]]
+name = "pygments"
+version = "2.19.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload-time = "2025-06-21T13:39:12.283Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" },
+]
+
 [[package]]
 name = "pyparsing"
 version = "3.2.5"
@@ -1528,6 +1848,15 @@ version = "6.0.3"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/05/8e/961c0007c59b8dd7729d542c61a4d537767a59645b82a0b521206e1e25c2/pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f", size = 130960, upload-time = "2025-09-25T21:33:16.546Z" }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/6d/16/a95b6757765b7b031c9374925bb718d55e0a9ba8a1b6a12d25962ea44347/pyyaml-6.0.3-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:44edc647873928551a01e7a563d7452ccdebee747728c1080d881d68af7b997e", size = 185826, upload-time = "2025-09-25T21:31:58.655Z" },
+    { url = "https://files.pythonhosted.org/packages/16/19/13de8e4377ed53079ee996e1ab0a9c33ec2faf808a4647b7b4c0d46dd239/pyyaml-6.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:652cb6edd41e718550aad172851962662ff2681490a8a711af6a4d288dd96824", size = 175577, upload-time = "2025-09-25T21:32:00.088Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/62/d2eb46264d4b157dae1275b573017abec435397aa59cbcdab6fc978a8af4/pyyaml-6.0.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:10892704fc220243f5305762e276552a0395f7beb4dbf9b14ec8fd43b57f126c", size = 775556, upload-time = "2025-09-25T21:32:01.31Z" },
+    { url = "https://files.pythonhosted.org/packages/10/cb/16c3f2cf3266edd25aaa00d6c4350381c8b012ed6f5276675b9eba8d9ff4/pyyaml-6.0.3-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:850774a7879607d3a6f50d36d04f00ee69e7fc816450e5f7e58d7f17f1ae5c00", size = 882114, upload-time = "2025-09-25T21:32:03.376Z" },
+    { url = "https://files.pythonhosted.org/packages/71/60/917329f640924b18ff085ab889a11c763e0b573da888e8404ff486657602/pyyaml-6.0.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b8bb0864c5a28024fac8a632c443c87c5aa6f215c0b126c449ae1a150412f31d", size = 806638, upload-time = "2025-09-25T21:32:04.553Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/6f/529b0f316a9fd167281a6c3826b5583e6192dba792dd55e3203d3f8e655a/pyyaml-6.0.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1d37d57ad971609cf3c53ba6a7e365e40660e3be0e5175fa9f2365a379d6095a", size = 767463, upload-time = "2025-09-25T21:32:06.152Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/6a/b627b4e0c1dd03718543519ffb2f1deea4a1e6d42fbab8021936a4d22589/pyyaml-6.0.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:37503bfbfc9d2c40b344d06b2199cf0e96e97957ab1c1b546fd4f87e53e5d3e4", size = 794986, upload-time = "2025-09-25T21:32:07.367Z" },
+    { url = "https://files.pythonhosted.org/packages/45/91/47a6e1c42d9ee337c4839208f30d9f09caa9f720ec7582917b264defc875/pyyaml-6.0.3-cp311-cp311-win32.whl", hash = "sha256:8098f252adfa6c80ab48096053f512f2321f0b998f98150cea9bd23d83e1467b", size = 142543, upload-time = "2025-09-25T21:32:08.95Z" },
+    { url = "https://files.pythonhosted.org/packages/da/e3/ea007450a105ae919a72393cb06f122f288ef60bba2dc64b26e2646fa315/pyyaml-6.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:9f3bfb4965eb874431221a3ff3fdcddc7e74e3b07799e0e84ca4a0f867d449bf", size = 158763, upload-time = "2025-09-25T21:32:09.96Z" },
     { url = "https://files.pythonhosted.org/packages/d1/33/422b98d2195232ca1826284a76852ad5a86fe23e31b009c9886b2d0fb8b2/pyyaml-6.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196", size = 182063, upload-time = "2025-09-25T21:32:11.445Z" },
     { url = "https://files.pythonhosted.org/packages/89/a0/6cf41a19a1f2f3feab0e9c0b74134aa2ce6849093d5517a0c550fe37a648/pyyaml-6.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0", size = 173973, upload-time = "2025-09-25T21:32:12.492Z" },
     { url = "https://files.pythonhosted.org/packages/ed/23/7a778b6bd0b9a8039df8b1b1d80e2e2ad78aa04171592c8a5c43a56a6af4/pyyaml-6.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28", size = 775116, upload-time = "2025-09-25T21:32:13.652Z" },
@@ -1583,6 +1912,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" },
 ]
 
+[[package]]
+name = "rich"
+version = "14.2.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "markdown-it-py" },
+    { name = "pygments" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/fb/d2/8920e102050a0de7bfabeb4c4614a49248cf8d5d7a8d01885fbb24dc767a/rich-14.2.0.tar.gz", hash = "sha256:73ff50c7c0c1c77c8243079283f4edb376f0f6442433aecb8ce7e6d0b92d1fe4", size = 219990, upload-time = "2025-10-09T14:16:53.064Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/25/7a/b0178788f8dc6cafce37a212c99565fa1fe7872c70c6c9c1e1a372d9d88f/rich-14.2.0-py3-none-any.whl", hash = "sha256:76bc51fe2e57d2b1be1f96c524b890b816e334ab4c1e45888799bfaab0021edd", size = 243393, upload-time = "2025-10-09T14:16:51.245Z" },
+]
+
 [[package]]
 name = "setuptools"
 version = "80.9.0"
@@ -1592,15 +1934,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/a3/dc/17031897dae0efacfea57dfd3a82fdd2a2aeb58e0ff71b77b87e44edc772/setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922", size = 1201486, upload-time = "2025-05-27T00:56:49.664Z" },
 ]
 
-[[package]]
-name = "shellingham"
-version = "1.5.4"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310, upload-time = "2023-10-24T04:13:40.426Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" },
-]
-
 [[package]]
 name = "six"
 version = "1.17.0"
@@ -1610,42 +1943,45 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" },
 ]
 
-[[package]]
-name = "sniffio"
-version = "1.3.1"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372, upload-time = "2024-02-25T23:20:04.057Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" },
-]
-
 [[package]]
 name = "sqlalchemy"
-version = "2.0.44"
+version = "2.0.45"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "greenlet", marker = "platform_machine == 'AMD64' or platform_machine == 'WIN32' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'ppc64le' or platform_machine == 'win32' or platform_machine == 'x86_64'" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/f0/f2/840d7b9496825333f532d2e3976b8eadbf52034178aac53630d09fe6e1ef/sqlalchemy-2.0.44.tar.gz", hash = "sha256:0ae7454e1ab1d780aee69fd2aae7d6b8670a581d8847f2d1e0f7ddfbf47e5a22", size = 9819830, upload-time = "2025-10-10T14:39:12.935Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/be/f9/5e4491e5ccf42f5d9cfc663741d261b3e6e1683ae7812114e7636409fcc6/sqlalchemy-2.0.45.tar.gz", hash = "sha256:1632a4bda8d2d25703fdad6363058d882541bdaaee0e5e3ddfa0cd3229efce88", size = 9869912, upload-time = "2025-12-09T21:05:16.737Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/62/c4/59c7c9b068e6813c898b771204aad36683c96318ed12d4233e1b18762164/sqlalchemy-2.0.44-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:72fea91746b5890f9e5e0997f16cbf3d53550580d76355ba2d998311b17b2250", size = 2139675, upload-time = "2025-10-10T16:03:31.064Z" },
-    { url = "https://files.pythonhosted.org/packages/d6/ae/eeb0920537a6f9c5a3708e4a5fc55af25900216bdb4847ec29cfddf3bf3a/sqlalchemy-2.0.44-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:585c0c852a891450edbb1eaca8648408a3cc125f18cf433941fa6babcc359e29", size = 2127726, upload-time = "2025-10-10T16:03:35.934Z" },
-    { url = "https://files.pythonhosted.org/packages/d8/d5/2ebbabe0379418eda8041c06b0b551f213576bfe4c2f09d77c06c07c8cc5/sqlalchemy-2.0.44-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b94843a102efa9ac68a7a30cd46df3ff1ed9c658100d30a725d10d9c60a2f44", size = 3327603, upload-time = "2025-10-10T15:35:28.322Z" },
-    { url = "https://files.pythonhosted.org/packages/45/e5/5aa65852dadc24b7d8ae75b7efb8d19303ed6ac93482e60c44a585930ea5/sqlalchemy-2.0.44-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:119dc41e7a7defcefc57189cfa0e61b1bf9c228211aba432b53fb71ef367fda1", size = 3337842, upload-time = "2025-10-10T15:43:45.431Z" },
-    { url = "https://files.pythonhosted.org/packages/41/92/648f1afd3f20b71e880ca797a960f638d39d243e233a7082c93093c22378/sqlalchemy-2.0.44-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0765e318ee9179b3718c4fd7ba35c434f4dd20332fbc6857a5e8df17719c24d7", size = 3264558, upload-time = "2025-10-10T15:35:29.93Z" },
-    { url = "https://files.pythonhosted.org/packages/40/cf/e27d7ee61a10f74b17740918e23cbc5bc62011b48282170dc4c66da8ec0f/sqlalchemy-2.0.44-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2e7b5b079055e02d06a4308d0481658e4f06bc7ef211567edc8f7d5dce52018d", size = 3301570, upload-time = "2025-10-10T15:43:48.407Z" },
-    { url = "https://files.pythonhosted.org/packages/3b/3d/3116a9a7b63e780fb402799b6da227435be878b6846b192f076d2f838654/sqlalchemy-2.0.44-cp312-cp312-win32.whl", hash = "sha256:846541e58b9a81cce7dee8329f352c318de25aa2f2bbe1e31587eb1f057448b4", size = 2103447, upload-time = "2025-10-10T15:03:21.678Z" },
-    { url = "https://files.pythonhosted.org/packages/25/83/24690e9dfc241e6ab062df82cc0df7f4231c79ba98b273fa496fb3dd78ed/sqlalchemy-2.0.44-cp312-cp312-win_amd64.whl", hash = "sha256:7cbcb47fd66ab294703e1644f78971f6f2f1126424d2b300678f419aa73c7b6e", size = 2130912, upload-time = "2025-10-10T15:03:24.656Z" },
-    { url = "https://files.pythonhosted.org/packages/45/d3/c67077a2249fdb455246e6853166360054c331db4613cda3e31ab1cadbef/sqlalchemy-2.0.44-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ff486e183d151e51b1d694c7aa1695747599bb00b9f5f604092b54b74c64a8e1", size = 2135479, upload-time = "2025-10-10T16:03:37.671Z" },
-    { url = "https://files.pythonhosted.org/packages/2b/91/eabd0688330d6fd114f5f12c4f89b0d02929f525e6bf7ff80aa17ca802af/sqlalchemy-2.0.44-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0b1af8392eb27b372ddb783b317dea0f650241cea5bd29199b22235299ca2e45", size = 2123212, upload-time = "2025-10-10T16:03:41.755Z" },
-    { url = "https://files.pythonhosted.org/packages/b0/bb/43e246cfe0e81c018076a16036d9b548c4cc649de241fa27d8d9ca6f85ab/sqlalchemy-2.0.44-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2b61188657e3a2b9ac4e8f04d6cf8e51046e28175f79464c67f2fd35bceb0976", size = 3255353, upload-time = "2025-10-10T15:35:31.221Z" },
-    { url = "https://files.pythonhosted.org/packages/b9/96/c6105ed9a880abe346b64d3b6ddef269ddfcab04f7f3d90a0bf3c5a88e82/sqlalchemy-2.0.44-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b87e7b91a5d5973dda5f00cd61ef72ad75a1db73a386b62877d4875a8840959c", size = 3260222, upload-time = "2025-10-10T15:43:50.124Z" },
-    { url = "https://files.pythonhosted.org/packages/44/16/1857e35a47155b5ad927272fee81ae49d398959cb749edca6eaa399b582f/sqlalchemy-2.0.44-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:15f3326f7f0b2bfe406ee562e17f43f36e16167af99c4c0df61db668de20002d", size = 3189614, upload-time = "2025-10-10T15:35:32.578Z" },
-    { url = "https://files.pythonhosted.org/packages/88/ee/4afb39a8ee4fc786e2d716c20ab87b5b1fb33d4ac4129a1aaa574ae8a585/sqlalchemy-2.0.44-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1e77faf6ff919aa8cd63f1c4e561cac1d9a454a191bb864d5dd5e545935e5a40", size = 3226248, upload-time = "2025-10-10T15:43:51.862Z" },
-    { url = "https://files.pythonhosted.org/packages/32/d5/0e66097fc64fa266f29a7963296b40a80d6a997b7ac13806183700676f86/sqlalchemy-2.0.44-cp313-cp313-win32.whl", hash = "sha256:ee51625c2d51f8baadf2829fae817ad0b66b140573939dd69284d2ba3553ae73", size = 2101275, upload-time = "2025-10-10T15:03:26.096Z" },
-    { url = "https://files.pythonhosted.org/packages/03/51/665617fe4f8c6450f42a6d8d69243f9420f5677395572c2fe9d21b493b7b/sqlalchemy-2.0.44-cp313-cp313-win_amd64.whl", hash = "sha256:c1c80faaee1a6c3428cecf40d16a2365bcf56c424c92c2b6f0f9ad204b899e9e", size = 2127901, upload-time = "2025-10-10T15:03:27.548Z" },
-    { url = "https://files.pythonhosted.org/packages/9c/5e/6a29fa884d9fb7ddadf6b69490a9d45fded3b38541713010dad16b77d015/sqlalchemy-2.0.44-py3-none-any.whl", hash = "sha256:19de7ca1246fbef9f9d1bff8f1ab25641569df226364a0e40457dc5457c54b05", size = 1928718, upload-time = "2025-10-10T15:29:45.32Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/f8/9be54ff620e5b796ca7b44670ef58bc678095d51b0e89d6e3102ea468216/sqlalchemy-2.0.45-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b8c8b41b97fba5f62349aa285654230296829672fc9939cd7f35aab246d1c08b", size = 3309379, upload-time = "2025-12-09T22:06:07.461Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/2b/60ce3ee7a5ae172bfcd419ce23259bb874d2cddd44f67c5df3760a1e22f9/sqlalchemy-2.0.45-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:12c694ed6468333a090d2f60950e4250b928f457e4962389553d6ba5fe9951ac", size = 3309948, upload-time = "2025-12-09T22:09:57.643Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/42/bac8d393f5db550e4e466d03d16daaafd2bad1f74e48c12673fb499a7fc1/sqlalchemy-2.0.45-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:f7d27a1d977a1cfef38a0e2e1ca86f09c4212666ce34e6ae542f3ed0a33bc606", size = 3261239, upload-time = "2025-12-09T22:06:08.879Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/12/43dc70a0528c59842b04ea1c1ed176f072a9b383190eb015384dd102fb19/sqlalchemy-2.0.45-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:d62e47f5d8a50099b17e2bfc1b0c7d7ecd8ba6b46b1507b58cc4f05eefc3bb1c", size = 3284065, upload-time = "2025-12-09T22:09:59.454Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/9c/563049cf761d9a2ec7bc489f7879e9d94e7b590496bea5bbee9ed7b4cc32/sqlalchemy-2.0.45-cp311-cp311-win32.whl", hash = "sha256:3c5f76216e7b85770d5bb5130ddd11ee89f4d52b11783674a662c7dd57018177", size = 2113480, upload-time = "2025-12-09T21:29:57.03Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/fa/09d0a11fe9f15c7fa5c7f0dd26be3d235b0c0cbf2f9544f43bc42efc8a24/sqlalchemy-2.0.45-cp311-cp311-win_amd64.whl", hash = "sha256:a15b98adb7f277316f2c276c090259129ee4afca783495e212048daf846654b2", size = 2138407, upload-time = "2025-12-09T21:29:58.556Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/c7/1900b56ce19bff1c26f39a4ce427faec7716c81ac792bfac8b6a9f3dca93/sqlalchemy-2.0.45-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b3ee2aac15169fb0d45822983631466d60b762085bc4535cd39e66bea362df5f", size = 3333760, upload-time = "2025-12-09T22:11:02.66Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/93/3be94d96bb442d0d9a60e55a6bb6e0958dd3457751c6f8502e56ef95fed0/sqlalchemy-2.0.45-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ba547ac0b361ab4f1608afbc8432db669bd0819b3e12e29fb5fa9529a8bba81d", size = 3348268, upload-time = "2025-12-09T22:13:49.054Z" },
+    { url = "https://files.pythonhosted.org/packages/48/4b/f88ded696e61513595e4a9778f9d3f2bf7332cce4eb0c7cedaabddd6687b/sqlalchemy-2.0.45-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:215f0528b914e5c75ef2559f69dca86878a3beeb0c1be7279d77f18e8d180ed4", size = 3278144, upload-time = "2025-12-09T22:11:04.14Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/6a/310ecb5657221f3e1bd5288ed83aa554923fb5da48d760a9f7622afeb065/sqlalchemy-2.0.45-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:107029bf4f43d076d4011f1afb74f7c3e2ea029ec82eb23d8527d5e909e97aa6", size = 3313907, upload-time = "2025-12-09T22:13:50.598Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/39/69c0b4051079addd57c84a5bfb34920d87456dd4c90cf7ee0df6efafc8ff/sqlalchemy-2.0.45-cp312-cp312-win32.whl", hash = "sha256:0c9f6ada57b58420a2c0277ff853abe40b9e9449f8d7d231763c6bc30f5c4953", size = 2112182, upload-time = "2025-12-09T21:39:30.824Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/4e/510db49dd89fc3a6e994bee51848c94c48c4a00dc905e8d0133c251f41a7/sqlalchemy-2.0.45-cp312-cp312-win_amd64.whl", hash = "sha256:8defe5737c6d2179c7997242d6473587c3beb52e557f5ef0187277009f73e5e1", size = 2139200, upload-time = "2025-12-09T21:39:32.321Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/c8/7cc5221b47a54edc72a0140a1efa56e0a2730eefa4058d7ed0b4c4357ff8/sqlalchemy-2.0.45-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fe187fc31a54d7fd90352f34e8c008cf3ad5d064d08fedd3de2e8df83eb4a1cf", size = 3277082, upload-time = "2025-12-09T22:11:06.167Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/50/80a8d080ac7d3d321e5e5d420c9a522b0aa770ec7013ea91f9a8b7d36e4a/sqlalchemy-2.0.45-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:672c45cae53ba88e0dad74b9027dddd09ef6f441e927786b05bec75d949fbb2e", size = 3293131, upload-time = "2025-12-09T22:13:52.626Z" },
+    { url = "https://files.pythonhosted.org/packages/da/4c/13dab31266fc9904f7609a5dc308a2432a066141d65b857760c3bef97e69/sqlalchemy-2.0.45-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:470daea2c1ce73910f08caf10575676a37159a6d16c4da33d0033546bddebc9b", size = 3225389, upload-time = "2025-12-09T22:11:08.093Z" },
+    { url = "https://files.pythonhosted.org/packages/74/04/891b5c2e9f83589de202e7abaf24cd4e4fa59e1837d64d528829ad6cc107/sqlalchemy-2.0.45-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9c6378449e0940476577047150fd09e242529b761dc887c9808a9a937fe990c8", size = 3266054, upload-time = "2025-12-09T22:13:54.262Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/24/fc59e7f71b0948cdd4cff7a286210e86b0443ef1d18a23b0d83b87e4b1f7/sqlalchemy-2.0.45-cp313-cp313-win32.whl", hash = "sha256:4b6bec67ca45bc166c8729910bd2a87f1c0407ee955df110d78948f5b5827e8a", size = 2110299, upload-time = "2025-12-09T21:39:33.486Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/c5/d17113020b2d43073412aeca09b60d2009442420372123b8d49cc253f8b8/sqlalchemy-2.0.45-cp313-cp313-win_amd64.whl", hash = "sha256:afbf47dc4de31fa38fd491f3705cac5307d21d4bb828a4f020ee59af412744ee", size = 2136264, upload-time = "2025-12-09T21:39:36.801Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/8d/bb40a5d10e7a5f2195f235c0b2f2c79b0bf6e8f00c0c223130a4fbd2db09/sqlalchemy-2.0.45-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:83d7009f40ce619d483d26ac1b757dfe3167b39921379a8bd1b596cf02dab4a6", size = 3521998, upload-time = "2025-12-09T22:13:28.622Z" },
+    { url = "https://files.pythonhosted.org/packages/75/a5/346128b0464886f036c039ea287b7332a410aa2d3fb0bb5d404cb8861635/sqlalchemy-2.0.45-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:d8a2ca754e5415cde2b656c27900b19d50ba076aa05ce66e2207623d3fe41f5a", size = 3473434, upload-time = "2025-12-09T22:13:30.188Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/64/4e1913772646b060b025d3fc52ce91a58967fe58957df32b455de5a12b4f/sqlalchemy-2.0.45-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7f46ec744e7f51275582e6a24326e10c49fbdd3fc99103e01376841213028774", size = 3272404, upload-time = "2025-12-09T22:11:09.662Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/27/caf606ee924282fe4747ee4fd454b335a72a6e018f97eab5ff7f28199e16/sqlalchemy-2.0.45-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:883c600c345123c033c2f6caca18def08f1f7f4c3ebeb591a63b6fceffc95cce", size = 3277057, upload-time = "2025-12-09T22:13:56.213Z" },
+    { url = "https://files.pythonhosted.org/packages/85/d0/3d64218c9724e91f3d1574d12eb7ff8f19f937643815d8daf792046d88ab/sqlalchemy-2.0.45-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:2c0b74aa79e2deade948fe8593654c8ef4228c44ba862bb7c9585c8e0db90f33", size = 3222279, upload-time = "2025-12-09T22:11:11.1Z" },
+    { url = "https://files.pythonhosted.org/packages/24/10/dd7688a81c5bc7690c2a3764d55a238c524cd1a5a19487928844cb247695/sqlalchemy-2.0.45-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:8a420169cef179d4c9064365f42d779f1e5895ad26ca0c8b4c0233920973db74", size = 3244508, upload-time = "2025-12-09T22:13:57.932Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/41/db75756ca49f777e029968d9c9fee338c7907c563267740c6d310a8e3f60/sqlalchemy-2.0.45-cp314-cp314-win32.whl", hash = "sha256:e50dcb81a5dfe4b7b4a4aa8f338116d127cb209559124f3694c70d6cd072b68f", size = 2113204, upload-time = "2025-12-09T21:39:38.365Z" },
+    { url = "https://files.pythonhosted.org/packages/89/a2/0e1590e9adb292b1d576dbcf67ff7df8cf55e56e78d2c927686d01080f4b/sqlalchemy-2.0.45-cp314-cp314-win_amd64.whl", hash = "sha256:4748601c8ea959e37e03d13dcda4a44837afcd1b21338e637f7c935b8da06177", size = 2138785, upload-time = "2025-12-09T21:39:39.503Z" },
+    { url = "https://files.pythonhosted.org/packages/42/39/f05f0ed54d451156bbed0e23eb0516bcad7cbb9f18b3bf219c786371b3f0/sqlalchemy-2.0.45-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:cd337d3526ec5298f67d6a30bbbe4ed7e5e68862f0bf6dd21d289f8d37b7d60b", size = 3522029, upload-time = "2025-12-09T22:13:32.09Z" },
+    { url = "https://files.pythonhosted.org/packages/54/0f/d15398b98b65c2bce288d5ee3f7d0a81f77ab89d9456994d5c7cc8b2a9db/sqlalchemy-2.0.45-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:9a62b446b7d86a3909abbcd1cd3cc550a832f99c2bc37c5b22e1925438b9367b", size = 3475142, upload-time = "2025-12-09T22:13:33.739Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/e1/3ccb13c643399d22289c6a9786c1a91e3dcbb68bce4beb44926ac2c557bf/sqlalchemy-2.0.45-py3-none-any.whl", hash = "sha256:5225a288e4c8cc2308dbdd874edad6e7d0fd38eac1e9e5f23503425c8eee20d0", size = 1936672, upload-time = "2025-12-09T21:54:52.608Z" },
 ]
 
 [[package]]
@@ -1660,6 +1996,23 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/a2/09/77d55d46fd61b4a135c444fc97158ef34a095e5681d0a6c10b75bf356191/sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5", size = 6299353, upload-time = "2025-04-27T18:04:59.103Z" },
 ]
 
+[[package]]
+name = "textual"
+version = "6.8.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "markdown-it-py", extra = ["linkify"] },
+    { name = "mdit-py-plugins" },
+    { name = "platformdirs" },
+    { name = "pygments" },
+    { name = "rich" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c8/8f/aeccf7459e3d71cbca912a27a97f1fcb00735326f90714d22fa540d3848e/textual-6.8.0.tar.gz", hash = "sha256:7efe618ec9197466b8fe536aefabb678edf30658b9dc58a763365d7daed12b62", size = 1581639, upload-time = "2025-12-07T17:53:46.681Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/47/34/4f1bad936ac3ad94c8576b15660d4ce434f7dbd372baa53566a490bcdce3/textual-6.8.0-py3-none-any.whl", hash = "sha256:074d389ba8c6c98c74e2a4fe1493ea3a38f3ee5008697e98f71daa2cf8ab8fda", size = 714378, upload-time = "2025-12-07T17:53:44.501Z" },
+]
+
 [[package]]
 name = "torch"
 version = "2.9.0"
@@ -1684,12 +2037,16 @@ dependencies = [
     { name = "nvidia-nvjitlink-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
     { name = "nvidia-nvshmem-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
     { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "setuptools" },
+    { name = "setuptools", marker = "python_full_version >= '3.12'" },
     { name = "sympy" },
     { name = "triton", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
     { name = "typing-extensions" },
 ]
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/58/fe/334225e6330e672b36aef23d77451fa906ea12881570c08638a91331a212/torch-2.9.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:c596708b5105d0b199215acf0c9be7c1db5f1680d88eddadf4b75a299259a677", size = 104230578, upload-time = "2025-10-15T15:46:08.182Z" },
+    { url = "https://files.pythonhosted.org/packages/05/cc/49566caaa218872ec9a2912456f470ff92649894a4bc2e5274aa9ef87c4a/torch-2.9.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:51de31219c97c51cf4bf2be94d622e3deb5dcc526c6dc00e97c17eaec0fc1d67", size = 899815990, upload-time = "2025-10-15T15:48:03.336Z" },
+    { url = "https://files.pythonhosted.org/packages/74/25/e9ab21d5925b642d008f139d4a3c9664fc9ee1faafca22913c080cc4c0a5/torch-2.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:dd515c70059afd95f48b8192733764c08ca37a1d19803af6401b5ecad7c8676e", size = 109313698, upload-time = "2025-10-15T15:46:12.425Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/b7/205ef3e94de636feffd64b28bb59a0dfac0771221201b9871acf9236f5ca/torch-2.9.0-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:614a185e4986326d526a91210c8fc1397e76e8cfafa78baf6296a790e53a9eec", size = 74463678, upload-time = "2025-10-15T15:46:29.779Z" },
     { url = "https://files.pythonhosted.org/packages/d1/d3/3985739f3b8e88675127bf70f82b3a48ae083e39cda56305dbd90398fec0/torch-2.9.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:e5f7af1dc4c0a7c4a260c2534f41ddaf209714f7c89145e644c44712fbd6b642", size = 104107898, upload-time = "2025-10-15T15:46:20.883Z" },
     { url = "https://files.pythonhosted.org/packages/a5/4b/f4bb2e6c25d0272f798cd6d7a04ed315da76cec68c602d87040c7847287f/torch-2.9.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:01cff95ecd9a212ea2f141db28acccdceb6a4c54f64e6c51091146f5e2a772c6", size = 899738273, upload-time = "2025-10-15T15:50:04.188Z" },
     { url = "https://files.pythonhosted.org/packages/66/11/c1c5ba6691cda6279087c35bd626536e4fd29521fe740abf5008377a9a02/torch-2.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:4582b162f541651f0cb184d3e291c05c2f556c7117c64a9873e2ee158d40062b", size = 109280887, upload-time = "2025-10-15T15:46:26.228Z" },
@@ -1714,7 +2071,7 @@ wheels = [
 
 [[package]]
 name = "torchdata"
-version = "0.11.0"
+version = "0.7.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "requests" },
@@ -1722,7 +2079,11 @@ dependencies = [
     { name = "urllib3" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/95/d4/af694ef718aedbe95a72760ab9ff7a6a7a44ace2d7f70c27bfeb67c5c503/torchdata-0.11.0-py3-none-any.whl", hash = "sha256:52b940fbbe0e00fb21cabddf528449d1bec5bfb0d0823b7487b15f951658ee33", size = 61968, upload-time = "2025-02-20T22:26:30.666Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/9a/8b3c64a141b58228419110858acdd5eae7a1b54db9dd8f22a2af956ac53d/torchdata-0.7.1-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:91a78c677a3e4e2447d888587f7ea0b4ddde81ca95adf709ba0d3dc9a4e9542d", size = 1801812, upload-time = "2023-11-15T17:09:05.57Z" },
+    { url = "https://files.pythonhosted.org/packages/35/b2/7ed3a80ae0673b940f2af14281dc02dee0f667c6094e6dcd399fa35249a7/torchdata-0.7.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fa325d628aa6125c6b46b6fa27c94150ca9276edbba1042d3eb3cd9c1039b5a9", size = 4815618, upload-time = "2023-11-15T17:09:02.386Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/8d/b17138a9ad7e47dd602587dbcc142bd98374e0c16c0806c2026d8db54242/torchdata-0.7.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d256535648dfb94d1226f233768c6798d1841edfdbf0a09b2115e6cbbda614f9", size = 4657579, upload-time = "2023-11-15T17:09:12.272Z" },
+    { url = "https://files.pythonhosted.org/packages/da/8d/e0413f91944f931cb5c685cbd6330ad450f9d5466c466822d25761ca772d/torchdata-0.7.1-cp311-cp311-win_amd64.whl", hash = "sha256:7460a5fa298e7cd5cef98e8e6455d481e5c73d39a462a89a38918389c8153e20", size = 1330404, upload-time = "2023-11-15T17:09:16.713Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/c8/34eda2bd6beb8a11c06cf905db74092bdbc3dec51a48f4f22cc474866a0a/torchdata-0.7.1-py3-none-any.whl", hash = "sha256:9f9476a26987d90fa3f87cb09ec82b78ce6031ddcaa91851c9fa9f732a987ab8", size = 184418, upload-time = "2023-11-15T17:09:10.159Z" },
 ]
 
 [[package]]
@@ -1735,6 +2096,10 @@ dependencies = [
     { name = "torch" },
 ]
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/a3/17/54ed2ec6944ea972b461a86424c8c7f98835982c90cbc45bf59bd962863a/torchvision-0.24.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f771cf918351ad509a28488be475f3e9cc71a750d6b1467842bfb64863a5e986", size = 1891719, upload-time = "2025-10-15T15:51:10.384Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/07/0cd6776eee784742ad3cb2bfd3295383d84cb2f9e87386119333d1587f0f/torchvision-0.24.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:bbd63bf4ebff84c48c50123eba90526cc9f794fe45bc9f5dd07cec19e8c62bce", size = 2420513, upload-time = "2025-10-15T15:51:18.087Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/f4/6026c08011ddcefcbc14161c5aa9dce55c35c6b045e04ef0952e88bf4594/torchvision-0.24.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:78fe414b3bb6dbf7e6f6da6f733ba96881f6b29a9b997228de7c5f603e5ed940", size = 8048018, upload-time = "2025-10-15T15:51:13.579Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/b4/362b4e67ed87cee0fb4f8f0363a852eaeef527968bf62c07ed56f764d729/torchvision-0.24.0-cp311-cp311-win_amd64.whl", hash = "sha256:629584b94e52f32a6278f2a35d85eeaae95fcc38730fcb765064f26c3c96df5d", size = 4027686, upload-time = "2025-10-15T15:51:19.189Z" },
     { url = "https://files.pythonhosted.org/packages/47/ef/81e4e69e02e2c4650b30e8c11c8974f946682a30e0ab7e9803a831beff76/torchvision-0.24.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c61d40bcd2e2451e932902a702ad495ba1ec6f279e90b1e15cef2bb55dc911e2", size = 1891726, upload-time = "2025-10-15T15:51:16.977Z" },
     { url = "https://files.pythonhosted.org/packages/00/7b/e3809b3302caea9a12c13f3adebe4fef127188438e719fd6c8dc93db1da6/torchvision-0.24.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:b0531d1483fc322d7da0d83be52f0df860a75114ab87dbeeb9de765feaeda843", size = 2419495, upload-time = "2025-10-15T15:51:11.885Z" },
     { url = "https://files.pythonhosted.org/packages/7e/e6/7324ead6793075a8c75c56abeed1236d1750de16a5613cfe2ddad164a92a/torchvision-0.24.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:26b9dd9c083f8e5f7ac827de6d5b88c615d9c582dc87666770fbdf16887e4c25", size = 8050480, upload-time = "2025-10-15T15:51:24.012Z" },
@@ -1774,6 +2139,7 @@ name = "triton"
 version = "3.5.0"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/3d/78/949a04391c21956c816523678f0e5fa308eb5b1e7622d88c4e4ef5fceca0/triton-3.5.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f34bfa21c5b3a203c0f0eab28dcc1e49bd1f67d22724e77fb6665a659200a4ec", size = 170433488, upload-time = "2025-10-13T16:37:57.132Z" },
     { url = "https://files.pythonhosted.org/packages/f5/3a/e991574f3102147b642e49637e0281e9bb7c4ba254edb2bab78247c85e01/triton-3.5.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c9e71db82261c4ffa3921cd050cd5faa18322d2d405c30eb56084afaff3b0833", size = 170476535, upload-time = "2025-10-13T16:38:05.18Z" },
     { url = "https://files.pythonhosted.org/packages/6c/29/10728de8a6e932e517c10773486b8e99f85d1b1d9dd87d9a9616e1fef4a1/triton-3.5.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e6bb9aa5519c084a333acdba443789e50012a4b851cd486c54f0b8dc2a8d3a12", size = 170487289, upload-time = "2025-10-13T16:38:11.662Z" },
     { url = "https://files.pythonhosted.org/packages/5c/38/db80e48b9220c9bce872b0f616ad0446cdf554a40b85c7865cbca99ab3c2/triton-3.5.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c83f2343e1a220a716c7b3ab9fccfcbe3ad4020d189549200e2d2e8d5868bed9", size = 170577179, upload-time = "2025-10-13T16:38:17.865Z" },
@@ -1781,19 +2147,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/fb/b7/1dec8433ac604c061173d0589d99217fe7bf90a70bdc375e745d044b8aad/triton-3.5.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:317fe477ea8fd4524a6a8c499fb0a36984a56d0b75bf9c9cb6133a1c56d5a6e7", size = 170580176, upload-time = "2025-10-13T16:38:31.14Z" },
 ]
 
-[[package]]
-name = "typer-slim"
-version = "0.20.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "click" },
-    { name = "typing-extensions" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/8e/45/81b94a52caed434b94da65729c03ad0fb7665fab0f7db9ee54c94e541403/typer_slim-0.20.0.tar.gz", hash = "sha256:9fc6607b3c6c20f5c33ea9590cbeb17848667c51feee27d9e314a579ab07d1a3", size = 106561, upload-time = "2025-10-20T17:03:46.642Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/5e/dd/5cbf31f402f1cc0ab087c94d4669cfa55bd1e818688b910631e131d74e75/typer_slim-0.20.0-py3-none-any.whl", hash = "sha256:f42a9b7571a12b97dddf364745d29f12221865acef7a2680065f9bb29c7dc89d", size = 47087, upload-time = "2025-10-20T17:03:44.546Z" },
-]
-
 [[package]]
 name = "typing-extensions"
 version = "4.15.0"
@@ -1813,12 +2166,21 @@ wheels = [
 ]
 
 [[package]]
-name = "urllib3"
-version = "2.5.0"
+name = "uc-micro-py"
+version = "1.0.3"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/15/22/9ee70a2574a4f4599c47dd506532914ce044817c7752a79b6a51286319bc/urllib3-2.5.0.tar.gz", hash = "sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760", size = 393185, upload-time = "2025-06-18T14:07:41.644Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/91/7a/146a99696aee0609e3712f2b44c6274566bc368dfe8375191278045186b8/uc-micro-py-1.0.3.tar.gz", hash = "sha256:d321b92cff673ec58027c04015fcaa8bb1e005478643ff4a500882eaab88c48a", size = 6043, upload-time = "2024-02-09T16:52:01.654Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/a7/c2/fe1e52489ae3122415c51f387e221dd0773709bad6c6cdaa599e8a2c5185/urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc", size = 129795, upload-time = "2025-06-18T14:07:40.39Z" },
+    { url = "https://files.pythonhosted.org/packages/37/87/1f677586e8ac487e29672e4b17455758fce261de06a0d086167bb760361a/uc_micro_py-1.0.3-py3-none-any.whl", hash = "sha256:db1dffff340817673d7b466ec86114a9dc0e9d4d9b5ba229d9d60e5c12600cd5", size = 6229, upload-time = "2024-02-09T16:52:00.371Z" },
+]
+
+[[package]]
+name = "urllib3"
+version = "2.6.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/5e/1d/0f3a93cca1ac5e8287842ed4eebbd0f7a991315089b1a0b01c7788aa7b63/urllib3-2.6.1.tar.gz", hash = "sha256:5379eb6e1aba4088bae84f8242960017ec8d8e3decf30480b3a1abdaa9671a3f", size = 432678, upload-time = "2025-12-08T15:25:26.773Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/bc/56/190ceb8cb10511b730b564fb1e0293fa468363dbad26145c34928a60cb0c/urllib3-2.6.1-py3-none-any.whl", hash = "sha256:e67d06fe947c36a7ca39f4994b08d73922d40e6cca949907be05efa6fd75110b", size = 131138, upload-time = "2025-12-08T15:25:25.51Z" },
 ]
 
 [[package]]
@@ -1827,6 +2189,21 @@ version = "3.6.0"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/02/84/30869e01909fb37a6cc7e18688ee8bf1e42d57e7e0777636bd47524c43c7/xxhash-3.6.0.tar.gz", hash = "sha256:f0162a78b13a0d7617b2845b90c763339d1f1d82bb04a4b07f4ab535cc5e05d6", size = 85160, upload-time = "2025-10-02T14:37:08.097Z" }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/17/d4/cc2f0400e9154df4b9964249da78ebd72f318e35ccc425e9f403c392f22a/xxhash-3.6.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b47bbd8cf2d72797f3c2772eaaac0ded3d3af26481a26d7d7d41dc2d3c46b04a", size = 32844, upload-time = "2025-10-02T14:34:14.037Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/ec/1cc11cd13e26ea8bc3cb4af4eaadd8d46d5014aebb67be3f71fb0b68802a/xxhash-3.6.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2b6821e94346f96db75abaa6e255706fb06ebd530899ed76d32cd99f20dc52fa", size = 30809, upload-time = "2025-10-02T14:34:15.484Z" },
+    { url = "https://files.pythonhosted.org/packages/04/5f/19fe357ea348d98ca22f456f75a30ac0916b51c753e1f8b2e0e6fb884cce/xxhash-3.6.0-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:d0a9751f71a1a65ce3584e9cae4467651c7e70c9d31017fa57574583a4540248", size = 194665, upload-time = "2025-10-02T14:34:16.541Z" },
+    { url = "https://files.pythonhosted.org/packages/90/3b/d1f1a8f5442a5fd8beedae110c5af7604dc37349a8e16519c13c19a9a2de/xxhash-3.6.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8b29ee68625ab37b04c0b40c3fafdf24d2f75ccd778333cfb698f65f6c463f62", size = 213550, upload-time = "2025-10-02T14:34:17.878Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/ef/3a9b05eb527457d5db13a135a2ae1a26c80fecd624d20f3e8dcc4cb170f3/xxhash-3.6.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:6812c25fe0d6c36a46ccb002f40f27ac903bf18af9f6dd8f9669cb4d176ab18f", size = 212384, upload-time = "2025-10-02T14:34:19.182Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/18/ccc194ee698c6c623acbf0f8c2969811a8a4b6185af5e824cd27b9e4fd3e/xxhash-3.6.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4ccbff013972390b51a18ef1255ef5ac125c92dc9143b2d1909f59abc765540e", size = 445749, upload-time = "2025-10-02T14:34:20.659Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/86/cf2c0321dc3940a7aa73076f4fd677a0fb3e405cb297ead7d864fd90847e/xxhash-3.6.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:297b7fbf86c82c550e12e8fb71968b3f033d27b874276ba3624ea868c11165a8", size = 193880, upload-time = "2025-10-02T14:34:22.431Z" },
+    { url = "https://files.pythonhosted.org/packages/82/fb/96213c8560e6f948a1ecc9a7613f8032b19ee45f747f4fca4eb31bb6d6ed/xxhash-3.6.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:dea26ae1eb293db089798d3973a5fc928a18fdd97cc8801226fae705b02b14b0", size = 210912, upload-time = "2025-10-02T14:34:23.937Z" },
+    { url = "https://files.pythonhosted.org/packages/40/aa/4395e669b0606a096d6788f40dbdf2b819d6773aa290c19e6e83cbfc312f/xxhash-3.6.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:7a0b169aafb98f4284f73635a8e93f0735f9cbde17bd5ec332480484241aaa77", size = 198654, upload-time = "2025-10-02T14:34:25.644Z" },
+    { url = "https://files.pythonhosted.org/packages/67/74/b044fcd6b3d89e9b1b665924d85d3f400636c23590226feb1eb09e1176ce/xxhash-3.6.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:08d45aef063a4531b785cd72de4887766d01dc8f362a515693df349fdb825e0c", size = 210867, upload-time = "2025-10-02T14:34:27.203Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/fd/3ce73bf753b08cb19daee1eb14aa0d7fe331f8da9c02dd95316ddfe5275e/xxhash-3.6.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:929142361a48ee07f09121fe9e96a84950e8d4df3bb298ca5d88061969f34d7b", size = 414012, upload-time = "2025-10-02T14:34:28.409Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/b3/5a4241309217c5c876f156b10778f3ab3af7ba7e3259e6d5f5c7d0129eb2/xxhash-3.6.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:51312c768403d8540487dbbfb557454cfc55589bbde6424456951f7fcd4facb3", size = 191409, upload-time = "2025-10-02T14:34:29.696Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/01/99bfbc15fb9abb9a72b088c1d95219fc4782b7d01fc835bd5744d66dd0b8/xxhash-3.6.0-cp311-cp311-win32.whl", hash = "sha256:d1927a69feddc24c987b337ce81ac15c4720955b667fe9b588e02254b80446fd", size = 30574, upload-time = "2025-10-02T14:34:31.028Z" },
+    { url = "https://files.pythonhosted.org/packages/65/79/9d24d7f53819fe301b231044ea362ce64e86c74f6e8c8e51320de248b3e5/xxhash-3.6.0-cp311-cp311-win_amd64.whl", hash = "sha256:26734cdc2d4ffe449b41d186bbeac416f704a482ed835d375a5c0cb02bc63fef", size = 31481, upload-time = "2025-10-02T14:34:32.062Z" },
+    { url = "https://files.pythonhosted.org/packages/30/4e/15cd0e3e8772071344eab2961ce83f6e485111fed8beb491a3f1ce100270/xxhash-3.6.0-cp311-cp311-win_arm64.whl", hash = "sha256:d72f67ef8bf36e05f5b6c65e8524f265bd61071471cd4cf1d36743ebeeeb06b7", size = 27861, upload-time = "2025-10-02T14:34:33.555Z" },
     { url = "https://files.pythonhosted.org/packages/9a/07/d9412f3d7d462347e4511181dea65e47e0d0e16e26fbee2ea86a2aefb657/xxhash-3.6.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:01362c4331775398e7bb34e3ab403bc9ee9f7c497bc7dee6272114055277dd3c", size = 32744, upload-time = "2025-10-02T14:34:34.622Z" },
     { url = "https://files.pythonhosted.org/packages/79/35/0429ee11d035fc33abe32dca1b2b69e8c18d236547b9a9b72c1929189b9a/xxhash-3.6.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b7b2df81a23f8cb99656378e72501b2cb41b1827c0f5a86f87d6b06b69f9f204", size = 30816, upload-time = "2025-10-02T14:34:36.043Z" },
     { url = "https://files.pythonhosted.org/packages/b7/f2/57eb99aa0f7d98624c0932c5b9a170e1806406cdbcdb510546634a1359e0/xxhash-3.6.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:dc94790144e66b14f67b10ac8ed75b39ca47536bf8800eb7c24b50271ea0c490", size = 194035, upload-time = "2025-10-02T14:34:37.354Z" },
@@ -1902,6 +2279,11 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/9a/9a/c19c42c5b3f5a4aad748a6d5b4f23df3bed7ee5445accc65a0fb3ff03953/xxhash-3.6.0-cp314-cp314t-win32.whl", hash = "sha256:5851f033c3030dd95c086b4a36a2683c2ff4a799b23af60977188b057e467119", size = 31586, upload-time = "2025-10-02T14:36:15.603Z" },
     { url = "https://files.pythonhosted.org/packages/03/d6/4cc450345be9924fd5dc8c590ceda1db5b43a0a889587b0ae81a95511360/xxhash-3.6.0-cp314-cp314t-win_amd64.whl", hash = "sha256:0444e7967dac37569052d2409b00a8860c2135cff05502df4da80267d384849f", size = 32526, upload-time = "2025-10-02T14:36:16.708Z" },
     { url = "https://files.pythonhosted.org/packages/0f/c9/7243eb3f9eaabd1a88a5a5acadf06df2d83b100c62684b7425c6a11bcaa8/xxhash-3.6.0-cp314-cp314t-win_arm64.whl", hash = "sha256:bb79b1e63f6fd84ec778a4b1916dfe0a7c3fdb986c06addd5db3a0d413819d95", size = 28898, upload-time = "2025-10-02T14:36:17.843Z" },
+    { url = "https://files.pythonhosted.org/packages/93/1e/8aec23647a34a249f62e2398c42955acd9b4c6ed5cf08cbea94dc46f78d2/xxhash-3.6.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:0f7b7e2ec26c1666ad5fc9dbfa426a6a3367ceaf79db5dd76264659d509d73b0", size = 30662, upload-time = "2025-10-02T14:37:01.743Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/0b/b14510b38ba91caf43006209db846a696ceea6a847a0c9ba0a5b1adc53d6/xxhash-3.6.0-pp311-pypy311_pp73-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:5dc1e14d14fa0f5789ec29a7062004b5933964bb9b02aae6622b8f530dc40296", size = 41056, upload-time = "2025-10-02T14:37:02.879Z" },
+    { url = "https://files.pythonhosted.org/packages/50/55/15a7b8a56590e66ccd374bbfa3f9ffc45b810886c8c3b614e3f90bd2367c/xxhash-3.6.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:881b47fc47e051b37d94d13e7455131054b56749b91b508b0907eb07900d1c13", size = 36251, upload-time = "2025-10-02T14:37:04.44Z" },
+    { url = "https://files.pythonhosted.org/packages/62/b2/5ac99a041a29e58e95f907876b04f7067a0242cb85b5f39e726153981503/xxhash-3.6.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c6dc31591899f5e5666f04cc2e529e69b4072827085c1ef15294d91a004bc1bd", size = 32481, upload-time = "2025-10-02T14:37:05.869Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/d9/8d95e906764a386a3d3b596f3c68bb63687dfca806373509f51ce8eea81f/xxhash-3.6.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:15e0dac10eb9309508bfc41f7f9deaa7755c69e35af835db9cb10751adebc35d", size = 31565, upload-time = "2025-10-02T14:37:06.966Z" },
 ]
 
 [[package]]
@@ -1915,6 +2297,22 @@ dependencies = [
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/57/63/0c6ebca57330cd313f6102b16dd57ffaf3ec4c83403dcb45dbd15c6f3ea1/yarl-1.22.0.tar.gz", hash = "sha256:bebf8557577d4401ba8bd9ff33906f1376c877aa78d1fe216ad01b4d6745af71", size = 187169, upload-time = "2025-10-06T14:12:55.963Z" }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/4d/27/5ab13fc84c76a0250afd3d26d5936349a35be56ce5785447d6c423b26d92/yarl-1.22.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:1ab72135b1f2db3fed3997d7e7dc1b80573c67138023852b6efb336a5eae6511", size = 141607, upload-time = "2025-10-06T14:09:16.298Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/a1/d065d51d02dc02ce81501d476b9ed2229d9a990818332242a882d5d60340/yarl-1.22.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:669930400e375570189492dc8d8341301578e8493aec04aebc20d4717f899dd6", size = 94027, upload-time = "2025-10-06T14:09:17.786Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/da/8da9f6a53f67b5106ffe902c6fa0164e10398d4e150d85838b82f424072a/yarl-1.22.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:792a2af6d58177ef7c19cbf0097aba92ca1b9cb3ffdd9c7470e156c8f9b5e028", size = 94963, upload-time = "2025-10-06T14:09:19.662Z" },
+    { url = "https://files.pythonhosted.org/packages/68/fe/2c1f674960c376e29cb0bec1249b117d11738db92a6ccc4a530b972648db/yarl-1.22.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3ea66b1c11c9150f1372f69afb6b8116f2dd7286f38e14ea71a44eee9ec51b9d", size = 368406, upload-time = "2025-10-06T14:09:21.402Z" },
+    { url = "https://files.pythonhosted.org/packages/95/26/812a540e1c3c6418fec60e9bbd38e871eaba9545e94fa5eff8f4a8e28e1e/yarl-1.22.0-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3e2daa88dc91870215961e96a039ec73e4937da13cf77ce17f9cad0c18df3503", size = 336581, upload-time = "2025-10-06T14:09:22.98Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/f5/5777b19e26fdf98563985e481f8be3d8a39f8734147a6ebf459d0dab5a6b/yarl-1.22.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ba440ae430c00eee41509353628600212112cd5018d5def7e9b05ea7ac34eb65", size = 388924, upload-time = "2025-10-06T14:09:24.655Z" },
+    { url = "https://files.pythonhosted.org/packages/86/08/24bd2477bd59c0bbd994fe1d93b126e0472e4e3df5a96a277b0a55309e89/yarl-1.22.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:e6438cc8f23a9c1478633d216b16104a586b9761db62bfacb6425bac0a36679e", size = 392890, upload-time = "2025-10-06T14:09:26.617Z" },
+    { url = "https://files.pythonhosted.org/packages/46/00/71b90ed48e895667ecfb1eaab27c1523ee2fa217433ed77a73b13205ca4b/yarl-1.22.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4c52a6e78aef5cf47a98ef8e934755abf53953379b7d53e68b15ff4420e6683d", size = 365819, upload-time = "2025-10-06T14:09:28.544Z" },
+    { url = "https://files.pythonhosted.org/packages/30/2d/f715501cae832651d3282387c6a9236cd26bd00d0ff1e404b3dc52447884/yarl-1.22.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:3b06bcadaac49c70f4c88af4ffcfbe3dc155aab3163e75777818092478bcbbe7", size = 363601, upload-time = "2025-10-06T14:09:30.568Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/f9/a678c992d78e394e7126ee0b0e4e71bd2775e4334d00a9278c06a6cce96a/yarl-1.22.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:6944b2dc72c4d7f7052683487e3677456050ff77fcf5e6204e98caf785ad1967", size = 358072, upload-time = "2025-10-06T14:09:32.528Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/d1/b49454411a60edb6fefdcad4f8e6dbba7d8019e3a508a1c5836cba6d0781/yarl-1.22.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:d5372ca1df0f91a86b047d1277c2aaf1edb32d78bbcefffc81b40ffd18f027ed", size = 385311, upload-time = "2025-10-06T14:09:34.634Z" },
+    { url = "https://files.pythonhosted.org/packages/87/e5/40d7a94debb8448c7771a916d1861d6609dddf7958dc381117e7ba36d9e8/yarl-1.22.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:51af598701f5299012b8416486b40fceef8c26fc87dc6d7d1f6fc30609ea0aa6", size = 381094, upload-time = "2025-10-06T14:09:36.268Z" },
+    { url = "https://files.pythonhosted.org/packages/35/d8/611cc282502381ad855448643e1ad0538957fc82ae83dfe7762c14069e14/yarl-1.22.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b266bd01fedeffeeac01a79ae181719ff848a5a13ce10075adbefc8f1daee70e", size = 370944, upload-time = "2025-10-06T14:09:37.872Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/df/fadd00fb1c90e1a5a8bd731fa3d3de2e165e5a3666a095b04e31b04d9cb6/yarl-1.22.0-cp311-cp311-win32.whl", hash = "sha256:a9b1ba5610a4e20f655258d5a1fdc7ebe3d837bb0e45b581398b99eb98b1f5ca", size = 81804, upload-time = "2025-10-06T14:09:39.359Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/f7/149bb6f45f267cb5c074ac40c01c6b3ea6d8a620d34b337f6321928a1b4d/yarl-1.22.0-cp311-cp311-win_amd64.whl", hash = "sha256:078278b9b0b11568937d9509b589ee83ef98ed6d561dfe2020e24a9fd08eaa2b", size = 86858, upload-time = "2025-10-06T14:09:41.068Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/13/88b78b93ad3f2f0b78e13bfaaa24d11cbc746e93fe76d8c06bf139615646/yarl-1.22.0-cp311-cp311-win_arm64.whl", hash = "sha256:b6a6f620cfe13ccec221fa312139135166e47ae169f8253f72a0abc0dae94376", size = 81637, upload-time = "2025-10-06T14:09:42.712Z" },
     { url = "https://files.pythonhosted.org/packages/75/ff/46736024fee3429b80a165a732e38e5d5a238721e634ab41b040d49f8738/yarl-1.22.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:e340382d1afa5d32b892b3ff062436d592ec3d692aeea3bef3a5cfe11bbf8c6f", size = 142000, upload-time = "2025-10-06T14:09:44.631Z" },
     { url = "https://files.pythonhosted.org/packages/5a/9a/b312ed670df903145598914770eb12de1bac44599549b3360acc96878df8/yarl-1.22.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f1e09112a2c31ffe8d80be1b0988fa6a18c5d5cad92a9ffbb1c04c91bfe52ad2", size = 94338, upload-time = "2025-10-06T14:09:46.372Z" },
     { url = "https://files.pythonhosted.org/packages/ba/f5/0601483296f09c3c65e303d60c070a5c19fcdbc72daa061e96170785bc7d/yarl-1.22.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:939fe60db294c786f6b7c2d2e121576628468f65453d86b0fe36cb52f987bd74", size = 94909, upload-time = "2025-10-06T14:09:48.648Z" },