feat: model --> ready to test train
This commit is contained in:
parent
63d1b6f5ae
commit
b58682cb49
8 changed files with 382 additions and 17 deletions
|
|
@ -14,7 +14,6 @@ class CNNPredictor(nn.Module):
|
|||
def __init__(
|
||||
self,
|
||||
vocab_size=256,
|
||||
context_length=128,
|
||||
num_layers=3,
|
||||
hidden_dim=128,
|
||||
kernel_size=3,
|
||||
|
|
|
|||
|
|
@ -1,11 +0,0 @@
|
|||
import torch
|
||||
from torch.utils.data import TensorDataset
|
||||
|
||||
|
||||
def make_context_pairs(data: bytes, context_length: int) -> TensorDataset:
|
||||
data = torch.tensor(list(data), dtype=torch.uint8)
|
||||
sample_count = data.shape[0] - context_length
|
||||
x = data.unfold(0, context_length, 1)[:sample_count]
|
||||
y = data[context_length:]
|
||||
return TensorDataset(x, y)
|
||||
|
||||
|
|
@ -4,9 +4,10 @@ import torch.nn.functional as F
|
|||
import optuna.trial as tr
|
||||
from torch.utils.data import DataLoader
|
||||
from tqdm import tqdm
|
||||
import argparse
|
||||
|
||||
from optuna_trial import create_model
|
||||
from data_utils import make_context_pairs
|
||||
from utils import make_context_pairs, load_data
|
||||
import optuna
|
||||
|
||||
# hyper parameters
|
||||
|
|
@ -71,10 +72,22 @@ def objective_function(trial: tr.Trial, train_data: bytes, validation_data: byte
|
|||
return result["best_validation_loss"]
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--train-data", type=str, required=True)
|
||||
parser.add_argument("--validation-data", type=str, required=True)
|
||||
parser.add_argument("--batch-size", type=int, default=128)
|
||||
|
||||
args = parser.parse_args()
|
||||
print(args)
|
||||
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
train_data = b""
|
||||
validation_data = b""
|
||||
batch_size = 0
|
||||
train_data = load_data(args.train_data)
|
||||
validation_data = load_data(args.validation_data)
|
||||
batch_size = args.batch_size
|
||||
|
||||
print(f"training data length: {len(train_data)}")
|
||||
print(f"validation data length: {len(validation_data)}")
|
||||
print(f"batch size: {batch_size}")
|
||||
|
||||
study = optuna.create_study(study_name="CNN network",direction="minimize")
|
||||
study.optimize(lambda trial: objective_function(trial, train_data, validation_data, batch_size), n_trials=10)
|
||||
|
|
@ -10,7 +10,6 @@ def create_model(trial: tr.Trial, vocab_size: int = 256, context_length: int = 1
|
|||
|
||||
return CNNPredictor(
|
||||
vocab_size=vocab_size,
|
||||
context_length=context_length,
|
||||
num_layers=num_layers,
|
||||
hidden_dim=hidden_dim,
|
||||
kernel_size=kernel_size,
|
||||
|
|
|
|||
20
CNN-model/utils.py
Normal file
20
CNN-model/utils.py
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
import torch
|
||||
from torch.utils.data import TensorDataset
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
|
||||
def make_context_pairs(data: bytes, context_length: int) -> TensorDataset:
|
||||
data = torch.tensor(list(data), dtype=torch.long)
|
||||
sample_count = data.shape[0] - context_length
|
||||
x = data.unfold(0, context_length, 1)[:sample_count]
|
||||
y = data[context_length:]
|
||||
return TensorDataset(x, y)
|
||||
|
||||
def print_distribution(from_to: tuple[int, int], probabilities: list[float]):
|
||||
plt.hist(range(from_to[0], from_to[1]), weights=probabilities)
|
||||
plt.show()
|
||||
|
||||
|
||||
def load_data(path: str) -> bytes:
|
||||
with open(path, "rb") as f:
|
||||
return f.read()
|
||||
Reference in a new issue