feat: Context CLI arg
This commit is contained in:
parent
cd74949b74
commit
a4583d402b
8 changed files with 38 additions and 31 deletions
|
|
@ -18,18 +18,15 @@ class HumanReferenceGenomeDataset(Dataset):
|
|||
split: str = "train",
|
||||
transform: Callable = None,
|
||||
size: int = -1,
|
||||
context_length: int = 1024,
|
||||
config: str = "6kbp",
|
||||
):
|
||||
super().__init__("human_reference_genome", root, split, transform, size)
|
||||
super().__init__("human_reference_genome", root, split, transform, size, context_length)
|
||||
|
||||
print(f"Loading from HuggingFace (config: {config}, split: {split})")
|
||||
ds = load_dataset("InstaDeepAI/human_reference_genome", config, split=split,
|
||||
data = load_dataset("InstaDeepAI/human_reference_genome", config, split=split,
|
||||
cache_dir=self.root, trust_remote_code=True)
|
||||
|
||||
# Your Dataset.process_data() expects a list[str]; use the 'sequence' field
|
||||
self.data = ds["sequence"]
|
||||
|
||||
self.context_length = 2048
|
||||
self.data = data["sequence"]
|
||||
|
||||
self.process_data()
|
||||
|
||||
|
|
|
|||
Reference in a new issue