Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
from pyhealth.datasets import SampleBaseDataset
from torch.utils.data import Dataset


class SampleKGDataset(SampleBaseDataset):
class SampleKGDataset(Dataset):
"""Sample KG dataset class.

This class inherits from `SampleBaseDataset` and is specifically designed
for KG datasets.
This class inherits from `torch.utils.data.Dataset` and is specifically
designed for KG datasets.

Args:
samples: a list of samples
samples: a list of samples
A sample is a dict containing following data:
{
'triple': a positive triple e.g., (0, 0, 2835)
Expand All @@ -24,19 +24,22 @@ class SampleKGDataset(SampleBaseDataset):
task_name: the name of the task. Default is None.
"""
def __init__(
self,
samples,
dataset_name="",
task_name="",
dev=False,
self,
samples,
dataset_name="",
task_name="",
dev=False,
entity_num=0,
relation_num=0,
entity2id=None,
relation2id=None,
**kwargs
):

super().__init__(samples, dataset_name, task_name)
super().__init__()
self.samples = samples
self.dataset_name = dataset_name
self.task_name = task_name
self.dev = dev
self.entity_num = entity_num
self.relation_num = relation_num
Expand Down Expand Up @@ -65,6 +68,10 @@ def __getitem__(self, index):
"""
return self.samples[index]

def __len__(self):
"""Returns the number of samples in the dataset."""
return len(self.samples)

def stat(self):
"""Returns some statistics of the base dataset."""
lines = list()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,16 @@
import numpy as np
import torch

from pyhealth.datasets import SampleBaseDataset


def split(
dataset: SampleBaseDataset,
dataset,
ratios: Union[Tuple[float, float, float], List[float]],
seed: Optional[int] = None,
):
"""Splits the dataset by its outermost indexed items

Args:
dataset: a `SampleBaseDataset` object
dataset: a `SampleKGDataset` object
ratios: a list/tuple of ratios for train / val / test
seed: random seed for shuffling the dataset

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from.kg_base import KGEBaseModel
from pyhealth.datasets import SampleBaseDataset
import torch


Expand All @@ -13,7 +12,7 @@ class ComplEx(KGEBaseModel):

def __init__(
self,
dataset: SampleBaseDataset,
dataset,
e_dim: int = 600,
r_dim: int = 600,
ns: str = "adv",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from.kg_base import KGEBaseModel
from pyhealth.datasets import SampleBaseDataset
import torch


Expand All @@ -12,7 +11,7 @@ class DistMult(KGEBaseModel):
"""
def __init__(
self,
dataset: SampleBaseDataset,
dataset,
e_dim: int = 300,
r_dim: int = 300,
ns: str = "adv",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from abc import ABC
from pyhealth.datasets import SampleBaseDataset

import torch
import time
Expand Down Expand Up @@ -32,7 +31,7 @@ def device(self):

def __init__(
self,
dataset: SampleBaseDataset,
dataset,
e_dim: int = 500,
r_dim: int = 500,
ns: str = "uniform",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from.kg_base import KGEBaseModel
from pyhealth.datasets import SampleBaseDataset
import torch


Expand All @@ -13,7 +12,7 @@ class RotatE(KGEBaseModel):

def __init__(
self,
dataset: SampleBaseDataset,
dataset,
e_dim: int = 600,
r_dim: int = 300,
ns='adv',
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from.kg_base import KGEBaseModel
from pyhealth.datasets import SampleBaseDataset
import torch


Expand All @@ -13,7 +12,7 @@ class TransE(KGEBaseModel):

def __init__(
self,
dataset: SampleBaseDataset,
dataset,
e_dim: int = 300,
r_dim: int = 300,
ns: str = "adv",
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ keywords = [
[project.optional-dependencies]
graph = [
"torch-geometric>=2.6.0",
"pandarallel",
]
nlp = [
"editdistance~=0.8.1",
Expand Down