Skip to content

beignet.datasets

beignet.datasets.ATOM3DDataset

Bases: LMDBDataset

Source code in src/beignet/datasets/_atom3d_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
class ATOM3DDataset(LMDBDataset):
    def __init__(
        self,
        root: Union[str, Path],
        path: Union[str, Path],
        resource: str,
        name: str,
        *,
        checksum: Optional[str] = None,
        download: bool = False,
        transform: Union[Callable, Transform, None] = None,
    ):
        self._root = root

        if isinstance(self._root, str):
            self._root = Path(self._root).resolve()

        self._transform_fn = transform

        if download:
            beignet.io.download_and_extract_archive(
                resource,
                self._root / f"ATOM3D{name}",
                checksum=checksum,
            )

        super().__init__(
            self._root / f"ATOM3D{name}" / path,
            transform=transform,
        )

beignet.datasets.ATOM3DMSPDataset

Bases: ATOM3DDataset

Source code in src/beignet/datasets/_atom3d_msp_dataset.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
class ATOM3DMSPDataset(ATOM3DDataset):
    def __init__(
        self,
        root: Union[str, Path],
        *,
        download: bool = False,
        transform: Union[Callable, Transform, None] = None,
        target_transform: Union[Callable, Transform, None] = None,
    ):
        super().__init__(
            root,
            "raw/MSP/data",
            "https://zenodo.org/record/4962515/files/MSP-raw.tar.gz",
            "MSP",
            checksum="77aeb79cfc80bd51cdfb2aa321bf6128",
            download=download,
        )

        self._transform_fn = transform

        self._target_transform_fn = target_transform

    def __getitem__(
        self,
        index: int,
    ) -> Tuple[Tuple[DataFrame, DataFrame], Tensor]:
        item = super().__getitem__(index)

        structure = DataFrame(**item["original_atoms"])

        mutant = DataFrame(**item["mutated_atoms"])

        if self._transform_fn is not None:
            structure, mutant = self._transform_fn(structure, mutant)

        target = torch.tensor(int(item["label"]))

        if self._target_transform_fn is not None:
            target = self._target_transform_fn(target)

        return (structure, mutant), target

beignet.datasets.ATOM3DPPIDataset

Bases: ATOM3DDataset

Source code in src/beignet/datasets/_atom3d_ppi_dataset.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
class ATOM3DPPIDataset(ATOM3DDataset):
    def __init__(
        self,
        root: Union[str, Path],
        *,
        download: bool = False,
        transform: Union[Callable, Transform, None] = None,
        target_transform: Union[Callable, Transform, None] = None,
        joint_transform_fn: Union[Callable, Transform, None] = None,
    ):
        super().__init__(
            root,
            "raw/DIPS/data",
            "https://zenodo.org/record/4911102/files/PPI-raw.tar.gz",
            "PPI",
            checksum="621977d132b39957e3480a24a30a7358",
            download=download,
        )

        self._transform_fn = transform

        self._target_transform_fn = target_transform

        self._joint_transform_fn = joint_transform_fn

    def __getitem__(self, index: int) -> Tuple[DataFrame, DataFrame]:
        item = super().__getitem__(index)

        features = DataFrame(**item["atoms_pairs"])

        target = DataFrame(**item["atoms_neighbors"])

        if self._joint_transform_fn is not None:
            features, target = self._joint_transform_fn(features, target)

        if self._transform_fn is not None:
            features = self._transform_fn(features)

        if self._target_transform_fn is not None:
            target = self._target_transform_fn(target)

        return features, target

beignet.datasets.ATOM3DPSRDataset

Bases: ATOM3DDataset

Source code in src/beignet/datasets/_atom3d_psr_dataset.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
class ATOM3DPSRDataset(ATOM3DDataset):
    def __init__(
        self,
        root: Union[str, Path],
        *,
        download: bool = False,
        transform: Union[Callable, Transform, None] = None,
        target_transform: Union[Callable, Transform, None] = None,
    ):
        super().__init__(
            root,
            "raw/casp5_to_13/data",
            "https://zenodo.org/record/4915648/files/PSR-raw.tar.gz",
            "PSR",
            checksum="80caef3c98febb70951fa244c8303039",
            download=download,
        )

        self._transform_fn = transform

        self._target_transform_fn = target_transform

    def __getitem__(
        self,
        index: int,
    ) -> Tuple[DataFrame, Dict[str, Tensor]]:
        item = super().__getitem__(index)

        features = DataFrame(**item["atoms"])

        if self._transform_fn is not None:
            features = self._transform_fn(features)

        target = item["scores"]

        for k, _ in target.items():
            target[k] = torch.tensor(target[k])

        if self._target_transform_fn is not None:
            target = self._target_transform_fn(target)

        return features, target

beignet.datasets.ATOM3DRESDataset

Bases: ATOM3DDataset

Source code in src/beignet/datasets/_atom3d_res_dataset.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
class ATOM3DRESDataset(ATOM3DDataset):
    def __init__(
        self,
        root: Union[str, Path],
        *,
        download: bool = False,
        transform: Union[Callable, Transform, None] = None,
        target_transform: Union[Callable, Transform, None] = None,
    ):
        """
        ATOM3D Residue Identity (RES) consists of atomic environments
        extracted from non-redundant structures in the Protein Data Bank.
        This is formulated as a classification task where the identity of
        the amino acid in the center of the environment is predicted based
        on all other atoms.

        Each sample is a pair of features and a target, where features is
        the molecule’s atomic coordinates and target is the environments’s
        atomic coordinates

        Parameters
        ----------
        root : Union[str, Path]
            The root directory of the dataset.

        download : bool, optional
            If True, download the dataset from the specified source,
            by default `False`.

        transform : Union[Callable, Transform, None], optional
            The transformation function to be applied to the features,
            by default `None`.

        target_transform : Union[Callable, Transform, None], optional
            The transformation function to be applied to the targets,
            by default `None`.
        """
        super().__init__(
            root,
            "raw/RES/data",
            "https://zenodo.org/record/5026743/files/RES-raw.tar.gz",
            "RES",
            checksum="3d6b6c61efb890a8baa303280b6589d9",
            download=download,
        )

        self._transform_fn = transform

        self._target_transform_fn = target_transform

    def __getitem__(self, index: int) -> Tuple[DataFrame, DataFrame]:
        """
        Parameters
        ----------
        index : int
            The index of the item to retrieve from the dataset.

        Returns
        -------
        Tuple[DataFrame, DataFrame]
            A tuple containing the features and target of the item.
        """
        item = super().__getitem__(index)

        features = DataFrame(**item["atoms"])

        if self._transform_fn is not None:
            features = self._transform_fn(features)

        target = DataFrame(**item["labels"])

        if self._target_transform_fn is not None:
            target = self._target_transform_fn(target)

        return features, target
__getitem__
__getitem__(index)

Parameters:

Name Type Description Default
index int

The index of the item to retrieve from the dataset.

required

Returns:

Type Description
Tuple[DataFrame, DataFrame]

A tuple containing the features and target of the item.

Source code in src/beignet/datasets/_atom3d_res_dataset.py
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
def __getitem__(self, index: int) -> Tuple[DataFrame, DataFrame]:
    """
    Parameters
    ----------
    index : int
        The index of the item to retrieve from the dataset.

    Returns
    -------
    Tuple[DataFrame, DataFrame]
        A tuple containing the features and target of the item.
    """
    item = super().__getitem__(index)

    features = DataFrame(**item["atoms"])

    if self._transform_fn is not None:
        features = self._transform_fn(features)

    target = DataFrame(**item["labels"])

    if self._target_transform_fn is not None:
        target = self._target_transform_fn(target)

    return features, target
__init__
__init__(root, *, download=False, transform=None, target_transform=None)

ATOM3D Residue Identity (RES) consists of atomic environments extracted from non-redundant structures in the Protein Data Bank. This is formulated as a classification task where the identity of the amino acid in the center of the environment is predicted based on all other atoms.

Each sample is a pair of features and a target, where features is the molecule’s atomic coordinates and target is the environments’s atomic coordinates

Parameters:

Name Type Description Default
root Union[str, Path]

The root directory of the dataset.

required
download bool

If True, download the dataset from the specified source, by default False.

False
transform Union[Callable, Transform, None]

The transformation function to be applied to the features, by default None.

None
target_transform Union[Callable, Transform, None]

The transformation function to be applied to the targets, by default None.

None
Source code in src/beignet/datasets/_atom3d_res_dataset.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
def __init__(
    self,
    root: Union[str, Path],
    *,
    download: bool = False,
    transform: Union[Callable, Transform, None] = None,
    target_transform: Union[Callable, Transform, None] = None,
):
    """
    ATOM3D Residue Identity (RES) consists of atomic environments
    extracted from non-redundant structures in the Protein Data Bank.
    This is formulated as a classification task where the identity of
    the amino acid in the center of the environment is predicted based
    on all other atoms.

    Each sample is a pair of features and a target, where features is
    the molecule’s atomic coordinates and target is the environments’s
    atomic coordinates

    Parameters
    ----------
    root : Union[str, Path]
        The root directory of the dataset.

    download : bool, optional
        If True, download the dataset from the specified source,
        by default `False`.

    transform : Union[Callable, Transform, None], optional
        The transformation function to be applied to the features,
        by default `None`.

    target_transform : Union[Callable, Transform, None], optional
        The transformation function to be applied to the targets,
        by default `None`.
    """
    super().__init__(
        root,
        "raw/RES/data",
        "https://zenodo.org/record/5026743/files/RES-raw.tar.gz",
        "RES",
        checksum="3d6b6c61efb890a8baa303280b6589d9",
        download=download,
    )

    self._transform_fn = transform

    self._target_transform_fn = target_transform

beignet.datasets.ATOM3DRSRDataset

Bases: ATOM3DDataset

Source code in src/beignet/datasets/_atom3d_rsr_dataset.py
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
class ATOM3DRSRDataset(ATOM3DDataset):
    def __init__(
        self,
        root: Union[str, Path],
        *,
        download: bool = False,
        transform: Union[Callable, Transform, None] = None,
        target_transform: Union[Callable, Transform, None] = None,
    ):
        """
        The ATOM3D RNA Structure Ranking (RSR) task predicts the
        three-dimensional structure of an RNA molecule, given its sequence.
        A total of 21 RNAs are included, which consist of the first 21 RNAs
        from the RNA-Puzzles competition (Cruz et al., 2011).

        This problem is prhased as candidate ranking. For each RNA,
        candidate structural models are generated using FARFAR2 (“Silly Boy”
        Watkins et al., 2020) and calculate each candidate’s atoms’ root
        mean squared deviation (RMSD) to the experimentally determined
        structure.

        Each sample is a pair of features and a target, where features is
        the molecule’s atomic coordinates and target is a dictionary of the
        following scores:

        .. list-table:: Target
           :widths: 20 80
           :header-rows: 1

           * - Key
             - Description
           * - score
             -
           * - fa_atr
             -
           * - fa_rep
             -
           * - fa_intra_rep
             -
           * - lk_nonpolar
             -
           * - fa_elec_rna_phos_phos
             -
           * - rna_torsion
             -
           * - suiteness_bonus
             -
           * - rna_sugar_close
             -
           * - fa_stack
             -
           * - stack_elec
             -
           * - geom_sol_fast
             -
           * - hbond_sr_bb_sc
             -
           * - hbond_lr_bb_sc
             -
           * - hbond_sc
             -
           * - ref
             -
           * - free_suite
             -
           * - free_2HOprime
             -
           * - intermol
             -
           * - other_pose
             -
           * - loop_close
             -
           * - linear_chainbreak
             -
           * - rms
             -
           * - rms_stem
             -
           * - time
             -
           * - N_WC
             -
           * - N_NWC
             -
           * - N_BS
             -
           * - N_BP
             -
           * - natWC
             -
           * - natNWC
             -
           * - natBP
             -
           * - f_natWC
             -
           * - f_natNWC
             -
           * - f_natBP
             -

        Parameters
        ----------
        root : Union[str, Path]
            The root directory of the dataset.

        download : bool, optional
            If `True`, download the dataset from the specified source,
            by default `False`.

        transform : Union[Callable, Transform, None], optional
            The transformation function to be applied to the features,
            by default `None`.

        target_transform : Union[Callable, Transform, None], optional
            The transformation function to be applied to the targets,
            by default `None`.
        """
        super().__init__(
            root,
            "raw/candidates/data",
            "https://zenodo.org/record/4961085/files/RSR-raw.tar.gz",
            "RSR",
            checksum="68830ab0ab95cf3d218785a4e4e7669c",
            download=download,
        )

        self._transform_fn = transform

        self._target_transform_fn = target_transform

    def __getitem__(self, index: int) -> Tuple[DataFrame, Dict[str, Tensor]]:
        """
        Parameters
        ----------
        index : int
            The index of the item to retrieve from the dataset.

        Returns
        -------
        Tuple[DataFrame, Dict[str, Tensor]]
            A tuple containing the features and target of the item.
        """
        item = super().__getitem__(index)

        features = DataFrame(**item["atoms"])

        if self._transform_fn is not None:
            features = self._transform_fn(features)

        target = item["scores"]

        for k, v in target.items():
            target[k] = torch.tensor(v)

        if self._target_transform_fn is not None:
            target = self._target_transform_fn(target)

        return features, target
__getitem__
__getitem__(index)

Parameters:

Name Type Description Default
index int

The index of the item to retrieve from the dataset.

required

Returns:

Type Description
Tuple[DataFrame, Dict[str, Tensor]]

A tuple containing the features and target of the item.

Source code in src/beignet/datasets/_atom3d_rsr_dataset.py
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
def __getitem__(self, index: int) -> Tuple[DataFrame, Dict[str, Tensor]]:
    """
    Parameters
    ----------
    index : int
        The index of the item to retrieve from the dataset.

    Returns
    -------
    Tuple[DataFrame, Dict[str, Tensor]]
        A tuple containing the features and target of the item.
    """
    item = super().__getitem__(index)

    features = DataFrame(**item["atoms"])

    if self._transform_fn is not None:
        features = self._transform_fn(features)

    target = item["scores"]

    for k, v in target.items():
        target[k] = torch.tensor(v)

    if self._target_transform_fn is not None:
        target = self._target_transform_fn(target)

    return features, target
__init__
__init__(root, *, download=False, transform=None, target_transform=None)

The ATOM3D RNA Structure Ranking (RSR) task predicts the three-dimensional structure of an RNA molecule, given its sequence. A total of 21 RNAs are included, which consist of the first 21 RNAs from the RNA-Puzzles competition (Cruz et al., 2011).

This problem is prhased as candidate ranking. For each RNA, candidate structural models are generated using FARFAR2 (“Silly Boy” Watkins et al., 2020) and calculate each candidate’s atoms’ root mean squared deviation (RMSD) to the experimentally determined structure.

Each sample is a pair of features and a target, where features is the molecule’s atomic coordinates and target is a dictionary of the following scores:

.. list-table:: Target :widths: 20 80 :header-rows: 1

    • Key
    • Description
    • score
    • fa_atr
    • fa_rep
    • fa_intra_rep
    • lk_nonpolar
    • fa_elec_rna_phos_phos
    • rna_torsion
    • suiteness_bonus
    • rna_sugar_close
    • fa_stack
    • stack_elec
    • geom_sol_fast
    • hbond_sr_bb_sc
    • hbond_lr_bb_sc
    • hbond_sc
    • ref
    • free_suite
    • free_2HOprime
    • intermol
    • other_pose
    • loop_close
    • linear_chainbreak
    • rms
    • rms_stem
    • time
    • N_WC
    • N_NWC
    • N_BS
    • N_BP
    • natWC
    • natNWC
    • natBP
    • f_natWC
    • f_natNWC
    • f_natBP

Parameters:

Name Type Description Default
root Union[str, Path]

The root directory of the dataset.

required
download bool

If True, download the dataset from the specified source, by default False.

False
transform Union[Callable, Transform, None]

The transformation function to be applied to the features, by default None.

None
target_transform Union[Callable, Transform, None]

The transformation function to be applied to the targets, by default None.

None
Source code in src/beignet/datasets/_atom3d_rsr_dataset.py
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
def __init__(
    self,
    root: Union[str, Path],
    *,
    download: bool = False,
    transform: Union[Callable, Transform, None] = None,
    target_transform: Union[Callable, Transform, None] = None,
):
    """
    The ATOM3D RNA Structure Ranking (RSR) task predicts the
    three-dimensional structure of an RNA molecule, given its sequence.
    A total of 21 RNAs are included, which consist of the first 21 RNAs
    from the RNA-Puzzles competition (Cruz et al., 2011).

    This problem is prhased as candidate ranking. For each RNA,
    candidate structural models are generated using FARFAR2 (“Silly Boy”
    Watkins et al., 2020) and calculate each candidate’s atoms’ root
    mean squared deviation (RMSD) to the experimentally determined
    structure.

    Each sample is a pair of features and a target, where features is
    the molecule’s atomic coordinates and target is a dictionary of the
    following scores:

    .. list-table:: Target
       :widths: 20 80
       :header-rows: 1

       * - Key
         - Description
       * - score
         -
       * - fa_atr
         -
       * - fa_rep
         -
       * - fa_intra_rep
         -
       * - lk_nonpolar
         -
       * - fa_elec_rna_phos_phos
         -
       * - rna_torsion
         -
       * - suiteness_bonus
         -
       * - rna_sugar_close
         -
       * - fa_stack
         -
       * - stack_elec
         -
       * - geom_sol_fast
         -
       * - hbond_sr_bb_sc
         -
       * - hbond_lr_bb_sc
         -
       * - hbond_sc
         -
       * - ref
         -
       * - free_suite
         -
       * - free_2HOprime
         -
       * - intermol
         -
       * - other_pose
         -
       * - loop_close
         -
       * - linear_chainbreak
         -
       * - rms
         -
       * - rms_stem
         -
       * - time
         -
       * - N_WC
         -
       * - N_NWC
         -
       * - N_BS
         -
       * - N_BP
         -
       * - natWC
         -
       * - natNWC
         -
       * - natBP
         -
       * - f_natWC
         -
       * - f_natNWC
         -
       * - f_natBP
         -

    Parameters
    ----------
    root : Union[str, Path]
        The root directory of the dataset.

    download : bool, optional
        If `True`, download the dataset from the specified source,
        by default `False`.

    transform : Union[Callable, Transform, None], optional
        The transformation function to be applied to the features,
        by default `None`.

    target_transform : Union[Callable, Transform, None], optional
        The transformation function to be applied to the targets,
        by default `None`.
    """
    super().__init__(
        root,
        "raw/candidates/data",
        "https://zenodo.org/record/4961085/files/RSR-raw.tar.gz",
        "RSR",
        checksum="68830ab0ab95cf3d218785a4e4e7669c",
        download=download,
    )

    self._transform_fn = transform

    self._target_transform_fn = target_transform

beignet.datasets.ATOM3DSMPDataset

Bases: ATOM3DDataset

Source code in src/beignet/datasets/_atom3d_smp_dataset.py
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
class ATOM3DSMPDataset(ATOM3DDataset):
    def __init__(
        self,
        root: Union[str, Path],
        *,
        download: bool = False,
        transform: Union[Callable, Transform, None] = None,
        target_transform: Union[Callable, Transform, None] = None,
    ):
        """
        ATOM3D Small Molecule Properties (SMP) is a dataset of structures
        and energetic, electronic, and thermodynamic properties for 134,000
        stable small organic molecules, obtained from quantum-chemical
        calculations. The task is to predict the molecular properties from
        the ground-state structure.

        Some molecules have been excluded because they failed consistency
        tests or were not properly processed.

        Each sample is a pair of features and a target, where features is
        the molecule’s atomic coordinates and target is a dictionary of the
        following energetic, electronic, and thermodynamic properties:

        .. list-table:: Target
           :widths: 20 20 60
           :header-rows: 1

           * - Key
             - Unit
             - Description
           * - a
             - GHz
             - Rotational constant A
           * - b
             - GHz
             - Rotational constant B
           * - c
             - GHz
             - Rotational constant C
           * - mu
             - Debye
             - Dipole moment
           * - alpha
             - Bohr^3
             - Isotropic polarizability
           * - homo
             - Hartree
             - Energy of Highest occupied molecular orbital (HOMO)
           * - lumo
             - Hartree
             - Energy of Lowest occupied molecular orbital (LUMO)
           * - gap
             - Hartree
             - Gap, difference between LUMO and HOMO
           * - r2
             - Bohr^2
             - Electronic spatial extent
           * - zpve
             - Hartree
             - Zero point vibrational energy
           * - u0
             - Hartree
             - Internal energy at 0 K
           * - u
             - Hartree
             - Internal energy at 298.15 K
           * - h
             - Hartree
             - Enthalpy at 298.15 K
           * - g
             - Hartree
             - Free energy at 298.15 K
           * - cv
             - cal/(mol K)
             - Heat capacity at 298.15 K

        Parameters
        ----------
        root : Union[str, Path]
            The root directory of the dataset.

        download : bool, optional
            If True, download the dataset from the specified source,
            by default `False`.

        transform : Union[Callable, Transform, None], optional
            The transformation function to be applied to the features,
            by default `None`.

        target_transform : Union[Callable, Transform, None], optional
            The transformation function to be applied to the targets,
            by default `None`.
        """
        super().__init__(
            root,
            "raw/QM9/data",
            "https://zenodo.org/record/4911142/files/SMP-raw.tar.gz",
            "SMP",
            checksum="52cc7955c0f80f7dd9faf041e171f405",
            download=download,
        )

        self._transform_fn = transform

        self._target_transform_fn = target_transform

    def __getitem__(
        self,
        index: int,
    ) -> Tuple[Tuple[DataFrame], Dict[str, Tensor]]:
        """
        Parameters
        ----------
        index : int
            The index of the item to retrieve from the dataset.

        Returns
        -------
        Tuple[Tuple[DataFrame], Dict[str, Tensor]]
            A tuple containing the features and target of the item.

        """
        item = super().__getitem__(index)

        features = DataFrame(**item["atoms"])

        if self._transform_fn is not None:
            features = self._transform_fn(features)

        target = {}

        for k, v in zip(
            [
                "a",
                "b",
                "c",
                "mu",
                "alpha",
                "homo",
                "lumo",
                "gap",
                "r2",
                "zpve",
                "u0",
                "u",
                "h",
                "g",
                "cv",
            ],
            item["labels"],
            strict=False,
        ):
            target[k] = torch.tensor(v)

        if self._target_transform_fn is not None:
            target = self._target_transform_fn(target)

        return features, target
__getitem__
__getitem__(index)

Parameters:

Name Type Description Default
index int

The index of the item to retrieve from the dataset.

required

Returns:

Type Description
Tuple[Tuple[DataFrame], Dict[str, Tensor]]

A tuple containing the features and target of the item.

Source code in src/beignet/datasets/_atom3d_smp_dataset.py
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
def __getitem__(
    self,
    index: int,
) -> Tuple[Tuple[DataFrame], Dict[str, Tensor]]:
    """
    Parameters
    ----------
    index : int
        The index of the item to retrieve from the dataset.

    Returns
    -------
    Tuple[Tuple[DataFrame], Dict[str, Tensor]]
        A tuple containing the features and target of the item.

    """
    item = super().__getitem__(index)

    features = DataFrame(**item["atoms"])

    if self._transform_fn is not None:
        features = self._transform_fn(features)

    target = {}

    for k, v in zip(
        [
            "a",
            "b",
            "c",
            "mu",
            "alpha",
            "homo",
            "lumo",
            "gap",
            "r2",
            "zpve",
            "u0",
            "u",
            "h",
            "g",
            "cv",
        ],
        item["labels"],
        strict=False,
    ):
        target[k] = torch.tensor(v)

    if self._target_transform_fn is not None:
        target = self._target_transform_fn(target)

    return features, target
__init__
__init__(root, *, download=False, transform=None, target_transform=None)

ATOM3D Small Molecule Properties (SMP) is a dataset of structures and energetic, electronic, and thermodynamic properties for 134,000 stable small organic molecules, obtained from quantum-chemical calculations. The task is to predict the molecular properties from the ground-state structure.

Some molecules have been excluded because they failed consistency tests or were not properly processed.

Each sample is a pair of features and a target, where features is the molecule’s atomic coordinates and target is a dictionary of the following energetic, electronic, and thermodynamic properties:

.. list-table:: Target :widths: 20 20 60 :header-rows: 1

    • Key
    • Unit
    • Description
    • a
    • GHz
    • Rotational constant A
    • b
    • GHz
    • Rotational constant B
    • c
    • GHz
    • Rotational constant C
    • mu
    • Debye
    • Dipole moment
    • alpha
    • Bohr^3
    • Isotropic polarizability
    • homo
    • Hartree
    • Energy of Highest occupied molecular orbital (HOMO)
    • lumo
    • Hartree
    • Energy of Lowest occupied molecular orbital (LUMO)
    • gap
    • Hartree
    • Gap, difference between LUMO and HOMO
    • r2
    • Bohr^2
    • Electronic spatial extent
    • zpve
    • Hartree
    • Zero point vibrational energy
    • u0
    • Hartree
    • Internal energy at 0 K
    • u
    • Hartree
    • Internal energy at 298.15 K
    • h
    • Hartree
    • Enthalpy at 298.15 K
    • g
    • Hartree
    • Free energy at 298.15 K
    • cv
    • cal/(mol K)
    • Heat capacity at 298.15 K

Parameters:

Name Type Description Default
root Union[str, Path]

The root directory of the dataset.

required
download bool

If True, download the dataset from the specified source, by default False.

False
transform Union[Callable, Transform, None]

The transformation function to be applied to the features, by default None.

None
target_transform Union[Callable, Transform, None]

The transformation function to be applied to the targets, by default None.

None
Source code in src/beignet/datasets/_atom3d_smp_dataset.py
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
def __init__(
    self,
    root: Union[str, Path],
    *,
    download: bool = False,
    transform: Union[Callable, Transform, None] = None,
    target_transform: Union[Callable, Transform, None] = None,
):
    """
    ATOM3D Small Molecule Properties (SMP) is a dataset of structures
    and energetic, electronic, and thermodynamic properties for 134,000
    stable small organic molecules, obtained from quantum-chemical
    calculations. The task is to predict the molecular properties from
    the ground-state structure.

    Some molecules have been excluded because they failed consistency
    tests or were not properly processed.

    Each sample is a pair of features and a target, where features is
    the molecule’s atomic coordinates and target is a dictionary of the
    following energetic, electronic, and thermodynamic properties:

    .. list-table:: Target
       :widths: 20 20 60
       :header-rows: 1

       * - Key
         - Unit
         - Description
       * - a
         - GHz
         - Rotational constant A
       * - b
         - GHz
         - Rotational constant B
       * - c
         - GHz
         - Rotational constant C
       * - mu
         - Debye
         - Dipole moment
       * - alpha
         - Bohr^3
         - Isotropic polarizability
       * - homo
         - Hartree
         - Energy of Highest occupied molecular orbital (HOMO)
       * - lumo
         - Hartree
         - Energy of Lowest occupied molecular orbital (LUMO)
       * - gap
         - Hartree
         - Gap, difference between LUMO and HOMO
       * - r2
         - Bohr^2
         - Electronic spatial extent
       * - zpve
         - Hartree
         - Zero point vibrational energy
       * - u0
         - Hartree
         - Internal energy at 0 K
       * - u
         - Hartree
         - Internal energy at 298.15 K
       * - h
         - Hartree
         - Enthalpy at 298.15 K
       * - g
         - Hartree
         - Free energy at 298.15 K
       * - cv
         - cal/(mol K)
         - Heat capacity at 298.15 K

    Parameters
    ----------
    root : Union[str, Path]
        The root directory of the dataset.

    download : bool, optional
        If True, download the dataset from the specified source,
        by default `False`.

    transform : Union[Callable, Transform, None], optional
        The transformation function to be applied to the features,
        by default `None`.

    target_transform : Union[Callable, Transform, None], optional
        The transformation function to be applied to the targets,
        by default `None`.
    """
    super().__init__(
        root,
        "raw/QM9/data",
        "https://zenodo.org/record/4911142/files/SMP-raw.tar.gz",
        "SMP",
        checksum="52cc7955c0f80f7dd9faf041e171f405",
        download=download,
    )

    self._transform_fn = transform

    self._target_transform_fn = target_transform

beignet.datasets.AqSolDBSolubilityDataset

Bases: TDCDataset

Source code in src/beignet/datasets/_aqsoldb_solubility_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class AqSolDBSolubilityDataset(TDCDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        download: bool = False,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory of dataset.

        download: bool
            If `True`, downloads the dataset to the root directory. If dataset
            already exists, it is not redownloaded. Default, `False`.

        transform : Callable | Transform | None
            Transforms the input.

        target_transform : Callable | Transform | None
            Transforms the target.
        """
        super().__init__(
            root=root,
            download=download,
            identifier=4259610,
            suffix="tsv",
            checksum="md5:f7a675706bfe7e75c278f16dd2477b03",
            x_keys=["Drug"],
            y_keys=["Y"],
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root, *, download=False, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory of dataset.

required
download bool

If True, downloads the dataset to the root directory. If dataset already exists, it is not redownloaded. Default, False.

False
transform Callable | Transform | None

Transforms the input.

None
target_transform Callable | Transform | None

Transforms the target.

None
Source code in src/beignet/datasets/_aqsoldb_solubility_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self,
    root: str | Path,
    *,
    download: bool = False,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory of dataset.

    download: bool
        If `True`, downloads the dataset to the root directory. If dataset
        already exists, it is not redownloaded. Default, `False`.

    transform : Callable | Transform | None
        Transforms the input.

    target_transform : Callable | Transform | None
        Transforms the target.
    """
    super().__init__(
        root=root,
        download=download,
        identifier=4259610,
        suffix="tsv",
        checksum="md5:f7a675706bfe7e75c278f16dd2477b03",
        x_keys=["Drug"],
        y_keys=["Y"],
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.AstraZenecaClearanceDataset

Bases: TDCDataset

Source code in src/beignet/datasets/_astrazeneca_clearance_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class AstraZenecaClearanceDataset(TDCDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        download: bool = False,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory of dataset.

        download: bool
            If `True`, downloads the dataset to the root directory. If dataset
            already exists, it is not redownloaded. Default, `False`.

        transform : Callable | Transform | None
            Transforms the input.

        target_transform : Callable | Transform | None
            Transforms the target.
        """
        super().__init__(
            root=root,
            download=download,
            identifier=4266187,
            suffix="tsv",
            checksum="md5:7036ab2a23f6db37843d0ecc072bbddc",
            x_keys=["X"],
            y_keys=["Y"],
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root, *, download=False, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory of dataset.

required
download bool

If True, downloads the dataset to the root directory. If dataset already exists, it is not redownloaded. Default, False.

False
transform Callable | Transform | None

Transforms the input.

None
target_transform Callable | Transform | None

Transforms the target.

None
Source code in src/beignet/datasets/_astrazeneca_clearance_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self,
    root: str | Path,
    *,
    download: bool = False,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory of dataset.

    download: bool
        If `True`, downloads the dataset to the root directory. If dataset
        already exists, it is not redownloaded. Default, `False`.

    transform : Callable | Transform | None
        Transforms the input.

    target_transform : Callable | Transform | None
        Transforms the target.
    """
    super().__init__(
        root=root,
        download=download,
        identifier=4266187,
        suffix="tsv",
        checksum="md5:7036ab2a23f6db37843d0ecc072bbddc",
        x_keys=["X"],
        y_keys=["Y"],
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.AstraZenecaLipophilicityDataset

Bases: TDCDataset

Source code in src/beignet/datasets/_astrazeneca_lipophilicity_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class AstraZenecaLipophilicityDataset(TDCDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        download: bool = False,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory of dataset.

        download: bool
            If `True`, downloads the dataset to the root directory. If dataset
            already exists, it is not redownloaded. Default, `False`.

        transform : Callable | Transform | None
            Transforms the input.

        target_transform : Callable | Transform | None
            Transforms the target.
        """
        super().__init__(
            root=root,
            download=download,
            identifier=4259595,
            suffix="tsv",
            checksum="md5:77e72d4ec76530271bf4e296b62368ff",
            x_keys=["Drug"],
            y_keys=["Y"],
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root, *, download=False, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory of dataset.

required
download bool

If True, downloads the dataset to the root directory. If dataset already exists, it is not redownloaded. Default, False.

False
transform Callable | Transform | None

Transforms the input.

None
target_transform Callable | Transform | None

Transforms the target.

None
Source code in src/beignet/datasets/_astrazeneca_lipophilicity_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self,
    root: str | Path,
    *,
    download: bool = False,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory of dataset.

    download: bool
        If `True`, downloads the dataset to the root directory. If dataset
        already exists, it is not redownloaded. Default, `False`.

    transform : Callable | Transform | None
        Transforms the input.

    target_transform : Callable | Transform | None
        Transforms the target.
    """
    super().__init__(
        root=root,
        download=download,
        identifier=4259595,
        suffix="tsv",
        checksum="md5:77e72d4ec76530271bf4e296b62368ff",
        x_keys=["Drug"],
        y_keys=["Y"],
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.AstraZenecaPlasmaProteinBindingRateDataset

Bases: TDCDataset

Source code in src/beignet/datasets/_astrazeneca_plasma_protein_binding_rate_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class AstraZenecaPlasmaProteinBindingRateDataset(TDCDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        download: bool = False,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory of dataset.

        download: bool
            If `True`, downloads the dataset to the root directory. If dataset
            already exists, it is not redownloaded. Default, `False`.

        transform : Callable | Transform | None
            Transforms the input.

        target_transform : Callable | Transform | None
            Transforms the target.
        """
        super().__init__(
            root=root,
            download=download,
            identifier=6413140,
            suffix="tsv",
            checksum="md5:f3b700ea6b1f624fdbcf6a1c67937b00",
            x_keys=["Drug", "Species"],
            y_keys=["Y"],
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root, *, download=False, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory of dataset.

required
download bool

If True, downloads the dataset to the root directory. If dataset already exists, it is not redownloaded. Default, False.

False
transform Callable | Transform | None

Transforms the input.

None
target_transform Callable | Transform | None

Transforms the target.

None
Source code in src/beignet/datasets/_astrazeneca_plasma_protein_binding_rate_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self,
    root: str | Path,
    *,
    download: bool = False,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory of dataset.

    download: bool
        If `True`, downloads the dataset to the root directory. If dataset
        already exists, it is not redownloaded. Default, `False`.

    transform : Callable | Transform | None
        Transforms the input.

    target_transform : Callable | Transform | None
        Transforms the target.
    """
    super().__init__(
        root=root,
        download=download,
        identifier=6413140,
        suffix="tsv",
        checksum="md5:f3b700ea6b1f624fdbcf6a1c67937b00",
        x_keys=["Drug", "Species"],
        y_keys=["Y"],
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.BindingDBIC50Dataset

Bases: TDCDataset

Source code in src/beignet/datasets/_bindingdb_ic50_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class BindingDBIC50Dataset(TDCDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        download: bool = False,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory of dataset.

        download: bool
            If `True`, downloads the dataset to the root directory. If dataset
            already exists, it is not redownloaded. Default, `False`.

        transform : Callable | Transform | None
            Transforms the input.

        target_transform : Callable | Transform | None
            Transforms the target.
        """
        super().__init__(
            root=root,
            download=download,
            identifier=4291560,
            suffix="csv",
            checksum="md5:a6ca198002c335aa9a30248cf3795413",
            x_keys=["X1", "X2"],
            y_keys=["Y"],
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root, *, download=False, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory of dataset.

required
download bool

If True, downloads the dataset to the root directory. If dataset already exists, it is not redownloaded. Default, False.

False
transform Callable | Transform | None

Transforms the input.

None
target_transform Callable | Transform | None

Transforms the target.

None
Source code in src/beignet/datasets/_bindingdb_ic50_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self,
    root: str | Path,
    *,
    download: bool = False,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory of dataset.

    download: bool
        If `True`, downloads the dataset to the root directory. If dataset
        already exists, it is not redownloaded. Default, `False`.

    transform : Callable | Transform | None
        Transforms the input.

    target_transform : Callable | Transform | None
        Transforms the target.
    """
    super().__init__(
        root=root,
        download=download,
        identifier=4291560,
        suffix="csv",
        checksum="md5:a6ca198002c335aa9a30248cf3795413",
        x_keys=["X1", "X2"],
        y_keys=["Y"],
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.BindingDBKdDataset

Bases: TDCDataset

Source code in src/beignet/datasets/_bindingdb_kd_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class BindingDBKdDataset(TDCDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        download: bool = False,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory of dataset.

        download: bool
            If `True`, downloads the dataset to the root directory. If dataset
            already exists, it is not redownloaded. Default, `False`.

        transform : Callable | Transform | None
            Transforms the input.

        target_transform : Callable | Transform | None
            Transforms the target.
        """
        super().__init__(
            root=root,
            download=download,
            identifier=4291555,
            suffix="csv",
            checksum="md5:c463f536eeec3f99cdab9365d86e7154",
            x_keys=["X1", "X2"],
            y_keys=["Y"],
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root, *, download=False, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory of dataset.

required
download bool

If True, downloads the dataset to the root directory. If dataset already exists, it is not redownloaded. Default, False.

False
transform Callable | Transform | None

Transforms the input.

None
target_transform Callable | Transform | None

Transforms the target.

None
Source code in src/beignet/datasets/_bindingdb_kd_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self,
    root: str | Path,
    *,
    download: bool = False,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory of dataset.

    download: bool
        If `True`, downloads the dataset to the root directory. If dataset
        already exists, it is not redownloaded. Default, `False`.

    transform : Callable | Transform | None
        Transforms the input.

    target_transform : Callable | Transform | None
        Transforms the target.
    """
    super().__init__(
        root=root,
        download=download,
        identifier=4291555,
        suffix="csv",
        checksum="md5:c463f536eeec3f99cdab9365d86e7154",
        x_keys=["X1", "X2"],
        y_keys=["Y"],
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.BindingDBKiDataset

Bases: TDCDataset

Source code in src/beignet/datasets/_bindingdb_ki_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class BindingDBKiDataset(TDCDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        download: bool = False,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory of dataset.

        download: bool
            If `True`, downloads the dataset to the root directory. If dataset
            already exists, it is not redownloaded. Default, `False`.

        transform : Callable | Transform | None
            Transforms the input.

        target_transform : Callable | Transform | None
            Transforms the target.
        """
        super().__init__(
            root=root,
            download=download,
            identifier=4291556,
            suffix="csv",
            checksum="md5:187d6c8926c608e24f4469373811806d",
            x_keys=["X1", "X2"],
            y_keys=["Y"],
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root, *, download=False, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory of dataset.

required
download bool

If True, downloads the dataset to the root directory. If dataset already exists, it is not redownloaded. Default, False.

False
transform Callable | Transform | None

Transforms the input.

None
target_transform Callable | Transform | None

Transforms the target.

None
Source code in src/beignet/datasets/_bindingdb_ki_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self,
    root: str | Path,
    *,
    download: bool = False,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory of dataset.

    download: bool
        If `True`, downloads the dataset to the root directory. If dataset
        already exists, it is not redownloaded. Default, `False`.

    transform : Callable | Transform | None
        Transforms the input.

    target_transform : Callable | Transform | None
        Transforms the target.
    """
    super().__init__(
        root=root,
        download=download,
        identifier=4291556,
        suffix="csv",
        checksum="md5:187d6c8926c608e24f4469373811806d",
        x_keys=["X1", "X2"],
        y_keys=["Y"],
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.BroccatelliPGlycoproteinInhibitionDataset

Bases: TDCDataset

Source code in src/beignet/datasets/_broccatelli_p_glycoprotein_inhibition_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class BroccatelliPGlycoproteinInhibitionDataset(TDCDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        download: bool = False,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory of dataset.

        download: bool
            If `True`, downloads the dataset to the root directory. If dataset
            already exists, it is not redownloaded. Default, `False`.

        transform : Callable | Transform | None
            Transforms the input.

        target_transform : Callable | Transform | None
            Transforms the target.
        """
        super().__init__(
            root=root,
            download=download,
            identifier=4259597,
            suffix="tsv",
            checksum="md5:6915ccf0b5d6b9c8fe4d98cb5759a88a",
            x_keys=["Drug"],
            y_keys=["Y"],
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root, *, download=False, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory of dataset.

required
download bool

If True, downloads the dataset to the root directory. If dataset already exists, it is not redownloaded. Default, False.

False
transform Callable | Transform | None

Transforms the input.

None
target_transform Callable | Transform | None

Transforms the target.

None
Source code in src/beignet/datasets/_broccatelli_p_glycoprotein_inhibition_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self,
    root: str | Path,
    *,
    download: bool = False,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory of dataset.

    download: bool
        If `True`, downloads the dataset to the root directory. If dataset
        already exists, it is not redownloaded. Default, `False`.

    transform : Callable | Transform | None
        Transforms the input.

    target_transform : Callable | Transform | None
        Transforms the target.
    """
    super().__init__(
        root=root,
        download=download,
        identifier=4259597,
        suffix="tsv",
        checksum="md5:6915ccf0b5d6b9c8fe4d98cb5759a88a",
        x_keys=["Drug"],
        y_keys=["Y"],
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.CarbonMangelsCytochromeP4502C9SubstrateDataset

Bases: TDCDataset

Source code in src/beignet/datasets/_carbon_mangels_cytochrome_p450_2c9_substrate_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class CarbonMangelsCytochromeP4502C9SubstrateDataset(TDCDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        download: bool = False,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory of dataset.

        download: bool
            If `True`, downloads the dataset to the root directory. If dataset
            already exists, it is not redownloaded. Default, `False`.

        transform : Callable | Transform | None
            Transforms the input.

        target_transform : Callable | Transform | None
            Transforms the target.
        """
        super().__init__(
            root=root,
            download=download,
            identifier=4259584,
            suffix="tsv",
            checksum="md5:3f13c61b816868eb5d7b3b0c61023c04",
            x_keys=["Drug"],
            y_keys=["Y"],
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root, *, download=False, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory of dataset.

required
download bool

If True, downloads the dataset to the root directory. If dataset already exists, it is not redownloaded. Default, False.

False
transform Callable | Transform | None

Transforms the input.

None
target_transform Callable | Transform | None

Transforms the target.

None
Source code in src/beignet/datasets/_carbon_mangels_cytochrome_p450_2c9_substrate_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self,
    root: str | Path,
    *,
    download: bool = False,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory of dataset.

    download: bool
        If `True`, downloads the dataset to the root directory. If dataset
        already exists, it is not redownloaded. Default, `False`.

    transform : Callable | Transform | None
        Transforms the input.

    target_transform : Callable | Transform | None
        Transforms the target.
    """
    super().__init__(
        root=root,
        download=download,
        identifier=4259584,
        suffix="tsv",
        checksum="md5:3f13c61b816868eb5d7b3b0c61023c04",
        x_keys=["Drug"],
        y_keys=["Y"],
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.CarbonMangelsCytochromeP4502D6SubstrateDataset

Bases: TDCDataset

Source code in src/beignet/datasets/_carbon_mangels_cytochrome_p450_2d6_substrate_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class CarbonMangelsCytochromeP4502D6SubstrateDataset(TDCDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        download: bool = False,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory of dataset.

        download: bool
            If `True`, downloads the dataset to the root directory. If dataset
            already exists, it is not redownloaded. Default, `False`.

        transform : Callable | Transform | None
            Transforms the input.

        target_transform : Callable | Transform | None
            Transforms the target.
        """
        super().__init__(
            root=root,
            download=download,
            identifier=4259578,
            suffix="tsv",
            checksum="md5:d1f45520803a0d47c2056abf8f5548c7",
            x_keys=["Drug"],
            y_keys=["Y"],
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root, *, download=False, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory of dataset.

required
download bool

If True, downloads the dataset to the root directory. If dataset already exists, it is not redownloaded. Default, False.

False
transform Callable | Transform | None

Transforms the input.

None
target_transform Callable | Transform | None

Transforms the target.

None
Source code in src/beignet/datasets/_carbon_mangels_cytochrome_p450_2d6_substrate_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self,
    root: str | Path,
    *,
    download: bool = False,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory of dataset.

    download: bool
        If `True`, downloads the dataset to the root directory. If dataset
        already exists, it is not redownloaded. Default, `False`.

    transform : Callable | Transform | None
        Transforms the input.

    target_transform : Callable | Transform | None
        Transforms the target.
    """
    super().__init__(
        root=root,
        download=download,
        identifier=4259578,
        suffix="tsv",
        checksum="md5:d1f45520803a0d47c2056abf8f5548c7",
        x_keys=["Drug"],
        y_keys=["Y"],
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.CarbonMangelsCytochromeP4503A4SubstrateDataset

Bases: TDCDataset

Source code in src/beignet/datasets/_carbon_mangels_cytochrome_p450_3a4_substrate_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class CarbonMangelsCytochromeP4503A4SubstrateDataset(TDCDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        download: bool = False,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory of dataset.

        download: bool
            If `True`, downloads the dataset to the root directory. If dataset
            already exists, it is not redownloaded. Default, `False`.

        transform : Callable | Transform | None
            Transforms the input.

        target_transform : Callable | Transform | None
            Transforms the target.
        """
        super().__init__(
            root=root,
            download=download,
            identifier=4259581,
            suffix="tsv",
            checksum="md5:25cfb80ef8f04e035d5944228194ca95",
            x_keys=["Drug"],
            y_keys=["Y"],
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root, *, download=False, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory of dataset.

required
download bool

If True, downloads the dataset to the root directory. If dataset already exists, it is not redownloaded. Default, False.

False
transform Callable | Transform | None

Transforms the input.

None
target_transform Callable | Transform | None

Transforms the target.

None
Source code in src/beignet/datasets/_carbon_mangels_cytochrome_p450_3a4_substrate_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self,
    root: str | Path,
    *,
    download: bool = False,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory of dataset.

    download: bool
        If `True`, downloads the dataset to the root directory. If dataset
        already exists, it is not redownloaded. Default, `False`.

    transform : Callable | Transform | None
        Transforms the input.

    target_transform : Callable | Transform | None
        Transforms the target.
    """
    super().__init__(
        root=root,
        download=download,
        identifier=4259581,
        suffix="tsv",
        checksum="md5:25cfb80ef8f04e035d5944228194ca95",
        x_keys=["Drug"],
        y_keys=["Y"],
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.ChEMBLDataset

Bases: TDCDataset

Source code in src/beignet/datasets/_chembl_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
class ChEMBLDataset(TDCDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        download: bool = False,
        transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory of dataset.

        download: bool
            If `True`, downloads the dataset to the root directory. If dataset
            already exists, it is not redownloaded. Default, `False`.

        transform : Callable | Transform | None
            Transforms the input.
        """
        super().__init__(
            root=root,
            download=download,
            identifier=4170965,
            suffix="csv",
            checksum="md5:b0cedfe468c6331487f7084236944bac",
            x_keys=["smiles"],
            y_keys=[],
            transform=transform,
        )
__init__
__init__(root, *, download=False, transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory of dataset.

required
download bool

If True, downloads the dataset to the root directory. If dataset already exists, it is not redownloaded. Default, False.

False
transform Callable | Transform | None

Transforms the input.

None
Source code in src/beignet/datasets/_chembl_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
def __init__(
    self,
    root: str | Path,
    *,
    download: bool = False,
    transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory of dataset.

    download: bool
        If `True`, downloads the dataset to the root directory. If dataset
        already exists, it is not redownloaded. Default, `False`.

    transform : Callable | Transform | None
        Transforms the input.
    """
    super().__init__(
        root=root,
        download=download,
        identifier=4170965,
        suffix="csv",
        checksum="md5:b0cedfe468c6331487f7084236944bac",
        x_keys=["smiles"],
        y_keys=[],
        transform=transform,
    )

beignet.datasets.ClinToxDataset

Bases: TDCDataset

Source code in src/beignet/datasets/_clintox_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class ClinToxDataset(TDCDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        download: bool = False,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory of dataset.

        download: bool
            If `True`, downloads the dataset to the root directory. If dataset
            already exists, it is not redownloaded. Default, `False`.

        transform : Callable | Transform | None
            Transforms the input.

        target_transform : Callable | Transform | None
            Transforms the target.
        """
        super().__init__(
            root=root,
            download=download,
            identifier=4259572,
            suffix="tsv",
            checksum="md5:e8e7c5ba675129db0161913ba4871834",
            x_keys=["Drug"],
            y_keys=["Y"],
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root, *, download=False, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory of dataset.

required
download bool

If True, downloads the dataset to the root directory. If dataset already exists, it is not redownloaded. Default, False.

False
transform Callable | Transform | None

Transforms the input.

None
target_transform Callable | Transform | None

Transforms the target.

None
Source code in src/beignet/datasets/_clintox_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self,
    root: str | Path,
    *,
    download: bool = False,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory of dataset.

    download: bool
        If `True`, downloads the dataset to the root directory. If dataset
        already exists, it is not redownloaded. Default, `False`.

    transform : Callable | Transform | None
        Transforms the input.

    target_transform : Callable | Transform | None
        Transforms the target.
    """
    super().__init__(
        root=root,
        download=download,
        identifier=4259572,
        suffix="tsv",
        checksum="md5:e8e7c5ba675129db0161913ba4871834",
        x_keys=["Drug"],
        y_keys=["Y"],
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.DAVISDataset

Bases: TDCDataset

Source code in src/beignet/datasets/_davis_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class DAVISDataset(TDCDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        download: bool = False,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory of dataset.

        download: bool
            If `True`, downloads the dataset to the root directory. If dataset
            already exists, it is not redownloaded. Default, `False`.

        transform : Callable | Transform | None
            Transforms the input.

        target_transform : Callable | Transform | None
            Transforms the target.
        """
        super().__init__(
            root=root,
            download=download,
            identifier=5219748,
            suffix="md5:6c7949b81aea69e9d816db88602d771e",
            checksum="",
            x_keys=["X1", "X2"],
            y_keys=["Y"],
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root, *, download=False, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory of dataset.

required
download bool

If True, downloads the dataset to the root directory. If dataset already exists, it is not redownloaded. Default, False.

False
transform Callable | Transform | None

Transforms the input.

None
target_transform Callable | Transform | None

Transforms the target.

None
Source code in src/beignet/datasets/_davis_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self,
    root: str | Path,
    *,
    download: bool = False,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory of dataset.

    download: bool
        If `True`, downloads the dataset to the root directory. If dataset
        already exists, it is not redownloaded. Default, `False`.

    transform : Callable | Transform | None
        Transforms the input.

    target_transform : Callable | Transform | None
        Transforms the target.
    """
    super().__init__(
        root=root,
        download=download,
        identifier=5219748,
        suffix="md5:6c7949b81aea69e9d816db88602d771e",
        checksum="",
        x_keys=["X1", "X2"],
        y_keys=["Y"],
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.DataFrameDataset

Bases: Dataset

Source code in src/beignet/datasets/_data_frame_dataset.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
class DataFrameDataset(Dataset):
    _data: DataFrame

    def __init__(
        self,
        root: Union[str, Path],
        *,
        transform: Union[Callable, Transform, None] = None,
        target_transform: Union[Callable, Transform, None] = None,
    ) -> None:
        """
        :param root: Root directory where the dataset subdirectory exists or,
            if :attr:`download` is ``True``, the directory where the dataset
            subdirectory will be created and the dataset downloaded.

        :param transform: A ``Callable`` or ``Transform`` that maps data to
            transformed data (default: ``None``).

        :param target_transform: ``Callable`` or ``Transform`` that maps a
            target to a transformed target (default: ``None``).
        """
        if isinstance(root, str):
            root = Path(root).resolve()

        self._root = root

        self._transform_fn = transform

        self._target_transform_fn = target_transform

    def __getitem__(self, index: int) -> T:
        return self._data.iloc[index]

    def __len__(self) -> int:
        return len(self._data)
__init__
__init__(root, *, transform=None, target_transform=None)

:param root: Root directory where the dataset subdirectory exists or, if :attr:download is True, the directory where the dataset subdirectory will be created and the dataset downloaded.

:param transform: A Callable or Transform that maps data to transformed data (default: None).

:param target_transform: Callable or Transform that maps a target to a transformed target (default: None).

Source code in src/beignet/datasets/_data_frame_dataset.py
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
def __init__(
    self,
    root: Union[str, Path],
    *,
    transform: Union[Callable, Transform, None] = None,
    target_transform: Union[Callable, Transform, None] = None,
) -> None:
    """
    :param root: Root directory where the dataset subdirectory exists or,
        if :attr:`download` is ``True``, the directory where the dataset
        subdirectory will be created and the dataset downloaded.

    :param transform: A ``Callable`` or ``Transform`` that maps data to
        transformed data (default: ``None``).

    :param target_transform: ``Callable`` or ``Transform`` that maps a
        target to a transformed target (default: ``None``).
    """
    if isinstance(root, str):
        root = Path(root).resolve()

    self._root = root

    self._transform_fn = transform

    self._target_transform_fn = target_transform

beignet.datasets.DisGeNETDataset

Bases: TDCDataset

Source code in src/beignet/datasets/_disgenet_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class DisGeNETDataset(TDCDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        download: bool = False,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory of dataset.

        download: bool
            If `True`, downloads the dataset to the root directory. If dataset
            already exists, it is not redownloaded. Default, `False`.

        transform : Callable | Transform | None
            Transforms the input.

        target_transform : Callable | Transform | None
            Transforms the target.
        """
        super().__init__(
            root=root,
            download=download,
            identifier=4168282,
            suffix="disgenet.csv",
            checksum="md5:b7efdf1dc006ff04a33bb3a4aec5d746",
            x_keys=["X1", "ID2"],
            y_keys=["Y"],
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root, *, download=False, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory of dataset.

required
download bool

If True, downloads the dataset to the root directory. If dataset already exists, it is not redownloaded. Default, False.

False
transform Callable | Transform | None

Transforms the input.

None
target_transform Callable | Transform | None

Transforms the target.

None
Source code in src/beignet/datasets/_disgenet_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self,
    root: str | Path,
    *,
    download: bool = False,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory of dataset.

    download: bool
        If `True`, downloads the dataset to the root directory. If dataset
        already exists, it is not redownloaded. Default, `False`.

    transform : Callable | Transform | None
        Transforms the input.

    target_transform : Callable | Transform | None
        Transforms the target.
    """
    super().__init__(
        root=root,
        download=download,
        identifier=4168282,
        suffix="disgenet.csv",
        checksum="md5:b7efdf1dc006ff04a33bb3a4aec5d746",
        x_keys=["X1", "ID2"],
        y_keys=["Y"],
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.DrugCombDataset

Bases: TDCDataset

Source code in src/beignet/datasets/_drugcomb_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class DrugCombDataset(TDCDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        download: bool = False,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory of dataset.

        download: bool
            If `True`, downloads the dataset to the root directory. If dataset
            already exists, it is not redownloaded. Default, `False`.

        transform : Callable | Transform | None
            Transforms the input.

        target_transform : Callable | Transform | None
            Transforms the target.
        """
        super().__init__(
            root=root,
            download=download,
            identifier=4215720,
            suffix="pkl",
            checksum="md5:cfe52eeb5948f63e9c3bc562fc1958c3",
            x_keys=[""],
            y_keys=[""],
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root, *, download=False, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory of dataset.

required
download bool

If True, downloads the dataset to the root directory. If dataset already exists, it is not redownloaded. Default, False.

False
transform Callable | Transform | None

Transforms the input.

None
target_transform Callable | Transform | None

Transforms the target.

None
Source code in src/beignet/datasets/_drugcomb_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self,
    root: str | Path,
    *,
    download: bool = False,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory of dataset.

    download: bool
        If `True`, downloads the dataset to the root directory. If dataset
        already exists, it is not redownloaded. Default, `False`.

    transform : Callable | Transform | None
        Transforms the input.

    target_transform : Callable | Transform | None
        Transforms the target.
    """
    super().__init__(
        root=root,
        download=download,
        identifier=4215720,
        suffix="pkl",
        checksum="md5:cfe52eeb5948f63e9c3bc562fc1958c3",
        x_keys=[""],
        y_keys=[""],
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.FASTADataset

Bases: SizedSequenceDataset

Source code in src/beignet/datasets/_fasta_dataset.py
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
class FASTADataset(SizedSequenceDataset):
    def __init__(
        self,
        root: str | PathLike,
        *,
        transform: Callable | Transform | None = None,
    ):
        if isinstance(root, str):
            self.root = Path(root)

        self.root = self.root.resolve()

        if not self.root.exists():
            raise FileNotFoundError

        self.data = ThreadSafeFile(self.root, open)

        offsets = Path(f"{self.root}.offsets.npy")

        if offsets.exists():
            self.offsets, sizes = numpy.load(f"{offsets}")
        else:
            self.offsets, sizes = self._build_index()

            numpy.save(f"{offsets}", numpy.stack([self.offsets, sizes]))

        self.transform = transform

        super().__init__(self.root, sizes)

    def __getitem__(self, index: int) -> Tuple[str, str]:
        x = self.get(index)

        if self.transform:
            x = self.transform(x)

        return x

    def __len__(self) -> int:
        return self.offsets.size

    def get(self, index: int) -> (str, str):
        self.data.seek(self.offsets[index])

        if index == len(self) - 1:
            data = self.data.read()
        else:
            data = self.data.read(self.offsets[index + 1] - self.offsets[index])

        description, *sequence = data.split("\n")

        return "".join(sequence), description

    def _build_index(self) -> (numpy.ndarray, numpy.ndarray):
        with open(self.root, "r") as file:
            content = file.read()

        offsets, sizes = [], []

        current_offset, current_size = 0, 0

        parsing = False

        for sequence in tqdm.tqdm(content.splitlines(keepends=True)):
            characters = len(sequence)

            if sequence.startswith(">"):
                if parsing:
                    sizes = [*sizes, current_size]

                    current_size = 0

                offsets = [*offsets, current_offset]

                parsing = True
            elif parsing:
                current_size = current_size + len(sequence.rstrip("\n"))

            current_offset = current_offset + characters

        if parsing:
            sizes = [*sizes, current_size]

        offsets = numpy.array(offsets, dtype=numpy.int64)

        sizes = numpy.array(sizes, dtype=numpy.int64)

        return offsets, sizes

beignet.datasets.FreeSolvDataset

Bases: TDCDataset

Source code in src/beignet/datasets/_freesolv_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class FreeSolvDataset(TDCDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        download: bool = False,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory of dataset.

        download: bool
            If `True`, downloads the dataset to the root directory. If dataset
            already exists, it is not redownloaded. Default, `False`.

        transform : Callable | Transform | None
            Transforms the input.

        target_transform : Callable | Transform | None
            Transforms the target.
        """
        super().__init__(
            root=root,
            download=download,
            identifier=4259594,
            suffix="tsv",
            checksum="md5:2f2a6325ea440b41e22cb14c7775d591",
            x_keys=["Drug"],
            y_keys=["Y"],
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root, *, download=False, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory of dataset.

required
download bool

If True, downloads the dataset to the root directory. If dataset already exists, it is not redownloaded. Default, False.

False
transform Callable | Transform | None

Transforms the input.

None
target_transform Callable | Transform | None

Transforms the target.

None
Source code in src/beignet/datasets/_freesolv_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self,
    root: str | Path,
    *,
    download: bool = False,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory of dataset.

    download: bool
        If `True`, downloads the dataset to the root directory. If dataset
        already exists, it is not redownloaded. Default, `False`.

    transform : Callable | Transform | None
        Transforms the input.

    target_transform : Callable | Transform | None
        Transforms the target.
    """
    super().__init__(
        root=root,
        download=download,
        identifier=4259594,
        suffix="tsv",
        checksum="md5:2f2a6325ea440b41e22cb14c7775d591",
        x_keys=["Drug"],
        y_keys=["Y"],
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.GDSC1Dataset

Bases: TDCDataset

Source code in src/beignet/datasets/_gdsc1_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class GDSC1Dataset(TDCDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        download: bool = False,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory of dataset.

        download: bool
            If `True`, downloads the dataset to the root directory. If dataset
            already exists, it is not redownloaded. Default, `False`.

        transform : Callable | Transform | None
            Transforms the input.

        target_transform : Callable | Transform | None
            Transforms the target.
        """
        super().__init__(
            root=root,
            download=download,
            identifier=4165726,
            suffix="gdsc1.pkl",
            checksum="md5:6bee1e2507090559b34ab626e229c0be",
            x_keys=["X1", "X2"],
            y_keys=["Y"],
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root, *, download=False, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory of dataset.

required
download bool

If True, downloads the dataset to the root directory. If dataset already exists, it is not redownloaded. Default, False.

False
transform Callable | Transform | None

Transforms the input.

None
target_transform Callable | Transform | None

Transforms the target.

None
Source code in src/beignet/datasets/_gdsc1_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self,
    root: str | Path,
    *,
    download: bool = False,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory of dataset.

    download: bool
        If `True`, downloads the dataset to the root directory. If dataset
        already exists, it is not redownloaded. Default, `False`.

    transform : Callable | Transform | None
        Transforms the input.

    target_transform : Callable | Transform | None
        Transforms the target.
    """
    super().__init__(
        root=root,
        download=download,
        identifier=4165726,
        suffix="gdsc1.pkl",
        checksum="md5:6bee1e2507090559b34ab626e229c0be",
        x_keys=["X1", "X2"],
        y_keys=["Y"],
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.GDSC2Dataset

Bases: TDCDataset

Source code in src/beignet/datasets/_gdsc2_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class GDSC2Dataset(TDCDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        download: bool = False,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory of dataset.

        download: bool
            If `True`, downloads the dataset to the root directory. If dataset
            already exists, it is not redownloaded. Default, `False`.

        transform : Callable | Transform | None
            Transforms the input.

        target_transform : Callable | Transform | None
            Transforms the target.
        """
        super().__init__(
            root=root,
            download=download,
            identifier=4165727,
            suffix="gdsc2.pkl",
            checksum="md5:217ccb2c49dc43485924f8678eaf7e34",
            x_keys=["X1", "X2"],
            y_keys=["Y"],
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root, *, download=False, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory of dataset.

required
download bool

If True, downloads the dataset to the root directory. If dataset already exists, it is not redownloaded. Default, False.

False
transform Callable | Transform | None

Transforms the input.

None
target_transform Callable | Transform | None

Transforms the target.

None
Source code in src/beignet/datasets/_gdsc2_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self,
    root: str | Path,
    *,
    download: bool = False,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory of dataset.

    download: bool
        If `True`, downloads the dataset to the root directory. If dataset
        already exists, it is not redownloaded. Default, `False`.

    transform : Callable | Transform | None
        Transforms the input.

    target_transform : Callable | Transform | None
        Transforms the target.
    """
    super().__init__(
        root=root,
        download=download,
        identifier=4165727,
        suffix="gdsc2.pkl",
        checksum="md5:217ccb2c49dc43485924f8678eaf7e34",
        x_keys=["X1", "X2"],
        y_keys=["Y"],
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.HDF5TrajectoryDataset

Bases: TrajectoryDataset

Source code in src/beignet/datasets/_hdf5_trajectory_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
class HDF5TrajectoryDataset(TrajectoryDataset):
    def __init__(
        self,
        root: str | PathLike,
        transform: Callable[[Trajectory], Any] | None = None,
        stride: int | None = None,
        **kwargs,
    ):
        super().__init__(
            func=mdtraj.load_hdf5,
            extension="hdf5",
            root=root,
            transform=transform,
            stride=stride,
            **kwargs,
        )

beignet.datasets.HouHumanIntestinalAbsorptionDataset

Bases: TDCDataset

Source code in src/beignet/datasets/_hou_human_intestinal_absorption_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class HouHumanIntestinalAbsorptionDataset(TDCDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        download: bool = False,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory of dataset.

        download: bool
            If `True`, downloads the dataset to the root directory. If dataset
            already exists, it is not redownloaded. Default, `False`.

        transform : Callable | Transform | None
            Transforms the input.

        target_transform : Callable | Transform | None
            Transforms the target.
        """
        super().__init__(
            root=root,
            download=download,
            identifier=4259591,
            suffix="tsv",
            checksum="md5:ff67500a5c7b1321114a9d1b4078d92e",
            x_keys=["Drug"],
            y_keys=["Y"],
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root, *, download=False, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory of dataset.

required
download bool

If True, downloads the dataset to the root directory. If dataset already exists, it is not redownloaded. Default, False.

False
transform Callable | Transform | None

Transforms the input.

None
target_transform Callable | Transform | None

Transforms the target.

None
Source code in src/beignet/datasets/_hou_human_intestinal_absorption_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self,
    root: str | Path,
    *,
    download: bool = False,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory of dataset.

    download: bool
        If `True`, downloads the dataset to the root directory. If dataset
        already exists, it is not redownloaded. Default, `False`.

    transform : Callable | Transform | None
        Transforms the input.

    target_transform : Callable | Transform | None
        Transforms the target.
    """
    super().__init__(
        root=root,
        download=download,
        identifier=4259591,
        suffix="tsv",
        checksum="md5:ff67500a5c7b1321114a9d1b4078d92e",
        x_keys=["Drug"],
        y_keys=["Y"],
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.HuRIDataset

Bases: TDCDataset

Source code in src/beignet/datasets/_huri_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class HuRIDataset(TDCDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        download: bool = False,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory of dataset.

        download: bool
            If `True`, downloads the dataset to the root directory. If dataset
            already exists, it is not redownloaded. Default, `False`.

        transform : Callable | Transform | None
            Transforms the input.

        target_transform : Callable | Transform | None
            Transforms the target.
        """
        super().__init__(
            root=root,
            download=download,
            identifier=4139567,
            suffix="huri.tab",
            checksum="md5:d934f40f048fc8686c0137c273ceec57",
            x_keys=["X1", "X2"],
            y_keys=["Y"],
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root, *, download=False, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory of dataset.

required
download bool

If True, downloads the dataset to the root directory. If dataset already exists, it is not redownloaded. Default, False.

False
transform Callable | Transform | None

Transforms the input.

None
target_transform Callable | Transform | None

Transforms the target.

None
Source code in src/beignet/datasets/_huri_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self,
    root: str | Path,
    *,
    download: bool = False,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory of dataset.

    download: bool
        If `True`, downloads the dataset to the root directory. If dataset
        already exists, it is not redownloaded. Default, `False`.

    transform : Callable | Transform | None
        Transforms the input.

    target_transform : Callable | Transform | None
        Transforms the target.
    """
    super().__init__(
        root=root,
        download=download,
        identifier=4139567,
        suffix="huri.tab",
        checksum="md5:d934f40f048fc8686c0137c273ceec57",
        x_keys=["X1", "X2"],
        y_keys=["Y"],
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.JespersenIEDBDataset

Bases: TDCDataset

Source code in src/beignet/datasets/_jespersen_iedb_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class JespersenIEDBDataset(TDCDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        download: bool = False,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory of dataset.

        download: bool
            If `True`, downloads the dataset to the root directory. If dataset
            already exists, it is not redownloaded. Default, `False`.

        transform : Callable | Transform | None
            Transforms the input.

        target_transform : Callable | Transform | None
            Transforms the target.
        """
        super().__init__(
            root=root,
            download=download,
            identifier=4165725,
            suffix="pkl",
            checksum="md5:6d5b7e005e8f8cafa117c5224698804f",
            x_keys=["X1", "X2"],
            y_keys=["Y"],
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root, *, download=False, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory of dataset.

required
download bool

If True, downloads the dataset to the root directory. If dataset already exists, it is not redownloaded. Default, False.

False
transform Callable | Transform | None

Transforms the input.

None
target_transform Callable | Transform | None

Transforms the target.

None
Source code in src/beignet/datasets/_jespersen_iedb_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self,
    root: str | Path,
    *,
    download: bool = False,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory of dataset.

    download: bool
        If `True`, downloads the dataset to the root directory. If dataset
        already exists, it is not redownloaded. Default, `False`.

    transform : Callable | Transform | None
        Transforms the input.

    target_transform : Callable | Transform | None
        Transforms the target.
    """
    super().__init__(
        root=root,
        download=download,
        identifier=4165725,
        suffix="pkl",
        checksum="md5:6d5b7e005e8f8cafa117c5224698804f",
        x_keys=["X1", "X2"],
        y_keys=["Y"],
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.JespersenPDBDataset

Bases: TDCDataset

Source code in src/beignet/datasets/_jespersen_pdb_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class JespersenPDBDataset(TDCDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        download: bool = False,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory of dataset.

        download: bool
            If `True`, downloads the dataset to the root directory. If dataset
            already exists, it is not redownloaded. Default, `False`.

        transform : Callable | Transform | None
            Transforms the input.

        target_transform : Callable | Transform | None
            Transforms the target.
        """
        super().__init__(
            root=root,
            download=download,
            identifier=4165724,
            suffix="pkl",
            checksum="md5:78090626dc78bb925a3b65f44dc8e8da",
            x_keys=["X1", "X2"],
            y_keys=["Y"],
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root, *, download=False, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory of dataset.

required
download bool

If True, downloads the dataset to the root directory. If dataset already exists, it is not redownloaded. Default, False.

False
transform Callable | Transform | None

Transforms the input.

None
target_transform Callable | Transform | None

Transforms the target.

None
Source code in src/beignet/datasets/_jespersen_pdb_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self,
    root: str | Path,
    *,
    download: bool = False,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory of dataset.

    download: bool
        If `True`, downloads the dataset to the root directory. If dataset
        already exists, it is not redownloaded. Default, `False`.

    transform : Callable | Transform | None
        Transforms the input.

    target_transform : Callable | Transform | None
        Transforms the target.
    """
    super().__init__(
        root=root,
        download=download,
        identifier=4165724,
        suffix="pkl",
        checksum="md5:78090626dc78bb925a3b65f44dc8e8da",
        x_keys=["X1", "X2"],
        y_keys=["Y"],
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.KIBADataset

Bases: TDCDataset

Source code in src/beignet/datasets/_kiba_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class KIBADataset(TDCDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        download: bool = False,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory of dataset.

        download: bool
            If `True`, downloads the dataset to the root directory. If dataset
            already exists, it is not redownloaded. Default, `False`.

        transform : Callable | Transform | None
            Transforms the input.

        target_transform : Callable | Transform | None
            Transforms the target.
        """
        super().__init__(
            root=root,
            download=download,
            identifier=5255037,
            suffix="tsv",
            checksum="md5:c6fb4d13f07ed8b9b980e71be4893720",
            x_keys=["X1", "X2"],
            y_keys=["Y"],
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root, *, download=False, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory of dataset.

required
download bool

If True, downloads the dataset to the root directory. If dataset already exists, it is not redownloaded. Default, False.

False
transform Callable | Transform | None

Transforms the input.

None
target_transform Callable | Transform | None

Transforms the target.

None
Source code in src/beignet/datasets/_kiba_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self,
    root: str | Path,
    *,
    download: bool = False,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory of dataset.

    download: bool
        If `True`, downloads the dataset to the root directory. If dataset
        already exists, it is not redownloaded. Default, `False`.

    transform : Callable | Transform | None
        Transforms the input.

    target_transform : Callable | Transform | None
        Transforms the target.
    """
    super().__init__(
        root=root,
        download=download,
        identifier=5255037,
        suffix="tsv",
        checksum="md5:c6fb4d13f07ed8b9b980e71be4893720",
        x_keys=["X1", "X2"],
        y_keys=["Y"],
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.LMDBDataset

Bases: Dataset

Source code in src/beignet/datasets/_lmdb_dataset.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
class LMDBDataset(Dataset):
    def __init__(
        self,
        root: Union[str, Path],
        *,
        lock: bool = False,
        max_readers: int = 1,
        meminit: bool = True,
        readahead: bool = True,
        readonly: bool = True,
        transform: Union[Callable, Transform, None] = None,
    ):
        super().__init__()

        try:
            import lmdb
        except ImportError as error:
            raise ImportError(
                """
                LMDB datasets require the `lmdb` dependency:

                    $ pip install "beignet[lmdb]"
                """
            ) from error

        self._root = root

        self._transform_fn = transform

        if isinstance(self._root, str):
            self._root = Path(self._root).resolve()

        self._data = lmdb.open(
            str(self._root),
            lock=lock,
            max_readers=max_readers,
            meminit=meminit,
            readahead=readahead,
            readonly=readonly,
        )

        with self._data.begin(write=False) as transaction:
            self._size = int(transaction.get(b"num_examples"))

    def __getitem__(self, index: int) -> Dict[str, Any]:
        if not 0 <= index < self._size:
            raise IndexError(index)

        with self._data.begin(write=False) as transaction:
            with GzipFile(
                fileobj=BytesIO(transaction.get(str(index).encode())),
                mode="rb",
            ) as descriptor:
                item = json.loads(descriptor.read())

        if self._transform_fn:
            item = self._transform_fn(item)

        return item

    def __len__(self) -> int:
        return self._size

beignet.datasets.LombardoVolumeOfDistributionAtSteadyStateDataset

Bases: TDCDataset

Source code in src/beignet/datasets/_lombardo_volume_of_distribution_at_steady_state_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class LombardoVolumeOfDistributionAtSteadyStateDataset(TDCDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        download: bool = False,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory of dataset.

        download: bool
            If `True`, downloads the dataset to the root directory. If dataset
            already exists, it is not redownloaded. Default, `False`.

        transform : Callable | Transform | None
            Transforms the input.

        target_transform : Callable | Transform | None
            Transforms the target.
        """
        super().__init__(
            root=root,
            download=download,
            identifier=4267387,
            suffix="tsv",
            checksum="md5:268fbc1b70e45c870373b238ffd36313",
            x_keys=["X"],
            y_keys=["Y"],
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root, *, download=False, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory of dataset.

required
download bool

If True, downloads the dataset to the root directory. If dataset already exists, it is not redownloaded. Default, False.

False
transform Callable | Transform | None

Transforms the input.

None
target_transform Callable | Transform | None

Transforms the target.

None
Source code in src/beignet/datasets/_lombardo_volume_of_distribution_at_steady_state_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self,
    root: str | Path,
    *,
    download: bool = False,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory of dataset.

    download: bool
        If `True`, downloads the dataset to the root directory. If dataset
        already exists, it is not redownloaded. Default, `False`.

    transform : Callable | Transform | None
        Transforms the input.

    target_transform : Callable | Transform | None
        Transforms the target.
    """
    super().__init__(
        root=root,
        download=download,
        identifier=4267387,
        suffix="tsv",
        checksum="md5:268fbc1b70e45c870373b238ffd36313",
        x_keys=["X"],
        y_keys=["Y"],
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.MOSESDataset

Bases: TDCDataset

Source code in src/beignet/datasets/_moses_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
class MOSESDataset(TDCDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        download: bool = False,
        transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory of dataset.

        download: bool
            If `True`, downloads the dataset to the root directory. If dataset
            already exists, it is not redownloaded. Default, `False`.

        transform : Callable | Transform | None
            Transforms the input.
        """
        super().__init__(
            root=root,
            download=download,
            identifier=4170962,
            suffix="moses.tab",
            checksum="md5:b684443540f42cbdebb63ad090a1b4b3",
            x_keys=["smiles"],
            transform=transform,
        )
__init__
__init__(root, *, download=False, transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory of dataset.

required
download bool

If True, downloads the dataset to the root directory. If dataset already exists, it is not redownloaded. Default, False.

False
transform Callable | Transform | None

Transforms the input.

None
Source code in src/beignet/datasets/_moses_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
def __init__(
    self,
    root: str | Path,
    *,
    download: bool = False,
    transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory of dataset.

    download: bool
        If `True`, downloads the dataset to the root directory. If dataset
        already exists, it is not redownloaded. Default, `False`.

    transform : Callable | Transform | None
        Transforms the input.
    """
    super().__init__(
        root=root,
        download=download,
        identifier=4170962,
        suffix="moses.tab",
        checksum="md5:b684443540f42cbdebb63ad090a1b4b3",
        x_keys=["smiles"],
        transform=transform,
    )

beignet.datasets.MaBioavailabilityDataset

Bases: TDCDataset

Source code in src/beignet/datasets/_ma_bioavailability_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class MaBioavailabilityDataset(TDCDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        download: bool = False,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory of dataset.

        download: bool
            If `True`, downloads the dataset to the root directory. If dataset
            already exists, it is not redownloaded. Default, `False`.

        transform : Callable | Transform | None
            Transforms the input.

        target_transform : Callable | Transform | None
            Transforms the target.
        """
        super().__init__(
            root=root,
            download=download,
            identifier=4259567,
            suffix="tsv",
            checksum="md5:e1f3ee03667caf09ee007f4a14bca530",
            x_keys=["Drug"],
            y_keys=["Y"],
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root, *, download=False, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory of dataset.

required
download bool

If True, downloads the dataset to the root directory. If dataset already exists, it is not redownloaded. Default, False.

False
transform Callable | Transform | None

Transforms the input.

None
target_transform Callable | Transform | None

Transforms the target.

None
Source code in src/beignet/datasets/_ma_bioavailability_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self,
    root: str | Path,
    *,
    download: bool = False,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory of dataset.

    download: bool
        If `True`, downloads the dataset to the root directory. If dataset
        already exists, it is not redownloaded. Default, `False`.

    transform : Callable | Transform | None
        Transforms the input.

    target_transform : Callable | Transform | None
        Transforms the target.
    """
    super().__init__(
        root=root,
        download=download,
        identifier=4259567,
        suffix="tsv",
        checksum="md5:e1f3ee03667caf09ee007f4a14bca530",
        x_keys=["Drug"],
        y_keys=["Y"],
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.MartinsBloodBrainBarrierDataset

Bases: TDCDataset

Source code in src/beignet/datasets/_martins_blood_brain_barrier_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class MartinsBloodBrainBarrierDataset(TDCDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        download: bool = False,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory of dataset.

        download: bool
            If `True`, downloads the dataset to the root directory. If dataset
            already exists, it is not redownloaded. Default, `False`.

        transform : Callable | Transform | None
            Transforms the input.

        target_transform : Callable | Transform | None
            Transforms the target.
        """
        super().__init__(
            root=root,
            download=download,
            identifier=4259566,
            suffix="tsv",
            checksum="md5:4c7ddf7260f9573476ba2d4ca877957f",
            x_keys=["Drug"],
            y_keys=["Y"],
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root, *, download=False, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory of dataset.

required
download bool

If True, downloads the dataset to the root directory. If dataset already exists, it is not redownloaded. Default, False.

False
transform Callable | Transform | None

Transforms the input.

None
target_transform Callable | Transform | None

Transforms the target.

None
Source code in src/beignet/datasets/_martins_blood_brain_barrier_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self,
    root: str | Path,
    *,
    download: bool = False,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory of dataset.

    download: bool
        If `True`, downloads the dataset to the root directory. If dataset
        already exists, it is not redownloaded. Default, `False`.

    transform : Callable | Transform | None
        Transforms the input.

    target_transform : Callable | Transform | None
        Transforms the target.
    """
    super().__init__(
        root=root,
        download=download,
        identifier=4259566,
        suffix="tsv",
        checksum="md5:4c7ddf7260f9573476ba2d4ca877957f",
        x_keys=["Drug"],
        y_keys=["Y"],
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.NCATSPAMPAPermeabilityDataset

Bases: TDCDataset

Source code in src/beignet/datasets/_ncats_pampa_permeability_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class NCATSPAMPAPermeabilityDataset(TDCDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        download: bool = False,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory of dataset.

        download: bool
            If `True`, downloads the dataset to the root directory. If dataset
            already exists, it is not redownloaded. Default, `False`.

        transform : Callable | Transform | None
            Transforms the input.

        target_transform : Callable | Transform | None
            Transforms the target.
        """
        super().__init__(
            root=root,
            download=download,
            identifier=6695857,
            suffix="tsv",
            checksum="md5:ec813b5c6f829f92490faf03302d0960",
            x_keys=["Drug"],
            y_keys=["Y"],
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root, *, download=False, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory of dataset.

required
download bool

If True, downloads the dataset to the root directory. If dataset already exists, it is not redownloaded. Default, False.

False
transform Callable | Transform | None

Transforms the input.

None
target_transform Callable | Transform | None

Transforms the target.

None
Source code in src/beignet/datasets/_ncats_pampa_permeability_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self,
    root: str | Path,
    *,
    download: bool = False,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory of dataset.

    download: bool
        If `True`, downloads the dataset to the root directory. If dataset
        already exists, it is not redownloaded. Default, `False`.

    transform : Callable | Transform | None
        Transforms the input.

    target_transform : Callable | Transform | None
        Transforms the target.
    """
    super().__init__(
        root=root,
        download=download,
        identifier=6695857,
        suffix="tsv",
        checksum="md5:ec813b5c6f829f92490faf03302d0960",
        x_keys=["Drug"],
        y_keys=["Y"],
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.ObachHalfLifeDataset

Bases: TDCDataset

Source code in src/beignet/datasets/_obach_half_life_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class ObachHalfLifeDataset(TDCDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        download: bool = False,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory of dataset.

        download: bool
            If `True`, downloads the dataset to the root directory. If dataset
            already exists, it is not redownloaded. Default, `False`.

        transform : Callable | Transform | None
            Transforms the input.

        target_transform : Callable | Transform | None
            Transforms the target.
        """
        super().__init__(
            root=root,
            download=download,
            identifier=4266799,
            suffix="tsv",
            checksum="md5:7872345be7e2f62215e91f12ac865ce1",
            x_keys=["X"],
            y_keys=["Y"],
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root, *, download=False, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory of dataset.

required
download bool

If True, downloads the dataset to the root directory. If dataset already exists, it is not redownloaded. Default, False.

False
transform Callable | Transform | None

Transforms the input.

None
target_transform Callable | Transform | None

Transforms the target.

None
Source code in src/beignet/datasets/_obach_half_life_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self,
    root: str | Path,
    *,
    download: bool = False,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory of dataset.

    download: bool
        If `True`, downloads the dataset to the root directory. If dataset
        already exists, it is not redownloaded. Default, `False`.

    transform : Callable | Transform | None
        Transforms the input.

    target_transform : Callable | Transform | None
        Transforms the target.
    """
    super().__init__(
        root=root,
        download=download,
        identifier=4266799,
        suffix="tsv",
        checksum="md5:7872345be7e2f62215e91f12ac865ce1",
        x_keys=["X"],
        y_keys=["Y"],
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.PDBTrajectoryDataset

Bases: TrajectoryDataset

Source code in src/beignet/datasets/_pdb_trajectory_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
class PDBTrajectoryDataset(TrajectoryDataset):
    def __init__(
        self,
        root: str | PathLike,
        transform: Callable[[Trajectory], Any] | None = None,
        stride: int | None = None,
        **kwargs,
    ):
        super().__init__(
            func=mdtraj.load_pdb,
            extension="pdb",
            root=root,
            transform=transform,
            stride=stride,
            **kwargs,
        )

beignet.datasets.PDBbindDataset

Bases: TDCDataset

Source code in src/beignet/datasets/_pdbbind_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class PDBbindDataset(TDCDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        download: bool = False,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory of dataset.

        download: bool
            If `True`, downloads the dataset to the root directory. If dataset
            already exists, it is not redownloaded. Default, `False`.

        transform : Callable | Transform | None
            Transforms the input.

        target_transform : Callable | Transform | None
            Transforms the target.
        """
        super().__init__(
            root=root,
            download=download,
            identifier=0,
            suffix="",
            checksum="",
            x_keys=[""],
            y_keys=[""],
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root, *, download=False, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory of dataset.

required
download bool

If True, downloads the dataset to the root directory. If dataset already exists, it is not redownloaded. Default, False.

False
transform Callable | Transform | None

Transforms the input.

None
target_transform Callable | Transform | None

Transforms the target.

None
Source code in src/beignet/datasets/_pdbbind_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self,
    root: str | Path,
    *,
    download: bool = False,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory of dataset.

    download: bool
        If `True`, downloads the dataset to the root directory. If dataset
        already exists, it is not redownloaded. Default, `False`.

    transform : Callable | Transform | None
        Transforms the input.

    target_transform : Callable | Transform | None
        Transforms the target.
    """
    super().__init__(
        root=root,
        download=download,
        identifier=0,
        suffix="",
        checksum="",
        x_keys=[""],
        y_keys=[""],
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.ParquetDataset

Bases: DataFrameDataset

Source code in src/beignet/datasets/_parquet_dataset.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
class ParquetDataset(DataFrameDataset):
    def __init__(
        self,
        root: Union[str, Path],
        path: Union[str, Path],
        *,
        columns: Optional[Sequence[str]],
        target_columns: Optional[Sequence[str]],
        transform: Union[Callable, Transform, None] = None,
        target_transform: Union[Callable, Transform, None] = None,
        **kwargs,
    ) -> None:
        """
        :param root: Root directory where the dataset subdirectory exists or,
            if :attr:`download` is ``True``, the directory where the dataset
            subdirectory will be created and the dataset downloaded.

        :param columns: x features of the dataset. items in the dataset are
            of the form ((columns), (target_columns)).

        :param target_columns: y features of the dataset. items in the dataset
            are of the form ((columns), (target_columns)).

        :param transform: A ``Callable`` or ``Transform`` that maps data to
            transformed data (default: ``None``).

        :param target_transform: ``Callable`` or ``Transform`` that maps a
            target to a transformed target (default: ``None``).
        """
        super().__init__(
            root,
            transform=transform,
            target_transform=target_transform,
        )

        self._path = path

        self._columns = columns

        self._target_columns = target_columns

        self._data = pandas.read_parquet(self._path, **kwargs)
__init__
__init__(root, path, *, columns, target_columns, transform=None, target_transform=None, **kwargs)

:param root: Root directory where the dataset subdirectory exists or, if :attr:download is True, the directory where the dataset subdirectory will be created and the dataset downloaded.

:param columns: x features of the dataset. items in the dataset are of the form ((columns), (target_columns)).

:param target_columns: y features of the dataset. items in the dataset are of the form ((columns), (target_columns)).

:param transform: A Callable or Transform that maps data to transformed data (default: None).

:param target_transform: Callable or Transform that maps a target to a transformed target (default: None).

Source code in src/beignet/datasets/_parquet_dataset.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
def __init__(
    self,
    root: Union[str, Path],
    path: Union[str, Path],
    *,
    columns: Optional[Sequence[str]],
    target_columns: Optional[Sequence[str]],
    transform: Union[Callable, Transform, None] = None,
    target_transform: Union[Callable, Transform, None] = None,
    **kwargs,
) -> None:
    """
    :param root: Root directory where the dataset subdirectory exists or,
        if :attr:`download` is ``True``, the directory where the dataset
        subdirectory will be created and the dataset downloaded.

    :param columns: x features of the dataset. items in the dataset are
        of the form ((columns), (target_columns)).

    :param target_columns: y features of the dataset. items in the dataset
        are of the form ((columns), (target_columns)).

    :param transform: A ``Callable`` or ``Transform`` that maps data to
        transformed data (default: ``None``).

    :param target_transform: ``Callable`` or ``Transform`` that maps a
        target to a transformed target (default: ``None``).
    """
    super().__init__(
        root,
        transform=transform,
        target_transform=target_transform,
    )

    self._path = path

    self._columns = columns

    self._target_columns = target_columns

    self._data = pandas.read_parquet(self._path, **kwargs)

beignet.datasets.QM7Dataset

Bases: TDCDataset

Source code in src/beignet/datasets/_qm7_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class QM7Dataset(TDCDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        download: bool = False,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory of dataset.

        download: bool
            If `True`, downloads the dataset to the root directory. If dataset
            already exists, it is not redownloaded. Default, `False`.

        transform : Callable | Transform | None
            Transforms the input.

        target_transform : Callable | Transform | None
            Transforms the target.
        """
        super().__init__(
            root=root,
            download=download,
            identifier=6358510,
            suffix="pkl",
            checksum="md5:d7fb621e931864f547e5f6d362904dc4",
            x_keys=[""],
            y_keys=[""],
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root, *, download=False, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory of dataset.

required
download bool

If True, downloads the dataset to the root directory. If dataset already exists, it is not redownloaded. Default, False.

False
transform Callable | Transform | None

Transforms the input.

None
target_transform Callable | Transform | None

Transforms the target.

None
Source code in src/beignet/datasets/_qm7_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self,
    root: str | Path,
    *,
    download: bool = False,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory of dataset.

    download: bool
        If `True`, downloads the dataset to the root directory. If dataset
        already exists, it is not redownloaded. Default, `False`.

    transform : Callable | Transform | None
        Transforms the input.

    target_transform : Callable | Transform | None
        Transforms the target.
    """
    super().__init__(
        root=root,
        download=download,
        identifier=6358510,
        suffix="pkl",
        checksum="md5:d7fb621e931864f547e5f6d362904dc4",
        x_keys=[""],
        y_keys=[""],
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.QM7bDataset

Bases: TDCDataset

Source code in src/beignet/datasets/_qm7b_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class QM7bDataset(TDCDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        download: bool = False,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory of dataset.

        download: bool
            If `True`, downloads the dataset to the root directory. If dataset
            already exists, it is not redownloaded. Default, `False`.

        transform : Callable | Transform | None
            Transforms the input.

        target_transform : Callable | Transform | None
            Transforms the target.
        """
        super().__init__(
            root=root,
            download=download,
            identifier=6358512,
            suffix="pkl",
            checksum="md5:9b3e6c8d359ab560d47692a523fb2311",
            x_keys=[""],
            y_keys=[""],
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root, *, download=False, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory of dataset.

required
download bool

If True, downloads the dataset to the root directory. If dataset already exists, it is not redownloaded. Default, False.

False
transform Callable | Transform | None

Transforms the input.

None
target_transform Callable | Transform | None

Transforms the target.

None
Source code in src/beignet/datasets/_qm7b_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self,
    root: str | Path,
    *,
    download: bool = False,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory of dataset.

    download: bool
        If `True`, downloads the dataset to the root directory. If dataset
        already exists, it is not redownloaded. Default, `False`.

    transform : Callable | Transform | None
        Transforms the input.

    target_transform : Callable | Transform | None
        Transforms the target.
    """
    super().__init__(
        root=root,
        download=download,
        identifier=6358512,
        suffix="pkl",
        checksum="md5:9b3e6c8d359ab560d47692a523fb2311",
        x_keys=[""],
        y_keys=[""],
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.QM8Dataset

Bases: TDCDataset

Source code in src/beignet/datasets/_qm8_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class QM8Dataset(TDCDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        download: bool = False,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory of dataset.

        download: bool
            If `True`, downloads the dataset to the root directory. If dataset
            already exists, it is not redownloaded. Default, `False`.

        transform : Callable | Transform | None
            Transforms the input.

        target_transform : Callable | Transform | None
            Transforms the target.
        """
        super().__init__(
            root=root,
            download=download,
            identifier=6358513,
            suffix="pkl",
            checksum="md5:8c8798d0f0d1dd8461f29ffaf0fff9fb",
            x_keys=[""],
            y_keys=[""],
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root, *, download=False, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory of dataset.

required
download bool

If True, downloads the dataset to the root directory. If dataset already exists, it is not redownloaded. Default, False.

False
transform Callable | Transform | None

Transforms the input.

None
target_transform Callable | Transform | None

Transforms the target.

None
Source code in src/beignet/datasets/_qm8_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self,
    root: str | Path,
    *,
    download: bool = False,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory of dataset.

    download: bool
        If `True`, downloads the dataset to the root directory. If dataset
        already exists, it is not redownloaded. Default, `False`.

    transform : Callable | Transform | None
        Transforms the input.

    target_transform : Callable | Transform | None
        Transforms the target.
    """
    super().__init__(
        root=root,
        download=download,
        identifier=6358513,
        suffix="pkl",
        checksum="md5:8c8798d0f0d1dd8461f29ffaf0fff9fb",
        x_keys=[""],
        y_keys=[""],
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.QM9Dataset

Bases: TDCDataset

Source code in src/beignet/datasets/_qm9_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class QM9Dataset(TDCDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        download: bool = False,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory of dataset.

        download: bool
            If `True`, downloads the dataset to the root directory. If dataset
            already exists, it is not redownloaded. Default, `False`.

        transform : Callable | Transform | None
            Transforms the input.

        target_transform : Callable | Transform | None
            Transforms the target.
        """
        super().__init__(
            root=root,
            download=download,
            identifier=6179310,
            suffix="pkl",
            checksum="md5:172bfbd89f7536dfebcfe6ca440538f0",
            x_keys=[""],
            y_keys=[""],
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root, *, download=False, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory of dataset.

required
download bool

If True, downloads the dataset to the root directory. If dataset already exists, it is not redownloaded. Default, False.

False
transform Callable | Transform | None

Transforms the input.

None
target_transform Callable | Transform | None

Transforms the target.

None
Source code in src/beignet/datasets/_qm9_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self,
    root: str | Path,
    *,
    download: bool = False,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory of dataset.

    download: bool
        If `True`, downloads the dataset to the root directory. If dataset
        already exists, it is not redownloaded. Default, `False`.

    transform : Callable | Transform | None
        Transforms the input.

    target_transform : Callable | Transform | None
        Transforms the target.
    """
    super().__init__(
        root=root,
        download=download,
        identifier=6179310,
        suffix="pkl",
        checksum="md5:172bfbd89f7536dfebcfe6ca440538f0",
        x_keys=[""],
        y_keys=[""],
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.REALDatabaseDataset

Bases: TDCDataset

Source code in src/beignet/datasets/_real_database_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class REALDatabaseDataset(TDCDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        download: bool = False,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory of dataset.

        download: bool
            If `True`, downloads the dataset to the root directory. If dataset
            already exists, it is not redownloaded. Default, `False`.

        transform : Callable | Transform | None
            Transforms the input.

        target_transform : Callable | Transform | None
            Transforms the target.
        """
        super().__init__(
            root=root,
            download=download,
            identifier=0,
            suffix="",
            checksum="",
            x_keys=[""],
            y_keys=[""],
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root, *, download=False, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory of dataset.

required
download bool

If True, downloads the dataset to the root directory. If dataset already exists, it is not redownloaded. Default, False.

False
transform Callable | Transform | None

Transforms the input.

None
target_transform Callable | Transform | None

Transforms the target.

None
Source code in src/beignet/datasets/_real_database_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self,
    root: str | Path,
    *,
    download: bool = False,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory of dataset.

    download: bool
        If `True`, downloads the dataset to the root directory. If dataset
        already exists, it is not redownloaded. Default, `False`.

    transform : Callable | Transform | None
        Transforms the input.

    target_transform : Callable | Transform | None
        Transforms the target.
    """
    super().__init__(
        root=root,
        download=download,
        identifier=0,
        suffix="",
        checksum="",
        x_keys=[""],
        y_keys=[""],
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.RandomEulerAngleDataset

Bases: RandomRotationDataset

Source code in src/beignet/datasets/_random_euler_angle_dataset.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
class RandomEulerAngleDataset(RandomRotationDataset):
    def __init__(
        self,
        size: int,
        axes: str,
        degrees: bool | None = False,
        *,
        device: torch.device | None = None,
        dtype: torch.dtype | None = None,
        generator: Generator | None = None,
        layout: torch.layout | None = torch.strided,
        pin_memory: bool | None = False,
        requires_grad: bool | None = False,
        transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        size : int
            Output size.

        axes : str
            Axes. 1-3 characters belonging to the set {‘X’, ‘Y’, ‘Z’} for
            intrinsic rotations, or {‘x’, ‘y’, ‘z’} for extrinsic rotations.
            Extrinsic and intrinsic rotations cannot be mixed.

        degrees : bool, optional
            If `True`, Euler angles are assumed to be in degrees. Default,
            `False`.

        generator : torch.Generator, optional
            Psuedo-random number generator. Default, `None`.

        dtype : torch.dtype, optional
            Type of the returned tensor. Default, global default.

        layout : torch.layout, optional
            Layout of the returned tensor. Default, `torch.strided`.

        device : torch.device, optional
            Device of the returned tensor. Default, current device for the
            default tensor type.

        requires_grad : bool, optional
            Whether autograd records operations on the returned tensor.
            Default, `False`.

        pin_memory : bool, optional
            If `True`, returned tensor is allocated in pinned memory. Default,
            `False`.
        """
        super().__init__(
            beignet.random_euler_angle(
                size,
                axes,
                degrees,
                generator=generator,
                dtype=dtype,
                layout=layout,
                device=device,
                requires_grad=requires_grad,
                pin_memory=pin_memory,
            ),
            transform=transform,
        )
__init__
__init__(size, axes, degrees=False, *, device=None, dtype=None, generator=None, layout=torch.strided, pin_memory=False, requires_grad=False, transform=None)

Parameters:

Name Type Description Default
size int

Output size.

required
axes str

Axes. 1-3 characters belonging to the set {‘X’, ‘Y’, ‘Z’} for intrinsic rotations, or {‘x’, ‘y’, ‘z’} for extrinsic rotations. Extrinsic and intrinsic rotations cannot be mixed.

required
degrees bool

If True, Euler angles are assumed to be in degrees. Default, False.

False
generator Generator

Psuedo-random number generator. Default, None.

None
dtype dtype

Type of the returned tensor. Default, global default.

None
layout layout

Layout of the returned tensor. Default, torch.strided.

strided
device device

Device of the returned tensor. Default, current device for the default tensor type.

None
requires_grad bool

Whether autograd records operations on the returned tensor. Default, False.

False
pin_memory bool

If True, returned tensor is allocated in pinned memory. Default, False.

False
Source code in src/beignet/datasets/_random_euler_angle_dataset.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
def __init__(
    self,
    size: int,
    axes: str,
    degrees: bool | None = False,
    *,
    device: torch.device | None = None,
    dtype: torch.dtype | None = None,
    generator: Generator | None = None,
    layout: torch.layout | None = torch.strided,
    pin_memory: bool | None = False,
    requires_grad: bool | None = False,
    transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    size : int
        Output size.

    axes : str
        Axes. 1-3 characters belonging to the set {‘X’, ‘Y’, ‘Z’} for
        intrinsic rotations, or {‘x’, ‘y’, ‘z’} for extrinsic rotations.
        Extrinsic and intrinsic rotations cannot be mixed.

    degrees : bool, optional
        If `True`, Euler angles are assumed to be in degrees. Default,
        `False`.

    generator : torch.Generator, optional
        Psuedo-random number generator. Default, `None`.

    dtype : torch.dtype, optional
        Type of the returned tensor. Default, global default.

    layout : torch.layout, optional
        Layout of the returned tensor. Default, `torch.strided`.

    device : torch.device, optional
        Device of the returned tensor. Default, current device for the
        default tensor type.

    requires_grad : bool, optional
        Whether autograd records operations on the returned tensor.
        Default, `False`.

    pin_memory : bool, optional
        If `True`, returned tensor is allocated in pinned memory. Default,
        `False`.
    """
    super().__init__(
        beignet.random_euler_angle(
            size,
            axes,
            degrees,
            generator=generator,
            dtype=dtype,
            layout=layout,
            device=device,
            requires_grad=requires_grad,
            pin_memory=pin_memory,
        ),
        transform=transform,
    )

beignet.datasets.RandomQuaternionDataset

Bases: RandomRotationDataset

Source code in src/beignet/datasets/_random_quaternion_dataset.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
class RandomQuaternionDataset(RandomRotationDataset):
    def __init__(
        self,
        size: int,
        canonical: bool = False,
        *,
        device: torch.device | None = None,
        dtype: torch.dtype | None = None,
        generator: Generator | None = None,
        layout: torch.layout | None = torch.strided,
        pin_memory: bool | None = False,
        requires_grad: bool | None = False,
        transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        size : int
            Output size.

        canonical : bool, optional
            Whether to map the redundant double cover of rotation space to a
            unique canonical single cover. If `True`, then the rotation
            quaternion is chosen from :math:`{q, -q}` such that the :math:`w`
            term is positive. If the :math:`w` term is :math:`0`, then the
            rotation quaternion is chosen such that the first non-zero term of
            the :math:`x`, :math:`y`, and :math:`z` terms is positive.

        generator : torch.Generator, optional
            Psuedo-random number generator. Default, `None`.

        dtype : torch.dtype, optional
            Type of the returned tensor. Default, global default.

        layout : torch.layout, optional
            Layout of the returned tensor. Default, `torch.strided`.

        device : torch.device, optional
            Device of the returned tensor. Default, current device for the
            default tensor type.

        requires_grad : bool, optional
            Whether autograd records operations on the returned tensor.
            Default, `False`.

        pin_memory : bool, optional
            If `True`, returned tensor is allocated in pinned memory.
            Default, `False`.
        """
        super().__init__(
            beignet.random_quaternion(
                size,
                canonical,
                generator=generator,
                dtype=dtype,
                layout=layout,
                device=device,
                requires_grad=requires_grad,
                pin_memory=pin_memory,
            ),
            transform=transform,
        )
__init__
__init__(size, canonical=False, *, device=None, dtype=None, generator=None, layout=torch.strided, pin_memory=False, requires_grad=False, transform=None)

Parameters:

Name Type Description Default
size int

Output size.

required
canonical bool

Whether to map the redundant double cover of rotation space to a unique canonical single cover. If True, then the rotation quaternion is chosen from :math:{q, -q} such that the :math:w term is positive. If the :math:w term is :math:0, then the rotation quaternion is chosen such that the first non-zero term of the :math:x, :math:y, and :math:z terms is positive.

False
generator Generator

Psuedo-random number generator. Default, None.

None
dtype dtype

Type of the returned tensor. Default, global default.

None
layout layout

Layout of the returned tensor. Default, torch.strided.

strided
device device

Device of the returned tensor. Default, current device for the default tensor type.

None
requires_grad bool

Whether autograd records operations on the returned tensor. Default, False.

False
pin_memory bool

If True, returned tensor is allocated in pinned memory. Default, False.

False
Source code in src/beignet/datasets/_random_quaternion_dataset.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
def __init__(
    self,
    size: int,
    canonical: bool = False,
    *,
    device: torch.device | None = None,
    dtype: torch.dtype | None = None,
    generator: Generator | None = None,
    layout: torch.layout | None = torch.strided,
    pin_memory: bool | None = False,
    requires_grad: bool | None = False,
    transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    size : int
        Output size.

    canonical : bool, optional
        Whether to map the redundant double cover of rotation space to a
        unique canonical single cover. If `True`, then the rotation
        quaternion is chosen from :math:`{q, -q}` such that the :math:`w`
        term is positive. If the :math:`w` term is :math:`0`, then the
        rotation quaternion is chosen such that the first non-zero term of
        the :math:`x`, :math:`y`, and :math:`z` terms is positive.

    generator : torch.Generator, optional
        Psuedo-random number generator. Default, `None`.

    dtype : torch.dtype, optional
        Type of the returned tensor. Default, global default.

    layout : torch.layout, optional
        Layout of the returned tensor. Default, `torch.strided`.

    device : torch.device, optional
        Device of the returned tensor. Default, current device for the
        default tensor type.

    requires_grad : bool, optional
        Whether autograd records operations on the returned tensor.
        Default, `False`.

    pin_memory : bool, optional
        If `True`, returned tensor is allocated in pinned memory.
        Default, `False`.
    """
    super().__init__(
        beignet.random_quaternion(
            size,
            canonical,
            generator=generator,
            dtype=dtype,
            layout=layout,
            device=device,
            requires_grad=requires_grad,
            pin_memory=pin_memory,
        ),
        transform=transform,
    )

beignet.datasets.RandomRotationDataset

Bases: Dataset

Source code in src/beignet/datasets/_random_rotation_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
class RandomRotationDataset(Dataset):
    def __init__(
        self,
        data: Tensor,
        *,
        transform: Callable | Transform | None = None,
    ):
        super().__init__()

        self.data = data

        self.transform = transform

    def __getitem__(self, index: int) -> Tensor:
        x = self.data[index]

        if self.transform:
            x = self.transform(x)

        return x

    def __len__(self) -> int:
        return len(self.data)

beignet.datasets.RandomRotationMatrixDataset

Bases: RandomRotationDataset

Source code in src/beignet/datasets/_random_rotation_matrix_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
class RandomRotationMatrixDataset(RandomRotationDataset):
    def __init__(
        self,
        size: int,
        *,
        device: torch.device | None = None,
        dtype: torch.dtype | None = None,
        generator: Generator | None = None,
        layout: torch.layout | None = torch.strided,
        pin_memory: bool | None = False,
        requires_grad: bool | None = False,
        transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        size : int
            Output size.

        generator : torch.Generator, optional
            Psuedo-random number generator. Default, `None`.

        dtype : torch.dtype, optional
            Type of the returned tensor. Default, global default.

        layout : torch.layout, optional
            Layout of the returned tensor. Default, `torch.strided`.

        device : torch.device, optional
            Device of the returned tensor. Default, current device for the
            default tensor type.

        requires_grad : bool, optional
            Whether autograd records operations on the returned tensor.
            Default, `False`.

        pin_memory : bool, optional
            If `True`, returned tensor is allocated in pinned memory. Default,
            `False`.
        """
        super().__init__(
            beignet.random_rotation_matrix(
                size,
                generator=generator,
                dtype=dtype,
                layout=layout,
                device=device,
                requires_grad=requires_grad,
                pin_memory=pin_memory,
            ),
            transform=transform,
        )
__init__
__init__(size, *, device=None, dtype=None, generator=None, layout=torch.strided, pin_memory=False, requires_grad=False, transform=None)

Parameters:

Name Type Description Default
size int

Output size.

required
generator Generator

Psuedo-random number generator. Default, None.

None
dtype dtype

Type of the returned tensor. Default, global default.

None
layout layout

Layout of the returned tensor. Default, torch.strided.

strided
device device

Device of the returned tensor. Default, current device for the default tensor type.

None
requires_grad bool

Whether autograd records operations on the returned tensor. Default, False.

False
pin_memory bool

If True, returned tensor is allocated in pinned memory. Default, False.

False
Source code in src/beignet/datasets/_random_rotation_matrix_dataset.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
def __init__(
    self,
    size: int,
    *,
    device: torch.device | None = None,
    dtype: torch.dtype | None = None,
    generator: Generator | None = None,
    layout: torch.layout | None = torch.strided,
    pin_memory: bool | None = False,
    requires_grad: bool | None = False,
    transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    size : int
        Output size.

    generator : torch.Generator, optional
        Psuedo-random number generator. Default, `None`.

    dtype : torch.dtype, optional
        Type of the returned tensor. Default, global default.

    layout : torch.layout, optional
        Layout of the returned tensor. Default, `torch.strided`.

    device : torch.device, optional
        Device of the returned tensor. Default, current device for the
        default tensor type.

    requires_grad : bool, optional
        Whether autograd records operations on the returned tensor.
        Default, `False`.

    pin_memory : bool, optional
        If `True`, returned tensor is allocated in pinned memory. Default,
        `False`.
    """
    super().__init__(
        beignet.random_rotation_matrix(
            size,
            generator=generator,
            dtype=dtype,
            layout=layout,
            device=device,
            requires_grad=requires_grad,
            pin_memory=pin_memory,
        ),
        transform=transform,
    )

beignet.datasets.RandomRotationVectorDataset

Bases: RandomRotationDataset

Source code in src/beignet/datasets/_random_rotation_vector_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
class RandomRotationVectorDataset(RandomRotationDataset):
    def __init__(
        self,
        size: int,
        degrees: bool = False,
        *,
        device: torch.device | None = None,
        dtype: torch.dtype | None = None,
        generator: Generator | None = None,
        layout: torch.layout | None = torch.strided,
        pin_memory: bool | None = False,
        requires_grad: bool | None = False,
        transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        size : int
            Output size.

        degrees : bool
            If `True`, rotation vector magnitudes are assumed to be in degrees.
            Default, `False`.

        generator : torch.Generator, optional
            Psuedo-random number generator. Default, `None`.

        dtype : torch.dtype, optional
            Type of the returned tensor. Default, global default.

        layout : torch.layout, optional
            Layout of the returned tensor. Default, `torch.strided`.

        device : torch.device, optional
            Device of the returned tensor. Default, current device for the
            default tensor type.

        requires_grad : bool, optional
            Whether autograd records operations on the returned tensor.
            Default, `False`.

        pin_memory : bool, optional
            If `True`, returned tensor is allocated in pinned memory. Default,
            `False`.
        """
        super().__init__(
            beignet.random_rotation_vector(
                size,
                degrees,
                generator=generator,
                dtype=dtype,
                layout=layout,
                device=device,
                requires_grad=requires_grad,
                pin_memory=pin_memory,
            ),
            transform=transform,
        )
__init__
__init__(size, degrees=False, *, device=None, dtype=None, generator=None, layout=torch.strided, pin_memory=False, requires_grad=False, transform=None)

Parameters:

Name Type Description Default
size int

Output size.

required
degrees bool

If True, rotation vector magnitudes are assumed to be in degrees. Default, False.

False
generator Generator

Psuedo-random number generator. Default, None.

None
dtype dtype

Type of the returned tensor. Default, global default.

None
layout layout

Layout of the returned tensor. Default, torch.strided.

strided
device device

Device of the returned tensor. Default, current device for the default tensor type.

None
requires_grad bool

Whether autograd records operations on the returned tensor. Default, False.

False
pin_memory bool

If True, returned tensor is allocated in pinned memory. Default, False.

False
Source code in src/beignet/datasets/_random_rotation_vector_dataset.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
def __init__(
    self,
    size: int,
    degrees: bool = False,
    *,
    device: torch.device | None = None,
    dtype: torch.dtype | None = None,
    generator: Generator | None = None,
    layout: torch.layout | None = torch.strided,
    pin_memory: bool | None = False,
    requires_grad: bool | None = False,
    transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    size : int
        Output size.

    degrees : bool
        If `True`, rotation vector magnitudes are assumed to be in degrees.
        Default, `False`.

    generator : torch.Generator, optional
        Psuedo-random number generator. Default, `None`.

    dtype : torch.dtype, optional
        Type of the returned tensor. Default, global default.

    layout : torch.layout, optional
        Layout of the returned tensor. Default, `torch.strided`.

    device : torch.device, optional
        Device of the returned tensor. Default, current device for the
        default tensor type.

    requires_grad : bool, optional
        Whether autograd records operations on the returned tensor.
        Default, `False`.

    pin_memory : bool, optional
        If `True`, returned tensor is allocated in pinned memory. Default,
        `False`.
    """
    super().__init__(
        beignet.random_rotation_vector(
            size,
            degrees,
            generator=generator,
            dtype=dtype,
            layout=layout,
            device=device,
            requires_grad=requires_grad,
            pin_memory=pin_memory,
        ),
        transform=transform,
    )

beignet.datasets.SAbDabDataset

Bases: TDCDataset

Source code in src/beignet/datasets/_sabdab_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class SAbDabDataset(TDCDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        download: bool = False,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory of dataset.

        download: bool
            If `True`, downloads the dataset to the root directory. If dataset
            already exists, it is not redownloaded. Default, `False`.

        transform : Callable | Transform | None
            Transforms the input.

        target_transform : Callable | Transform | None
            Transforms the target.
        """
        super().__init__(
            root=root,
            download=download,
            identifier=4167357,
            suffix="csv",
            checksum="md5:f4d0dba68859f7ae2a042bd90423b22b",
            x_keys=["X1", "X2"],
            y_keys=["Y"],
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root, *, download=False, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory of dataset.

required
download bool

If True, downloads the dataset to the root directory. If dataset already exists, it is not redownloaded. Default, False.

False
transform Callable | Transform | None

Transforms the input.

None
target_transform Callable | Transform | None

Transforms the target.

None
Source code in src/beignet/datasets/_sabdab_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self,
    root: str | Path,
    *,
    download: bool = False,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory of dataset.

    download: bool
        If `True`, downloads the dataset to the root directory. If dataset
        already exists, it is not redownloaded. Default, `False`.

    transform : Callable | Transform | None
        Transforms the input.

    target_transform : Callable | Transform | None
        Transforms the target.
    """
    super().__init__(
        root=root,
        download=download,
        identifier=4167357,
        suffix="csv",
        checksum="md5:f4d0dba68859f7ae2a042bd90423b22b",
        x_keys=["X1", "X2"],
        y_keys=["Y"],
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.SKEMPIDataset

Bases: ParquetDataset

The Structural Kinetic and Energetic database of Mutant Protein Interactions (SKEMPI) database is a compilation of experimental data on the thermodynamics of mutations in protein-protein interactions. The database includes protein names, protein structures from the Protein Data Bank (PDB), mutation information, and the change in free energy upon mutation. The change in free energy gives an indication of how the mutation affects the binding affinity of the two proteins.

Source code in src/beignet/datasets/_skempi_dataset.py
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
class SKEMPIDataset(ParquetDataset):
    """
    The Structural Kinetic and Energetic database of Mutant Protein
    Interactions (SKEMPI) database is a compilation of experimental data on
    the thermodynamics of mutations in protein-protein interactions. The
    database includes protein names, protein structures from the Protein
    Data Bank (PDB), mutation information, and the change in free energy
    upon mutation. The change in free energy gives an indication of how the
    mutation affects the binding affinity of the two proteins.
    """

    def __init__(
        self,
        root: Union[str, Path],
        *,
        download: bool = False,
        sequence_transform_fn: Union[Callable, Transform, None] = None,
        structure_transform_fn: Union[Callable, Transform, None] = None,
        target_transform: Union[Callable, Transform, None] = None,
    ) -> None:
        """
        :param root: Root directory where the dataset subdirectory exists or,
            if :attr:`download` is ``True``, the directory where the dataset
            subdirectory will be created and the dataset downloaded.

        :param download: If ``True``, download the dataset to the :attr:`root`
            directory (default: ``False``). If the dataset is already
            downloaded, it is not redownloaded.

        :param sequence_transform_fn: A ``Callable`` or ``Transform`` that maps
            sequences to transformed sequences (default: ``None``).

        :param structure_transform_fn: A ``Callable`` or ``Transform`` that
            maps structures to transformed structures (default: ``None``).

        :param target_transform: ``Callable`` or ``Transform`` that maps a
            target to a transformed target (default: ``None``).
        """
        if isinstance(root, str):
            root = Path(root).resolve()

        self._root = root

        if download:
            beignet.io.download(
                source="s3://beignet-data-dev/designdb/lake/thirdparty/skempi/cc5952a4a37f4f1fbe14ce484a00eb87_0.snappy.parquet",
                destination=self._root / "SKEMPI-v2.0",
                filename="SKEMPI-v2.0.parquet",
            )

            beignet.io.download_and_extract_archive(
                resource="https://life.bsc.es/pid/skempi2/database/download/SKEMPI2_PDBs.tgz",
                source=self._root,
                destination=self._root,
                name="SKEMPI-v2.0.tar.gz",
                remove_archive=True,
            )

        super().__init__(
            self._root / "SKEMPI-v2.0",
            self._root / "SKEMPI-v2.0" / "SKEMPI-v2.0.parquet",
        )

        self._sequence_transform_fn = sequence_transform_fn

        self._structure_transform_fn = structure_transform_fn

        self._target_transform_fn = target_transform

        self._data = self._data.dropna(
            subset=[
                "affinity_antigen_sequence",
                "affinity_pkd",
                "fv_heavy",
                "fv_light",
            ],
        )

        self._parser = PDBParser()

        self._structure_paths = [*self._root.glob("PDBs/*.pdb")]

    def __getitem__(
        self,
        index: int,
    ) -> (((str, str, str), (Tensor, [str])), (float, ...)):
        """
        :param index: index of the record to return.

        :returns: A pair of the form:

            .. math::

                \\left(\\text{antibodies},\\;\\text{targets}\\right).

            Each antibody in :math:`\\text{antibodies}` is a pair of the form:

            .. math::

                \\left(\\text{sequences},\\;\\text{structures}\\right).

            :math:`\\text{sequences} `is a :math:`3`-tuple of the form:

            .. math::

                \\left(\\text{VH},\\;\\text{VL},\\;\\text{Ag}\\right)

            where `\\text{VH}` is a ``str`` that represents the
            immunoglobulin heavy chain variable region sequence, `\\text{
            VL}` is a ``str`` that represents the immunoglobulin light chain
            variable region sequence, and $\\text{Ag}` is a ``str`` that
            represents the antigen sequence.

            An antibody is made up of two heavy chains and two light chains.
            Each heavy and light chain has a variable (:math:`V`) region and
            a constant (:math:`C`) region. The variable regions of the heavy
            and light chains form the antigen-binding site of the antibody.
            Each variable region is unique and gives the antibody its
            specificity for binding to a particular antigen. The heavy and
            light chain variable regions are named for their extensive
            sequence variability among different antibodies. This
            variability allows the immune system to produce antibodies that
            can specifically recognize and bind to a vast array of antigens.

            Antigens are molecules capable of stimulating an immune
            response. They are typically proteins or polysaccharides. This
            includes c omponents of bacterial cell walls, capsules, pili,
            and bacterial flagella, as well as proteins in viruses.

            The immune system recognizes antigens as foreign and mounts an
            immune response against them. Antigens are recognized by
            specific antibodies, which bind to the antigen. This binding can
            neutralize the antigen, mark it for destruction by other immune
            cells, or trigger other types of immune responses. Each type of
            antibody recognizes and binds to a specific antigen; this
            specificity is determined by the variable regions of the
            antibody's heavy and light chains.

            :math:`\\text{pKd}` is the negative logarithm of the
            dissociation constant (:math:`\\text{Kd}`). The dissociation
            constant is a measure of how tightly a ligand (e.g., a drug)
            binds to a receptor. The smaller the ``Kd`` value, the tighter
            or stronger the binding between the ligand and its receptor.
            Because :math:`\\text{pKd}` is the negative logarithm of
            :math:`\\text{Kd}`, a larger :math:`\\text{pKd}` value therefore
            represents stronger binding affinity. The :math:`\\text{pKd}`
            value is commonly used in pharmacology and medicinal chemistry
            because it allows easier comparison of binding affinities across
            different ligand-receptor pairs. It’s an important metric when
            assessing the potential efficacy of a drug.
        """
        item = super().__getitem__(index)

        sequence = (
            item["fv_heavy"],
            item["fv_light"],
            item["affinity_antigen_sequence"],
        )

        if self._sequence_transform_fn is not None:
            sequence = self._sequence_transform_fn(sequence)

        name, _ = os.path.splitext(
            os.path.basename(
                self._structure_paths[index],
            ),
        )

        structure = self._parser.get_structure(
            name,
            self._structure_paths[index],
        )

        atomic_coordinates = []

        residue_names = []

        atom_names = []

        alternate_location_indicators = []

        for atom in [*structure.get_atoms()]:
            atomic_coordinates = [
                *atomic_coordinates,
                torch.from_numpy(atom.coord),
            ]

            (
                _,
                _,
                residue_name,
                atom_name,
                alternate_location_indicator,
            ) = atom.get_full_id()

            residue_names = [
                *residue_names,
                residue_name,
            ]

            atom_names = [*atom_names, atom_name]

            alternate_location_indicator, _ = alternate_location_indicator

            alternate_location_indicators = [
                *alternate_location_indicators,
                alternate_location_indicator,
            ]

        structure = (
            torch.stack(atomic_coordinates),
            residue_names,
        )

        if self._structure_transform_fn is not None:
            structure = self._structure_transform_fn(sequence)

        target = item["affinity_pkd"]

        if self._target_transform_fn is not None:
            target = self._target_transform_fn(sequence)

        return (sequence, structure), target
__getitem__
__getitem__(index)

:param index: index of the record to return.

:returns: A pair of the form:

.. math::

    \left(\text{antibodies},\;\text{targets}\right).

Each antibody in :math:`\text{antibodies}` is a pair of the form:

.. math::

    \left(\text{sequences},\;\text{structures}\right).

:math:`\text{sequences} `is a :math:`3`-tuple of the form:

.. math::

    \left(\text{VH},\;\text{VL},\;\text{Ag}\right)

where `\text{VH}` is a ``str`` that represents the
immunoglobulin heavy chain variable region sequence, `\text{
VL}` is a ``str`` that represents the immunoglobulin light chain
variable region sequence, and $\text{Ag}` is a ``str`` that
represents the antigen sequence.

An antibody is made up of two heavy chains and two light chains.
Each heavy and light chain has a variable (:math:`V`) region and
a constant (:math:`C`) region. The variable regions of the heavy
and light chains form the antigen-binding site of the antibody.
Each variable region is unique and gives the antibody its
specificity for binding to a particular antigen. The heavy and
light chain variable regions are named for their extensive
sequence variability among different antibodies. This
variability allows the immune system to produce antibodies that
can specifically recognize and bind to a vast array of antigens.

Antigens are molecules capable of stimulating an immune
response. They are typically proteins or polysaccharides. This
includes c omponents of bacterial cell walls, capsules, pili,
and bacterial flagella, as well as proteins in viruses.

The immune system recognizes antigens as foreign and mounts an
immune response against them. Antigens are recognized by
specific antibodies, which bind to the antigen. This binding can
neutralize the antigen, mark it for destruction by other immune
cells, or trigger other types of immune responses. Each type of
antibody recognizes and binds to a specific antigen; this
specificity is determined by the variable regions of the
antibody's heavy and light chains.

:math:`\text{pKd}` is the negative logarithm of the
dissociation constant (:math:`\text{Kd}`). The dissociation
constant is a measure of how tightly a ligand (e.g., a drug)
binds to a receptor. The smaller the ``Kd`` value, the tighter
or stronger the binding between the ligand and its receptor.
Because :math:`\text{pKd}` is the negative logarithm of
:math:`\text{Kd}`, a larger :math:`\text{pKd}` value therefore
represents stronger binding affinity. The :math:`\text{pKd}`
value is commonly used in pharmacology and medicinal chemistry
because it allows easier comparison of binding affinities across
different ligand-receptor pairs. It’s an important metric when
assessing the potential efficacy of a drug.
Source code in src/beignet/datasets/_skempi_dataset.py
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
def __getitem__(
    self,
    index: int,
) -> (((str, str, str), (Tensor, [str])), (float, ...)):
    """
    :param index: index of the record to return.

    :returns: A pair of the form:

        .. math::

            \\left(\\text{antibodies},\\;\\text{targets}\\right).

        Each antibody in :math:`\\text{antibodies}` is a pair of the form:

        .. math::

            \\left(\\text{sequences},\\;\\text{structures}\\right).

        :math:`\\text{sequences} `is a :math:`3`-tuple of the form:

        .. math::

            \\left(\\text{VH},\\;\\text{VL},\\;\\text{Ag}\\right)

        where `\\text{VH}` is a ``str`` that represents the
        immunoglobulin heavy chain variable region sequence, `\\text{
        VL}` is a ``str`` that represents the immunoglobulin light chain
        variable region sequence, and $\\text{Ag}` is a ``str`` that
        represents the antigen sequence.

        An antibody is made up of two heavy chains and two light chains.
        Each heavy and light chain has a variable (:math:`V`) region and
        a constant (:math:`C`) region. The variable regions of the heavy
        and light chains form the antigen-binding site of the antibody.
        Each variable region is unique and gives the antibody its
        specificity for binding to a particular antigen. The heavy and
        light chain variable regions are named for their extensive
        sequence variability among different antibodies. This
        variability allows the immune system to produce antibodies that
        can specifically recognize and bind to a vast array of antigens.

        Antigens are molecules capable of stimulating an immune
        response. They are typically proteins or polysaccharides. This
        includes c omponents of bacterial cell walls, capsules, pili,
        and bacterial flagella, as well as proteins in viruses.

        The immune system recognizes antigens as foreign and mounts an
        immune response against them. Antigens are recognized by
        specific antibodies, which bind to the antigen. This binding can
        neutralize the antigen, mark it for destruction by other immune
        cells, or trigger other types of immune responses. Each type of
        antibody recognizes and binds to a specific antigen; this
        specificity is determined by the variable regions of the
        antibody's heavy and light chains.

        :math:`\\text{pKd}` is the negative logarithm of the
        dissociation constant (:math:`\\text{Kd}`). The dissociation
        constant is a measure of how tightly a ligand (e.g., a drug)
        binds to a receptor. The smaller the ``Kd`` value, the tighter
        or stronger the binding between the ligand and its receptor.
        Because :math:`\\text{pKd}` is the negative logarithm of
        :math:`\\text{Kd}`, a larger :math:`\\text{pKd}` value therefore
        represents stronger binding affinity. The :math:`\\text{pKd}`
        value is commonly used in pharmacology and medicinal chemistry
        because it allows easier comparison of binding affinities across
        different ligand-receptor pairs. It’s an important metric when
        assessing the potential efficacy of a drug.
    """
    item = super().__getitem__(index)

    sequence = (
        item["fv_heavy"],
        item["fv_light"],
        item["affinity_antigen_sequence"],
    )

    if self._sequence_transform_fn is not None:
        sequence = self._sequence_transform_fn(sequence)

    name, _ = os.path.splitext(
        os.path.basename(
            self._structure_paths[index],
        ),
    )

    structure = self._parser.get_structure(
        name,
        self._structure_paths[index],
    )

    atomic_coordinates = []

    residue_names = []

    atom_names = []

    alternate_location_indicators = []

    for atom in [*structure.get_atoms()]:
        atomic_coordinates = [
            *atomic_coordinates,
            torch.from_numpy(atom.coord),
        ]

        (
            _,
            _,
            residue_name,
            atom_name,
            alternate_location_indicator,
        ) = atom.get_full_id()

        residue_names = [
            *residue_names,
            residue_name,
        ]

        atom_names = [*atom_names, atom_name]

        alternate_location_indicator, _ = alternate_location_indicator

        alternate_location_indicators = [
            *alternate_location_indicators,
            alternate_location_indicator,
        ]

    structure = (
        torch.stack(atomic_coordinates),
        residue_names,
    )

    if self._structure_transform_fn is not None:
        structure = self._structure_transform_fn(sequence)

    target = item["affinity_pkd"]

    if self._target_transform_fn is not None:
        target = self._target_transform_fn(sequence)

    return (sequence, structure), target
__init__
__init__(root, *, download=False, sequence_transform_fn=None, structure_transform_fn=None, target_transform=None)

:param root: Root directory where the dataset subdirectory exists or, if :attr:download is True, the directory where the dataset subdirectory will be created and the dataset downloaded.

:param download: If True, download the dataset to the :attr:root directory (default: False). If the dataset is already downloaded, it is not redownloaded.

:param sequence_transform_fn: A Callable or Transform that maps sequences to transformed sequences (default: None).

:param structure_transform_fn: A Callable or Transform that maps structures to transformed structures (default: None).

:param target_transform: Callable or Transform that maps a target to a transformed target (default: None).

Source code in src/beignet/datasets/_skempi_dataset.py
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
def __init__(
    self,
    root: Union[str, Path],
    *,
    download: bool = False,
    sequence_transform_fn: Union[Callable, Transform, None] = None,
    structure_transform_fn: Union[Callable, Transform, None] = None,
    target_transform: Union[Callable, Transform, None] = None,
) -> None:
    """
    :param root: Root directory where the dataset subdirectory exists or,
        if :attr:`download` is ``True``, the directory where the dataset
        subdirectory will be created and the dataset downloaded.

    :param download: If ``True``, download the dataset to the :attr:`root`
        directory (default: ``False``). If the dataset is already
        downloaded, it is not redownloaded.

    :param sequence_transform_fn: A ``Callable`` or ``Transform`` that maps
        sequences to transformed sequences (default: ``None``).

    :param structure_transform_fn: A ``Callable`` or ``Transform`` that
        maps structures to transformed structures (default: ``None``).

    :param target_transform: ``Callable`` or ``Transform`` that maps a
        target to a transformed target (default: ``None``).
    """
    if isinstance(root, str):
        root = Path(root).resolve()

    self._root = root

    if download:
        beignet.io.download(
            source="s3://beignet-data-dev/designdb/lake/thirdparty/skempi/cc5952a4a37f4f1fbe14ce484a00eb87_0.snappy.parquet",
            destination=self._root / "SKEMPI-v2.0",
            filename="SKEMPI-v2.0.parquet",
        )

        beignet.io.download_and_extract_archive(
            resource="https://life.bsc.es/pid/skempi2/database/download/SKEMPI2_PDBs.tgz",
            source=self._root,
            destination=self._root,
            name="SKEMPI-v2.0.tar.gz",
            remove_archive=True,
        )

    super().__init__(
        self._root / "SKEMPI-v2.0",
        self._root / "SKEMPI-v2.0" / "SKEMPI-v2.0.parquet",
    )

    self._sequence_transform_fn = sequence_transform_fn

    self._structure_transform_fn = structure_transform_fn

    self._target_transform_fn = target_transform

    self._data = self._data.dropna(
        subset=[
            "affinity_antigen_sequence",
            "affinity_pkd",
            "fv_heavy",
            "fv_light",
        ],
    )

    self._parser = PDBParser()

    self._structure_paths = [*self._root.glob("PDBs/*.pdb")]

beignet.datasets.SequenceDataset

Bases: Dataset

Source code in src/beignet/datasets/_sequence_dataset.py
 7
 8
 9
10
11
12
class SequenceDataset(Dataset):
    def __init__(self, root: str | PathLike, *args, **kwargs):
        if isinstance(root, str):
            root = Path(root)

        self.root = root.resolve()

beignet.datasets.SizedSequenceDataset

Bases: SequenceDataset

Source code in src/beignet/datasets/_sized_sequence_dataset.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
class SizedSequenceDataset(SequenceDataset):
    def __init__(
        self,
        root: str | PathLike,
        sizes: ArrayLike,
        *args,
        **kwargs,
    ):
        super().__init__(root, *args, **kwargs)

        self.sizes = sizes

    def __len__(self) -> int:
        return len(self.sizes)

beignet.datasets.SwissProtDataset

Bases: UniProtDataset

Source code in src/beignet/datasets/_swissprot_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
class SwissProtDataset(UniProtDataset):
    def __init__(
        self,
        root: str | PathLike | None = None,
        *,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        """
        Parameters
        ----------
        root : str | PathLike, optional
            Root directory where the dataset subdirectory exists or, if
            `download` is `True`, the directory where the dataset subdirectory
            will be created and the dataset downloaded.

        transform : Callable, optional
            A `Callable` or `Transform` that that maps a sequence to a
            transformed sequence (default: `None`).

        target_transform : Callable, optional
            A `Callable` or `Transform` that maps a target to a transformed
            target (default: `None`).
        """
        super().__init__(
            "https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz",
            root,
            "md5:0766df3e5785fc5f1cfc496aa89e86ad",
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root=None, *, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | PathLike

Root directory where the dataset subdirectory exists or, if download is True, the directory where the dataset subdirectory will be created and the dataset downloaded.

None
transform Callable

A Callable or Transform that that maps a sequence to a transformed sequence (default: None).

None
target_transform Callable

A Callable or Transform that maps a target to a transformed target (default: None).

None
Source code in src/beignet/datasets/_swissprot_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
def __init__(
    self,
    root: str | PathLike | None = None,
    *,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    """
    Parameters
    ----------
    root : str | PathLike, optional
        Root directory where the dataset subdirectory exists or, if
        `download` is `True`, the directory where the dataset subdirectory
        will be created and the dataset downloaded.

    transform : Callable, optional
        A `Callable` or `Transform` that that maps a sequence to a
        transformed sequence (default: `None`).

    target_transform : Callable, optional
        A `Callable` or `Transform` that maps a target to a transformed
        target (default: `None`).
    """
    super().__init__(
        "https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz",
        root,
        "md5:0766df3e5785fc5f1cfc496aa89e86ad",
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.TDCDataset

Bases: Dataset

Source code in src/beignet/datasets/_tdc_dataset.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
class TDCDataset(Dataset):
    _x: List[T]
    _y: List[T]

    def __init__(
        self,
        root: str | Path,
        download: bool = False,
        *,
        identifier: int,
        suffix: str,
        checksum: str,
        x_keys: List[str],
        y_keys: List[str] | None = None,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        super().__init__()

        if isinstance(root, str):
            root = Path(root)

        if download:
            pooch.retrieve(
                f"https://dataverse.harvard.edu/api/access/datafile/{identifier}",
                fname=f"{self.__class__.__name__}.{suffix}",
                known_hash=checksum,
                path=root / self.__class__.__name__,
                progressbar=True,
            )

        path = root / self.__class__.__name__ / f"{self.__class__.__name__}.{suffix}"

        match path.suffix:
            case ".csv":
                self._data = pandas.read_csv(path)
            case ".pkl":
                self._data = pandas.read_pickle(path)
            case ".tab" | ".tsv":
                self._data = pandas.read_csv(path, sep="\t")
            case _:
                raise ValueError

        self._x_keys = x_keys
        self._y_keys = y_keys

        self.transform = transform
        self.target_transform = target_transform

        self._x = self._data[self._x_keys].apply(tuple, axis=1)

        if self._y_keys is not None:
            self._y = self._data[self._y_keys].apply(tuple, axis=1)

    def __getitem__(self, index: int) -> Tuple[T, T]:
        x = self._x[index]

        if len(x) == 1:
            x = x[0]

        if self.transform is not None:
            x = self.transform(x)

        if self._y_keys is None:
            return x

        y = self._y[index]

        if len(y) == 1:
            y = y[0]

        if self.target_transform is not None:
            y = self.target_transform(y)

        return x, y

    def __len__(self) -> int:
        return len(self._data)

beignet.datasets.TherapeuticAntibodyProfilerDataset

Bases: TDCDataset

Source code in src/beignet/datasets/_therapeutic_antibody_profiler_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
class TherapeuticAntibodyProfilerDataset(TDCDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        download: bool = False,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory of dataset.

        download: bool
            If `True`, downloads the dataset to the root directory. If dataset
            already exists, it is not redownloaded. Default, `False`.

        transform : Callable | Transform | None
            Transforms the input.

        target_transform : Callable | Transform | None
            Transforms the target.
        """
        super().__init__(
            root=root,
            download=download,
            identifier=4167113,
            suffix="tsv",
            checksum="md5:0a1b07fe1bdc9f67636f72878097841e",
            x_keys=["X"],
            y_keys=[
                "CDR_Length",
                "PNC",
                "PPC",
                "PSH",
                "SFvCSP",
            ],
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root, *, download=False, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory of dataset.

required
download bool

If True, downloads the dataset to the root directory. If dataset already exists, it is not redownloaded. Default, False.

False
transform Callable | Transform | None

Transforms the input.

None
target_transform Callable | Transform | None

Transforms the target.

None
Source code in src/beignet/datasets/_therapeutic_antibody_profiler_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
def __init__(
    self,
    root: str | Path,
    *,
    download: bool = False,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory of dataset.

    download: bool
        If `True`, downloads the dataset to the root directory. If dataset
        already exists, it is not redownloaded. Default, `False`.

    transform : Callable | Transform | None
        Transforms the input.

    target_transform : Callable | Transform | None
        Transforms the target.
    """
    super().__init__(
        root=root,
        download=download,
        identifier=4167113,
        suffix="tsv",
        checksum="md5:0a1b07fe1bdc9f67636f72878097841e",
        x_keys=["X"],
        y_keys=[
            "CDR_Length",
            "PNC",
            "PPC",
            "PSH",
            "SFvCSP",
        ],
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.Tox21Dataset

Bases: TDCDataset

Source code in src/beignet/datasets/_tox21_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
class Tox21Dataset(TDCDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        download: bool = False,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory of dataset.

        download: bool
            If `True`, downloads the dataset to the root directory. If dataset
            already exists, it is not redownloaded. Default, `False`.

        transform : Callable | Transform | None
            Transforms the input.

        target_transform : Callable | Transform | None
            Transforms the target.
        """
        super().__init__(
            root=root,
            download=download,
            identifier=4259612,
            suffix="tsv",
            checksum="md5:6f926279d60d413f0524894fdcb9ba5e",
            x_keys=["X"],
            y_keys=[
                "NR-AR",
                "NR-AR-LBD",
                "NR-AhR",
                "NR-Aromatase",
                "NR-ER",
                "NR-ER-LBD",
                "NR-PPAR-gamma",
                "SR-ARE",
                "SR-ATAD5",
                "SR-HSE",
                "SR-MMP",
                "SR-p53",
            ],
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root, *, download=False, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory of dataset.

required
download bool

If True, downloads the dataset to the root directory. If dataset already exists, it is not redownloaded. Default, False.

False
transform Callable | Transform | None

Transforms the input.

None
target_transform Callable | Transform | None

Transforms the target.

None
Source code in src/beignet/datasets/_tox21_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
def __init__(
    self,
    root: str | Path,
    *,
    download: bool = False,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory of dataset.

    download: bool
        If `True`, downloads the dataset to the root directory. If dataset
        already exists, it is not redownloaded. Default, `False`.

    transform : Callable | Transform | None
        Transforms the input.

    target_transform : Callable | Transform | None
        Transforms the target.
    """
    super().__init__(
        root=root,
        download=download,
        identifier=4259612,
        suffix="tsv",
        checksum="md5:6f926279d60d413f0524894fdcb9ba5e",
        x_keys=["X"],
        y_keys=[
            "NR-AR",
            "NR-AR-LBD",
            "NR-AhR",
            "NR-Aromatase",
            "NR-ER",
            "NR-ER-LBD",
            "NR-PPAR-gamma",
            "SR-ARE",
            "SR-ATAD5",
            "SR-HSE",
            "SR-MMP",
            "SR-p53",
        ],
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.ToxCastDataset

Bases: TDCDataset

Source code in src/beignet/datasets/_toxcast_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class ToxCastDataset(TDCDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        download: bool = False,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory of dataset.

        download: bool
            If `True`, downloads the dataset to the root directory. If dataset
            already exists, it is not redownloaded. Default, `False`.

        transform : Callable | Transform | None
            Transforms the input.

        target_transform : Callable | Transform | None
            Transforms the target.
        """
        super().__init__(
            root=root,
            download=download,
            identifier=4259613,
            suffix="",
            checksum="md5:a0791c8232b86fdb657f714ffa05e92a",
            x_keys=[""],
            y_keys=[""],
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root, *, download=False, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory of dataset.

required
download bool

If True, downloads the dataset to the root directory. If dataset already exists, it is not redownloaded. Default, False.

False
transform Callable | Transform | None

Transforms the input.

None
target_transform Callable | Transform | None

Transforms the target.

None
Source code in src/beignet/datasets/_toxcast_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self,
    root: str | Path,
    *,
    download: bool = False,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory of dataset.

    download: bool
        If `True`, downloads the dataset to the root directory. If dataset
        already exists, it is not redownloaded. Default, `False`.

    transform : Callable | Transform | None
        Transforms the input.

    target_transform : Callable | Transform | None
        Transforms the target.
    """
    super().__init__(
        root=root,
        download=download,
        identifier=4259613,
        suffix="",
        checksum="md5:a0791c8232b86fdb657f714ffa05e92a",
        x_keys=[""],
        y_keys=[""],
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.TrEMBLDataset

Bases: UniProtDataset

Source code in src/beignet/datasets/_trembl_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
class TrEMBLDataset(UniProtDataset):
    def __init__(
        self,
        root: str | PathLike | None = None,
        *,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        """
        Parameters
        ----------
        root : str | PathLike, optional
            Root directory where the dataset subdirectory exists or, if
            `download` is `True`, the directory where the dataset subdirectory
            will be created and the dataset downloaded.

        transform : Callable, optional
            A `Callable` or `Transform` that that maps a sequence to a
            transformed sequence (default: `None`).

        target_transform : Callable, optional
            A `Callable` or `Transform` that maps a target (a cluster
            identifier) to a transformed target (default: `None`).
        """
        super().__init__(
            "https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.fasta.gz",
            root,
            "md5:56f0f20479a88d28fb51db7ef4df90ed",
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root=None, *, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | PathLike

Root directory where the dataset subdirectory exists or, if download is True, the directory where the dataset subdirectory will be created and the dataset downloaded.

None
transform Callable

A Callable or Transform that that maps a sequence to a transformed sequence (default: None).

None
target_transform Callable

A Callable or Transform that maps a target (a cluster identifier) to a transformed target (default: None).

None
Source code in src/beignet/datasets/_trembl_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
def __init__(
    self,
    root: str | PathLike | None = None,
    *,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    """
    Parameters
    ----------
    root : str | PathLike, optional
        Root directory where the dataset subdirectory exists or, if
        `download` is `True`, the directory where the dataset subdirectory
        will be created and the dataset downloaded.

    transform : Callable, optional
        A `Callable` or `Transform` that that maps a sequence to a
        transformed sequence (default: `None`).

    target_transform : Callable, optional
        A `Callable` or `Transform` that maps a target (a cluster
        identifier) to a transformed target (default: `None`).
    """
    super().__init__(
        "https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.fasta.gz",
        root,
        "md5:56f0f20479a88d28fb51db7ef4df90ed",
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.TrajectoryDataset

Bases: Dataset

Source code in src/beignet/datasets/_trajectory_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class TrajectoryDataset(Dataset):
    def __init__(
        self,
        func: Callable,
        extension: str,
        root: str | PathLike,
        transform: Callable[[Trajectory], Any] | None = None,
        stride: int | None = None,
        **kwargs,
    ):
        self.func = functools.partial(func, **kwargs)

        if isinstance(root, str):
            root = Path(root)

        self.root = root.resolve()

        self.transform = transform

        self.stride = stride

        self.paths = [*self.root.glob(f"*.{extension}")]

        super().__init__()

    def __getitem__(self, index: int) -> Trajectory:
        item = self.func(self.paths[index], stride=self.stride)

        if self.transform:
            item = self.transform(item)

        return item

    def __len__(self) -> int:
        return len(self.paths)

beignet.datasets.USPTODataset

Bases: TDCDataset

Source code in src/beignet/datasets/_uspto_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class USPTODataset(TDCDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        download: bool = False,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory of dataset.

        download: bool
            If `True`, downloads the dataset to the root directory. If dataset
            already exists, it is not redownloaded. Default, `False`.

        transform : Callable | Transform | None
            Transforms the input.

        target_transform : Callable | Transform | None
            Transforms the target.
        """
        super().__init__(
            root=root,
            download=download,
            identifier=4171642,
            suffix="csv",
            checksum="md5:818b06cd7dff707d5ae2c82109ff8668",
            x_keys=["reactant"],
            y_keys=["product"],
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root, *, download=False, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory of dataset.

required
download bool

If True, downloads the dataset to the root directory. If dataset already exists, it is not redownloaded. Default, False.

False
transform Callable | Transform | None

Transforms the input.

None
target_transform Callable | Transform | None

Transforms the target.

None
Source code in src/beignet/datasets/_uspto_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self,
    root: str | Path,
    *,
    download: bool = False,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory of dataset.

    download: bool
        If `True`, downloads the dataset to the root directory. If dataset
        already exists, it is not redownloaded. Default, `False`.

    transform : Callable | Transform | None
        Transforms the input.

    target_transform : Callable | Transform | None
        Transforms the target.
    """
    super().__init__(
        root=root,
        download=download,
        identifier=4171642,
        suffix="csv",
        checksum="md5:818b06cd7dff707d5ae2c82109ff8668",
        x_keys=["reactant"],
        y_keys=["product"],
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.USPTOReactionProductDataset

Bases: TDCDataset

Source code in src/beignet/datasets/_uspto_reaction_product_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class USPTOReactionProductDataset(TDCDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        download: bool = False,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory of dataset.

        download: bool
            If `True`, downloads the dataset to the root directory. If dataset
            already exists, it is not redownloaded. Default, `False`.

        transform : Callable | Transform | None
            Transforms the input.

        target_transform : Callable | Transform | None
            Transforms the target.
        """
        super().__init__(
            root=root,
            download=download,
            identifier=0,
            suffix="",
            checksum="",
            x_keys=[""],
            y_keys=[""],
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root, *, download=False, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory of dataset.

required
download bool

If True, downloads the dataset to the root directory. If dataset already exists, it is not redownloaded. Default, False.

False
transform Callable | Transform | None

Transforms the input.

None
target_transform Callable | Transform | None

Transforms the target.

None
Source code in src/beignet/datasets/_uspto_reaction_product_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self,
    root: str | Path,
    *,
    download: bool = False,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory of dataset.

    download: bool
        If `True`, downloads the dataset to the root directory. If dataset
        already exists, it is not redownloaded. Default, `False`.

    transform : Callable | Transform | None
        Transforms the input.

    target_transform : Callable | Transform | None
        Transforms the target.
    """
    super().__init__(
        root=root,
        download=download,
        identifier=0,
        suffix="",
        checksum="",
        x_keys=[""],
        y_keys=[""],
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.UniProtDataset

Bases: FASTADataset

Source code in src/beignet/datasets/_uniprot_dataset.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
class UniProtDataset(FASTADataset):
    def __init__(
        self,
        url: str,
        root: str | PathLike | None = None,
        known_hash: str | None = None,
        *,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        """
        Parameters
        ----------
        url : str
            URL to the file that needs to be downloaded. Ideally, the URL
            should end with a file name (e.g., `uniref50.fasta.gz`).

        root : str | PathLike, optional
            Root directory where the dataset subdirectory exists or, if
            `download` is `True`, the directory where the dataset subdirectory
            will be created and the dataset downloaded.

        transform : Callable | Transform, optional
            A `Callable` or `Transform` that that maps a sequence to a
            transformed sequence (default: `None`).

        target_transform : Callable | Transform, optional
            A `Callable` or `Transform` that maps a target (a cluster
            identifier) to a transformed target (default: `None`).
        """
        if root is None:
            root = pooch.os_cache("beignet")

        if isinstance(root, str):
            root = Path(root)

        self.root = root.resolve()

        name = self.__class__.__name__.replace("Dataset", "")

        super().__init__(
            pooch.retrieve(
                url,
                known_hash,
                f"{name}.fasta.gz",
                root / name,
                processor=Decompress(
                    name=f"{name}.fasta",
                ),
                progressbar=True,
            ),
        )

        self.transform = transform

        self.target_transform = target_transform

    def __getitem__(self, index: int) -> (str, str):
        input, target = self.get(index)

        if self.transform:
            input = self.transform(input)

        if self.target_transform:
            target = self.target_transform(target)

        return input, target
__init__
__init__(url, root=None, known_hash=None, *, transform=None, target_transform=None)

Parameters:

Name Type Description Default
url str

URL to the file that needs to be downloaded. Ideally, the URL should end with a file name (e.g., uniref50.fasta.gz).

required
root str | PathLike

Root directory where the dataset subdirectory exists or, if download is True, the directory where the dataset subdirectory will be created and the dataset downloaded.

None
transform Callable | Transform

A Callable or Transform that that maps a sequence to a transformed sequence (default: None).

None
target_transform Callable | Transform

A Callable or Transform that maps a target (a cluster identifier) to a transformed target (default: None).

None
Source code in src/beignet/datasets/_uniprot_dataset.py
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
def __init__(
    self,
    url: str,
    root: str | PathLike | None = None,
    known_hash: str | None = None,
    *,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    """
    Parameters
    ----------
    url : str
        URL to the file that needs to be downloaded. Ideally, the URL
        should end with a file name (e.g., `uniref50.fasta.gz`).

    root : str | PathLike, optional
        Root directory where the dataset subdirectory exists or, if
        `download` is `True`, the directory where the dataset subdirectory
        will be created and the dataset downloaded.

    transform : Callable | Transform, optional
        A `Callable` or `Transform` that that maps a sequence to a
        transformed sequence (default: `None`).

    target_transform : Callable | Transform, optional
        A `Callable` or `Transform` that maps a target (a cluster
        identifier) to a transformed target (default: `None`).
    """
    if root is None:
        root = pooch.os_cache("beignet")

    if isinstance(root, str):
        root = Path(root)

    self.root = root.resolve()

    name = self.__class__.__name__.replace("Dataset", "")

    super().__init__(
        pooch.retrieve(
            url,
            known_hash,
            f"{name}.fasta.gz",
            root / name,
            processor=Decompress(
                name=f"{name}.fasta",
            ),
            progressbar=True,
        ),
    )

    self.transform = transform

    self.target_transform = target_transform

beignet.datasets.UniRef100Dataset

Bases: UniProtDataset

Source code in src/beignet/datasets/_uniref100_dataset.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
class UniRef100Dataset(UniProtDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory where the dataset subdirectory exists or, if
            `download` is `True`, the directory where the dataset subdirectory
            will be created and the dataset downloaded.

        transform : Callable, optional
            A `Callable` or `Transform` that that maps a sequence to a
            transformed sequence (default: `None`).

        target_transform : Callable, optional
            A `Callable` or `Transform` that maps a target (a cluster
            identifier) to a transformed target (default: `None`).
        """
        super().__init__(
            "http://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref100/uniref100.fasta.gz",
            root,
            "md5:0354240a56f4ca91ff426f8241cfeb7d",
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root, *, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory where the dataset subdirectory exists or, if download is True, the directory where the dataset subdirectory will be created and the dataset downloaded.

required
transform Callable

A Callable or Transform that that maps a sequence to a transformed sequence (default: None).

None
target_transform Callable

A Callable or Transform that maps a target (a cluster identifier) to a transformed target (default: None).

None
Source code in src/beignet/datasets/_uniref100_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
def __init__(
    self,
    root: str | Path,
    *,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory where the dataset subdirectory exists or, if
        `download` is `True`, the directory where the dataset subdirectory
        will be created and the dataset downloaded.

    transform : Callable, optional
        A `Callable` or `Transform` that that maps a sequence to a
        transformed sequence (default: `None`).

    target_transform : Callable, optional
        A `Callable` or `Transform` that maps a target (a cluster
        identifier) to a transformed target (default: `None`).
    """
    super().__init__(
        "http://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref100/uniref100.fasta.gz",
        root,
        "md5:0354240a56f4ca91ff426f8241cfeb7d",
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.UniRef50Dataset

Bases: UniProtDataset

Source code in src/beignet/datasets/_uniref50_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
class UniRef50Dataset(UniProtDataset):
    def __init__(
        self,
        root: str | PathLike | None = None,
        *,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        """
        Parameters
        ----------
        root : str | PathLike, optional
            Root directory where the dataset subdirectory exists or, if
            `download` is `True`, the directory where the dataset subdirectory
            will be created and the dataset downloaded.

        transform : Callable, optional
            A `Callable` or `Transform` that that maps a sequence to a
            transformed sequence (default: `None`).

        target_transform : Callable, optional
            A `Callable` or `Transform` that maps a target (a cluster
            identifier) to a transformed target (default: `None`).
        """
        super().__init__(
            "http://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref50/uniref50.fasta.gz",
            root,
            "md5:e638c63230d13ad5e2098115b9cb5d8f",
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root=None, *, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | PathLike

Root directory where the dataset subdirectory exists or, if download is True, the directory where the dataset subdirectory will be created and the dataset downloaded.

None
transform Callable

A Callable or Transform that that maps a sequence to a transformed sequence (default: None).

None
target_transform Callable

A Callable or Transform that maps a target (a cluster identifier) to a transformed target (default: None).

None
Source code in src/beignet/datasets/_uniref50_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
def __init__(
    self,
    root: str | PathLike | None = None,
    *,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    """
    Parameters
    ----------
    root : str | PathLike, optional
        Root directory where the dataset subdirectory exists or, if
        `download` is `True`, the directory where the dataset subdirectory
        will be created and the dataset downloaded.

    transform : Callable, optional
        A `Callable` or `Transform` that that maps a sequence to a
        transformed sequence (default: `None`).

    target_transform : Callable, optional
        A `Callable` or `Transform` that maps a target (a cluster
        identifier) to a transformed target (default: `None`).
    """
    super().__init__(
        "http://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref50/uniref50.fasta.gz",
        root,
        "md5:e638c63230d13ad5e2098115b9cb5d8f",
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.UniRef90Dataset

Bases: UniProtDataset

Source code in src/beignet/datasets/_uniref90_dataset.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
class UniRef90Dataset(UniProtDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory where the dataset subdirectory exists or, if
            `download` is `True`, the directory where the dataset subdirectory
            will be created and the dataset downloaded.

        transform : Callable, optional
            A `Callable` or `Transform` that that maps a sequence to a
            transformed sequence (default: `None`).

        target_transform : Callable, optional
            A `Callable` or `Transform` that maps a target (a cluster
            identifier) to a transformed target (default: `None`).
        """
        super().__init__(
            "http://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref90/uniref90.fasta.gz",
            root,
            "md5:6161bad4d7506365aee882fd5ff9c833",
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root, *, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory where the dataset subdirectory exists or, if download is True, the directory where the dataset subdirectory will be created and the dataset downloaded.

required
transform Callable

A Callable or Transform that that maps a sequence to a transformed sequence (default: None).

None
target_transform Callable

A Callable or Transform that maps a target (a cluster identifier) to a transformed target (default: None).

None
Source code in src/beignet/datasets/_uniref90_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
def __init__(
    self,
    root: str | Path,
    *,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory where the dataset subdirectory exists or, if
        `download` is `True`, the directory where the dataset subdirectory
        will be created and the dataset downloaded.

    transform : Callable, optional
        A `Callable` or `Transform` that that maps a sequence to a
        transformed sequence (default: `None`).

    target_transform : Callable, optional
        A `Callable` or `Transform` that maps a target (a cluster
        identifier) to a transformed target (default: `None`).
    """
    super().__init__(
        "http://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref90/uniref90.fasta.gz",
        root,
        "md5:6161bad4d7506365aee882fd5ff9c833",
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.VeithCytochromeP4501A2InhibitionDataset

Bases: TDCDataset

Source code in src/beignet/datasets/_veith_cytochrome_p450_1a2_inhibition_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class VeithCytochromeP4501A2InhibitionDataset(TDCDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        download: bool = False,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory of dataset.

        download: bool
            If `True`, downloads the dataset to the root directory. If dataset
            already exists, it is not redownloaded. Default, `False`.

        transform : Callable | Transform | None
            Transforms the input.

        target_transform : Callable | Transform | None
            Transforms the target.
        """
        super().__init__(
            root=root,
            download=download,
            identifier=4259573,
            suffix="tsv",
            checksum="md5:ab58d48970ff880fd5a03f3f6eaadb76",
            x_keys=["Drug"],
            y_keys=["Y"],
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root, *, download=False, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory of dataset.

required
download bool

If True, downloads the dataset to the root directory. If dataset already exists, it is not redownloaded. Default, False.

False
transform Callable | Transform | None

Transforms the input.

None
target_transform Callable | Transform | None

Transforms the target.

None
Source code in src/beignet/datasets/_veith_cytochrome_p450_1a2_inhibition_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self,
    root: str | Path,
    *,
    download: bool = False,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory of dataset.

    download: bool
        If `True`, downloads the dataset to the root directory. If dataset
        already exists, it is not redownloaded. Default, `False`.

    transform : Callable | Transform | None
        Transforms the input.

    target_transform : Callable | Transform | None
        Transforms the target.
    """
    super().__init__(
        root=root,
        download=download,
        identifier=4259573,
        suffix="tsv",
        checksum="md5:ab58d48970ff880fd5a03f3f6eaadb76",
        x_keys=["Drug"],
        y_keys=["Y"],
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.VeithCytochromeP4502C19InhibitionDataset

Bases: TDCDataset

Source code in src/beignet/datasets/_veith_cytochrome_p450_2c19_inhibition_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class VeithCytochromeP4502C19InhibitionDataset(TDCDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        download: bool = False,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory of dataset.

        download: bool
            If `True`, downloads the dataset to the root directory. If dataset
            already exists, it is not redownloaded. Default, `False`.

        transform : Callable | Transform | None
            Transforms the input.

        target_transform : Callable | Transform | None
            Transforms the target.
        """
        super().__init__(
            root=root,
            download=download,
            identifier=4259576,
            suffix="tsv",
            checksum="md5:fe0c4420effb5df2417fa9c9a2ba07ae",
            x_keys=["Drug"],
            y_keys=["Y"],
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root, *, download=False, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory of dataset.

required
download bool

If True, downloads the dataset to the root directory. If dataset already exists, it is not redownloaded. Default, False.

False
transform Callable | Transform | None

Transforms the input.

None
target_transform Callable | Transform | None

Transforms the target.

None
Source code in src/beignet/datasets/_veith_cytochrome_p450_2c19_inhibition_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self,
    root: str | Path,
    *,
    download: bool = False,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory of dataset.

    download: bool
        If `True`, downloads the dataset to the root directory. If dataset
        already exists, it is not redownloaded. Default, `False`.

    transform : Callable | Transform | None
        Transforms the input.

    target_transform : Callable | Transform | None
        Transforms the target.
    """
    super().__init__(
        root=root,
        download=download,
        identifier=4259576,
        suffix="tsv",
        checksum="md5:fe0c4420effb5df2417fa9c9a2ba07ae",
        x_keys=["Drug"],
        y_keys=["Y"],
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.VeithCytochromeP4502C9InhibitionDataset

Bases: TDCDataset

Source code in src/beignet/datasets/_veith_cytochrome_p450_2c9_inhibition_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class VeithCytochromeP4502C9InhibitionDataset(TDCDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        download: bool = False,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory of dataset.

        download: bool
            If `True`, downloads the dataset to the root directory. If dataset
            already exists, it is not redownloaded. Default, `False`.

        transform : Callable | Transform | None
            Transforms the input.

        target_transform : Callable | Transform | None
            Transforms the target.
        """
        super().__init__(
            root=root,
            download=download,
            identifier=4259577,
            suffix="tsv",
            checksum="md5:87d21d2666e8e2bfc76f7d693e060c0c",
            x_keys=["Drug"],
            y_keys=["Y"],
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root, *, download=False, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory of dataset.

required
download bool

If True, downloads the dataset to the root directory. If dataset already exists, it is not redownloaded. Default, False.

False
transform Callable | Transform | None

Transforms the input.

None
target_transform Callable | Transform | None

Transforms the target.

None
Source code in src/beignet/datasets/_veith_cytochrome_p450_2c9_inhibition_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self,
    root: str | Path,
    *,
    download: bool = False,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory of dataset.

    download: bool
        If `True`, downloads the dataset to the root directory. If dataset
        already exists, it is not redownloaded. Default, `False`.

    transform : Callable | Transform | None
        Transforms the input.

    target_transform : Callable | Transform | None
        Transforms the target.
    """
    super().__init__(
        root=root,
        download=download,
        identifier=4259577,
        suffix="tsv",
        checksum="md5:87d21d2666e8e2bfc76f7d693e060c0c",
        x_keys=["Drug"],
        y_keys=["Y"],
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.VeithCytochromeP4502D6InhibitionDataset

Bases: TDCDataset

Source code in src/beignet/datasets/_veith_cytochrome_p450_2d6_inhibition_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class VeithCytochromeP4502D6InhibitionDataset(TDCDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        download: bool = False,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory of dataset.

        download: bool
            If `True`, downloads the dataset to the root directory. If dataset
            already exists, it is not redownloaded. Default, `False`.

        transform : Callable | Transform | None
            Transforms the input.

        target_transform : Callable | Transform | None
            Transforms the target.
        """
        super().__init__(
            root=root,
            download=download,
            identifier=4259580,
            suffix="tsv",
            checksum="md5:9f82eae1ecccec93c8fc4249955e8694",
            x_keys=["Drug"],
            y_keys=["Y"],
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root, *, download=False, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory of dataset.

required
download bool

If True, downloads the dataset to the root directory. If dataset already exists, it is not redownloaded. Default, False.

False
transform Callable | Transform | None

Transforms the input.

None
target_transform Callable | Transform | None

Transforms the target.

None
Source code in src/beignet/datasets/_veith_cytochrome_p450_2d6_inhibition_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self,
    root: str | Path,
    *,
    download: bool = False,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory of dataset.

    download: bool
        If `True`, downloads the dataset to the root directory. If dataset
        already exists, it is not redownloaded. Default, `False`.

    transform : Callable | Transform | None
        Transforms the input.

    target_transform : Callable | Transform | None
        Transforms the target.
    """
    super().__init__(
        root=root,
        download=download,
        identifier=4259580,
        suffix="tsv",
        checksum="md5:9f82eae1ecccec93c8fc4249955e8694",
        x_keys=["Drug"],
        y_keys=["Y"],
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.VeithCytochromeP4503A4InhibitionDataset

Bases: TDCDataset

Source code in src/beignet/datasets/_veith_cytochrome_p450_3a4_inhibition_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class VeithCytochromeP4503A4InhibitionDataset(TDCDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        download: bool = False,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory of dataset.

        download: bool
            If `True`, downloads the dataset to the root directory. If dataset
            already exists, it is not redownloaded. Default, `False`.

        transform : Callable | Transform | None
            Transforms the input.

        target_transform : Callable | Transform | None
            Transforms the target.
        """
        super().__init__(
            root=root,
            download=download,
            identifier=4259582,
            suffix="tsv",
            checksum="md5:73258e31495abd95072a6e06acbee83a",
            x_keys=["Drug"],
            y_keys=["Y"],
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root, *, download=False, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory of dataset.

required
download bool

If True, downloads the dataset to the root directory. If dataset already exists, it is not redownloaded. Default, False.

False
transform Callable | Transform | None

Transforms the input.

None
target_transform Callable | Transform | None

Transforms the target.

None
Source code in src/beignet/datasets/_veith_cytochrome_p450_3a4_inhibition_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self,
    root: str | Path,
    *,
    download: bool = False,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory of dataset.

    download: bool
        If `True`, downloads the dataset to the root directory. If dataset
        already exists, it is not redownloaded. Default, `False`.

    transform : Callable | Transform | None
        Transforms the input.

    target_transform : Callable | Transform | None
        Transforms the target.
    """
    super().__init__(
        root=root,
        download=download,
        identifier=4259582,
        suffix="tsv",
        checksum="md5:73258e31495abd95072a6e06acbee83a",
        x_keys=["Drug"],
        y_keys=["Y"],
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.WangEffectivePermeabilityDataset

Bases: TDCDataset

Source code in src/beignet/datasets/_wang_effective_permeability_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class WangEffectivePermeabilityDataset(TDCDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        download: bool = False,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory of dataset.

        download: bool
            If `True`, downloads the dataset to the root directory. If dataset
            already exists, it is not redownloaded. Default, `False`.

        transform : Callable | Transform | None
            Transforms the input.

        target_transform : Callable | Transform | None
            Transforms the target.
        """
        super().__init__(
            root=root,
            download=download,
            identifier=4259569,
            suffix="tsv",
            checksum="md5:11681ff33d65be3a751a3fb0e45fa1a6",
            x_keys=["Drug"],
            y_keys=["Y"],
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root, *, download=False, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory of dataset.

required
download bool

If True, downloads the dataset to the root directory. If dataset already exists, it is not redownloaded. Default, False.

False
transform Callable | Transform | None

Transforms the input.

None
target_transform Callable | Transform | None

Transforms the target.

None
Source code in src/beignet/datasets/_wang_effective_permeability_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self,
    root: str | Path,
    *,
    download: bool = False,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory of dataset.

    download: bool
        If `True`, downloads the dataset to the root directory. If dataset
        already exists, it is not redownloaded. Default, `False`.

    transform : Callable | Transform | None
        Transforms the input.

    target_transform : Callable | Transform | None
        Transforms the target.
    """
    super().__init__(
        root=root,
        download=download,
        identifier=4259569,
        suffix="tsv",
        checksum="md5:11681ff33d65be3a751a3fb0e45fa1a6",
        x_keys=["Drug"],
        y_keys=["Y"],
        transform=transform,
        target_transform=target_transform,
    )

beignet.datasets.ZINCDataset

Bases: TDCDataset

Source code in src/beignet/datasets/_zinc_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
class ZINCDataset(TDCDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        download: bool = False,
        transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory of dataset.

        download: bool
            If `True`, downloads the dataset to the root directory. If dataset
            already exists, it is not redownloaded. Default, `False`.

        transform : Callable | Transform | None
            Transforms the input.
        """
        super().__init__(
            root=root,
            download=download,
            identifier=4170963,
            suffix="tsv",
            checksum="md5:9e4754d72db297d496def3498a926979",
            x_keys=["smiles"],
            y_keys=[],
            transform=transform,
        )
__init__
__init__(root, *, download=False, transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory of dataset.

required
download bool

If True, downloads the dataset to the root directory. If dataset already exists, it is not redownloaded. Default, False.

False
transform Callable | Transform | None

Transforms the input.

None
Source code in src/beignet/datasets/_zinc_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
def __init__(
    self,
    root: str | Path,
    *,
    download: bool = False,
    transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory of dataset.

    download: bool
        If `True`, downloads the dataset to the root directory. If dataset
        already exists, it is not redownloaded. Default, `False`.

    transform : Callable | Transform | None
        Transforms the input.
    """
    super().__init__(
        root=root,
        download=download,
        identifier=4170963,
        suffix="tsv",
        checksum="md5:9e4754d72db297d496def3498a926979",
        x_keys=["smiles"],
        y_keys=[],
        transform=transform,
    )

beignet.datasets.ZhuAcuteToxicityLD50Dataset

Bases: TDCDataset

Source code in src/beignet/datasets/_zhu_acute_toxicity_ld50_dataset.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class ZhuAcuteToxicityLD50Dataset(TDCDataset):
    def __init__(
        self,
        root: str | Path,
        *,
        download: bool = False,
        transform: Callable | Transform | None = None,
        target_transform: Callable | Transform | None = None,
    ):
        r"""
        Parameters
        ----------
        root : str | Path
            Root directory of dataset.

        download: bool
            If `True`, downloads the dataset to the root directory. If dataset
            already exists, it is not redownloaded. Default, `False`.

        transform : Callable | Transform | None
            Transforms the input.

        target_transform : Callable | Transform | None
            Transforms the target.
        """
        super().__init__(
            root=root,
            download=download,
            identifier=4267146,
            suffix="tsv",
            checksum="md5:d655bc7921566e84713aeb91b3298526",
            x_keys=["X"],
            y_keys=["Y"],
            transform=transform,
            target_transform=target_transform,
        )
__init__
__init__(root, *, download=False, transform=None, target_transform=None)

Parameters:

Name Type Description Default
root str | Path

Root directory of dataset.

required
download bool

If True, downloads the dataset to the root directory. If dataset already exists, it is not redownloaded. Default, False.

False
transform Callable | Transform | None

Transforms the input.

None
target_transform Callable | Transform | None

Transforms the target.

None
Source code in src/beignet/datasets/_zhu_acute_toxicity_ld50_dataset.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self,
    root: str | Path,
    *,
    download: bool = False,
    transform: Callable | Transform | None = None,
    target_transform: Callable | Transform | None = None,
):
    r"""
    Parameters
    ----------
    root : str | Path
        Root directory of dataset.

    download: bool
        If `True`, downloads the dataset to the root directory. If dataset
        already exists, it is not redownloaded. Default, `False`.

    transform : Callable | Transform | None
        Transforms the input.

    target_transform : Callable | Transform | None
        Transforms the target.
    """
    super().__init__(
        root=root,
        download=download,
        identifier=4267146,
        suffix="tsv",
        checksum="md5:d655bc7921566e84713aeb91b3298526",
        x_keys=["X"],
        y_keys=["Y"],
        transform=transform,
        target_transform=target_transform,
    )