Skip to content

Commit 322bddc

Browse files
Merge pull request #30 from yhr91/CRISPROutcome
Added CRISPR DNA repair outcomes dataset
2 parents a87f4ed + 00ec22c commit 322bddc

3 files changed

Lines changed: 41 additions & 7 deletions

File tree

tdc/label_name_list.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -272,9 +272,13 @@
272272
drugcomb_targets = ['CSS', 'Synergy_ZIP', 'Synergy_Bliss',
273273
'Synergy_Loewe','Synergy_HSA']
274274

275+
leenay_targets = ['Fraction_Insertions', 'Avg_Insertion_Length', 'Avg_Deletion_Length',
276+
'Indel_Diversity', 'Fraction_Frameshifts']
277+
275278
dataset2target_lists = {'qm7b': QM7_targets,
276279
'qm8': QM8_targets,
277280
'qm9': QM9_targets,
278281
'tap': TAP_targets,
279282
'toxcast': ToxCast_targets,
280-
'tox21': Tox21_targets}
283+
'tox21': Tox21_targets,
284+
'leenay': leenay_targets}

tdc/metadata.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@
6464

6565
gda_dataset_names = ['disgenet']
6666

67+
crisproutcome_dataset_names = ['leenay']
68+
6769
drugres_dataset_names = ['gdsc1', 'gdsc2']
6870

6971
drugsyn_dataset_names = ['oncopolypharmacology', 'drugcomb_nci60']
@@ -222,7 +224,8 @@
222224
"Develop",
223225
"QM",
224226
"Paratope",
225-
"Yields"],
227+
"Yields",
228+
"CRISPROutcome"],
226229
'multi_pred': ["DTI",
227230
"PPI",
228231
"DDI",
@@ -270,7 +273,8 @@ def get_task2category():
270273
"Yields": yield_dataset_names,
271274
"Catalyst": catalyst_dataset_names,
272275
"CompoundLibrary": compound_library_names,
273-
"BioKG": biokg_library_names
276+
"BioKG": biokg_library_names,
277+
"CRISPROutcome": crisproutcome_dataset_names
274278
}
275279

276280
benchmark_names = {"admet_group": admet_benchmark}
@@ -359,8 +363,8 @@ def get_task2category():
359363
'clearance_hepatocyte_az': 'tab',
360364
'half_life_obach': 'tab',
361365
'ld50_zhu': 'tab',
362-
'vdss_lombardo': 'tab'
363-
}
366+
'vdss_lombardo': 'tab',
367+
'leenay':'tab'}
364368

365369
name2id = {'bbb_adenot': 4259565,
366370
'bbb_martins': 4259566,
@@ -439,8 +443,8 @@ def get_task2category():
439443
'clearance_hepatocyte_az': 4266187,
440444
'ld50_zhu': 4267146,
441445
'half_life_obach': 4266799,
442-
'vdss_lombardo': 4267387
443-
}
446+
'vdss_lombardo': 4267387,
447+
'leenay':4279966 }
444448

445449
oracle2type = {'drd2': 'pkl',
446450
'jnk3': 'pkl',

tdc/single_pred/dataloader.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,3 +219,29 @@ def __init__(self, name, path='./data', label_name=None, print_stats=False):
219219
if print_stats:
220220
self.print_stats()
221221
print('Done!', flush = True, file = sys.stderr)
222+
223+
class CRISPROutcome(single_pred_dataset.DataLoader):
224+
"""DNA repair outcomes following a CRISPR experiment.
225+
226+
Parameters
227+
----------
228+
name : str
229+
Description of the variable.
230+
231+
path : str, optional (default="data")
232+
Description of the variable.
233+
234+
label_name : str, optional (default=None)
235+
Description of the variable.
236+
237+
print_stats : bool, optional (default=True)
238+
Description of the variable.
239+
"""
240+
241+
def __init__(self, name, path='./data', label_name=None, print_stats=False):
242+
super().__init__(name, path, label_name, print_stats,
243+
dataset_names=dataset_names["CRISPROutcome"])
244+
self.entity1_name = 'GuideSeq'
245+
if print_stats:
246+
self.print_stats()
247+
print('Done!', flush = True, file = sys.stderr)

0 commit comments

Comments
 (0)