habdine commited on
Commit
045f65d
1 Parent(s): 183290f

Delete utils_dataset.py

Browse files
Files changed (1) hide show
  1. utils_dataset.py +0 -60
utils_dataset.py DELETED
@@ -1,60 +0,0 @@
1
- import numpy as np
2
- import csv
3
-
4
- def load_GO_annot(filename):
5
- # Load GO annotations
6
- onts = ['mf', 'bp', 'cc']
7
- prot2annot = {}
8
- goterms = {ont: [] for ont in onts}
9
- gonames = {ont: [] for ont in onts}
10
- with open(filename, mode='r') as tsvfile:
11
- reader = csv.reader(tsvfile, delimiter='\t')
12
-
13
- # molecular function
14
- next(reader, None) # skip the headers
15
- goterms[onts[0]] = next(reader)
16
- next(reader, None) # skip the headers
17
- gonames[onts[0]] = next(reader)
18
-
19
- # biological process
20
- next(reader, None) # skip the headers
21
- goterms[onts[1]] = next(reader)
22
- next(reader, None) # skip the headers
23
- gonames[onts[1]] = next(reader)
24
-
25
- # cellular component
26
- next(reader, None) # skip the headers
27
- goterms[onts[2]] = next(reader)
28
- next(reader, None) # skip the headers
29
- gonames[onts[2]] = next(reader)
30
-
31
- next(reader, None) # skip the headers
32
- counts = {ont: np.zeros(len(goterms[ont]), dtype=float) for ont in onts}
33
- for row in reader:
34
- prot, prot_goterms = row[0], row[1:]
35
- prot2annot[prot] = {ont: [] for ont in onts}
36
- for i in range(3):
37
- goterm_indices = [goterms[onts[i]].index(goterm) for goterm in prot_goterms[i].split(',') if goterm != '']
38
- prot2annot[prot][onts[i]] = np.zeros(len(goterms[onts[i]]))
39
- prot2annot[prot][onts[i]][goterm_indices] = 1.0
40
- counts[onts[i]][goterm_indices] += 1.0
41
- return prot2annot, goterms, gonames, counts
42
-
43
-
44
- def load_EC_annot(filename):
45
- # Load EC annotations """
46
- prot2annot = {}
47
- with open(filename, mode='r') as tsvfile:
48
- reader = csv.reader(tsvfile, delimiter='\t')
49
-
50
- # molecular function
51
- next(reader, None) # skip the headers
52
- ec_numbers = {'ec': next(reader)}
53
- next(reader, None) # skip the headers
54
- counts = {'ec': np.zeros(len(ec_numbers['ec']), dtype=float)}
55
- for row in reader:
56
- prot, prot_ec_numbers = row[0], row[1]
57
- ec_indices = [ec_numbers['ec'].index(ec_num) for ec_num in prot_ec_numbers.split(',')]
58
- prot2annot[prot] = {'ec': np.zeros(len(ec_numbers['ec']), dtype=np.int64)}
59
- prot2annot[prot]['ec'][ec_indices] = 1.0
60
- counts['ec'][ec_indices] += 1