Spaces:
Sleeping
Sleeping
Kevin Louis
commited on
Commit
·
4b032bf
1
Parent(s):
a388195
Upload DNAseq.py
Browse files
DNAseq.py
CHANGED
@@ -1,119 +1,51 @@
|
|
1 |
-
|
2 |
-
def __init__(self, sequence):
|
3 |
-
self.sequence = sequence.lower()
|
4 |
|
5 |
-
def get_total_bases(self):
|
6 |
-
return len(self.sequence)
|
7 |
-
# Total length of sequence
|
8 |
-
|
9 |
-
def get_base_count(self, base):
|
10 |
-
base = base.lower()
|
11 |
-
return self.sequence.count(base)
|
12 |
-
# Total number of specified base. Can be A, T, G or C
|
13 |
|
|
|
14 |
def get_base_counts(self):
|
15 |
base_counts = {
|
16 |
-
'a': self.
|
17 |
-
't': self.
|
18 |
-
'g': self.
|
19 |
-
'c': self.
|
20 |
}
|
21 |
return base_counts
|
22 |
# Total number of each base within the sequence returned as a dictionary
|
23 |
|
24 |
-
def get_base_percentage(self, base):
|
25 |
-
total_bases = self.get_total_bases()
|
26 |
-
base_count = self.get_base_count(base)
|
27 |
-
base_percentage = (base_count / total_bases) * 100
|
28 |
-
return base_percentage
|
29 |
-
# Base content by percentage.Can be A, T, G or C
|
30 |
-
|
31 |
def get_base_percentages(self):
|
32 |
base_percentages = {
|
33 |
-
'a': self.
|
34 |
-
't': self.
|
35 |
-
'g': self.
|
36 |
-
'c': self.
|
37 |
}
|
38 |
return base_percentages
|
39 |
# Base content percentage for each base returned as a dictionary
|
40 |
|
41 |
def get_gc_content(self):
|
42 |
-
total_bases = self.
|
43 |
gc_count = self.sequence.count('g') + self.sequence.count('c')
|
44 |
gc_content = (gc_count / total_bases) * 100
|
45 |
return gc_content
|
46 |
# Guanine Cytosine (gc) content by percentage
|
47 |
|
48 |
def get_at_content(self):
|
49 |
-
total_bases = self.
|
50 |
at_count = self.sequence.count('a') + self.sequence.count('t')
|
51 |
at_content = (at_count / total_bases) * 100
|
52 |
return at_content
|
53 |
# Adenine Thymine (at) content by percentage
|
54 |
|
55 |
def get_purine_content(self):
|
56 |
-
total_bases = self.
|
57 |
ag_count = self.sequence.count('a') + self.sequence.count('g')
|
58 |
ag_content = (ag_count / total_bases) * 100
|
59 |
return ag_content
|
60 |
# Adenine Guanine (purine) content by percentage
|
61 |
|
62 |
def get_pyrimidine_content(self):
|
63 |
-
total_bases = self.
|
64 |
ct_count = self.sequence.count('c') + self.sequence.count('t')
|
65 |
ct_content = (ct_count / total_bases) * 100
|
66 |
return ct_content
|
67 |
# Cytosine Thymine (pyrimidine) content by percentage
|
68 |
-
|
69 |
-
def get_base_at_position(self, position):
|
70 |
-
pos = position - 1
|
71 |
-
|
72 |
-
if 0 <= pos < len(self.sequence):
|
73 |
-
base_at_pos = self.sequence[pos]
|
74 |
-
return base_at_pos
|
75 |
-
else:
|
76 |
-
return "Position is out of range. Positions should be 1 - {}".format(len(self.sequence))
|
77 |
-
# Returns the base at a specified position in the sequence
|
78 |
-
|
79 |
-
def get_base_at_positions(self, position_list):
|
80 |
-
if self.check_positions(position_list):
|
81 |
-
pos_dict = {i: self.sequence[i - 1] for i in position_list if 0 <= i < len(self.sequence)}
|
82 |
-
return pos_dict
|
83 |
-
else:
|
84 |
-
return "Position is out of range.Positions should be 1 - {}".format(len(self.sequence))
|
85 |
-
# Returns base for each position in list
|
86 |
-
|
87 |
-
def check_positions(self, position_list):
|
88 |
-
# Check if the positions are within the range of the sequence length
|
89 |
-
# Value = 0 -> position out of sequence range
|
90 |
-
# Value = 1 -> position within sequence range
|
91 |
-
|
92 |
-
checked = {}
|
93 |
-
for pos in position_list:
|
94 |
-
if pos <= 0 or pos > len(self.sequence):
|
95 |
-
checked[pos] = 0
|
96 |
-
else:
|
97 |
-
checked[pos] = 1
|
98 |
-
|
99 |
-
# Check if all values are equal to 1 / All positions in the list are within the range of the sequence length
|
100 |
-
all_values_equal_to_1 = all(value == 1 for value in checked.values())
|
101 |
-
|
102 |
-
if all_values_equal_to_1:
|
103 |
-
valid = True
|
104 |
-
else:
|
105 |
-
valid = False
|
106 |
-
|
107 |
-
return valid
|
108 |
-
|
109 |
-
def get_subsequence(self, start_position, end_position):
|
110 |
-
# Ensure the start and end positions are within the bounds of the sequence
|
111 |
-
if start_position > 0 and end_position <= len(self.sequence):
|
112 |
-
return self.sequence[start_position - 1:end_position]
|
113 |
-
else:
|
114 |
-
return "Position is out of range. Positions should be 1 - {}".format(len(self.sequence))
|
115 |
-
# Returns the subsequence based on given positions
|
116 |
-
|
117 |
-
def subsequence_total_bases(self, start_position, end_position):
|
118 |
-
return len(self.get_subsequence(start_position, end_position))
|
119 |
-
|
|
|
1 |
+
from sequence import Sequence
|
|
|
|
|
2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
+
class DNAseq(Sequence):
|
5 |
def get_base_counts(self):
|
6 |
base_counts = {
|
7 |
+
'a': self.get_unit_count('a'),
|
8 |
+
't': self.get_unit_count('t'),
|
9 |
+
'g': self.get_unit_count('g'),
|
10 |
+
'c': self.get_unit_count('c'),
|
11 |
}
|
12 |
return base_counts
|
13 |
# Total number of each base within the sequence returned as a dictionary
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
def get_base_percentages(self):
|
16 |
base_percentages = {
|
17 |
+
'a': self.get_unit_percentage('a'),
|
18 |
+
't': self.get_unit_percentage('t'),
|
19 |
+
'g': self.get_unit_percentage('g'),
|
20 |
+
'c': self.get_unit_percentage('c'),
|
21 |
}
|
22 |
return base_percentages
|
23 |
# Base content percentage for each base returned as a dictionary
|
24 |
|
25 |
def get_gc_content(self):
|
26 |
+
total_bases = self.get_seq_length()
|
27 |
gc_count = self.sequence.count('g') + self.sequence.count('c')
|
28 |
gc_content = (gc_count / total_bases) * 100
|
29 |
return gc_content
|
30 |
# Guanine Cytosine (gc) content by percentage
|
31 |
|
32 |
def get_at_content(self):
|
33 |
+
total_bases = self.get_seq_length()
|
34 |
at_count = self.sequence.count('a') + self.sequence.count('t')
|
35 |
at_content = (at_count / total_bases) * 100
|
36 |
return at_content
|
37 |
# Adenine Thymine (at) content by percentage
|
38 |
|
39 |
def get_purine_content(self):
|
40 |
+
total_bases = self.get_seq_length()
|
41 |
ag_count = self.sequence.count('a') + self.sequence.count('g')
|
42 |
ag_content = (ag_count / total_bases) * 100
|
43 |
return ag_content
|
44 |
# Adenine Guanine (purine) content by percentage
|
45 |
|
46 |
def get_pyrimidine_content(self):
|
47 |
+
total_bases = self.get_seq_length()
|
48 |
ct_count = self.sequence.count('c') + self.sequence.count('t')
|
49 |
ct_content = (ct_count / total_bases) * 100
|
50 |
return ct_content
|
51 |
# Cytosine Thymine (pyrimidine) content by percentage
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|