type(5.0)

float


a = 3.14
a + 2

5.140000000000001


x = [1,5,3,7,8]
y = ['a','b','c']
type(y)
z = [1, 2, 3, 'a', 'b']


a = 2
b = 5.46
c = [1,2,3,4]
d = [5,6,7,8]
e = 7
c * b

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Input In [11], in <cell line: 6>()
      4 d = [5,6,7,8]
      5 e = 7
----> 6 c * b

TypeError: can't multiply sequence by non-int of type 'float'


a = [1,2,3,4,5,6,7,8]
b = 5
c = 10
b in a
b < c or c == 1
b not in a

False


a = [1,2,3,4,5]
b = ['a','b','c']
c = 'a random string'

c[2]
c[1:4]

' ra'


a = [1,2,3,4,5]         # mutable
b = ['a','b','c']       # mutable
c = 'a random string'   # immutable

c[0] = 'A'
#a[0] = 42
c

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Input In [19], in <cell line: 5>()
      2 b = ['a','b','c']       # mutable
      3 c = 'a random string'   # immutable
----> 5 c[0] = 'A'
      6 #a[0] = 42
      7 c

TypeError: 'str' object does not support item assignment


myTuple = (1,2,3,4,'a','b','c')
myTuple[0] = 42
#print(myTuple)
print(len(myTuple))
#for i in myTuple:
#     print(i)

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Input In [24], in <cell line: 2>()
      1 myTuple = (1,2,3,4,'a','b','c')
----> 2 myTuple[0] = 42
      3 #print(myTuple)
      4 print(len(myTuple))

TypeError: 'tuple' object does not support item assignment


a = 3
b = [1,2,3,4]
if a in b:
    print(str(a)+' is found in the list b')
else:
    print(str(a)+' is not in the list')

3 is found in the list b


fh = open('../files/somerandomfile.txt','r', encoding = 'utf-8')
for line in fh:
    print(line.strip())
fh.close()

just a strange
file with
some
nonsense lines


numbers = [5,6,7,8]
i = 0
while i < len(numbers):
    print(numbers[i])
    i += 1


fh = open('/mnt/c/Users/Nina/Documents/courses/Python_Beginner_Course/genotypes.vcf', 'r', encoding = 'utf-8')
for line in fh:
    if not line.startswith('#'):  
        print(line.strip())
        break
fh.close()
# Next, find chromosome 5

1	10492	.	C	T	550.31	LOW_VQSLOD	AN=26;AC=2	GT:AD:DP:GQ:PGT:PID:PL	./.:0,0:0:.:.:.:.	./.:0,0:0:.:.:.:.	./.:0,0:0:.:.:.:.	./.:0,0:0:.:.:.:.	./.:0,0:0:.:.:.:.	0/1:12,7:19:99:0|1:10403_ACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAAC_A:196,0,340	./.:0,0:0:.:.:.:.	./.:0,0:0:.:.:.:.	./.:0,0:0:.:.:.:.	./.:0,0:0:.:.:.:.	0/1:18,4:22:48:.:.:48,0,504	./.:0,0:0:.:.:.:.	./.:0,0:0:.:.:.:.


fh = open('/mnt/c/Users/Nina/Documents/courses/Python_Beginner_Course/genotypes.vcf', 'r', encoding = 'utf-8')
for line in fh:
    if not line.startswith('#'):
        cols = line.strip().split('\t')
        if cols[0] == '5':
            print(cols)
            break
fh.close()

# Next, find the correct region

['5', '12041', '.', 'A', 'T', '18075.2', 'PASS', 'AN=26;AC=2', 'GT:AD:DP:GQ:PL', './.:0,0:0:.:.', './.:0,0:0:.:.', './.:0,0:0:.:.', './.:0,0:0:.:.', './.:0,0:0:.:.', './.:0,0:0:.:.', './.:0,0:0:.:.', './.:0,0:0:.:.', './.:0,0:0:.:.', './.:0,0:0:.:.', '0/1:15,6:21:99:142,0,391', './.:0,0:0:.:.', '0/1:16,17:33:99:442,0,422']


fh = open('/mnt/c/Users/Nina/Documents/courses/Python_Beginner_Course/genotypes.vcf', 'r', encoding = 'utf-8')
for line in fh:
    if not line.startswith('#'):
        cols = line.strip().split('\t')
        if cols[0] == '5' and \
           int(cols[1]) >= 1000000 and int(cols[1]) <= 1005000:
                print(cols)
                break
fh.close()
# Next, find the genotypes for sample1

['5', '1000080', '.', 'A', 'T', '2557.1', 'PASS', 'AN=26;AC=2', 'GT:AD:DP:GQ:PL', '0/1:15,18:33:99:489,0,357', './.:0,0:0:.:.', './.:0,0:0:.:.', './.:0,0:0:.:.', './.:0,0:0:.:.', './.:0,0:0:.:.', './.:0,0:0:.:.', './.:0,0:0:.:.', '0/1:21,19:40:99:481,0,542', './.:0,0:0:.:.', './.:0,0:0:.:.', './.:0,0:0:.:.', './.:0,0:0:.:.']


fh = open('/mnt/c/Users/Nina/Documents/courses/Python_Beginner_Course/genotypes.vcf', 'r', encoding = 'utf-8')
for line in fh:
    if not line.startswith('#'):
        cols = line.strip().split('\t')
        if cols[0] == '5' and \
           int(cols[1]) >= 1000000 and int(cols[1]) <= 1005000:
                geno = cols[9]
                print(geno)
                break
fh.close()
# Next, extract the genotypes only

0/1:15,18:33:99:489,0,357


fh = open('/mnt/c/Users/Nina/Documents/courses/Python_Beginner_Course/genotypes.vcf', 'r', encoding = 'utf-8')
for line in fh:
    if not line.startswith('#'):
        cols = line.strip().split('\t')
        if cols[0] == '5' and \
           int(cols[1]) >= 1000000 and int(cols[1]) <= 1005000:
                geno = cols[9].split(':')[0]
                print(geno)
                break
fh.close()
# Next, find in which positions sample1 has alternate alleles

0/1


fh = open('/mnt/c/Users/Nina/Documents/courses/Python_Beginner_Course/genotypes.vcf', 'r', encoding = 'utf-8')
for line in fh:
    if not line.startswith('#'):
        cols = line.strip().split('\t')
        if cols[0] == '5' and \
           int(cols[1]) >= 1000000 and int(cols[1]) <= 1005000:
                geno = cols[9].split(':')[0]
                if geno in ['0/1', '1/1']:
                    print(geno)
fh.close()
#Next, print nicely

0/1
0/1
0/1
0/1
0/1
0/1
0/1
0/1
0/1
0/1
0/1
0/1
0/1
0/1
0/1
0/1
0/1
0/1
0/1
0/1
0/1
0/1


fh = open('/mnt/c/Users/Nina/Documents/courses/Python_Beginner_Course/genotypes.vcf', 'r', encoding = 'utf-8')
res = []
for line in fh:
    if not line.startswith('#'):
        cols = line.strip().split('\t')
        if cols[0] == '5' and \
           int(cols[1]) >= 1000000 and int(cols[1]) <= 1005000:
                geno = cols[9].split(':')[0]
                if geno in ['0/1', '1/1']:
                    var = cols[0]+':'+cols[1]+'_'+cols[3]+'-'+cols[4]
                 #   print(var+' has genotype: '+geno)
                    res.append(var)
fh.close()
print(res)

['5:1000080_A-T', '5:1000156_G-A', '5:1001097_C-A', '5:1001193_C-T', '5:1001245_T-C', '5:1001339_C-T', '5:1001344_G-C', '5:1001683_G-T', '5:1001755_G-A', '5:1002374_G-A', '5:1002382_G-C', '5:1002620_T-C', '5:1002722_G-A', '5:1002819_C-A', '5:1003043_G-T', '5:1003099_C-T', '5:1003135_G-A', '5:1004648_A-G', '5:1004650_A-C', '5:1004665_A-G', '5:1004702_G-T', '5:1004879_T-C']


len([1,2,3])
len('a string')

'a string  '.strip()
[1,2,3].strip()

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
Input In [41], in <cell line: 5>()
      2 len('a string')
      4 'a string  '.strip()
----> 5 [1,2,3].strip()

AttributeError: 'list' object has no attribute 'strip'


abs(-5)

5


sum([1,2,35,23,88,4])

153


sum([1,2,3,4],10)

20


b = round(3.234556, 2)
a = 'my string'
print(b)

3.23


'    spaciou   sWith5678.com'.strip('mo.c')

'    spaciou   sWith5678'


'    spaciou   sWith5678.com\n'.lstrip()

'spaciou   sWith5678.com\n'


'    spaciou   sWith5678.com\n'.rstrip()

'    spaciou   sWith5678.com'


a = '  split a string into a list '
a.split(maxsplit=3)

['split', 'a', 'string', 'into a list ']


'|'.join('a string already')
' '.join(['a', 'b', 'c', 'd'])
#' '.join([1,2,3])

'a b c d'


'long string'.startswith('ng', 2)
#'long string'.endswith('nt')

True


'LongRandomString'.lower()
'LongRandomString'.upper()

'LONGRANDOMSTRING'


a = [1,2,3,4,5,5,5,5]
a.append(6)
a.pop(2)
a.reverse()
a.remove(5)

b = (1,2,3,4)
c = [1,2,3,4]
c.append(5)
c


fh   = open('../downloads/250.imdb', 'r', encoding = 'utf-8')
best = [0,'']           # here we save the rating and which movie
for line in fh:
    if not line.startswith('#'):
        cols   = line.strip().split('|')
        rating = float(cols[1].strip())
        if rating > best[0]:           # if the rating is higher than previous highest, update best
            best = [rating,cols[6]]
fh.close()
print(best)

Introduction to¶

with Application to Bioinformatics¶

- Day 2¶

Review Day 1¶

Variables and Types¶

Literals¶

Variables¶

Lists¶

Comments¶

Operations¶

Basic operations¶

Comparison/Logical/Membership operators¶

Sequences¶

Indexing¶

Mutable / Immutable sequences and iterables¶

New data type: `tuples`¶

If/ Else statements¶

Files and loops¶

Questions?¶

Day 2¶

How to approach a coding task¶

Always write pseudocode!¶

What is your input?¶

Basic Pseudocode:¶

More useful functions and methods¶

Functions¶

From Python documentation¶

Methods¶

Useful operations on strings¶

Useful operations on Mutable sequences¶

Summary¶

IMDb¶

Find the movie with the highest rating¶

Introduction to¶

with Application to Bioinformatics¶

- Day 2¶

Review Day 1¶

Variables and Types¶

Literals¶

Variables¶

Lists¶

Comments¶

Operations¶

Basic operations¶

Comparison/Logical/Membership operators¶

Sequences¶

Indexing¶

Mutable / Immutable sequences and iterables¶

New data type: tuples¶

If/ Else statements¶

Files and loops¶

Questions?¶

Day 2¶

How to approach a coding task¶

Always write pseudocode!¶

What is your input?¶

Basic Pseudocode:¶

More useful functions and methods¶

Functions¶

From Python documentation¶

Methods¶

Useful operations on strings¶

Useful operations on Mutable sequences¶

Summary¶

IMDb¶

Find the movie with the highest rating¶

New data type: `tuples`¶