myTuple    = (1, 2, 3)
myList     = [1, 2 ,3]


myList[2]  = 4
myList


myTuple[2] = 4


myTuple = (1, 2, [1,2,3])
print(myTuple)
myTuple[2][2] = 4
print(myTuple)


myList = [1, 2, 3.5, 5 ,6.2]
round(sum(myList)/len(myList),1)


my_list   = ['I','know','Python']

" ".join(my_list).upper()


# Code Snippet for Finding the Movie with the Highest Rating
# Note that this is just one of the solutions
with open('../downloads/250.imdb', 'r') as fh:  
    movieList = [] 
    highestRating = -100  
                         
    for line in fh:     
        if not line.startswith('#'):    
            cols = line.strip().split('|')
            rating = float(cols[1].strip())
            title = cols[6].strip()
            movieList.append((rating, title))
            if rating > highestRating:
                highestRating = rating
    print("Movie(s) with highest rating " + str(highestRating) + ":" )
    for i in range(len(movieList)):
        if movieList[i][0] == highestRating:
            print(movieList[i][1])
    print(sortedMovieList)

file = open("filename.txt", "r")
content = file.read()
file.close()

setName = set() # Create an empty set


mySet = {1,2,3,4,5}
for e in mySet:
    print(e)


mySet = {"1", "1", "2", "2", "3"}
print(mySet)


mySet = {1, "tga", (3, 4), 5.6, False}
print(mySet)


mySet = {1, "tga", [3, 4], 5.6, False}


mySet = {1, "tga", (3, 4, [1, 2]), 5.6, False}


# Add elements to a set
myset = set()
myset.add(1)
myset.add(100)
myset.add(100)
myset.add(100)
print(myset)


# get the number of elements of a set
len(1)


# membership checking
12 in myset


import time, random

# Create a large list and set
large_list = list(range(10000000))
large_set = set(large_list)
elements_to_find = random.sample(range(10000001), 10)

# Measure time for list membership check
list_time = time.time()
for e in elements_to_find:
    e in large_list
list_time = time.time() - list_time

# Measure time for set membership check
set_time = time.time()
for e in elements_to_find:
    e in large_set
set_time = time.time() - set_time

print(f"List check: {list_time:.6f} seconds")
print(f"Set check: {set_time:.6f} seconds")
print(f"Set is approximately {list_time / set_time:.2f} times faster.")

d = {} # Create an empty dictionary


myDict = {'drama': 4, 
          'thriller': 2, 
          'romance': 5}
myDict


myDict = {'drama': 4, 
          5: 2, 
          'romance': 5}
myDict


list(myDict.items())


myDict = {'drama': 182, 
          'war': 30, 
          'adventure': 55, 
          'comedy': 46, 
          'family': 24, 
          'animation': 17, 
          'biography': 25}


myDict['comedy']


myDict['newkey'] = 25
myDict


genreList = ["drama", "action", "drama", "horror", "thriller", "comedy", "drama", "comedy"]
myDict = {}
for g in genreList:
    if not g in myDict:
        myDict[g] = 1
    else:
        myDict[g] += 1
        
myDict


print("Hello Python")


len("ACCCCTTGAACCCC")


max([87, 131, 69, 112, 147, 55, 68, 130, 119, 50])

def function_name(arg1, arg2, ...):
    # Block of code
    return result


def SayHi(name):
    print("Hi", name)

SayHi('Anna')
SayHi('Mike')


# Calculate the average duration of movies in the genre 'drama'
genre = "drama"
average = sum(genreDict[genre])/len(genreDict[genre])  # calculate average length per genre
hours   = int(average/3600)                            # format seconds to hours
minutes = (average - (3600*hours))/60             # format seconds to minutes
reformattedTime = str(hours)+'h'+str(round(minutes))+'min'
print('The average length for movies in genre '+ genre +\
      ' is '+ reformattedTime)


# Copy the previous code here


for genre in ['drama', 'horror', 'comedy']:
    average = sum(genreDict[genre])/len(genreDict[genre])  # calculate average length per genre
    hours   = int(average/3600)                                 # format seconds to hours
    minutes = (average - (3600*hours))/60             # format seconds to minutes
    reformattedTime = str(hours)+'h'+str(round(minutes))+'min'
    print('The average length for movies in genre '+ genre +\
          ' is '+ reformattedTime)


for genre in ['drama', 'horror', 'comedy']:
    print('The average length for movies in genre '+ genre +\
          ' is '+ formatSec(genre, genreDict))

for genre in ['drama', 'horror', 'comedy']:
    print('The average length for movies in genre '+ genre +\
          ' is '+ formatSec(genre, genreDict))


WEIGHT = 5
def addWeight(value):
    return value * WEIGHT
print(addWeight(4))


WEIGHT = 5
def changeWeight():
    WEIGHT = 10
    print("WEIGHT inside the function is", WEIGHT)
    return None
changeWeight()
print("WEIGHT outside the function is", WEIGHT)


math.sqrt(5)


import math
math.sqrt(5)


sqrt(5)


from math import sqrt
sqrt(5)


def formatSec(seconds):
    hours     = seconds/3600
    minutes   = (seconds - (3600*int(hours)))/60   
    return str(int(hours))+'h'+str(round(minutes))+'min'


def toSec(days, hours, minutes, seconds):
    total = 0
    total += days*60*60*24
    total += hours*60*60
    total += minutes*60
    total += seconds
 
    return str(total)+'s'


from myTimeTools import formatSec, toSec
print(formatSec(3601))


toSec(days=0, hours=1, minutes=0, seconds=1)


Ref  Alternative
A    T,G                                1/1

python myscript.py

import sys

program_name = sys.argv[0]
arg1 = sys.argv[1] # index error if the first argument is not provided in the command
arg2 = sys.argv[2] # index error if the second argument is not provided in the command


input_file = "../downloads/250.imdb"
output_file = "newfile.txt"

with open(input_file, "r") as fi:
    with open(output_file, "w") as fo:
        for line in fi:
            fo.write(line)


!ls -lah mynewseq.fa


# Code that can deal with command line arguments
import sys

usage = f"{sys.argv[0]} inputFile outputFile"

if len(sys.argv) < 3:
    print(usage)
    sys.exit(1)

input_file = sys.argv[1]
output_file = sys.argv[2]

with open(input_file, "r") as fi:
    with open(output_file, "w") as fo:
        for line in fi:
            fo.write(line)


chrom = "5"
pos = 1235651
ref = "C"
alt = "T"
geno = "1/1"
info = chrom + ":" + str(pos) + "_" + ref + "-" + alt + " has genotype: "+ geno
print(info)


chrom = "5"
pos = 1235651
ref = "C"
alt = "T"
geno = "1/1"
info = f"{chrom}:{pos}_{ref}-{alt} has genotype: {geno}"
print(info)


info = chrom + ":" + str(pos) + "_" + ref + "-" + alt + " has genotype: "+ geno


chrom = "5"
pos = 1235651
ref = "C"
alt = "T"
geno = "1/1"
info = "{}:{}_{}-{} has genotype: {}".format(chrom, pos, ref, alt, geno)
print(info)


genes = ["TP53", "COX2"]
lengths = [355,  458]
print(f"Lengths of genes {genes} are {lengths}")


gene = "COX1"
exp_level = 45.123253
print(f"Expression level of gene {gene} is {exp_level}")


print(f"Expression level of gene {gene} is {exp_level:.2e}")


seq = "ATCGTAGCCCATAGC"
print(f"The length of sequence {seq} is {len(seq)}")


text = "a string with many words  "
print(f"the text \"{text}\" is divided in to a list {text.split()}, " 
      f"and it has {len(text.split())} elements")


gene = "COX1"
exp_level = 45.123253
print("Expression level of gene %s is %f"%(gene, exp_level))

Function	Method
Standalone block of code	Function associated with an object
Called independently, e.g. `functionName()`	Called on an instance of a class, e.g. `obj.methodName()`
Not tied to any object or class	Tied to the objects they are called on
Defined outside of a class	Defined within a class

Introduction to¶

with Application to Bioinformatics¶

- Day 3¶

Day 3¶

Quiz: Review Day 2¶

Tuples (Q 1&2)¶

Is it true that we can never modify the content of a tuple?¶

How to structure the code (Q 3)¶

Things to Consider When Writing Pseudocode¶

Functions and methods (Q 4&5)¶

What are the differences between a function and a method?¶

Exerciese from yesterday¶

Find the movie with the highest rating in the file 250.imdb¶

The with key word¶

However, for Python 3, the default encoding is usually 'utf-8', so it's not needed.¶

New data type: set¶

Syntax:¶

Set is unordered¶

Set has unique elements¶

Set can only have hashable elements¶

Although tuples are immutable, but when it contains mutable items, it becomes non hashable. Be careful!¶

Basic operations on set¶

Learn more on https://www.w3schools.com/python/python_sets.asp¶

When the size of list is large, membership checking with set tends to be much faster than with list¶

Day 3, Exercise 1 (~30 min)¶

Find the number of unique genres in the file 250.imdb¶

Session 2¶

New data type: dictionary¶

Syntax:¶

Basic operations on Dictionaries¶

Live Exercise¶

How to add a new key to a dictionary¶

How to count occurrences of each genre¶

Day 3, Exercise 2 (~50 min)¶

Take a break after the exercise (~10 min)¶

PyQuiz 3.1 - set, list and dictionary (before lunch)¶

Lunch¶

Session 3¶

We have used many built-in functions¶

How to write your own functions?¶

Syntax of function¶

Now let convert this code into a function¶

Why use functions?¶

Scope¶

We will talk more about the scope of variables tomorrow¶

Use external libraries in Python¶

Why use libraries¶

How to define your own libraries¶

A simple library is just file with some python functions¶

Summary¶

Day 3, Exercise 3 (~30 min)¶

Take a break after the exercise (~10 min)¶

Session 4¶

How to pass arguments to Python script from the command line?¶

Not just¶

But also¶

sys.argv¶

How to use it¶

Code to copy a text file¶

String formatting¶

Other (better) ways of formatting strings:¶

format method¶

`f-strings" formatting is recommended¶

It works for other data types as well¶

It works for functions and operations as well¶

The ancient way (for Python 2, but still working)¶

Summary¶

Day 3, Exercise 4 (~30 min)¶

PyQuiz 3.2¶

Project time¶

What are the differences between a `function` and a `method`?¶

Find the movie with the highest rating in the file `250.imdb`¶

The `with` key word¶

New data type: `set`¶

Basic operations on `set`¶

Learn more on https://www.w3schools.com/python/python_sets.asp ¶

When the size of list is large, membership checking with `set` tends to be much faster than with `list`¶

Find the number of unique genres in the file `250.imdb`¶

New data type: `dictionary`¶

`sys.argv`¶

`format` method¶