set
dict
sys.argv
and string formattingsys.argv
1. Which of the following variables are of the type tuple?
a = (1, 2, 3, 4)
a = ([1, 2], 'a', 'b')
2. What is the difference between a tuple and a list?
A tuple is immutable while a list is mutable
myTuple = (1, 2, 3)
myList = [1, 2 ,3]
myList[2] = 4
myList
myTuple[2] = 4
myTuple = (1, 2, [1,2,3])
print(myTuple)
myTuple[2][2] = 4
print(myTuple)
3. What does pseudocode mean?
Writing down the steps you intend to include in your code in more general language
Writing pseudocode before actual coding is a good habit.
4. What are the following examples of?
len([1, 2, 3, 4])
print("my text")
Functions
5. What are the following examples of?
"my\ttext".split("\t")
[1, 2, 3].pop()
Methods
function
and a method
?¶Function | Method |
---|---|
Standalone block of code | Function associated with an object |
Called independently, e.g. functionName() |
Called on an instance of a class, e.g. obj.methodName() |
Not tied to any object or class | Tied to the objects they are called on |
Defined outside of a class | Defined within a class |
6. Calculate the average of the list [1,2,3.5,5,6.2]
to one decimal, using Python
myList = [1, 2, 3.5, 5 ,6.2]
round(sum(myList)/len(myList),1)
7. Take the list ['I','know','Python']
as input and output the string 'I KNOW PYTHON'
my_list = ['I','know','Python']
" ".join(my_list).upper()
# Code Snippet for Finding the Movie with the Highest Rating
# Note that this is just one of the solutions
with open('../downloads/250.imdb', 'r') as fh:
movieList = []
highestRating = -100
for line in fh:
if not line.startswith('#'):
cols = line.strip().split('|')
rating = float(cols[1].strip())
title = cols[6].strip()
movieList.append((rating, title))
if rating > highestRating:
highestRating = rating
print("Movie(s) with highest rating " + str(highestRating) + ":" )
for i in range(len(movieList)):
if movieList[i][0] == highestRating:
print(movieList[i][1])
print(sortedMovieList)
with
key word¶with
keyword to ensure the file handle be closed automaticallyfile = open("filename.txt", "r")
content = file.read()
file.close()
with open("filename.txt", "r") as file:
content = file.read()
with open ("filename.txt", "r", encoding='utf-8') as file
content = file.read()
set
¶setName = set() # Create an empty set
setName = {1,2,3,4,5} # Create a populated set
setName = set([1,2,3,4,5]) # Alternative way
mySet = {1,2,3,4,5}
for e in mySet:
print(e)
mySet = {"1", "1", "2", "2", "3"}
print(mySet)
mySet = {1, "tga", (3, 4), 5.6, False}
print(mySet)
mySet = {1, "tga", [3, 4], 5.6, False}
mySet = {1, "tga", (3, 4, [1, 2]), 5.6, False}
set
¶# Add elements to a set
myset = set()
myset.add(1)
myset.add(100)
myset.add(100)
myset.add(100)
print(myset)
# get the number of elements of a set
len(1)
# membership checking
12 in myset
set
tends to be much faster than with list
¶import time, random
# Create a large list and set
large_list = list(range(10000000))
large_set = set(large_list)
elements_to_find = random.sample(range(10000001), 10)
# Measure time for list membership check
list_time = time.time()
for e in elements_to_find:
e in large_list
list_time = time.time() - list_time
# Measure time for set membership check
set_time = time.time()
for e in elements_to_find:
e in large_set
set_time = time.time() - set_time
print(f"List check: {list_time:.6f} seconds")
print(f"Set check: {set_time:.6f} seconds")
print(f"Set is approximately {list_time / set_time:.2f} times faster.")
dictionary
dictionary
¶set
), while the values associated with keys can be of any data type and can be duplicated
d = {} # Create an empty dictionary
d = {'key1':1, 'key2':2, 'key3':3} # create a populated dictionary
myDict = {'drama': 4,
'thriller': 2,
'romance': 5}
myDict
myDict = {'drama': 4,
5: 2,
'romance': 5}
myDict
list(myDict.items())
myDict = {'drama': 182,
'war': 30,
'adventure': 55,
'comedy': 46,
'family': 24,
'animation': 17,
'biography': 25}
myDict['comedy']
comedy
genre?29
movies in the fantasy
genre to this dictionary.comedy
genre by one.myDict['newkey'] = 25
myDict
genreList = ["drama", "action", "drama", "horror", "thriller", "comedy", "drama", "comedy"]
myDict = {}
for g in genreList:
if not g in myDict:
myDict[g] = 1
else:
myDict[g] += 1
myDict
print("Hello Python")
len("ACCCCTTGAACCCC")
max([87, 131, 69, 112, 147, 55, 68, 130, 119, 50])
def function_name(arg1, arg2, ...):
# Block of code
return result
def SayHi(name):
print("Hi", name)
SayHi('Anna')
SayHi('Mike')
# Calculate the average duration of movies in the genre 'drama'
genre = "drama"
average = sum(genreDict[genre])/len(genreDict[genre]) # calculate average length per genre
hours = int(average/3600) # format seconds to hours
minutes = (average - (3600*hours))/60 # format seconds to minutes
reformattedTime = str(hours)+'h'+str(round(minutes))+'min'
print('The average length for movies in genre '+ genre +\
' is '+ reformattedTime)
# Copy the previous code here
for genre in ['drama', 'horror', 'comedy']:
average = sum(genreDict[genre])/len(genreDict[genre]) # calculate average length per genre
hours = int(average/3600) # format seconds to hours
minutes = (average - (3600*hours))/60 # format seconds to minutes
reformattedTime = str(hours)+'h'+str(round(minutes))+'min'
print('The average length for movies in genre '+ genre +\
' is '+ reformattedTime)
for genre in ['drama', 'horror', 'comedy']:
print('The average length for movies in genre '+ genre +\
' is '+ formatSec(genre, genreDict))
for genre in ['drama', 'horror', 'comedy']:
print('The average length for movies in genre '+ genre +\
' is '+ formatSec(genre, genreDict))
WEIGHT = 5
def addWeight(value):
return value * WEIGHT
print(addWeight(4))
WEIGHT = 5
def changeWeight():
WEIGHT = 10
print("WEIGHT inside the function is", WEIGHT)
return None
changeWeight()
print("WEIGHT outside the function is", WEIGHT)
math.sqrt(5)
import math
math.sqrt(5)
sqrt(5)
from math import sqrt
sqrt(5)
def formatSec(seconds):
hours = seconds/3600
minutes = (seconds - (3600*int(hours)))/60
return str(int(hours))+'h'+str(round(minutes))+'min'
def toSec(days, hours, minutes, seconds):
total = 0
total += days*60*60*24
total += hours*60*60
total += minutes*60
total += seconds
return str(total)+'s'
from myTimeTools import formatSec, toSec
print(formatSec(3601))
toSec(days=0, hours=1, minutes=0, seconds=1)
Ref Alternative
A T,G 1/1
sys.argv
and string formattingsys.argv
sys.argv
¶import sys
program_name = sys.argv[0]
arg1 = sys.argv[1] # index error if the first argument is not provided in the command
arg2 = sys.argv[2] # index error if the second argument is not provided in the command
input_file = "../downloads/250.imdb"
output_file = "newfile.txt"
with open(input_file, "r") as fi:
with open(output_file, "w") as fo:
for line in fi:
fo.write(line)
!ls -lah mynewseq.fa
# Code that can deal with command line arguments
import sys
usage = f"{sys.argv[0]} inputFile outputFile"
if len(sys.argv) < 3:
print(usage)
sys.exit(1)
input_file = sys.argv[1]
output_file = sys.argv[2]
with open(input_file, "r") as fi:
with open(output_file, "w") as fo:
for line in fi:
fo.write(line)
chrom = "5"
pos = 1235651
ref = "C"
alt = "T"
geno = "1/1"
info = chrom + ":" + str(pos) + "_" + ref + "-" + alt + " has genotype: "+ geno
print(info)
f-strings (since python 3.6)
chrom = "5"
pos = 1235651
ref = "C"
alt = "T"
geno = "1/1"
info = f"{chrom}:{pos}_{ref}-{alt} has genotype: {geno}"
print(info)
info = chrom + ":" + str(pos) + "_" + ref + "-" + alt + " has genotype: "+ geno
format
method¶chrom = "5"
pos = 1235651
ref = "C"
alt = "T"
geno = "1/1"
info = "{}:{}_{}-{} has genotype: {}".format(chrom, pos, ref, alt, geno)
print(info)
genes = ["TP53", "COX2"]
lengths = [355, 458]
print(f"Lengths of genes {genes} are {lengths}")
gene = "COX1"
exp_level = 45.123253
print(f"Expression level of gene {gene} is {exp_level}")
print(f"Expression level of gene {gene} is {exp_level:.2e}")
seq = "ATCGTAGCCCATAGC"
print(f"The length of sequence {seq} is {len(seq)}")
text = "a string with many words "
print(f"the text \"{text}\" is divided in to a list {text.split()}, "
f"and it has {len(text.split())} elements")
gene = "COX1"
exp_level = 45.123253
print("Expression level of gene %s is %f"%(gene, exp_level))
sys.argv
to deal with arguments passed to the python script from the command linesys.argv[0]
is the program namesys.argv[1]
is is the first argument and so onf-strings
formatting is a convenient and recommended way to format the string