The variable inside the function has a separate scope and does not affect the one outside
name = "Max"
def changeName():
name = "Niko"
print(f"name inside the function: {name}")
changeName()
print(f"name outside of the function: {name}")
Keyword arguments can be given in any order, while positional arguments depend on the function's order
def sum3(a, b, c):
print(f"a={a}")
print(f"b={b}")
print(f"c={c}")
return a+b+c
sum3(1, 2, 3)
def add(x, y, z=0):
return x + y + z
print(add(1, 2))
print(add(1, y=2, z=3))
"""
and '''
can be used for docstring# Add docstring and comments to the following function
def add(x, y, z=0):
return x + y + z
help(print)
math
in the same script?¶import math
math = "great"
math.sqrt(5)
If you run
import myMoudle
and then update myMoudle
and then reload with import myMoudle
in Jupyter notebook, the module will not be updated. You will need to run
from importlib import reload
reload(myModule)
or
del sys.modules['myMoudle']
df
where age
is greater than 30, which command would you use?¶import pandas as pd
df = pd.DataFrame({
'name': ['Alice', 'Bob', 'Charlie', 'David'],
'age': [25, 30, 35, 40],
'height': [165.4, 175.3, 168.5, 180.6]
})
print(df)
df[df['age'] > 30]
df
, which method should you use?¶If you don't specify the key columns
, it renames the rows
df = pd.DataFrame({
'name': ['Alice', 'Bob', 'Charlie', 'David'],
'age': [25, 30, 35, 40],
'height': [165.4, 175.3, 168.5, 180.6]
})
"MVR???A"
"ATG???TAG"
!grep -E "furniture.*sell" ../downloads/blocket_listings.txt
.
matches any character (once)?
repeat previous pattern 0 or 1 times*
repeat previous pattern 0 or more times+
repeat previous pattern 1 or more times</div>
\w
matches any letter or number, and the underscore\d
matches any digit\D
matches any non-digit\s
matches any whitespace (spaces, tabs, ...)\S
matches any non-whitespace\w
matches any letter or number, and the underscore\w+
\d
matches any digit\d+
\s
matches any whitespace (spaces, tabs, ...)\s+
[abc]
matches a single character defined in this set {a, b, c}[^abc]
matches a single character that is not a, b or c[a-z]
matches all letters between a
and z
(the english alphabet).[a-z]+
matches any (lowercased) english word.salt?pet[er]+
Example - finding patterns in a VCF file
1 920760 rs80259304 T C . PASS AA=T;AC=18;AN=120;DP=190;GP=1:930897;BN=131 GT:DP:CB 0/1:1:SM 0/0:4:SM...
0/0:1:SM
0/0:4:SM
...
"[01]/[01]"
(or "\d/\d")
\s[01]/[01]:
!grep -E "[01]/[01]" ../downloads/genotypes_small.vcf | head -n 2
Example - finding patterns in vcf
1 920760 rs80259304 T C . PASS AA=T;AC=18;AN=120;DP=190;GP=1:930897;BN=131 GT:DP:CB 0/1:1:SM 0/0:4/SM...
... 1/1:... ... 1/1:... ...
.*1/1.*1/1.*
.*\s1/1:.*\s1/1:.*
!grep -E ".*\s1/1:.*\s1/1:.*" ../downloads/genotypes_small.vcf | head -n 1
.
matches any character (once)?
repeat previous pattern 0 or 1 times*
repeat previous pattern 0 or more times+
repeat previous pattern 1 or more times\w
matches any letter or number, and the underscore\d
matches any digit\D
matches any non-digit\s
matches any whitespace (spaces, tabs, ...)\S
matches any non-whitespace[abc]
matches a single character defined in this set {a, b, c}[^abc]
matches a single character that is not a, b or c[a-z]
matches any (lowercased) letter from the english alphabet.*
matches anything# Import module
import re
pattern = "col[ou]+r.*"
text = "The colour of the wall is very vibrant, \
but the color of the sky is even more spectacular."
# Try to find a hit
result = re.search(pattern, text)
print(result)
help(re.search)
result.group()
: Return the string matched by the expression
result.start()
: Return the starting position of the match
result.end()
: Return the ending position of the match
result.span()
: Return both (start, end)
print(result.start())
print(result.end())
print(result.span())
print(f"Found text: '{result.group()}'")
pattern = "col[ou]+r\w*"
result = re.search(pattern, text)
print(result)
print(result.span())
print(f"Found text: '{result.group()}'")
re.finditer
pattern = "col[ou]+r\w*"
text = "The colour of the wall is very vibrant, \
but the color of the sky is even more spectacular."
for result in re.finditer(pattern, text):
print(result)
# Search pattern in string
pattern = "col[ou]+r\w*"
p_find_colour = re.compile(pattern)
p_find_colour.search(text)
for result in p_find_colour.finditer(text):
print(f"Find colour at the position {result.span()}, the word is '{result.group()}'")
p_find_colour.findall(text)
# Remember, [a-z]+ matches any lower case english word
p = re.compile('[a-z]+')
result = p.search('123 ATGAAA 456')
print(result)
p = re.compile('[a-z]+', flags=re.IGNORECASE)
result = p.search('123 ATGAAA 456')
result
text = "The first full stop is here: ."
p_find_fullstop = re.compile(".")
result = p_find_fullstop.search(text)
print(f"Found {result.group()} at position {result.start()}")
# Use escape character to search
p_find_fullstop = re.compile('\.')
result = p.search(text)
print(f"Found {result.group()} at position {result.start()}")
\
escaping a character^
beginning of the string$
end of string|
boolean or
^hello$
p_find_hello = re.compile('^hello$')
text = "hello Python"
result = p_find_hello.search(text)
print(result)
text = "hello"
result = p_find_hello.search(text)
print(result)
salt?pet(er|re) | nit(er|re) | KNO3
p_find_salpeter = re.compile('salt?pet(er|re)|nit(er|re)|KNO3')
text = "saltepter or salpeter88 or KNO3 or niter or nitre, \
just too many forms of salpeter!"
for result in p_find_salpeter.finditer(text):
print(result)
text = "Do it becuase I say so, not becuase you want!"
# Spell the word because correctly
p_fix_because = re.compile('becuase')
p_fix_because.sub('because', text)
print(text)
text = p_fix_because.sub('because', text)
print(text)
# Remove additional spaces
p_remove_extra_space = re.compile('\s+')
p_remove_extra_space.sub(' ', text)
p = re.compile()
p.search(text)
Substitution
p.sub(replacement, text)
Typical code structure for text matching:
pattern = re.compile( ... )
match = pattern.search('string goes here')
if match:
print('Match found: ', match.group())
else:
print('No match')
with open('myfile.txt', 'r') as fh
for line in fh:
do_stuff(line)
iterations = 0
information = []
with open('myfile.txt', 'r') as fh:
for line in fh:
iterations += 1
information += do_stuff(line)
Base types:
str "hello"
int 5
float 5.2
bool True
Collections:
list ["a", "b", "c"]
dict {"a": "alligator", "b": "bear", "c": "cat"}
tuple ("this", "that")
set {"drama", "sci-fi"}
iterations = 0
score = 5.2
# variable = literal
+, -, *,... # mathematical
and, or, not # logical
==, != # (in)equality
<, >, <=, >= # comparison
in # membership
value = 4
nextvalue = 1
nextvalue += value
print(f"nextvalue: {nextvalue}, value: {value}")
x = 5
y = 7
z = 0
x > 4 or y == 7 and z > 1
(x > 4 or y == 7) and z > 1
Works like a list of characters
mystr = "one"
mystr += " two" # string concatnation
mystr
len(mystr) # get the length
"one" in mystr # membership checking
mystr = "one"
mystr[1] = "W"
mystr = "one"
print(mystr)
mystr = "two"
print(mystr)
mystr = "one"
print(f"mystr = {mystr}, address = {id(mystr)}")
mystr = "two"
print(f"mystr = {mystr}, address = {id(mystr)}")
s.strip() # remove unwanted spacing
s.split() # split line into columns
s.upper(), s.lower() # change the case
p = re.compile('A.A.A')
p.search(dnastring)
p = re.compile('T')
p.sub('U', dnastring)
import re
p = re.compile('p.*\sp') # the greedy star!
p.search('a python programmer writes python code').group()
Can contain strings, integer, booleans...
Most collections are mutable (not tuple): you can add, remove, change values
Lists:
mylist.append('value')
Dicts:
mydict['key'] = 'value'
Sets:
myset.add('value')
Test for membership:
value in myobj
Check size:
len(myobj)
todolist = ["work", "sleep", "eat", "work"]
todolist.sort()
todolist.reverse()
todolist[2]
todolist[-1]
todolist[2:6]
todolist = ["work", "sleep", "eat", "work"]
todolist.sort()
print(todolist)
todolist.reverse()
print(todolist)
todolist[2]
todolist[-1]
todolist[2:]
mydict = {"a": "alligator", "b": "bear", "c": "cat"}
counter = {"cats": 55, "dogs": 8}
mydict["a"]
mydict.keys()
mydict.values()
counter = {'cats': 0, 'others': 0}
for animal in ['zebra', 'cat', 'dog', 'cat']:
if animal == 'cat':
counter['cats'] += 1
else:
counter['others'] += 1
counter
Bag of values
myset = {"drama", "sci-fi"}
myset.add("comedy")
myset.remove("drama")
set1 = set(["1", "2", "3", "4", "5"])
set1
set1.add("1")
set1
set2 = set(["3", "6"])
set1.intersection(set2)
set1.union(set2)
set1.difference(set2)
tup = (max_length, sequence)
length = tup[0] # get content at index 0
tup = (2, 'xy')
tup[0]
tup[0] = 2
def find_longest_seq(file):
# some code here...
return length, sequence
answer = find_longest_seq(filepath)
print('length', answer[0])
print('sequence', answer[1])
answer = find_longest_seq(filepath) # return as a tuple
length, sequence = find_longest_seq(filepath) # return as two variables
if count > 10:
print('big')
elif count > 5:
print('medium')
else:
print('small')
shopping_list = ['bread', 'egg', ' butter', 'milk']
tired = True
if len(shopping_list) > 4:
print('Really need to go shopping!')
elif not tired:
print('Not tired? Then go shopping!')
else:
print('Better to stay at home')
information = []
with open('myfile.txt', 'r') as fh
for line in fh:
if is_comment(line):
use_comment(line)
else:
information.append(read_data(line)) # read_data return a list
information = []
with open('myfile.txt', 'r') as fh:
# Read the first line
line = fh.readline()
# Continue to read lines until an empty string is returned
while line:
information.append(read_data(line)) # read_data return a list
line = file.readline() # Read the next line
For
loop
is a control flow statement that performs operations over a known amount of steps.
While
loop
is a control flow statement that allows code to be executed repeatedly based on a given Boolean condition.
Which one to use?
For
loops - standard for iterations over lists and other iterable objects
While
loops - more flexible and can iterate an unspecified number of times
# For loop example
# You know the number of iterations before hand
user_input = "thank god it's friday"
for letter in user_input:
print(letter.upper())
# While loop example
# The number of iterations is unknown before hand
i = 0
go_on = True
while go_on:
c = user_input[i]
print(c.upper())
i += 1
if c == 'd':
go_on = False
break
- stop the loopcontinue
- go on to the next iteration# example for break
user_input = "thank god it's friday"
for letter in user_input:
if letter == 'd':
break
print(letter.upper())
# example for continue
user_input = "thank god it's friday"
for letter in user_input:
if letter == ' ' or letter == '\'': # Skip spaces and apostrophes
continue
print(letter.upper())
Watch out!
# DON'T RUN THIS
i = 0
-while i < 10:
print(user_input[i])
While loops may be infinite!
In: Read from files
with open(filename, 'r') as fh:
for line in fh:
do_stuff(line)
Read information from command line: sys.argv[1:]
Out: Write to files:
with open(filename, 'w') as fh:
fh.write(text)
print('my_information')
Open files should be closed:
fh.close()
or use the with
clause
with open(filename, "r") as fh:
do_something
None
)def prettyprinter(name, value, delim=":", end=None):
out = "The " + name + " is " + delim + " " + value
if end:
out += end
return out
None
)Any longer pieces of code that have been used and will be re-used should be saved
Save it as a file mycode.py
To run it:
python3 mycode.py
or python mycode.py
Import it:
import mycode
""" This is a doc-string explaining what the purpose of this function/module is """
# This is a comment that helps understanding the code
Endless possibilities!
Google Form for anonymous evaluation
Canvas -> Module -> Day 5 -> Exercise 3 - day 5