name = "Max"
def changeName():
    name = "Niko"
    print(f"name inside the function: {name}")
changeName()
print(f"name outside of the function: {name}")


def sum3(a, b, c):
    print(f"a={a}")
    print(f"b={b}")
    print(f"c={c}")
    return a+b+c
sum3(1, 2, 3)


def add(x, y, z=0):
    return x + y + z
print(add(1, 2))
print(add(1, y=2, z=3))


# Add docstring and comments to the following function
def add(x, y, z=0):
    return x + y + z


help(print)


import math
math = "great"
math.sqrt(5)

import myMoudle


import pandas as pd
df = pd.DataFrame({
    'name': ['Alice', 'Bob', 'Charlie', 'David'],
    'age': [25, 30, 35, 40],
    'height': [165.4, 175.3, 168.5, 180.6]
})
print(df)
df[df['age'] > 30]


df = pd.DataFrame({
    'name': ['Alice', 'Bob', 'Charlie', 'David'],
    'age': [25, 30, 35, 40],
    'height': [165.4, 175.3, 168.5, 180.6]
})


!grep -E "furniture.*sell" ../downloads/blocket_listings.txt


!grep -E "[01]/[01]" ../downloads/genotypes_small.vcf | head -n 2


!grep -E ".*\s1/1:.*\s1/1:.*" ../downloads/genotypes_small.vcf | head -n 1


# Import module
import re


pattern = "col[ou]+r.*"
text = "The colour of the wall is very vibrant, \
but the color of the sky is even more spectacular."
# Try to find a hit
result = re.search(pattern, text)
print(result)
help(re.search)


print(result.start())
print(result.end())
print(result.span())
print(f"Found text: '{result.group()}'")


pattern = "col[ou]+r\w*"
result = re.search(pattern, text)
print(result)
print(result.span())
print(f"Found text: '{result.group()}'")

re.finditer


pattern = "col[ou]+r\w*"
text = "The colour of the wall is very vibrant, \
but the color of the sky is even more spectacular."

for result in re.finditer(pattern, text):
    print(result)


# Search pattern in string
pattern = "col[ou]+r\w*"
p_find_colour = re.compile(pattern)


p_find_colour.search(text)


for result in p_find_colour.finditer(text):
    print(f"Find colour at the position {result.span()}, the word is '{result.group()}'")


p_find_colour.findall(text)


# Remember, [a-z]+ matches any lower case english word
p = re.compile('[a-z]+')
result = p.search('123 ATGAAA 456')
print(result)


p = re.compile('[a-z]+', flags=re.IGNORECASE)

result = p.search('123 ATGAAA 456')
result


text = "The first full stop is here: ."
p_find_fullstop = re.compile(".")

result = p_find_fullstop.search(text)
print(f"Found {result.group()} at position {result.start()}")


# Use escape character to search
p_find_fullstop = re.compile('\.')

result = p.search(text)
print(f"Found {result.group()} at position {result.start()}")


p_find_hello = re.compile('^hello$')
text = "hello Python"
result = p_find_hello.search(text)
print(result)


text = "hello"
result = p_find_hello.search(text)
print(result)


p_find_salpeter = re.compile('salt?pet(er|re)|nit(er|re)|KNO3')
text = "saltepter or salpeter88 or KNO3 or niter or nitre, \
just too many forms of salpeter!"
for result in p_find_salpeter.finditer(text):
    print(result)


text = "Do it   becuase   I say so,     not becuase you want!"


# Spell the word because correctly
p_fix_because = re.compile('becuase')
p_fix_because.sub('because', text)
print(text)


text = p_fix_because.sub('because', text)
print(text)


# Remove additional spaces
p_remove_extra_space = re.compile('\s+')
p_remove_extra_space.sub(' ', text)

p = re.compile()

pattern = re.compile( ... )
match = pattern.search('string goes here')
if match:
    print('Match found: ', match.group())
else:
    print('No match')

with open('myfile.txt', 'r') as fh
    for line in fh:
        do_stuff(line)

iterations = 0
information = []

with open('myfile.txt', 'r') as fh:
    for line in fh:
        iterations += 1
        information += do_stuff(line)

str     "hello"
int     5
float   5.2
bool    True

iterations = 0
score      = 5.2
# variable = literal

+, -, *,...   # mathematical
and, or, not  # logical 
==, !=        # (in)equality
<, >, <=, >=  # comparison
in            # membership


value = 4
nextvalue = 1
nextvalue += value
print(f"nextvalue: {nextvalue}, value: {value}")


x = 5
y = 7
z = 0

x > 4 or y == 7 and z > 1


(x > 4 or y == 7) and z > 1


mystr = "one"


mystr += " two" # string concatnation 
mystr


len(mystr) # get the length


"one" in mystr # membership checking


mystr = "one"
mystr[1] = "W"


mystr = "one"
print(mystr)
mystr = "two"
print(mystr)


mystr = "one"
print(f"mystr = {mystr}, address = {id(mystr)}")
mystr = "two"
print(f"mystr = {mystr}, address = {id(mystr)}")

s.strip()  # remove unwanted spacing

  s.split()  # split line into columns

  s.upper(), s.lower()  # change the case

p = re.compile('A.A.A')
  p.search(dnastring)

  p = re.compile('T')
  p.sub('U', dnastring)


import re

p = re.compile('p.*\sp')  # the greedy star!

p.search('a python programmer writes python code').group()

mylist.append('value')

value in myobj

todolist = ["work", "sleep", "eat", "work"]

        todolist.sort()
        todolist.reverse()
        todolist[2]
        todolist[-1]
        todolist[2:6]


todolist = ["work", "sleep", "eat", "work"]


todolist.sort()
print(todolist)


todolist.reverse()
print(todolist)


todolist[2]


todolist[-1]


todolist[2:]

mydict = {"a": "alligator", "b": "bear", "c": "cat"}
    counter = {"cats": 55, "dogs": 8}

    mydict["a"]
    mydict.keys()
    mydict.values()


counter = {'cats': 0, 'others': 0}

for animal in ['zebra', 'cat', 'dog', 'cat']:
    if animal == 'cat':
        counter['cats'] += 1
    else:
        counter['others'] += 1
        
counter

myset = {"drama", "sci-fi"}

    myset.add("comedy")

    myset.remove("drama")


set1 = set(["1", "2", "3", "4", "5"])
set1


set1.add("1")
set1


set2 = set(["3", "6"])
set1.intersection(set2)


set1.union(set2)


set1.difference(set2)

tup = (max_length, sequence)


tup = (2, 'xy')
tup[0]


tup[0] = 2

def find_longest_seq(file):
    # some code here...
    return length, sequence

answer = find_longest_seq(filepath)
print('length', answer[0])
print('sequence', answer[1])

answer = find_longest_seq(filepath) # return as a tuple
length, sequence = find_longest_seq(filepath) # return as two variables

if count > 10:
   print('big')
elif count > 5:
   print('medium')
else:
   print('small')


shopping_list = ['bread', 'egg', ' butter', 'milk']
tired         = True

if len(shopping_list) > 4:
    print('Really need to go shopping!')
elif not tired:
    print('Not tired? Then go shopping!')
else:
    print('Better to stay at home')

information = []
with open('myfile.txt', 'r') as fh
    for line in fh:
        if is_comment(line):
           use_comment(line)
        else:
           information.append(read_data(line)) # read_data return a list

information = []
with open('myfile.txt', 'r') as fh:
    # Read the first line
    line = fh.readline()

    # Continue to read lines until an empty string is returned
    while line:
        information.append(read_data(line)) # read_data return a list
        line = file.readline()  # Read the next line


# For loop example
# You know the number of iterations before hand
user_input = "thank god it's friday"
for letter in user_input:
    print(letter.upper())


# While loop example
# The number of iterations is unknown before hand
i = 0
go_on = True
while go_on:
    c = user_input[i]
    print(c.upper())
    i += 1
    if c == 'd':
        go_on = False


# example for break
user_input = "thank god it's friday"
for letter in user_input:
    if letter == 'd':
        break
    print(letter.upper())


# example for continue
user_input = "thank god it's friday"
for letter in user_input:
    if letter == ' ' or letter == '\'':  # Skip spaces and apostrophes
        continue
    print(letter.upper())


# DON'T RUN THIS
i = 0
-while i < 10:    
    print(user_input[i])

with open(filename, 'r') as fh:
   for line in fh:
       do_stuff(line)

fh.close()

def prettyprinter(name, value, delim=":", end=None):
        out = "The " + name + " is " + delim + " " + value
        if end:
            out += end
        return out

""" This is a doc-string explaining what the purpose of this function/module is """

Introduction to¶

with Application to Bioinformatics¶

- Day 5¶

Day 5¶

Quiz: Review Day 4¶

1. What happens if you declare a variable with the same name inside and outside a function?¶

2. What is the difference between positional arguments and keyword arguments?¶

Arguments can be used in both ways, with or without keyword, if there is no ambiguity¶

3. What will be the output of the following code snippet?¶

4. Why is it beneficial to use docstrings in functions?¶

5. How can you see the documentation of a Python library function in the console?¶

6. Which of these import statements would avoid a name conflict if there’s a local variable math in the same script?¶

7. What will happen if you import the same module multiple times in a Python script?¶

8. If you want to filter rows in df where age is greater than 30, which command would you use?¶

9. If you want to rename multiple columns in a DataFrame df, which method should you use?¶

New topic: Regular Expressions¶

Examples where regex can play a role¶

Regex is not unique for Python and it is supported by¶

Defining a search pattern¶

More common operations - classes of characters¶

More common operations - classes of characters¶

More common operations - classes of characters¶

More common operations - classes of characters¶

More common operations - classes of characters¶

If we want to find a record with at least one sample (having genotype fields):¶

Cheat sheet¶

A playground for regex with detailed explanations of your regex¶

Day 5, Exercise 1 (~30 min)¶

Practicing regular expressions¶

Take a break after the exercise (~10 min)¶

Session 2¶

Regular expressions in Python¶

How to find all occurences of "color" variations?¶

re.compile¶

Benefits of using re.compile¶

Case insensitiveness¶

How to find a full stop?¶

More operations¶

Substitution¶

Overview¶

Summary¶

Day 5, Exercise 2 (~30 min)¶

Use regular expressions with Python¶

Take a break after the exercise (~10 min)¶

PyQuiz 5.1 (~10 min)¶

Lunch¶

Sum up!

Processing files - looping through the lines¶

Store values¶

Values¶

Assign values¶

Compare and membership¶

Strings¶

String is immutable¶

String manipulation¶

Regular expressions help you find and replace strings.¶

Collections¶

Collections¶

Lists¶

Dictionaries¶

Sets¶

Tuples¶

Tuples in functions¶

Deciding what to do with if else statement¶

Deciding what to do - if statement¶

if x: is equvalent to if bool(x):¶

Program flow - for loops¶

Program flow - while loops¶

Different types of loops¶

Controlling loops¶

File Input/Output¶

Input/Output¶

Code structure¶

Functions¶

Functions - arguments¶

Using your code¶

Documentation and comments¶

Why programming?¶

Why programming?¶

Final advice¶

6. Which of these import statements would avoid a name conflict if there’s a local variable `math` in the same script?¶

8. If you want to filter rows in `df` where `age` is greater than 30, which command would you use?¶

9. If you want to rename multiple columns in a DataFrame `df`, which method should you use?¶

`if x:` is equvalent to `if bool(x):`¶