Lecture 18 - SQL and Python
06 October, 2024
SELECT
, WHERE
and ORDER BY
GROUP BY
and filter groups with HAVING
COUNT
, SUM
, AVG
, MIN
, and MAX
psycopg2
and sqlalchemy
libraries (and pandas
too! 🐼)LIKE
, IN
, BETWEEN
, CASE
, window functions, and string functions like LEFT
, RIGHT
, and LENGTH
COALESCE
psycopg2
and sqlalchemy
librariespsycopg2
is a PostgreSQL adapter for Python (more info here)sqlalchemy
is a SQL for Python, and it is database-agnosticsqlalchemy
generates SQL statements and psycopg2
sends SQL statements to the databasecreate_engine()
creates a connection to the databasepostgresql+psycopg2
is the database driverpostgres:postgres
is the username and passwordlocalhost:5432
is the host and portpostgres
is the database nameexecute()
method of the connection objectcommit()
method is necessary to save the changes, otherwise they will be lostexecute()
method receives a string with the SQL command# Add data to the drivers table
connection.execute(text('''
INSERT INTO drivers (driver_name, team, nationality, victories)
VALUES
('Lewis Hamilton', 'Mercedes', 'British', 103),
('Max Verstappen', 'Red Bull Racing', 'Dutch', 55),
('Charles Leclerc', 'Ferrari', 'Monégasque', 5),
('Fernando Alonso', 'Aston Martin', NULL, NULL)
'''))
connection.commit()
SELECT
command for that, and fetchall()
to retrieve the dataIN
and BETWEEN
operatorsIN
to filter data based on a list of valuesconnection.execute(text('''
SELECT * FROM drivers WHERE team IN ('Ferrari', 'Mercedes');
''')).fetchall()
[(1, 'Lewis Hamilton', 'Mercedes', 'British', 103),
(3, 'Charles Leclerc', 'Ferrari', 'Monégasque', 5)]
BETWEEN
is used to filter data within a rangeconnection.execute(text('''
SELECT * FROM drivers WHERE victories BETWEEN 50 AND 100;
''')).fetchall()
[(2, 'Max Verstappen', 'Red Bull Racing', 'Dutch', 55)]
NOT IN
and NOT BETWEEN
to exclude valuesLIKE
, ILIKE
, and SIMILAR TO
operatorsLIKE
operator to filter data based on patterns%
is a wildcard that matches any sequence of characters_
is a wildcard that matches any single character[(1, 'Lewis Hamilton', 'Mercedes', 'British', 103)]
[(1, 'Lewis Hamilton', 'Mercedes', 'British', 103)]
NOT LIKE
to exclude patternsILIKE
is the case-insensitive version of LIKE
[(1, 'Lewis Hamilton', 'Mercedes', 'British', 103)]
SIMILAR TO
is another operator that allows for more complex patternsconnection.execute(text('''
SELECT * FROM drivers WHERE driver_name SIMILAR TO 'L[a-z]%';
''')).fetchall()
[(1, 'Lewis Hamilton', 'Mercedes', 'British', 103)]
connection.execute(text('''
SELECT * FROM drivers
WHERE team SIMILAR TO '%(Racing|Martin)';
''')).fetchall()
[(2, 'Max Verstappen', 'Red Bull Racing', 'Dutch', 55),
(4, 'Fernando Alonso', 'Aston Martin', None, None)]
IS NULL
and COALESCE
operatorsIS NULL
to check for missing values[(4, 'Fernando Alonso', 'Aston Martin', None, None)]
COALESCE
COALESCE(column_name, value)
COALESCE
with subqueriesCOALESCE
with subqueries::int
to convert the result to an integerfunction() OVER (PARTITION BY column ORDER BY column)
ROW_NUMBER()
, RANK()
, DENSE_RANK()
, LAG()
, LEAD()
, and many othersconnection.execute(text('''
INSERT INTO drivers (driver_name, team, nationality, victories)
VALUES
('Valtteri Bottas', 'Mercedes', 'Finnish', 10),
('Sergio Perez', 'Red Bull Racing', 'Mexican', 5),
('Lando Norris', 'McLaren', 'British', 2),
('Esteban Ocon', 'Ferrari', 'French', 1)
'''))
connection.execute(text('''
SELECT driver_name, team, victories,
ROUND(AVG(victories) OVER (), 2) AS avg_victories,
ROUND(AVG(victories) OVER (PARTITION BY team), 2) AS avg_victories_team,
RANK() OVER (ORDER BY victories DESC) AS rank
FROM drivers
ORDER BY victories DESC;
''')).fetchall()
[('Fernando Alonso', 'Aston Martin', None, Decimal('25.86'), None, 1),
('Lewis Hamilton', 'Mercedes', 103, Decimal('25.86'), Decimal('56.50'), 2),
('Max Verstappen', 'Red Bull Racing', 55, Decimal('25.86'), Decimal('30.00'), 3),
('Valtteri Bottas', 'Mercedes', 10, Decimal('25.86'), Decimal('56.50'), 4),
('Charles Leclerc', 'Ferrari', 5, Decimal('25.86'), Decimal('3.00'), 5),
('Sergio Perez', 'Red Bull Racing', 5, Decimal('25.86'), Decimal('30.00'), 5),
('Lando Norris', 'McLaren', 2, Decimal('25.86'), Decimal('2.00'), 7),
('Esteban Ocon', 'Ferrari', 1, Decimal('25.86'), Decimal('3.00'), 8)]
GROUP BY
GROUP BY
GROUP BY
reduces the number of rows in the result setconnection.execute(text('''
SELECT
driver_name,
team,
victories,
ROUND(AVG(victories) OVER (PARTITION BY team), 2) AS avg_victories_team,
RANK() OVER (ORDER BY victories DESC) AS rank,
RANK() OVER (PARTITION BY team ORDER BY victories DESC) AS rank_team
FROM drivers
WHERE victories IS NOT NULL
ORDER BY rank ASC;
''')).fetchall()
[('Lewis Hamilton', 'Mercedes', 103, Decimal('56.50'), 1, 1),
('Max Verstappen', 'Red Bull Racing', 55, Decimal('30.00'), 2, 1),
('Valtteri Bottas', 'Mercedes', 10, Decimal('56.50'), 3, 2),
('Charles Leclerc', 'Ferrari', 5, Decimal('3.00'), 4, 1),
('Sergio Perez', 'Red Bull Racing', 5, Decimal('30.00'), 4, 2),
('Lando Norris', 'McLaren', 2, Decimal('2.00'), 6, 1),
('Esteban Ocon', 'Ferrari', 1, Decimal('3.00'), 7, 2)]
driver_name
, nationality
, and victories
from the drivers
tablerank_nationality
that ranks drivers based on the number of victories and their nationalityvictories
columnrank_nationality
LEFT()
, RIGHT()
, LENGTH()
, UPPER()
, LOWER()
, INITCAP()
, TRIM()
, REPLACE()
, and many othersconnection.execute(text('''
SELECT driver_name,
LEFT(driver_name, 5) AS first_name,
RIGHT(driver_name, 8) AS last_name,
LENGTH(driver_name) AS name_length,
UPPER(driver_name) AS upper_name,
LOWER(driver_name) AS lower_name,
INITCAP(driver_name) AS initcap_name
FROM drivers;
''')).fetchall()
[('Lewis Hamilton', 'Lewis', 'Hamilton', 14, 'LEWIS HAMILTON', 'lewis hamilton', 'Lewis Hamilton'),
('Max Verstappen', 'Max V', 'rstappen', 14, 'MAX VERSTAPPEN', 'max verstappen', 'Max Verstappen'),
('Charles Leclerc', 'Charl', ' Leclerc', 15, 'CHARLES LECLERC', 'charles leclerc', 'Charles Leclerc'),
('Fernando Alonso', 'Ferna', 'o Alonso', 15, 'FERNANDO ALONSO', 'fernando alonso', 'Fernando Alonso'),
('Valtteri Bottas', 'Valtt', 'i Bottas', 15, 'VALTTERI BOTTAS', 'valtteri bottas', 'Valtteri Bottas'),
('Sergio Perez', 'Sergi', 'io Perez', 12, 'SERGIO PEREZ', 'sergio perez', 'Sergio Perez'),
('Lando Norris', 'Lando', 'o Norris', 12, 'LANDO NORRIS', 'lando norris', 'Lando Norris'),
('Esteban Ocon', 'Esteb', 'ban Ocon', 12, 'ESTEBAN OCON', 'esteban ocon', 'Esteban Ocon')]
TRIM()
to remove leading and trailing spacesSTRPOS()
returns the position of a substring in a stringREPLACE()
is used to replace a substring with another substringCONCAT()
concatenates stringsconnection.execute(text('''
SELECT driver_name,
TRIM(driver_name) AS trimmed_name,
STRPOS(driver_name, 'a') AS a_position,
REPLACE(driver_name, ' ', '_') AS replaced_name,
CONCAT(driver_name, ' is a great driver!') AS message
FROM drivers;
''')).fetchall()
[('Lewis Hamilton', 'Lewis Hamilton', 8, 'Lewis_Hamilton', 'Lewis Hamilton is a great driver!'),
('Max Verstappen', 'Max Verstappen', 2, 'Max_Verstappen', 'Max Verstappen is a great driver!'),
('Charles Leclerc', 'Charles Leclerc', 3, 'Charles_Leclerc', 'Charles Leclerc is a great driver!'),
('Fernando Alonso', 'Fernando Alonso', 5, 'Fernando_Alonso', 'Fernando Alonso is a great driver!'),
('Valtteri Bottas', 'Valtteri Bottas', 2, 'Valtteri_Bottas', 'Valtteri Bottas is a great driver!'),
('Sergio Perez', 'Sergio Perez', 0, 'Sergio_Perez', 'Sergio Perez is a great driver!'),
('Lando Norris', 'Lando Norris', 2, 'Lando_Norris', 'Lando Norris is a great driver!'),
('Esteban Ocon', 'Esteban Ocon', 6, 'Esteban_Ocon', 'Esteban Ocon is a great driver!')]
CASE
statementIF
statement, but we can use the CASE
statementif
statements in other programming languagesCASE WHEN condition THEN value ELSE value END
connection.execute(text('''
SELECT driver_name,
CASE
WHEN victories > 50 THEN 'Great driver'
ELSE 'Good driver'
END
FROM drivers;
''')).fetchall()
[('Lewis Hamilton', 'Great driver'),
('Max Verstappen', 'Great driver'),
('Charles Leclerc', 'Good driver'),
('Fernando Alonso', 'Good driver'),
('Valtteri Bottas', 'Good driver'),
('Sergio Perez', 'Good driver'),
('Lando Norris', 'Good driver'),
('Esteban Ocon', 'Good driver')]
CASE
with multiple conditionsAS
keyword is used to rename the columnconnection.execute(text('''
SELECT driver_name,
CASE
WHEN victories > 50 THEN 'Great driver'
WHEN victories < 10 THEN 'Average driver'
ELSE 'No data'
END AS driver_status
FROM drivers;
''')).fetchall()
[('Lewis Hamilton', 'Great driver'),
('Max Verstappen', 'Great driver'),
('Charles Leclerc', 'Average driver'),
('Fernando Alonso', 'No data'),
('Valtteri Bottas', 'No data'),
('Sergio Perez', 'Average driver'),
('Lando Norris', 'Average driver'),
('Esteban Ocon', 'Average driver')]
CASE
can be combined with IN
and BETWEEN
operatorsconnection.execute(text('''
SELECT driver_name,
CASE
WHEN victories IN (5, 54) THEN 'Good driver'
WHEN victories BETWEEN 55 AND 110 THEN 'Great driver'
ELSE 'No data'
END AS driver_status
FROM drivers;
''')).fetchall()
[('Lewis Hamilton', 'Great driver'),
('Max Verstappen', 'Great driver'),
('Charles Leclerc', 'Good driver'),
('Fernando Alonso', 'No data'),
('Valtteri Bottas', 'No data'),
('Sergio Perez', 'Good driver'),
('Lando Norris', 'No data'),
('Esteban Ocon', 'No data')]
CASE
for missing valuesCASE
can also be useful to fill missing values with conditions[(4, 'Fernando Alonso', 'Aston Martin', None, None)]
connection.execute(text('''
SELECT driver_name,
CASE
WHEN nationality IS NULL THEN 'Spanish'
ELSE nationality
END,
CASE
WHEN victories IS NULL THEN 30
ELSE victories
END
FROM drivers
''')).fetchall()
[('Lewis Hamilton', 'British', 103),
('Max Verstappen', 'Dutch', 55),
('Charles Leclerc', 'Monégasque', 5),
('Fernando Alonso', 'Spanish', 30),
('Valtteri Bottas', 'Finnish', 10),
('Sergio Perez', 'Mexican', 5),
('Lando Norris', 'British', 2),
('Esteban Ocon', 'French', 1)]
CASE
statementdriver_level
that classifies drivers as ‘Beginner’, ‘Intermediate’, or ‘Expert’ based on the number of victories
pandas
🐼pandas
pandas
is great! 😎read_sql()
method to read data from a SQL queryimport pandas as pd
# Read data from SQL query
df = pd.read_sql('SELECT * FROM drivers', connection)
df
driver_id | driver_name | team | nationality | victories | |
---|---|---|---|---|---|
0 | 1 | Lewis Hamilton | Mercedes | British | 103.0 |
1 | 2 | Max Verstappen | Red Bull Racing | Dutch | 55.0 |
2 | 3 | Charles Leclerc | Ferrari | Monégasque | 5.0 |
3 | 4 | Fernando Alonso | Aston Martin | None | NaN |
4 | 5 | Valtteri Bottas | Mercedes | Finnish | 10.0 |
5 | 6 | Sergio Perez | Red Bull Racing | Mexican | 5.0 |
6 | 7 | Lando Norris | McLaren | British | 2.0 |
7 | 8 | Esteban Ocon | Ferrari | French | 1.0 |
read_sql()
methoddriver_id | driver_name | team | nationality | victories | |
---|---|---|---|---|---|
0 | 1 | Lewis Hamilton | Mercedes | British | 103 |
sqlalchemy
and use read_sql()
to read the data if that’s what you wantpandas
pandas
method to manipulate the dataavg_victories | |
---|---|
team | |
Ferrari | 3.0 |
McLaren | 2.0 |
Mercedes | 56.5 |
Red Bull Racing | 30.0 |
.query()
to filter datato_sql()
methoddf.to_sql('drivers_copy', connection, if_exists='replace', index=False)
# Check if the data were written correctly
pd.read_sql('SELECT * FROM drivers_copy', connection)
driver_id | driver_name | team | nationality | victories | |
---|---|---|---|---|---|
0 | 1 | Lewis Hamilton | Mercedes | British | 103.0 |
1 | 2 | Max Verstappen | Red Bull Racing | Dutch | 55.0 |
2 | 3 | Charles Leclerc | Ferrari | Monégasque | 5.0 |
3 | 4 | Fernando Alonso | Aston Martin | None | NaN |
4 | 5 | Valtteri Bottas | Mercedes | Finnish | 10.0 |
5 | 6 | Sergio Perez | Red Bull Racing | Mexican | 5.0 |
6 | 7 | Lando Norris | McLaren | British | 2.0 |
7 | 8 | Esteban Ocon | Ferrari | French | 1.0 |
employees
with the following columns:
employee_id
(serial, primary key)employee_name
(varchar(50))department
(varchar(50))salary
(int)pandas
(you can see the example below)pandas
to compute the average salary by departmentpandas
CASE
function to pivot tables, which is similar to the pivot_table()
method in pandas
connection.execute(text('''
DROP TABLE IF EXISTS student_scores;
CREATE TABLE student_scores (
student_name VARCHAR(50),
subject VARCHAR(50),
score INTEGER,
term VARCHAR(10)
);
INSERT INTO student_scores (student_name, subject, score, term)
VALUES
('Alice', 'Maths', 90, 'Q1'),
('Alice', 'Maths', 80, 'Q2'),
('Alice', 'Science', 80, 'Q1'),
('Alice', 'Science', 75, 'Q2'),
('Bob', 'Maths', 80, 'Q1'),
('Bob', 'Maths', 100, 'Q2'),
('Bob', 'Science', 80, 'Q1'),
('Bob', 'Science', 70, 'Q2'),
('Charles', 'Maths', 70, 'Q1'),
('Charles', 'Maths', 75, 'Q2'),
('Charles', 'Science', 90, 'Q1'),
('Charles', 'Science', 85, 'Q2')
'''))
connection.commit()
connection.execute(text('SELECT * FROM student_scores')).fetchall()
[('Alice', 'Maths', 90, 'Q1'),
('Alice', 'Maths', 80, 'Q2'),
('Alice', 'Science', 80, 'Q1'),
('Alice', 'Science', 75, 'Q2'),
('Bob', 'Maths', 80, 'Q1'),
('Bob', 'Maths', 100, 'Q2'),
('Bob', 'Science', 80, 'Q1'),
('Bob', 'Science', 70, 'Q2'),
('Charles', 'Maths', 70, 'Q1'),
('Charles', 'Maths', 75, 'Q2'),
('Charles', 'Science', 90, 'Q1'),
('Charles', 'Science', 85, 'Q2')]
CASE
functiondf2 = pd.read_sql('''
SELECT
student_name,
AVG(CASE WHEN subject = 'Maths' THEN score END) as Maths,
AVG(CASE WHEN subject = 'Science' THEN score END) as Science,
AVG(score) as total
FROM student_scores
GROUP BY student_name
ORDER BY total DESC;
''', connection)
df2
student_name | maths | science | total | |
---|---|---|---|---|
0 | Bob | 90.0 | 75.0 | 82.50 |
1 | Alice | 85.0 | 77.5 | 81.25 |
2 | Charles | 72.5 | 87.5 | 80.00 |
crosstab
function that can be used to pivot tablesCASE
function, but it works finecrosstab(text, text)
and it requires the tablefunc
extension#} eval: true
connection.execute(text('CREATE EXTENSION IF NOT EXISTS tablefunc;'))
connection.commit()
df3 = pd.read_sql('''
DROP TABLE IF EXISTS ct;
SELECT student_name, Maths, Science, (Maths + Science)/2 as Total
FROM crosstab(
'SELECT student_name, subject, AVG(score)
FROM student_scores
GROUP BY student_name, subject
ORDER BY student_name, subject',
'SELECT DISTINCT subject FROM student_scores ORDER BY subject'
) AS ct (
student_name VARCHAR, Maths NUMERIC, Science NUMERIC
);
''', connection)
df3
student_name | maths | science | total | |
---|---|---|---|---|
0 | Alice | 85.0 | 77.5 | 81.25 |
1 | Bob | 90.0 | 75.0 | 82.50 |
2 | Charles | 72.5 | 87.5 | 80.00 |
ct
if it already exists, just to avoid conflicts
DROP TABLE IF EXISTS ct;
crosstab
function to pivot the table
SELECT student_name, subject, AVG(score) FROM student_scores GROUP BY student_name, subject ORDER BY student_name, subject
SELECT DISTINCT subject FROM student_scores ORDER BY subject
crosstab
function requires two arguments
AS
keyword to rename the columns and assigned the result to a new table called ct
AS ct (student_name VARCHAR, Maths NUMERIC, Science NUMERIC)
psycopg2
and sqlalchemy
to connect to a PostgreSQL databaseexecute()
and fetchall()
IN
, BETWEEN
, LIKE
, CASE
, and window functions are very usefulCOALESCE
and pivot tables with the CASE
function or the crosstab
functionpandas
to read data from SQL queries and manipulate itSource: Susan Ibach
[(2, 'Max Verstappen', 'Red Bull Racing', 'Dutch', 55)]
connection.execute(text('''
SELECT * FROM drivers WHERE nationality LIKE '_______';
''')).fetchall()
[(1, 'Lewis Hamilton', 'Mercedes', 'British', 103),
(5, 'Valtteri Bottas', 'Mercedes', 'Finnish', 10),
(6, 'Sergio Perez', 'Red Bull Racing', 'Mexican', 5),
(7, 'Lando Norris', 'McLaren', 'British', 2)]
LENGTH
function[(1, 'Lewis Hamilton', 'Mercedes', 'British', 103),
(5, 'Valtteri Bottas', 'Mercedes', 'Finnish', 10),
(6, 'Sergio Perez', 'Red Bull Racing', 'Mexican', 5),
(7, 'Lando Norris', 'McLaren', 'British', 2)]
connection.execute(text('''
SELECT * FROM drivers WHERE driver_name LIKE 'L%' OR driver_name LIKE 'M%';
''')).fetchall()
[(1, 'Lewis Hamilton', 'Mercedes', 'British', 103),
(2, 'Max Verstappen', 'Red Bull Racing', 'Dutch', 55),
(7, 'Lando Norris', 'McLaren', 'British', 2)]
connection.execute(text('''
SELECT * FROM drivers WHERE victories BETWEEN 1 AND 10;
''')).fetchall()
[(3, 'Charles Leclerc', 'Ferrari', 'Monégasque', 5),
(5, 'Valtteri Bottas', 'Mercedes', 'Finnish', 10),
(6, 'Sergio Perez', 'Red Bull Racing', 'Mexican', 5),
(7, 'Lando Norris', 'McLaren', 'British', 2),
(8, 'Esteban Ocon', 'Ferrari', 'French', 1)]
rank_nationality
that ranks drivers based on the number of victories and their nationalityconnection.execute(text('''
SELECT driver_name, nationality, victories,
RANK() OVER (PARTITION BY nationality ORDER BY victories DESC) AS rank_nationality
FROM drivers
WHERE victories IS NOT NULL
ORDER BY rank_nationality
''')).fetchall()
[('Valtteri Bottas', 'Finnish', 10, 1),
('Charles Leclerc', 'Monégasque', 5, 1),
('Max Verstappen', 'Dutch', 55, 1),
('Lewis Hamilton', 'British', 103, 1),
('Esteban Ocon', 'French', 1, 1),
('Sergio Perez', 'Mexican', 5, 1),
('Lando Norris', 'British', 2, 2)]
driver_level
that classifies drivers as ‘Beginner’, ‘Intermediate’, or ‘Expert’ based on the number of victories
Beginner
if victories < 10Intermediate
if victories between 10 and 50Expert
if victories > 50connection.execute(text('''
SELECT driver_name,
CASE
WHEN victories < 10 THEN 'Beginner'
WHEN victories BETWEEN 10 AND 100 THEN 'Intermediate'
ELSE 'Expert'
END AS driver_level
FROM drivers;
''')).fetchall()
[('Lewis Hamilton', 'Expert'),
('Max Verstappen', 'Intermediate'),
('Charles Leclerc', 'Beginner'),
('Fernando Alonso', 'Expert'),
('Valtteri Bottas', 'Intermediate'),
('Sergio Perez', 'Beginner'),
('Lando Norris', 'Beginner'),
('Esteban Ocon', 'Beginner')]
employees
with the following columns:
employee_id
(serial, primary key)employee_name
(varchar(50))department
(varchar(50))salary
(int)pandas
(you can see the example below)pandas
to compute the average salary by departmentconnection.execute(text('''
DROP TABLE IF EXISTS employees;
CREATE TABLE employees (
employee_id SERIAL PRIMARY KEY,
employee_name VARCHAR(50),
department VARCHAR(50),
salary INT
);
'''))
connection.execute(text('''
INSERT INTO employees (employee_name, department, salary)
VALUES
('Alice', 'HR', 5000),
('Bob', 'IT', 6000),
('Charlie', 'HR', 7000),
('David', 'IT', 8000)
'''))
connection.commit()
# Read data with pandas
pd.read_sql('SELECT * FROM employees', connection)
# Compute average salary by department
pd.read_sql('SELECT department, AVG(salary) FROM employees GROUP BY department', connection)
department | avg | |
---|---|---|
0 | IT | 7000.0 |
1 | HR | 6000.0 |
psycopg2
when trying to drop tablesfrom sqlalchemy import create_engine, text, inspect
import time
engine = create_engine('postgresql+psycopg2://postgres:postgres@localhost:5432/postgres', future=True)
connection = engine.connect()
def clean_database(engine):
with engine.connect() as conn:
try:
# Get inspector to check existing tables
inspector = inspect(engine)
existing_tables = inspector.get_table_names()
if not existing_tables:
print("No tables found in database")
return
print(f"Found {len(existing_tables)} tables: {existing_tables}")
# Kill other connections
conn.execute(text("""
SELECT pg_terminate_backend(pid)
FROM pg_stat_activity
WHERE pid <> pg_backend_pid()
AND datname = current_database()
"""))
conn.execute(text("ROLLBACK"))
conn.execute(text("SET statement_timeout = '30s'"))
# Only drop tables that exist
for table in existing_tables:
try:
conn.execute(text(f"DROP TABLE IF EXISTS {table} CASCADE"))
print(f"Dropped {table}")
conn.commit()
time.sleep(1)
except Exception as e:
print(f"Error with {table}: {str(e)}")
conn.execute(text("ROLLBACK"))
except Exception as e:
print(f"Fatal error: {str(e)}")
conn.execute(text("ROLLBACK"))
# Execute
clean_database(engine)