# tools for handling files
import sys
import os
# pandas/numpy for handling data
import pandas as pd
import numpy as np
from pandas import ExcelWriter
from pandas import ExcelFile
# for reading individual telomere length data from files
from ast import literal_eval
# for grabbing individual cells
import more_itertools
# my module containing functions for handling/visualizing/analyzing telomere length/chr rearrangement data
import telomere_methods_rad_patient as trp
# incase reloading modules is required
import importlib
%load_ext autoreload
%autoreload
...
all_patients_dict = trp.generate_dictionary_from_TeloLength_data('../data/raw patient teloFISH data/')
all_patients_df = trp.generate_dataframe_from_dict(all_patients_dict)
# don't need telo means per cell @ this time
all_patients_df = all_patients_df.drop(['cell data'], axis=1)
print(all_patients_df.shape)
# changing telo data to list in prep for saving to csv
all_patients_df['telo data'] = all_patients_df['telo data'].apply(lambda row: row.tolist())
all_patients_df.to_csv('../data/compiled patient data csv files/all_patients_df.csv', index=False)
melted_all_patients_df = pd.melt(
all_patients_df,
id_vars = [col for col in all_patients_df.columns if col != 'Q1' and col != 'Q2-3' and col != 'Q4'],
var_name='relative Q',
value_name='Q freq counts')
melted_all_patients_df['Q freq counts'] = melted_all_patients_df['Q freq counts'].astype('float64')
melted_all_patients_df.head(4)
melted_all_patients_df.to_csv('../data/compiled patient data csv files/melted_all_patients_df.csv', index=False)
pivot_patients_telo_means_df = all_patients_df.pivot(index='patient id', columns='timepoint', values='telo means')
pivot_patients_telo_means_df = pivot_patients_telo_means_df.drop(13)
pivot_patients_telo_means_df.to_csv('../data/compiled patient data csv files/pivot_patients_telo_means_df.csv', index=False)
# can imagine the lists containing the individual telos per patient exploding to the right; maintains the index relationship
explode_telos_raw = all_patients_df['telo data'].apply(pd.Series)
print(explode_telos_raw.shape)
explode_telos_raw.head(4)
exploded_telos_all_patients_df = (explode_telos_raw
# we'll merge the exploded telos df w/ our original all patients df on the index!
.merge(all_patients_df, right_index = True, left_index = True)
.drop(['telo data', 'Q1', 'Q2-3', 'Q4'], axis = 1)
.melt(id_vars = ['patient id', 'timepoint', 'telo means'], value_name = "individual telomeres")
.drop("variable", axis = 1)
.dropna())
exploded_telos_all_patients_df.head(4)
exploded_telos_all_patients_df.to_csv('../data/compiled patient data csv files/exploded_telos_all_patients_df.csv', index=False)
all_qPCR_df = pd.read_excel('../data/qPCR telo data/REVISED Tel +Alb (both plates) 2019-08-05 13 Quantification Cq Results.xlsx',
sheet_name=1,
skiprows=1,
usecols=[24, 25, 26,]
)
all_qPCR_df.columns = ['Sample', 'telo means qPCR', 'SEM']
all_qPCR_df['Sample'] = all_qPCR_df['Sample'].astype('str')
all_qPCR_df.dropna(axis=0, inplace=True)
all_qPCR_df.drop([24, 47, 48], axis=0, inplace=True)
all_qPCR_df['Sample'] = all_qPCR_df['Sample'].apply(lambda x: trp.change_sample_ID(x))
all_qPCR_df['timepoint'] = all_qPCR_df['Sample'].apply(lambda x: trp.make_timepoint_col(x))
all_qPCR_df['patient id'] = all_qPCR_df['Sample'].apply(lambda x: trp.make_patient_ID(x))
all_qPCR_df['patient id'] = all_qPCR_df['patient id'].astype('str')
all_qPCR_df.drop(['Sample'], axis=1, inplace=True)
cols = ['patient id', 'timepoint', 'telo means qPCR', 'SEM']
all_qPCR_df = all_qPCR_df.reindex(columns=cols)
all_qPCR_df.head(4)
all_qPCR_df.to_csv('../data/qPCR telo data/all_qPCR_df.csv', index=False)
all_chr_aberr_df = trp.make_dataframe_chr_aberr_data('../data/dGH scoresheets/')
all_chr_aberr_df.to_csv('../data/compiled patient data csv files/all_chr_aberr_df.csv', index=False)