Introduction

This workbook refers to the manuscript SignalingProfiler 2.0: a network-based approach to bridge multi-omic data to phenotypic hallmarks.

Import SignalingProfiler 2.0 and other libraries

library(SignalingProfiler)
library(tidyverse)

Read input omic data

You can also embed plots, for example:

Perform SignalingProfiler analysis with custom regulons

Step 1) Infer the protein activity using custom regulons

We suggest to create all the regulons and then run the analysis, since the download of regulons requires some minutes. As an alternative, built-in regulons can be used (see SignalingProfiler tutorial).

TFEA analysis

tfea_regulons <- create_tfea_regulons(resources = c('SIGNOR', 'Dorothea'),
                                      organism = 'human')
write_tsv(tfea_regulons, '../custom_regulons/tfea_custom_regulons.tsv')

Run the analysis

tfea_result <- run_footprint_based_analysis(omic_data = tr_df,
                                   analysis = 'tfea',
                                   organism = 'human',
                                   reg_minsize = 10,
                                   exp_sign = FALSE,
                                   integrated_regulons = FALSE,
                                   collectri = FALSE,
                                   hypergeom_corr = TRUE,
                                   correct_proteomics = TRUE,
                                   prot_df = prot_df,
                                   GO_annotation = TRUE,
                                   custom = TRUE,
                                   custom_path = '../custom_regulons/tfea_custom_regulons.tsv' )

KSEA analysis

ksea_regulons <- create_ksea_regulons(resources = c('SIGNOR', 'Omnipath'), 
                                      organism = 'human')
write_tsv(ksea_regulons, '../custom_regulons/ksea_custom_regulons.tsv')

Run the analysis

ksea_result <- run_footprint_based_analysis(omic_data = phospho_df,
                                   analysis = 'ksea',
                                   organism = 'human',
                                   reg_minsize = 5,
                                   exp_sign = FALSE,
                                   integrated_regulons = FALSE,
                                   collectri = FALSE,
                                   hypergeom_corr = TRUE,
                                   correct_proteomics = TRUE,
                                   prot_df = prot_df,
                                   GO_annotation = TRUE,
                                   custom = TRUE,
                                   custom_path = '../custom_regulons/ksea_custom_regulons.tsv' )

PhosphoScore analysis

reg_phosphosites <- get_phosphoscore_info(resources = c('SIGNOR', 'PsP'), 
                      organism = 'human', 
                      psp_reg_site_path = '../../revisions/input/Regulatory_sites_2023-08-24',
                      only_activatory = TRUE)
write_tsv(reg_phosphosites, '../custom_regulons/act_phosphosites.tsv')

Run the PhosphoScore analysis

phosphoscore_df <- phosphoscore_computation(phosphoproteomic_data = phospho_df,
                                            organism = 'human',
                                            activatory = TRUE ,
                                            GO_annotation= TRUE,
                                            custom = TRUE,
                                            custom_path = '../custom_regulons/act_phosphosites.tsv')

Final activity score computation


combined_tfs <- combine_footprint_and_phosphoscore(footprint_output = tfea_result,
                                                   phosphoscore_df =  phosphoscore_df,
                                                   analysis =  'tfea')

combined_kin_phos <- combine_footprint_and_phosphoscore(footprint_output = ksea_result,
                                                        phosphoscore_df =  phosphoscore_df,
                                                        analysis =  'ksea')

toy_other <- phosphoscore_df %>%
  dplyr::filter(mf == 'other') %>%
  dplyr::rename(final_score = phosphoscore) %>%
  dplyr::mutate(method = 'PhosphoScore')

prot_activity_df <- dplyr::bind_rows(combined_tfs, 
                                    combined_kin_phos, 
                                    toy_other) %>%
  select(UNIPROT, gene_name, mf, final_score, method)

prot_activity_df
# # A tibble: 60 × 5
#    UNIPROT       gene_name mf    final_score method
#    <chr>         <chr>     <chr>       <dbl> <chr> 
#  1 P15336        ATF2      tf           4.48 VIPER 
#  2 P18848        ATF4      tf           6.37 VIPER 
#  3 P03372        ESR1      tf          -3.24 VIPER 
#  4 Q08050;Q08050 FOXM1     tf           1.97 VIPER 
#  5 Q12778        FOXO1     tf           6.05 VIPER 
#  6 O43524        FOXO3     tf          14.7  VIPER 
#  7 P98177        FOXO4     tf          19.0  VIPER 
#  8 Q16665        HIF1A     tf          -2.42 VIPER 
#  9 P04198        MYCN      tf          -6.48 VIPER 
# 10 Q12857        NFIA      tf          -5.36 VIPER 
# # ℹ 50 more rows

Step 2) Context-specific network generation using custom PKN

Custom PKN generation

Generate a custom PKN choosing among 3 databases (4 if we consider Ser/Thr Kinome Atlas):

  • SIGNOR

  • PhosphoSitePlus: to use it you have to manually download ‘Regulatory_sites.gz’ and ‘Kinase_Substrate_Dataset.gz’ from here, after registration.

  • OmniPath

You can also specify whether the interactions should be only direct with the direct argument.


create_PKN(database = c('SIGNOR', 'PsP'), #add also 'Omnipath' or 'Ser/Thr_Kinome_Atlas'
           direct = TRUE,
           organism = 'human', 
           psp_reg_site_path = '../../revisions/input/Regulatory_sites_2023-08-24',
           psp_kin_sub_path = '../../revisions/input/Kinase_Substrate_Dataset_2023-08-24', 
           file_path =  '../custom_regulons/custom_PKN.RDS') 
    

The function returns:

  • a list of an igraph object of the PKN;

  • a table of the entities included in the PKN;

  • a table of the interactions composing the PKN;

Naive network generation


# Extract the interactions object and apply SignalingProfiler commands
PKN_object <- readRDS('../custom_regulons/custom_PKN.RDS')
PKN_table <- PKN_object$interactions

PKN_expressed <- preprocess_PKN(omics_data = list(tr_df, prot_df, phospho_df),
                                PKN_table = PKN_table)

# divide proteins according to the molecular function
kin_phos_other <- prot_activity_df %>%
  dplyr::filter(mf %in% c('kin', 'phos', 'other'))
tfs <- prot_activity_df %>%
  dplyr::filter(mf == 'tf')

# create the naïve network
two_layers_toy <- two_layer_naive_network(starts_gn = c('AMPK', 'MTOR'),
                                          intermediate_gn = kin_phos_other$gene_name,
                                          targets_gn = tfs$gene_name,
                                          PKN_table = PKN_expressed, #or PKN_mouse
                                          max_length_1 = 3,
                                          max_length_2 = 4,
                                          rds_path = paste0('../custom_regulons/two_naive.rds'),
                                          sif_path = paste0('../custom_regulons/two_naive.sif'),
                                          connect_all = TRUE)

two_layers_toy

Optimization of the naive network over the protein activity

two_layers_toy <- readRDS('../custom_regulons/two_naive.rds')
receptor_list <- list('MTOR' = -1, 'AMPK' = 1)

carnival_input <- prepare_carnival_input(two_layers_toy,
                                         prot_activity_df,
                                         receptor_list,
                                         organism = 'human')

# Set ILP Solver options
solver = 'cplex'
carnival_options = default_CARNIVAL_options(solver)
carnival_options$timelimit <- 600

# FIRST RUN: RECEPTOR to KIN, PHOS, OTHERS
receptors_df <- carnival_input %>%
  dplyr::filter(mf == 'rec')

target1_df <- carnival_input%>%
  dplyr::filter(mf %in% c('kin', 'phos', 'other'))

naive_network <- readr::read_tsv(paste0('../custom_regulons/two_naive.sif'),
                                 col_names = c('source', 'interaction', 'target'))

output1 <- run_carnival_and_create_graph(source_df = receptors_df,
                                         target_df = target1_df,
                                         naive_network = unique(naive_network),
                                         proteins_df = carnival_input,
                                         organism = 'human',
                                         carnival_options = carnival_options,
                                         files = FALSE,
                                         with_atlas = FALSE)

# SECOND RUN: from KIN, PHOS, OTHERS to TFs
run1_output_nodes <- convert_output_nodes_in_next_input(output1)

source_df <- run1_output_nodes %>%
  dplyr::filter(mf %in% c('kin', 'phos', 'other'))
source_df$UNIPROT <- ''

target2_df <- carnival_input %>%
  dplyr::filter(mf == 'tf')

output2 <- run_carnival_and_create_graph(source_df = source_df,
                                         target_df = target2_df,
                                         naive_network = unique(naive_network),
                                         proteins_df = carnival_input,
                                         organism = 'human',
                                         carnival_options = carnival_options,
                                         files = FALSE,
                                         with_atlas = FALSE)

# UNION OF RUN1 and RUN2 graphs
union <- union_of_graphs(graph_1 = output1$igraph_network,
                         graph_2 = output2$igraph_network,
                         proteins_df = carnival_input,
                         files = TRUE,
                         path_sif = paste0('../custom_regulons/union_optimized.sif'),
                         path_rds = paste0('../custom_regulons/union_optimized.rds'))

# EXPANSION and MAPPING of PHOSPHOPROTEOMICS DATA ON EDGES
union_graph_rds <- readRDS(paste0('../custom_regulons/union_optimized.rds'))
union_output_final <- expand_and_map_edges(optimized_object = union_graph_rds,
                                           organism = 'human',
                                           phospho_df = phospho_df,
                                           files = TRUE,
                                           with_atlas = FALSE,
                                           path_sif = paste0('../custom_regulons/union_validated.sif'),
                                           path_rds = paste0('../custom_regulons/union_validated.rds'))

Step 3) Phenotypic activity inference

pheno_table_distances <- phenoscore_network_preprocessing(proteomics = prot_df,
                                                          phospho = phospho_df)
write_rds(pheno_table_distances, '../custom_regulons/pheno_table_distances.rds')
pheno_table_distances <- readRDS('../custom_regulons/pheno_table_distances.rds')
sp_output <- readRDS('../custom_regulons/union_validated.rds')

toy_phenoscore_output<- phenoscore_computation(proteins_df = sp_output$nodes_df,
                                               desired_phenotypes = NULL,
                                               pheno_distances_table = pheno_table_distances,
                                               sp_graph = sp_output$igraph_network,
                                               # closeness of proteins to phenotypes
                                               path_length = 4,
                                               stat = 'mean',
                                               zscore_threshold = -1.96,
                                               # exclude random phenotypes
                                               n_random = 1000,
                                               pvalue_threshold = 0.05,
                                               # optimized network  specificity
                                               remove_cascade = TRUE,
                                               node_idx = TRUE,
                                               use_carnival_activity = TRUE,
                                               create_pheno_network = TRUE)

write_rds(toy_phenoscore_output, '../custom_regulons/phenoscore.rds')

toy_phenoscore_output <- readRDS( '../custom_regulons/phenoscore.rds')

toy_phenoscore_output$table_phenotypes %>% 
  filter(EndPathways %in% c('fatty acid biosynthesis', 
                            'lipogenesis', 'autophagy', 
                            'apoptosis', 'cell death', 'adipogenesis', 
                            'glycogen synthesis', 
                            'protein synthesis', 
                            'glycolysis', 'proliferation')) %>% 
  arrange(desc(phenoscore))
# # A tibble: 10 × 2
#    EndPathways             phenoscore
#    <chr>                        <dbl>
#  1 fatty acid biosynthesis     10.3  
#  2 lipogenesis                  4.45 
#  3 glycogen synthesis           2.85 
#  4 autophagy                    2.08 
#  5 apoptosis                    0.873
#  6 cell death                   0.707
#  7 proliferation               -0.466
#  8 protein synthesis           -1.70 
#  9 glycolysis                  -1.72 
# 10 adipogenesis                -2.45