import pandas as pd
table = pd.read_csv("clientes.csv")


display(table)


print(table.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100000 entries, 0 to 99999
Data columns (total 25 columns):
 #   Column                    Non-Null Count   Dtype  
---  ------                    --------------   -----  
 0   id_cliente                100000 non-null  int64  
 1   mes                       100000 non-null  int64  
 2   idade                     100000 non-null  float64
 3   profissao                 100000 non-null  object 
 4   salario_anual             100000 non-null  float64
 5   num_contas                100000 non-null  float64
 6   num_cartoes               100000 non-null  float64
 7   juros_emprestimo          100000 non-null  float64
 8   num_emprestimos           100000 non-null  float64
 9   dias_atraso               100000 non-null  float64
 10  num_pagamentos_atrasados  100000 non-null  float64
 11  num_verificacoes_credito  100000 non-null  float64
 12  mix_credito               100000 non-null  object 
 13  divida_total              100000 non-null  float64
 14  taxa_uso_credito          100000 non-null  float64
 15  idade_historico_credito   100000 non-null  float64
 16  investimento_mensal       100000 non-null  float64
 17  comportamento_pagamento   100000 non-null  object 
 18  saldo_final_mes           100000 non-null  float64
 19  score_credito             100000 non-null  object 
 20  emprestimo_carro          100000 non-null  int64  
 21  emprestimo_casa           100000 non-null  int64  
 22  emprestimo_pessoal        100000 non-null  int64  
 23  emprestimo_credito        100000 non-null  int64  
 24  emprestimo_estudantil     100000 non-null  int64  
dtypes: float64(14), int64(7), object(4)
memory usage: 19.1+ MB
None


# a. Label Encoder - transform text in number
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

encoder = LabelEncoder()
table["profissao"] = encoder.fit_transform(table["profissao"])
table["mix_credito"] = encoder.fit_transform(table["mix_credito"])
table["comportamento_pagamento"] = encoder.fit_transform(table["comportamento_pagamento"])
#display(table)

# b. Separating data in 2: training and testing data
x = table.drop(["score_credito", "id_cliente"], axis=1)
y = table["score_credito"]
x_training, x_test, y_training, y_test = train_test_split(x,y, test_size=0.3, random_state=1)


# 2 models: Decision Tree and KNN

# a. Import
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier

# b. Create
model_decision_tree = RandomForestClassifier()
model_knn = KNeighborsClassifier()

# c. Train
model_decision_tree.fit(x_training, y_training)
model_knn.fit(x_training, y_training)

KNeighborsClassifier()

KNeighborsClassifier()


from sklearn.metrics import accuracy_score
prediction_decision_tree = model_decision_tree.predict(x_test)
prediction_knn = model_knn.predict(x_test.to_numpy())

print(accuracy_score(y_test, prediction_decision_tree))
print(accuracy_score(y_test, prediction_knn))

/opt/homebrew/lib/python3.11/site-packages/sklearn/base.py:464: UserWarning: X does not have valid feature names, but KNeighborsClassifier was fitted with feature names
  warnings.warn(

0.8251666666666667
0.7324


table_new_customers = pd.read_csv("novos_clientes.csv")
table_new_customers["profissao"] = encoder.fit_transform(table_new_customers["profissao"])
table_new_customers["mix_credito"] = encoder.fit_transform(table_new_customers["mix_credito"])
table_new_customers["comportamento_pagamento"] = encoder.fit_transform(table_new_customers["comportamento_pagamento"])
print(table_new_customers.info())
display(table_new_customers)

new_prediction = model_decision_tree.predict(table_new_customers)
print(new_prediction)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 23 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   mes                       3 non-null      int64  
 1   idade                     3 non-null      float64
 2   profissao                 3 non-null      int64  
 3   salario_anual             3 non-null      float64
 4   num_contas                3 non-null      float64
 5   num_cartoes               3 non-null      float64
 6   juros_emprestimo          3 non-null      float64
 7   num_emprestimos           3 non-null      float64
 8   dias_atraso               3 non-null      float64
 9   num_pagamentos_atrasados  3 non-null      float64
 10  num_verificacoes_credito  3 non-null      float64
 11  mix_credito               3 non-null      int64  
 12  divida_total              3 non-null      float64
 13  taxa_uso_credito          3 non-null      float64
 14  idade_historico_credito   3 non-null      float64
 15  investimento_mensal       3 non-null      float64
 16  comportamento_pagamento   3 non-null      int64  
 17  saldo_final_mes           3 non-null      float64
 18  emprestimo_carro          3 non-null      int64  
 19  emprestimo_casa           3 non-null      int64  
 20  emprestimo_pessoal        3 non-null      int64  
 21  emprestimo_credito        3 non-null      int64  
 22  emprestimo_estudantil     3 non-null      int64  
dtypes: float64(14), int64(9)
memory usage: 684.0 bytes
None

['Poor' 'Good' 'Standard']


columns = list(x_test.columns)
importance = pd.DataFrame(index=columns, data=model_decision_tree.feature_importances_)
importance = importance*100
print(importance)

                                  0
mes                        3.935890
idade                      4.204221
profissao                  3.292680
salario_anual              5.113708
num_contas                 3.655706
num_cartoes                4.556644
juros_emprestimo           8.370750
num_emprestimos            3.367472
dias_atraso                5.974806
num_pagamentos_atrasados   4.569275
num_verificacoes_credito   4.724786
mix_credito                8.581199
divida_total              10.875573
taxa_uso_credito           5.075785
idade_historico_credito    7.497293
investimento_mensal        4.854108
comportamento_pagamento    2.330494
saldo_final_mes            5.463939
emprestimo_carro           0.717342
emprestimo_casa            0.729737
emprestimo_pessoal         0.697791
emprestimo_credito         0.712744
emprestimo_estudantil      0.698057


display(table)
display(table_new_customers)

	id_cliente	mes	idade	profissao	salario_anual	num_contas	num_cartoes	juros_emprestimo	num_emprestimos	dias_atraso	...	idade_historico_credito	investimento_mensal	comportamento_pagamento	saldo_final_mes	score_credito	emprestimo_carro	emprestimo_casa	emprestimo_pessoal	emprestimo_credito	emprestimo_estudantil
0	3392	1	23.0	cientista	19114.12	3.0	4.0	3.0	4.0	3.0	...	265.0	21.465380	alto_gasto_pagamento_baixos	312.494089	Good	1	1	1	1	0
1	3392	2	23.0	cientista	19114.12	3.0	4.0	3.0	4.0	3.0	...	266.0	21.465380	baixo_gasto_pagamento_alto	284.629162	Good	1	1	1	1	0
2	3392	3	23.0	cientista	19114.12	3.0	4.0	3.0	4.0	3.0	...	267.0	21.465380	baixo_gasto_pagamento_medio	331.209863	Good	1	1	1	1	0
3	3392	4	23.0	cientista	19114.12	3.0	4.0	3.0	4.0	5.0	...	268.0	21.465380	baixo_gasto_pagamento_baixo	223.451310	Good	1	1	1	1	0
4	3392	5	23.0	cientista	19114.12	3.0	4.0	3.0	4.0	6.0	...	269.0	21.465380	alto_gasto_pagamento_medio	341.489231	Good	1	1	1	1	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
99995	37932	4	25.0	mecanico	39628.99	4.0	6.0	7.0	2.0	23.0	...	378.0	24.028477	alto_gasto_pagamento_alto	479.866228	Poor	1	0	0	0	1
99996	37932	5	25.0	mecanico	39628.99	4.0	6.0	7.0	2.0	18.0	...	379.0	24.028477	alto_gasto_pagamento_medio	496.651610	Poor	1	0	0	0	1
99997	37932	6	25.0	mecanico	39628.99	4.0	6.0	7.0	2.0	27.0	...	380.0	24.028477	alto_gasto_pagamento_alto	516.809083	Poor	1	0	0	0	1
99998	37932	7	25.0	mecanico	39628.99	4.0	6.0	7.0	2.0	20.0	...	381.0	24.028477	baixo_gasto_pagamento_alto	319.164979	Standard	1	0	0	0	1
99999	37932	8	25.0	mecanico	39628.99	4.0	6.0	7.0	2.0	18.0	...	382.0	24.028477	alto_gasto_pagamento_medio	393.673696	Poor	1	0	0	0	1

	mes	idade	profissao	salario_anual	num_contas	num_cartoes	juros_emprestimo	num_emprestimos	dias_atraso	num_pagamentos_atrasados	...	taxa_uso_credito	idade_historico_credito	investimento_mensal	comportamento_pagamento	saldo_final_mes	emprestimo_carro	emprestimo_casa	emprestimo_credito	emprestimo_estudantil
0	1	31.0	1	19300.340	6.0	7.0	17.0	5.0	52.0	19.0	...	29.934186	218.0	44.50951	1	312.487689	1	1	0	0
1	4	32.0	0	12600.445	5.0	5.0	10.0	3.0	25.0	18.0	...	28.819407	12.0	0.00000	2	300.994163	0	0	0	1
2	2	48.0	1	20787.690	8.0	6.0	14.0	7.0	24.0	14.0	...	34.235853	215.0	0.00000	0	345.081577	0	1	1	0

	id_cliente	mes	idade	profissao	salario_anual	num_contas	num_cartoes	juros_emprestimo	num_emprestimos	dias_atraso	...	idade_historico_credito	investimento_mensal	comportamento_pagamento	saldo_final_mes	score_credito	emprestimo_carro	emprestimo_casa	emprestimo_pessoal	emprestimo_credito	emprestimo_estudantil
0	3392	1	23.0	2	19114.12	3.0	4.0	3.0	4.0	3.0	...	265.0	21.465380	1	312.494089	Good	1	1	1	1	0
1	3392	2	23.0	2	19114.12	3.0	4.0	3.0	4.0	3.0	...	266.0	21.465380	3	284.629162	Good	1	1	1	1	0
2	3392	3	23.0	2	19114.12	3.0	4.0	3.0	4.0	3.0	...	267.0	21.465380	5	331.209863	Good	1	1	1	1	0
3	3392	4	23.0	2	19114.12	3.0	4.0	3.0	4.0	5.0	...	268.0	21.465380	4	223.451310	Good	1	1	1	1	0
4	3392	5	23.0	2	19114.12	3.0	4.0	3.0	4.0	6.0	...	269.0	21.465380	2	341.489231	Good	1	1	1	1	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
99995	37932	4	25.0	11	39628.99	4.0	6.0	7.0	2.0	23.0	...	378.0	24.028477	0	479.866228	Poor	1	0	0	0	1
99996	37932	5	25.0	11	39628.99	4.0	6.0	7.0	2.0	18.0	...	379.0	24.028477	2	496.651610	Poor	1	0	0	0	1
99997	37932	6	25.0	11	39628.99	4.0	6.0	7.0	2.0	27.0	...	380.0	24.028477	0	516.809083	Poor	1	0	0	0	1
99998	37932	7	25.0	11	39628.99	4.0	6.0	7.0	2.0	20.0	...	381.0	24.028477	3	319.164979	Standard	1	0	0	0	1
99999	37932	8	25.0	11	39628.99	4.0	6.0	7.0	2.0	18.0	...	382.0	24.028477	2	393.673696	Poor	1	0	0	0	1

	mes	idade	profissao	salario_anual	num_contas	num_cartoes	juros_emprestimo	num_emprestimos	dias_atraso	num_pagamentos_atrasados	...	taxa_uso_credito	idade_historico_credito	investimento_mensal	comportamento_pagamento	saldo_final_mes	emprestimo_carro	emprestimo_casa	emprestimo_credito	emprestimo_estudantil
0	1	31.0	1	19300.340	6.0	7.0	17.0	5.0	52.0	19.0	...	29.934186	218.0	44.50951	1	312.487689	1	1	0	0
1	4	32.0	0	12600.445	5.0	5.0	10.0	3.0	25.0	18.0	...	28.819407	12.0	0.00000	2	300.994163	0	0	0	1
2	2	48.0	1	20787.690	8.0	6.0	14.0	7.0	24.0	14.0	...	34.235853	215.0	0.00000	0	345.081577	0	1	1	0

Python - IA and Predictions¶

Case: Customer Credit Score¶

Project Description and KPIs¶

Data 📊 - Description and modeling¶

Step 1 - Importing the data source¶

Step 2 - Visualization of the data base¶

Step 3 - Data Cleaning | error handling¶

Preparing the data source to the IA¶

Step 4 - Create IA models to predict the credit score¶

Step 5 - Calculate the accuracy of the models | Compare them and choose the best¶

Step 6 - Predict the credit score of new customers¶

Step 7 - Main characteristics to define the score¶

Conclusion & Insights 🎉¶