import numpy as np
import pandas as pd
import plotly.io as pio
import plotly.express as px
pd.options.display.max_columns=200
import ipywidgets as widgets


df= pd.read_csv(r'data\aa_auditopin.csv')
df.head(3)


df_b6= df[df.afk<=6]
df_b6=df_b6[['afn', 'fy']]
df_b6.head()


df_cross= pd.crosstab(df_b6.fy, df_b6.afn).reset_index()
df_cross.head()


df_cross_melted= pd.melt(df_cross, id_vars='fy')
df_cross_melted.rename(columns={'value':'count'}, inplace= True)
df_cross_melted.head()


df_cross_melted['bubble_size']= df_cross_melted['count']
df_cross_melted.loc[df_cross_melted.bubble_size<=300, 'bubble_size']=300
df_cross_melted.head()


df_cross_melted.afn= df_cross_melted.afn.str.replace(' LLP', '')
df_cross_melted.loc[df_cross_melted.afn == 'PricewaterhouseCoopers', 'afn']='PwC'
df_cross_melted.loc[df_cross_melted.afn == 'Ernst & Young', 'afn']='EY'
df_cross_melted.loc[df_cross_melted.afn == 'Deloitte & Touche', 'afn']='Deloitte'


fig=px.scatter(df_cross_melted, x='fy', y='count',
               hover_name='afn',
               color='afn',
               size='bubble_size',
               size_max=30,
               range_x= [2000,2020],
               range_y=[-100,3500],
               height=600,
               width= 1000,
               labels=dict(fy='Fiscal Year', count='# of Clients', afn='')
              )

# global set-up
fig.update_layout(
    font_family="Times New Roman",
    font_size=15)

# title set-up
fig.update_layout(
    title={
        'text': "<b>The Number of Clients for Big 6 Audit Firms</b>",
        'font_size':25,
        'x':0.45,
        'xanchor': 'center',
        'yanchor': 'top'})

# xticks set-up
fig.update_xaxes(
    dtick=1)
fig.show()


fig=px.scatter(df_cross_melted, x='fy', y='count',
               hover_name='afn',
               animation_frame='fy',
               animation_group='afn',
               color='afn',
               size='bubble_size',
               size_max=30,
               range_x= [2000,2020],
               range_y=[-100,4000],
               height=600,
               width= 800,
               labels=dict(fy='Fiscal Year', count='# of Clients', afn='')
              )

# global set-up
fig.update_layout(
    font_family="Times New Roman",
    font_size=15)

# title set-up
fig.update_layout(
    title={
        'text': "<b>The Number of clients for big 6 audit firms</b>",
        'font_size':25,
        'x':0.45,
        'xanchor': 'center',
        'yanchor': 'top'},
    legend=dict(yanchor='top',
               y=0.99,
               xanchor='right',
               x=0.99,
               font_size=13,
               ))

# xticks set-up
fig.update_xaxes(
    dtick=5)
fig.show()


fig.write_html('..\_includes\client-dist-figure.html', auto_play= False)


! jupyter nbconvert audit-analytics-client-distribution.ipynb --to html

[NbConvertApp] Converting notebook audit-analytics-client-distribution.ipynb to html
[NbConvertApp] Writing 4343741 bytes to audit-analytics-client-distribution.html

	audit_op_key	afk	afn	af_ctry	fy	form_fkey	cik	tic	big5
0	66900	5	Arthur Andersen LLP	USA	2000	10-K	20	NaN	1
1	78252	5	Arthur Andersen LLP	USA	2001	10-K405	20	NaN	1
2	3538	4	KPMG LLP	USA	2002	10-K	20	NaN	1

	afn	fy
0	Arthur Andersen LLP	2000
1	Arthur Andersen LLP	2001
2	KPMG LLP	2002
3	Grant Thornton LLP	2003
4	Grant Thornton LLP	2004

afn	fy	Arthur Andersen LLP	Deloitte & Touche LLP	Ernst & Young LLP	Grant Thornton LLP	KPMG LLP	PricewaterhouseCoopers LLP
0	2000	2549	2884	3306	981	1998	3098
1	2001	2357	2724	2662	987	2154	2973
2	2002	196	3177	2863	955	2284	2875
3	2003	0	3232	3079	926	2295	3300
4	2004	0	2698	2785	1164	2175	2714

	fy	afn	count
0	2000	Arthur Andersen LLP	2549
1	2001	Arthur Andersen LLP	2357
2	2002	Arthur Andersen LLP	196
3	2003	Arthur Andersen LLP	0
4	2004	Arthur Andersen LLP	0

	fy	afn	count	bubble_size
0	2000	Arthur Andersen LLP	2549	2549
1	2001	Arthur Andersen LLP	2357	2357
2	2002	Arthur Andersen LLP	196	300
3	2003	Arthur Andersen LLP	0	300
4	2004	Arthur Andersen LLP	0	300

The distribution of clients by auditors¶

Import libraries¶

Keep big 6 firms (incl. Arthur Andersen)¶

Reshape¶

Count obs by each audit firm¶

Generate 'size' varible to use as a bubble size¶

Reduce long names¶

Plot without animnation¶

Plot with animation¶