# Importing Neccessary Libraries
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
import seaborn as sns
from matplotlib import pyplot as plt
#!pip install pingouin
import pingouin as pg


# Reading the data into pandas dataframe
df = pd.read_excel('Daten.xlsx')


# Viewing the first 5 rows of the the dataset
df.head()


# Checking the datatypes of each column
df.dtypes

Variante                    int64
Wiederholung                int64
Beigabemenge [Vol.-%]       int64
Material                   object
pH                        float64
g KCl / l Substrat        float64
mg NO3-N / l Substrat     float64
mg NH4-N / l Substrat     float64
mg P2O5 / l Substrat      float64
mg K2O / l Substrat       float64
Gesamt-N [% TS]           float64
P [% TS]                  float64
K [% TS]                  float64
Ca [% TS]                 float64
Mg [% TS]                 float64
Frischmasse [g]           float64
Blütenanzahl                int64
Wurzelbonitur [1-5]       float64
Wurzelbonitur [1-5]i      float64
Pflanzenhöhe [cm]           int64
Pflanzenhöhe [cm]i        float64
Pflanzenbreite [cm]       float64
Pflanzenbreite [cm]i        int64
Chlorophyllgehalt         float64
Trockenmasseanteil [%]    float64
dtype: object


# Changing the datatype of columns as per requirements for analysis
df['pH'] = pd.to_numeric(df['pH'],errors = 'coerce')
df['P [% TS]'] = pd.to_numeric(df['P [% TS]'],errors = 'coerce')
df['Beigabemenge [Vol.-%]'] = df['Beigabemenge [Vol.-%]'].astype('string')


# Creating a label column to create boxplot
df['label'] = df['Beigabemenge [Vol.-%]'] + '% ' + df['Material']
# Sorting the dataframe by label
df = df.sort_values(by='label')


# Creating boxplot between varient and pH
sns.set(rc = {'figure.figsize':(30,15)})
ax = sns.boxplot(x="label",y="pH",data=df)
ax.tick_params(axis='x', rotation=45)
ax.set_title('Variente vs pH', fontsize =20)
plt.show()


# Creating boxplot between Variente and g KCl / l Substrat
sns.set(rc = {'figure.figsize':(30,15)})
ax = sns.boxplot(x="label",y="g KCl / l Substrat",data=df)
ax.tick_params(axis='x', rotation=45)
ax.set_title('Variente vs g KCl / l Substrat', fontsize =20)
plt.show()


# Creating boxplot between variante and mg NO3-N / l Substrat
sns.set(rc = {'figure.figsize':(30,15)})
ax = sns.boxplot(x="label",y="mg NO3-N / l Substrat",data=df)
ax.tick_params(axis='x', rotation=45)
ax.set_title('Variente vs mg NO3-N / l Substrat', fontsize =20)
plt.show()


# Creating boxplot between variente and mg NH4-N / l Substrat
sns.set(rc = {'figure.figsize':(30,15)})
ax = sns.boxplot(x="label",y="mg NH4-N / l Substrat",data=df)
ax.tick_params(axis='x', rotation=45)
ax.set_title('Variente vs mg NH4-N / l Substrat', fontsize =20)
plt.show()


# Creating boxplot for Variente vs mg P2O5 / l Substrat
sns.set(rc = {'figure.figsize':(30,15)})
ax = sns.boxplot(x="label",y="mg P2O5 / l Substrat",data=df)
ax.tick_params(axis='x', rotation=45)
ax.set_title('Variente vs mg P2O5 / l Substrat', fontsize =20)
plt.show()


# Creating boxplot for Variente vs mg K2O / l Substrat
sns.set(rc = {'figure.figsize':(30,15)})
ax = sns.boxplot(x="label",y="mg K2O / l Substrat",data=df)
ax.tick_params(axis='x', rotation=45)
ax.set_title('Variente vs mg K2O / l Substrat', fontsize =20)
plt.show()


# Creating boxplot for Variente vs P [% TS]
sns.set(rc = {'figure.figsize':(30,15)})
ax = sns.boxplot(x="label",y="P [% TS]",data=df)
ax.tick_params(axis='x', rotation=45)
ax.set_title('Variente vs P [% TS]', fontsize =20)
plt.show()


# Creating boxplot for Variente vs K [% TS]
sns.set(rc = {'figure.figsize':(30,15)})
ax = sns.boxplot(x="label",y="K [% TS]",data=df)
ax.tick_params(axis='x', rotation=45)
ax.set_title('Variente vs K [% TS]', fontsize =20)
plt.show()


# Creating boxplot for Variente vs Ca [% TS]
sns.set(rc = {'figure.figsize':(30,15)})
ax = sns.boxplot(x="label",y="Ca [% TS]",data=df)
ax.tick_params(axis='x', rotation=45)
ax.set_title('Variente vs Ca [% TS]', fontsize =20)
plt.show()


# Creating boxplot for Variente vs Mg [% TS]
sns.set(rc = {'figure.figsize':(30,15)})
ax = sns.boxplot(x="label",y="Mg [% TS]",data=df)
ax.tick_params(axis='x', rotation=45)
ax.set_title('Variente vs Mg [% TS]', fontsize =20)
plt.show()


# List of Dependant Variables to execute the loop
DV = ['pH', 'g KCl / l Substrat', 'mg NO3-N / l Substrat', 'mg NH4-N / l Substrat', 'mg P2O5 / l Substrat', 'mg K2O / l Substrat',
       'Gesamt-N [% TS]', 'P [% TS]', 'K [% TS]', 'Ca [% TS]', 'Mg [% TS]', 'Frischmasse [g]', 'Blütenanzahl', 'Wurzelbonitur [1-5]', 'Wurzelbonitur [1-5]i',
       'Pflanzenhöhe [cm]', 'Pflanzenhöhe [cm]i', 'Pflanzenbreite [cm]',
       'Pflanzenbreite [cm]i', 'Chlorophyllgehalt', 'Trockenmasseanteil [%]']


# Doing Anova Analysis

list_results = []
for dv in DV:
  # run anova and create dv variable to identify dependent variable
  aov = pg.anova(data=df, dv=dv, between='Variante', detailed=True)\
    .assign(dv=dv)
  # append to list of results
  list_results.append(aov)

# concat all results into a DataFrame
df_results = pd.concat(list_results, axis=0)
# Dropping Within Rows
df_results.drop(df_results.index[df_results['Source'] == 'Within'], inplace=True)
# Export to Excel
df_results.to_excel('Anova.xlsx')
df_results


# Doing Post-Hoc Analysis

list_results = []
for dv in DV:
  # run anova and create dv variable to identify dependent variable
  posthocs = pg.pairwise_ttests(data=df, dv=dv, between='Variante')\
    .assign(dv=dv)
  # append to list of results
  list_results.append(posthocs)

# concat all results into a DataFrame
df_results2 = pd.concat(list_results, axis=0)

# Export to Excel
df_results2.to_excel('Post_Hoc.xlsx')
df_results2

C:\Users\shahz\anaconda3\lib\site-packages\pingouin\bayesian.py:145: IntegrationWarning: The integral is probably divergent, or slowly convergent.
  integr = quad(fun, 0, np.inf, args=(t, n, r, df))[0]

	Variante	Wiederholung	Beigabemenge [Vol.-%]	Material	pH	g KCl / l Substrat	mg NO3-N / l Substrat	mg NH4-N / l Substrat	mg P2O5 / l Substrat	mg K2O / l Substrat	...	Frischmasse [g]	Blütenanzahl	Wurzelbonitur [1-5]	Wurzelbonitur [1-5]i	Pflanzenhöhe [cm]	Pflanzenhöhe [cm]i	Pflanzenbreite [cm]	Pflanzenbreite [cm]i	Chlorophyllgehalt	Trockenmasseanteil [%]
0	1	1	0	Torf	5.8	0.26	3.704760	3.280000	14.270560	16.400	...	95.300000	42	4.75	4.500000	13	22.166667	20.714286	46	38.566667	11.235955
1	1	2	0	Torf	5.7	0.33	3.659580	3.240000	16.684848	17.172	...	86.150000	37	4.75	5.000000	11	23.000000	20.785714	44	37.866667	11.475410
2	1	3	0	Torf	5.7	0.27	3.834508	3.643685	15.016605	19.440	...	86.933333	36	4.75	5.000000	13	28.166667	21.357143	41	39.633333	11.111111
3	1	4	0	Torf	5.7	0.31	3.659580	3.240000	12.106260	16.200	...	89.683333	34	4.25	4.666667	14	26.833333	19.928571	43	37.166667	11.949686
4	2	1	30	Holzfasern	6.0	0.46	2.891520	5.016666	18.884092	32.256	...	70.816667	40	5.00	5.000000	12	23.833333	19.357143	36	35.766667	11.206897

Source	SS	DF	MS	F	p-unc	np2	dv
Variante	1.315000e+00	10	0.131500	19.074725	5.706652e-11	0.852512	pH
Variante	3.406064e+00	10	0.340606	17.898105	1.342357e-10	0.844326	g KCl / l Substrat
Variante	2.928979e+03	10	292.897886	2.055398	5.871264e-02	0.383799	mg NO3-N / l Substrat
Variante	8.194135e+03	10	819.413528	29.275912	1.413117e-13	0.898698	mg NH4-N / l Substrat
Variante	7.110291e+04	10	7110.291326	30.411283	8.144041e-14	0.902110	mg P2O5 / l Substrat
Variante	3.600441e+06	10	360044.083265	52.798553	2.140272e-17	0.941175	mg K2O / l Substrat
Variante	2.026364e+00	10	0.202636	4.487919	4.974350e-04	0.576267	Gesamt-N [% TS]
Variante	8.042137e-02	10	0.008042	5.475764	1.039140e-04	0.631156	P [% TS]
Variante	5.384432e+01	10	5.384432	111.753635	5.665210e-22	0.972163	K [% TS]
Variante	2.014378e-01	10	0.020144	4.997903	2.270779e-04	0.609656	Ca [% TS]
Variante	2.027775e-01	10	0.020278	71.291246	5.524975e-19	0.957042	Mg [% TS]
Variante	1.362700e+04	10	1362.699505	39.392323	1.799251e-15	0.922703	Frischmasse [g]
Variante	3.034045e+03	10	303.404545	5.349906	1.134698e-04	0.618493	Blütenanzahl
Variante	3.475000e+01	10	3.475000	20.850000	1.697146e-11	0.863354	Wurzelbonitur [1-5]
Variante	5.315025e+01	10	5.315025	31.492519	4.898803e-14	0.905152	Wurzelbonitur [1-5]i
Variante	8.163636e+01	10	8.163636	8.353488	1.420132e-06	0.716823	Pflanzenhöhe [cm]
Variante	2.370694e+02	10	23.706944	10.234887	1.463084e-07	0.756186	Pflanzenhöhe [cm]i
Variante	3.659474e+02	10	36.594736	31.640511	4.575051e-14	0.905554	Pflanzenbreite [cm]
Variante	1.442227e+03	10	144.222727	28.541829	2.038334e-13	0.896363	Pflanzenbreite [cm]i
Variante	4.438153e+01	10	4.438153	2.537912	2.153880e-02	0.434729	Chlorophyllgehalt
Variante	1.103886e+01	10	1.103886	0.906046	5.387308e-01	0.215415	Trockenmasseanteil [%]

	Contrast	A	B	Paired	Parametric	T	dof	alternative	p-unc	BF10	hedges	dv
0	Variante	1	2	False	True	-3.655631	6.0	two-sided	0.010635	4.984	-2.247758	pH
1	Variante	1	3	False	True	-2.777460	6.0	two-sided	0.032104	2.49	-1.707792	pH
2	Variante	1	4	False	True	-5.656854	6.0	two-sided	0.001311	21.217	-3.478261	pH
3	Variante	1	5	False	True	-7.549834	6.0	two-sided	0.000280	67.374	-4.642208	pH
4	Variante	1	6	False	True	-1.805788	6.0	two-sided	0.120979	1.171	-1.110335	pH
...	...	...	...	...	...	...	...	...	...	...	...	...
50	Variante	8	10	False	True	-1.537128	6.0	two-sided	0.175175	0.967	-0.945142	Trockenmasseanteil [%]
51	Variante	8	11	False	True	-1.054266	6.0	two-sided	0.332353	0.716	-0.648242	Trockenmasseanteil [%]
52	Variante	9	10	False	True	-0.800652	6.0	two-sided	0.453861	0.631	-0.492301	Trockenmasseanteil [%]
53	Variante	9	11	False	True	-0.450991	6.0	two-sided	0.667836	0.556	-0.277303	Trockenmasseanteil [%]
54	Variante	10	11	False	True	0.291481	6.0	two-sided	0.780498	0.537	0.179224	Trockenmasseanteil [%]

Flower Statistical Data Analysis¶