On ajoute aussi des colonnes utiles
import pandas
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib import cm
df = pandas.read_excel("Donnees_M2_RD.xlsx")
df["diff_dist"] = df["Dist_A"] - df["Dist_B"]
df["reussi"] = (((df["Dist_A"] > df["Dist_B"]) & (df["Response"] == 2))
| ((df["Dist_A"] < df["Dist_B"]) & (df["Response"] == 1)))
Obtenir la DataFrame indexée par Subject et Space
df_ss = df.set_index(["Subject","Space"])
df_ss
Name_A | Name_B | Dist_A | Dist_B | Mode | Side | Response | RT | diff_dist | reussi | ||
---|---|---|---|---|---|---|---|---|---|---|---|
Subject | Space | ||||||||||
P_ADI_331 | E | 0 | 2 | 2 | 4 | Dic | D | 2 | 18865 | -2 | False |
E | 1 | 4 | 4 | 1 | Dic | D | 2 | 13157 | 3 | True | |
E | 4 | 3 | 3 | 2 | Dic | D | 1 | 11628 | 1 | False | |
E | 2 | 4 | 4 | 1 | Dic | D | 1 | 10068 | 3 | False | |
E | 1 | 2 | 2 | 4 | Dic | D | 1 | 11801 | -2 | True | |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
P_VAR_330 | I | 0 | 1 | 3 | 5 | Dio | D | 1 | 7626 | -2 | True |
I | 3 | 2 | 5 | 1 | Dio | D | 2 | 6349 | 4 | True | |
I | 2 | 0 | 4 | 2 | Dio | D | 2 | 9031 | 2 | True | |
I | 0 | 2 | 2 | 1 | Dio | D | 2 | 16323 | 1 | True | |
I | 0 | 3 | 5 | 1 | Dio | D | 2 | 10139 | 4 | True |
9594 rows × 10 columns
Obtenir la DataFrame indexée par Subject, Space et l’index original avec un niveau nommé essai (on peut utiliser .rename sur un index)
df_sse = df.set_index([
"Subject",
"Space",
df.index.rename("essai")])
df_sse
Name_A | Name_B | Dist_A | Dist_B | Mode | Side | Response | RT | diff_dist | reussi | |||
---|---|---|---|---|---|---|---|---|---|---|---|---|
Subject | Space | essai | ||||||||||
P_ADI_331 | E | 0 | 0 | 2 | 2 | 4 | Dic | D | 2 | 18865 | -2 | False |
1 | 1 | 4 | 4 | 1 | Dic | D | 2 | 13157 | 3 | True | ||
2 | 4 | 3 | 3 | 2 | Dic | D | 1 | 11628 | 1 | False | ||
3 | 2 | 4 | 4 | 1 | Dic | D | 1 | 10068 | 3 | False | ||
4 | 1 | 2 | 2 | 4 | Dic | D | 1 | 11801 | -2 | True | ||
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
P_VAR_330 | I | 9589 | 0 | 1 | 3 | 5 | Dio | D | 1 | 7626 | -2 | True |
9590 | 3 | 2 | 5 | 1 | Dio | D | 2 | 6349 | 4 | True | ||
9591 | 2 | 0 | 4 | 2 | Dio | D | 2 | 9031 | 2 | True | ||
9592 | 0 | 2 | 2 | 1 | Dio | D | 2 | 16323 | 1 | True | ||
9593 | 0 | 3 | 5 | 1 | Dio | D | 2 | 10139 | 4 | True |
9594 rows × 10 columns
Faire une dataframe du temps de réponse moyen par Name_A, Name_B
df.groupby(by=["Name_A", "Name_B"])["RT"].mean()
Name_A Name_B
0 1 12232.680585
2 12606.089770
3 11913.616667
4 11866.710417
1 0 11688.033333
2 12291.152083
3 11958.879167
4 11601.102083
2 0 11608.336117
1 12040.025000
3 11978.183716
4 11357.747917
3 0 12121.487500
1 12884.901879
2 12542.287500
4 11793.595833
4 0 12300.860417
1 11875.852083
2 12172.350000
3 12170.338205
Name: RT, dtype: float64
Faire une dataframe du temps de réponse max par sujet et par différence de distance (symbolique)
df.groupby(by=["Subject","diff_dist"])["RT"].max()
Subject diff_dist
P_ADI_331 -4 16642
-3 15652
-2 18865
-1 16347
1 18067
...
P_VAR_330 -1 45627
1 80729
2 41271
3 35480
4 38072
Name: RT, Length: 192, dtype: int64
Faire une dataframe du nombre, puis du taux de bonnes réponses par Mode, Side et Space
total = df.groupby(by=["Mode", "Side", "Space"])["Subject"].count()
reussi = df[df["reussi"]].groupby(by=["Mode", "Side", "Space"])["Subject"].count()
df_mss = pandas.DataFrame(total).rename(columns={"Subject":"total"})
df_mss["reussis"] = reussi
df_mss["taux_reussis"] = df_mss["reussis"]/df_mss["total"]
df_mss
total | reussis | taux_reussis | |||
---|---|---|---|---|---|
Mode | Side | Space | |||
Dic | D | E | 1199 | 905 | 0.754796 |
I | 1198 | 1037 | 0.865609 | ||
G | E | 1199 | 919 | 0.766472 | |
I | 1199 | 1040 | 0.867389 | ||
Dio | D | E | 1200 | 824 | 0.686667 |
I | 1200 | 940 | 0.783333 | ||
G | E | 1199 | 898 | 0.748957 | |
I | 1200 | 1083 | 0.902500 |
Faire un scatter plot du taux de réussite en fonction de la différence en mètres d’une part et de l’arrondit du temps de réaction en dixièmes de seconde d’autre part.
Intégration de la différence de distance en mètres à partir du code de la séance 5
dist_i_m = pandas.DataFrame(
{ "Dist_I_m": [ 0.2, 0.3, 0.4, 0.6, 0.8 ] },
index = [1,2,3,4,5]
)
dist_e_m = pandas.DataFrame(
{ "Dist_E_m": [ 2, 3, 4, 6, 8 ] },
index = [1,2,3,4,5]
)
df_e = df[df['Space'] == 'E']
df1 = pandas.merge(df_e, dist_e_m, left_on='Dist_A', right_index=True)
df2 = df1.rename(columns={ 'Dist_E_m': 'Dist_A_m' })
df3 = pandas.merge(df2, dist_e_m, left_on='Dist_B', right_index=True)
df_e_m = df3.rename(columns={ 'Dist_E_m': 'Dist_B_m' })
df_i = df[df['Space'] == 'I']
df1 = pandas.merge(df_i, dist_i_m, left_on='Dist_A', right_index=True)
df2 = df1.rename(columns={ 'Dist_I_m': 'Dist_A_m' })
df3 = pandas.merge(df2, dist_i_m, left_on='Dist_B', right_index=True)
df_i_m = df3.rename(columns={ 'Dist_I_m': 'Dist_B_m' })
df_m = pandas.concat([df_e_m,df_i_m])
# Nouvelle ligne pour ajouter la différence de distances en mètres
df_m["diff_dist_m"] = df_m["Dist_A_m"] - df_m["Dist_B_m"]
df_m
Subject | Name_A | Name_B | Dist_A | Dist_B | Mode | Space | Side | Response | RT | diff_dist | reussi | Dist_A_m | Dist_B_m | diff_dist_m | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | P_ADI_331 | 0 | 2 | 2 | 4 | Dic | E | D | 2 | 18865 | -2 | False | 3.0 | 6.0 | -3.0 |
4 | P_ADI_331 | 1 | 2 | 2 | 4 | Dic | E | D | 1 | 11801 | -2 | True | 3.0 | 6.0 | -3.0 |
7 | P_ADI_331 | 0 | 3 | 2 | 4 | Dic | E | D | 1 | 13237 | -2 | True | 3.0 | 6.0 | -3.0 |
36 | P_ADI_331 | 3 | 1 | 2 | 4 | Dic | E | D | 1 | 9523 | -2 | True | 3.0 | 6.0 | -3.0 |
51 | P_ADI_331 | 2 | 4 | 2 | 4 | Dio | E | G | 1 | 11331 | -2 | True | 3.0 | 6.0 | -3.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
9498 | P_VAR_330 | 4 | 2 | 4 | 3 | Dic | I | G | 2 | 4811 | 1 | True | 0.6 | 0.4 | 0.2 |
9503 | P_VAR_330 | 3 | 0 | 4 | 3 | Dic | I | G | 2 | 6628 | 1 | True | 0.6 | 0.4 | 0.2 |
9526 | P_VAR_330 | 0 | 1 | 4 | 3 | Dic | I | G | 2 | 5458 | 1 | True | 0.6 | 0.4 | 0.2 |
9535 | P_VAR_330 | 2 | 4 | 4 | 3 | Dic | I | G | 1 | 8956 | 1 | False | 0.6 | 0.4 | 0.2 |
9546 | P_VAR_330 | 2 | 3 | 4 | 3 | Dio | I | D | 2 | 13525 | 1 | True | 0.6 | 0.4 | 0.2 |
9594 rows × 15 columns
On ajoute l’arrondi au 1/10 de second de RT
df_m["RT_round"] = df_m["RT"] // 1000
df_m
Subject | Name_A | Name_B | Dist_A | Dist_B | Mode | Space | Side | Response | RT | diff_dist | reussi | Dist_A_m | Dist_B_m | diff_dist_m | RT_round | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | P_ADI_331 | 0 | 2 | 2 | 4 | Dic | E | D | 2 | 18865 | -2 | False | 3.0 | 6.0 | -3.0 | 18 |
4 | P_ADI_331 | 1 | 2 | 2 | 4 | Dic | E | D | 1 | 11801 | -2 | True | 3.0 | 6.0 | -3.0 | 11 |
7 | P_ADI_331 | 0 | 3 | 2 | 4 | Dic | E | D | 1 | 13237 | -2 | True | 3.0 | 6.0 | -3.0 | 13 |
36 | P_ADI_331 | 3 | 1 | 2 | 4 | Dic | E | D | 1 | 9523 | -2 | True | 3.0 | 6.0 | -3.0 | 9 |
51 | P_ADI_331 | 2 | 4 | 2 | 4 | Dio | E | G | 1 | 11331 | -2 | True | 3.0 | 6.0 | -3.0 | 11 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
9498 | P_VAR_330 | 4 | 2 | 4 | 3 | Dic | I | G | 2 | 4811 | 1 | True | 0.6 | 0.4 | 0.2 | 4 |
9503 | P_VAR_330 | 3 | 0 | 4 | 3 | Dic | I | G | 2 | 6628 | 1 | True | 0.6 | 0.4 | 0.2 | 6 |
9526 | P_VAR_330 | 0 | 1 | 4 | 3 | Dic | I | G | 2 | 5458 | 1 | True | 0.6 | 0.4 | 0.2 | 5 |
9535 | P_VAR_330 | 2 | 4 | 4 | 3 | Dic | I | G | 1 | 8956 | 1 | False | 0.6 | 0.4 | 0.2 | 8 |
9546 | P_VAR_330 | 2 | 3 | 4 | 3 | Dio | I | D | 2 | 13525 | 1 | True | 0.6 | 0.4 | 0.2 | 13 |
9594 rows × 16 columns
Calcul du taux de réussite
total_essais = df_m.groupby(by=["diff_dist_m", "RT_round"])["RT"].count().rename("total")
df_scatter = pandas.DataFrame(total_essais)
essais_reussis = df_m[df_m["reussi"]].groupby(by=["diff_dist_m", "RT_round"])["RT"].count().rename("reussis").reindex(index=df_scatter.index, fill_value=0)
df_scatter["reussis"] = essais_reussis
df_scatter["taux"] = essais_reussis / total_essais
fig, ax = plt.subplots(subplot_kw={'projection': '3d'})
ax.scatter(df_scatter.index.get_level_values(0), df_scatter.index.get_level_values(1), df_scatter['taux'])
<mpl_toolkits.mplot3d.art3d.Path3DCollection at 0x7fe6ca63a1f0>