Notebook Séance 2
UE8: Introduction à l’analyse de données
print("Bonjour")
Bonjour
sous_total = 5 + 3 * 12
sous_total
41
Un morceau de code pour illustrer l’usage des variables
prix_bombon = 0.10
prix_croissant = 1.0
prix_pain = 1.2
total = prix_bombon * 8
total = total + prix_croissant * 5
total = total + prix_pain * 2
print("Je dois "+str(total)+" euros")
Je dois 8.2 euros
import pandas
df = pandas.read_excel("Donnees_M2_RD.xlsx")
df
Subject | Name_A | Name_B | Dist_A | Dist_B | Mode | Space | Side | Response | RT | |
---|---|---|---|---|---|---|---|---|---|---|
0 | P_ADI_331 | 0 | 2 | 2 | 4 | Dic | E | D | 2 | 18865 |
1 | P_ADI_331 | 1 | 4 | 4 | 1 | Dic | E | D | 2 | 13157 |
2 | P_ADI_331 | 4 | 3 | 3 | 2 | Dic | E | D | 1 | 11628 |
3 | P_ADI_331 | 2 | 4 | 4 | 1 | Dic | E | D | 1 | 10068 |
4 | P_ADI_331 | 1 | 2 | 2 | 4 | Dic | E | D | 1 | 11801 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
9589 | P_VAR_330 | 0 | 1 | 3 | 5 | Dio | I | D | 1 | 7626 |
9590 | P_VAR_330 | 3 | 2 | 5 | 1 | Dio | I | D | 2 | 6349 |
9591 | P_VAR_330 | 2 | 0 | 4 | 2 | Dio | I | D | 2 | 9031 |
9592 | P_VAR_330 | 0 | 2 | 2 | 1 | Dio | I | D | 2 | 16323 |
9593 | P_VAR_330 | 0 | 3 | 5 | 1 | Dio | I | D | 2 | 10139 |
9594 rows × 10 columns
rt = df['RT']
rt
0 18865
1 13157
2 11628
3 10068
4 11801
...
9589 7626
9590 6349
9591 9031
9592 16323
9593 10139
Name: RT, Length: 9594, dtype: int64
subjects = df['Subject']
subjects.drop_duplicates()
0 P_ADI_331
400 P_ALM_345
800 P_AMY_346
1200 P_BAM_347
1600 P_BEH_340
2000 P_BLC_325
2399 P_BLR_321
2798 P_BOA_321
3197 P_BOC_342
3597 P_CAR_327
3995 P_CAV_333
4395 P_CON_336
4795 P_GAM_338
5195 P_GHM_334
5595 P_GRC_341
5995 P_GRF_322
6394 P_LAC_354
6794 P_LEG_335
7194 P_MOE_339
7594 P_ROS_336
7994 P_SOA_337
8394 P_TAI_343
8794 P_VAL_329
9194 P_VAR_330
Name: Subject, dtype: object
rt.min()
2703
df.min()
Subject P_ADI_331
Name_A 0
Name_B 0
Dist_A 1
Dist_B 1
Mode Dic
Space E
Side D
Response 1
RT 2703
dtype: object
qs = [ (q+1)/10 for q in range(9) ]
print(qs)
rt.quantile(qs)
[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
0.1 6223.0
0.2 7427.2
0.3 8370.9
0.4 9315.0
0.5 10262.5
0.6 11304.6
0.7 12761.3
0.8 15054.2
0.9 19872.0
Name: RT, dtype: float64
rt
0 18865
1 13157
2 11628
3 10068
4 11801
...
9589 7626
9590 6349
9591 9031
9592 16323
9593 10139
Name: RT, Length: 9594, dtype: int64
filtre = (rt < 11000)
df[filtre]
Subject | Name_A | Name_B | Dist_A | Dist_B | Mode | Space | Side | Response | RT | |
---|---|---|---|---|---|---|---|---|---|---|
3 | P_ADI_331 | 2 | 4 | 4 | 1 | Dic | E | D | 1 | 10068 |
9 | P_ADI_331 | 2 | 1 | 4 | 2 | Dic | E | D | 2 | 10973 |
15 | P_ADI_331 | 1 | 3 | 4 | 3 | Dic | E | D | 2 | 10828 |
17 | P_ADI_331 | 3 | 0 | 3 | 4 | Dic | E | D | 1 | 10438 |
18 | P_ADI_331 | 4 | 2 | 3 | 2 | Dic | E | D | 2 | 10932 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
9588 | P_VAR_330 | 0 | 3 | 3 | 1 | Dio | I | D | 2 | 6153 |
9589 | P_VAR_330 | 0 | 1 | 3 | 5 | Dio | I | D | 1 | 7626 |
9590 | P_VAR_330 | 3 | 2 | 5 | 1 | Dio | I | D | 2 | 6349 |
9591 | P_VAR_330 | 2 | 0 | 4 | 2 | Dio | I | D | 2 | 9031 |
9593 | P_VAR_330 | 0 | 3 | 5 | 1 | Dio | I | D | 2 | 10139 |
5505 rows × 10 columns
df[(df['RT'] < 11000)]
Subject | Name_A | Name_B | Dist_A | Dist_B | Mode | Space | Side | Response | RT | |
---|---|---|---|---|---|---|---|---|---|---|
3 | P_ADI_331 | 2 | 4 | 4 | 1 | Dic | E | D | 1 | 10068 |
9 | P_ADI_331 | 2 | 1 | 4 | 2 | Dic | E | D | 2 | 10973 |
15 | P_ADI_331 | 1 | 3 | 4 | 3 | Dic | E | D | 2 | 10828 |
17 | P_ADI_331 | 3 | 0 | 3 | 4 | Dic | E | D | 1 | 10438 |
18 | P_ADI_331 | 4 | 2 | 3 | 2 | Dic | E | D | 2 | 10932 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
9588 | P_VAR_330 | 0 | 3 | 3 | 1 | Dio | I | D | 2 | 6153 |
9589 | P_VAR_330 | 0 | 1 | 3 | 5 | Dio | I | D | 1 | 7626 |
9590 | P_VAR_330 | 3 | 2 | 5 | 1 | Dio | I | D | 2 | 6349 |
9591 | P_VAR_330 | 2 | 0 | 4 | 2 | Dio | I | D | 2 | 9031 |
9593 | P_VAR_330 | 0 | 3 | 5 | 1 | Dio | I | D | 2 | 10139 |
5505 rows × 10 columns
df
Subject | Name_A | Name_B | Dist_A | Dist_B | Mode | Space | Side | Response | RT | |
---|---|---|---|---|---|---|---|---|---|---|
0 | P_ADI_331 | 0 | 2 | 2 | 4 | Dic | E | D | 2 | 18865 |
1 | P_ADI_331 | 1 | 4 | 4 | 1 | Dic | E | D | 2 | 13157 |
2 | P_ADI_331 | 4 | 3 | 3 | 2 | Dic | E | D | 1 | 11628 |
3 | P_ADI_331 | 2 | 4 | 4 | 1 | Dic | E | D | 1 | 10068 |
4 | P_ADI_331 | 1 | 2 | 2 | 4 | Dic | E | D | 1 | 11801 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
9589 | P_VAR_330 | 0 | 1 | 3 | 5 | Dio | I | D | 1 | 7626 |
9590 | P_VAR_330 | 3 | 2 | 5 | 1 | Dio | I | D | 2 | 6349 |
9591 | P_VAR_330 | 2 | 0 | 4 | 2 | Dio | I | D | 2 | 9031 |
9592 | P_VAR_330 | 0 | 2 | 2 | 1 | Dio | I | D | 2 | 16323 |
9593 | P_VAR_330 | 0 | 3 | 5 | 1 | Dio | I | D | 2 | 10139 |
9594 rows × 10 columns
df[(df['Name_A'] == 0) & (df['RT'] > 14000)]
Subject | Name_A | Name_B | Dist_A | Dist_B | Mode | Space | Side | Response | RT | |
---|---|---|---|---|---|---|---|---|---|---|
0 | P_ADI_331 | 0 | 2 | 2 | 4 | Dic | E | D | 2 | 18865 |
13 | P_ADI_331 | 0 | 3 | 4 | 3 | Dic | E | D | 2 | 14330 |
45 | P_ADI_331 | 0 | 1 | 1 | 2 | Dic | E | D | 2 | 16246 |
118 | P_ADI_331 | 0 | 3 | 4 | 5 | Dio | E | D | 2 | 14368 |
372 | P_ADI_331 | 0 | 2 | 3 | 2 | Dic | E | G | 2 | 14043 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
9549 | P_VAR_330 | 0 | 3 | 5 | 4 | Dio | I | D | 2 | 65102 |
9583 | P_VAR_330 | 0 | 4 | 4 | 5 | Dio | I | D | 2 | 45627 |
9585 | P_VAR_330 | 0 | 3 | 2 | 3 | Dio | I | D | 2 | 16671 |
9586 | P_VAR_330 | 0 | 1 | 2 | 3 | Dio | I | D | 1 | 18002 |
9592 | P_VAR_330 | 0 | 2 | 2 | 1 | Dio | I | D | 2 | 16323 |
480 rows × 10 columns
df[(df['Name_A'] == 0) & (df['RT'] > 14000)]['RT'].mean()
21400.008333333335