File size: 1,496 Bytes
f144dc6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# Remove Blank Rows
 # - We can use  .dropna()  to remove blank depending on axis=0(rows), axis=1(columns)
import pandas as pd
health_data = pd.read_csv('data.csv', header=0, sep=',')
health_data.dropna(axis=0, inplace=True) # Chú thích: inplace=True => Xóa các ô lỗi theo hàng ; inplace=False => giữ nguyên các ô lỗi theo hàng
print(health_data)

# Data Types
 # - We can use the  info()  function to list the data types within our dataset:
import pandas as pd
health_data = pd.read_csv("data.csv", header=0, sep=",")
print(health_data.info())

 # - We can use the astype() function to convert the data into float64.
import pandas as pd
health_data = pd.read_csv('data.csv', header=0, sep=',')
health_data['Hours_Work'] = health_data['Hours_Work'].astype(float)
health_data['Hours_Sleep'] = health_data['Hours_Sleep'].astype(float)
print(health_data.info())

# Analyze the data
 # - We can use the  describe()  function in Python to summarize data:
import pandas as pd
health_data = pd.read_csv('data.csv',header=0, sep=',')
pd.set_option('display.max_columns', None) # Có thể dùng thêm  pd.set_option('display.max_rows', None)
print(health_data.describe())
    # Count - Counts the number of observations
    # Mean - The average value
    # Std - Standard deviation (explained in the statistics chapter)
    # Max - The highest value
    # Min - The lowest value
    # 25%, 50% and 75% are percentiles (explained in the statistics chapter)