# get directory
import os
# print the directory
print(os.getcwd())
# Change directory
os.chdir("/Users/loanrobinson/Documents/Desktop/dataset")
# print my new directory
path = os.getcwd()
print ("My current working dirrectory is %s" % path)
# list all of file in directory
print(os.listdir())
# Read only csv files
import glob
for name in glob.glob("*.csv"):
print (name)
print(glob.glob("*.csv"))
# Read file start with Lab and format csv
files = sorted(glob.glob("Lab*.csv"))
print(files)
# Read all of csv file in directory
import pandas as pd
# substring text to get only names of file, before ".csv"
list_names = [i.split(".")[0] for i in files]
print(list_names)
# I need to remove double quote, to use dataframe
names = ','.join(list_names)
print(names)
# read all of files
dfList = [pd.read_csv(f) for f in files]
# check the first data in a list
print(dfList[0].head(4))
print(dfList[1].head(4))
print(dfList[0].describe())
print(dfList[0].columns)
type(dfList[0])
# set names of data frame
Lab6ex1,Lab6ex2 = [pd.DataFrame(i) for i in dfList]
print(Lab6ex2)
Lab6ex2.iloc[0:5,0:2] # Select first 5 rows, column 1 to column 2