import os
wd
= os
.getcwd
()
os
.listdir
(wd
)
import pickle
with open('data.pkl', 'rb') as file:
d
= pickle
.load
(file)
print(d
)
print(type(d
))
import pandas
as pd
file = 'battledeath.xlsx'
xl
= pd
.ExcelFile
(file)
print(xl
.sheet_names
)
df1
= xl
.parse
('2004')
print(df1
.head
())
df2
= xl
.parse
('2002')
print(df2
.head
())
War(country) 2004 0 Afghanistan 9.451028 1 Albania 0.130354 2 Algeria 3.407277 3 Andorra 0.000000 4 Angola 2.597931 War, age-adjusted mortality due to 2002 0 Afghanistan 36.083990 1 Albania 0.128908 2 Algeria 18.314120 3 Andorra 0.000000 4 Angola 18.964560
df1
= xl
.parse
('2002', skiprows
=[0], names
=['Country', 'AAM due to War (2002)'])
print(df1
.head
())
df2
= xl
.parse
(1 , parse_cols
=0, skiprows
=[0], names
=['Country'])
print(df2
.head
())
Country AAM due to War (2002) 0 Albania 0.128908 1 Algeria 18.314120 2 Andorra 0.000000 3 Angola 18.964560 4 Antigua and Barbuda 0.000000 Country 0 Albania 1 Algeria 2 Andorra 3 Angola 4 Antigua and Barbuda
2.1 Importing SAS/Stata files using pandas
from sas7bdat
import SAS7BDAT
with SAS7BDAT
('sales.sas7bdat') as file:
df_sas
= file.to_data_frame
()
print(df_sas
.head
())
pd
.DataFrame
.hist
(df_sas
[['P']])
plt
.ylabel
('count')
plt
.show
()
Using read_stata to import Stata files
df
= pd
.read_stata
('disarea.dta')
print(df
.head
())
pd
.DataFrame
.hist
(df
[['disa10']])
plt
.xlabel
('Extent of disease')
plt
.ylabel
('Number of countries')
plt
.show
()
Using h5py to import HDF5 files
import numpy
as np
import h5py
file = 'LIGO_data.hdf5'
data
= h5py
.File
(file, 'r')
print(type(data
))
for key
in data
.keys
():
print(key
)
group
= data
['strain']
for key
in group
.keys
():
print(key
)
strain
= data
['strain']['Strain'].value
num_samples
= 10000
time
= np
.arange
(0, 1, 1/num_samples
)
plt
.plot
(time
, strain
[:num_samples
])
plt
.xlabel
('GPS Time (s)')
plt
.ylabel
('strain')
plt
.show
()
<class ‘h5py._hl.files.File’> meta quality strain
load matlab file
import scipy
.io
import matplotlib
.pyplot
as plt
import numpy
as np
mat
= scipy
.io
.loadmat
('albeck_gene_expression.mat')
print(type(mat
))
print(mat
.keys
())
print(type(mat
['CYratioCyt']))
print(np
.shape
(mat
['CYratioCyt']))
data
= mat
['CYratioCyt'][25, 5:]
fig
= plt
.figure
()
plt
.plot
(data
)
plt
.xlabel
('time (min.)')
plt
.ylabel
('normalized fluorescence (measure of expression)')
plt
.show
()