Importing Data in Python: Importing data from other file types

xiaoxiao2025-01-06 62

import os wd = os.getcwd() os.listdir(wd) ## loading a pickle file # Import pickle package import pickle # Open pickle file and load data: d with open('data.pkl', 'rb') as file: d = pickle.load(file) # Print d print(d) # Print datatype of d print(type(d)) ## Listing sheets in Excel files import pandas as pd # Assign spreadsheet filename: file file = 'battledeath.xlsx' # Load spreadsheet: xl xl = pd.ExcelFile(file) # Print sheet names print(xl.sheet_names) # ['2002', '2004'] # Load a sheet into a DataFrame by name: df1 df1 = xl.parse('2004') # Print the head of the DataFrame df1 print(df1.head()) # Load a sheet into a DataFrame by index: df2 df2 = xl.parse('2002') # Print the head of the DataFrame df2 print(df2.head())

War(country) 2004 0 Afghanistan 9.451028 1 Albania 0.130354 2 Algeria 3.407277 3 Andorra 0.000000 4 Angola 2.597931 War, age-adjusted mortality due to 2002 0 Afghanistan 36.083990 1 Albania 0.128908 2 Algeria 18.314120 3 Andorra 0.000000 4 Angola 18.964560

# Parse the first sheet and rename the columns: df1 df1 = xl.parse('2002', skiprows=[0], names=['Country', 'AAM due to War (2002)']) # Print the head of the DataFrame df1 print(df1.head()) # Parse the first column of the second sheet and rename the column: df2 df2 = xl.parse(1 , parse_cols=0, skiprows=[0], names=['Country']) # Print the head of the DataFrame df2 print(df2.head())

Country AAM due to War (2002) 0 Albania 0.128908 1 Algeria 18.314120 2 Andorra 0.000000 3 Angola 18.964560 4 Antigua and Barbuda 0.000000 Country 0 Albania 1 Algeria 2 Andorra 3 Angola 4 Antigua and Barbuda

2.1 Importing SAS/Stata files using pandas

# Import sas7bdat package from sas7bdat import SAS7BDAT # Save file to a DataFrame: df_sas with SAS7BDAT('sales.sas7bdat') as file: df_sas = file.to_data_frame() # Print head of DataFrame print(df_sas.head()) # Plot histogram of DataFrame features (pandas and pyplot already imported) pd.DataFrame.hist(df_sas[['P']]) plt.ylabel('count') plt.show()

Using read_stata to import Stata files

# Load Stata file into a pandas DataFrame: df df = pd.read_stata('disarea.dta') # Print the head of the DataFrame df print(df.head()) # Plot histogram of one column of the DataFrame pd.DataFrame.hist(df[['disa10']]) plt.xlabel('Extent of disease') plt.ylabel('Number of countries') plt.show()

Using h5py to import HDF5 files

# Import packages import numpy as np import h5py # Assign filename: file file = 'LIGO_data.hdf5' # Load file: data data = h5py.File(file, 'r') # Print the datatype of the loaded file print(type(data)) # Print the keys of the file for key in data.keys(): print(key) # Get the HDF5 group: group group = data['strain'] # Check out keys of group for key in group.keys(): print(key) # Set variable equal to time series data: strain strain = data['strain']['Strain'].value # Set number of time points to sample: num_samples num_samples = 10000 # Set time vector time = np.arange(0, 1, 1/num_samples) # Plot data plt.plot(time, strain[:num_samples]) plt.xlabel('GPS Time (s)') plt.ylabel('strain') plt.show()

<class ‘h5py._hl.files.File’> meta quality strain

load matlab file

# Import package import scipy.io import matplotlib.pyplot as plt import numpy as np # Load MATLAB file: mat mat = scipy.io.loadmat('albeck_gene_expression.mat') # Print the datatype type of mat print(type(mat)) # dict # Print the keys of the MATLAB dictionary print(mat.keys()) # Print the type of the value corresponding to the key 'CYratioCyt' print(type(mat['CYratioCyt'])) # Print the shape of the value corresponding to the key 'CYratioCyt' print(np.shape(mat['CYratioCyt'])) # Subset the array and plot it data = mat['CYratioCyt'][25, 5:] fig = plt.figure() plt.plot(data) plt.xlabel('time (min.)') plt.ylabel('normalized fluorescence (measure of expression)') plt.show()

最新回复(0)