Source code for pystoned.dataset

import pandas as pd
import numpy as np
import os

file_path = os.path.dirname(__file__)


[docs] class production_data: """Example datasets provided by the pyStoNED """
[docs] def __init__(self, dmu, x, y, b=None, z=None): """General data structure Args: dmu (String): decision making unit. x (Numbers): input variables. y (Numbers): output variables. b (Numbers, optional): bad output variables. Defaults to None. z (Numbers, optional): contextual variables. Defaults to None. """ self.decision_making_unit = dmu self.x, self.y, self.b, self.z = x, y, b, z
[docs] def load_GHG_abatement_cost(year=None, x_select=['HRSN', 'CPNK'], y_select=['VALK'], b_select=['GHG']): """Loading OECD GHG emissions data Args: year (Numbers, optional): years. Defaults to None. x_select (list, optional): input variables. Defaults to ['HRSN', 'CPNK']. y_select (list, optional): output variable. Defaults to ['VALK']. b_select (list, optional): bad output variable. Defaults to ['GHG']. Returns: Numbers: selected input-output """ dataframe = pd.read_csv( file_path+"/data/abatementCost.csv") if year != None: dataframe = dataframe[dataframe['Year'] == year] else: dataframe['Country'] = dataframe['Country'] + \ dataframe['Year'].apply(str) dmu = np.asanyarray(dataframe['Country']).T x = np.column_stack( [np.asanyarray(dataframe[selected]).T for selected in x_select]) y = np.column_stack( [np.asanyarray(dataframe[selected]).T for selected in y_select]) if b_select != None: b = np.column_stack( [np.asanyarray(dataframe[selected]).T for selected in b_select]) return production_data(dmu, x, y, b)
[docs] def load_Finnish_electricity_firm(x_select=['Energy', 'Length', 'Customers'], y_select=['OPEX', 'CAPEX', 'TOTEX'], z_select=['PerUndGr']): """Loading Finnish electricity firm data Args: x_select (list, optional): input variables. Defaults to ['Energy', 'Length', 'Customers']. y_select (list, optional): output variable. Defaults to ['OPEX', 'CAPEX', 'TOTEX']. z_select (list, optional): contextual variable. Defaults to ['PerUndGr']. Returns: Numbers: selected input-output """ dataframe = pd.read_csv( file_path+"/data/electricityFirms.csv") dmu = np.asanyarray(dataframe.index.tolist()).T x = np.column_stack( [np.asanyarray(dataframe[selected]).T for selected in x_select]) y = np.column_stack( [np.asanyarray(dataframe[selected]).T for selected in y_select]) if z_select != None: z = np.column_stack( [np.asanyarray(dataframe[selected]).T for selected in z_select]) return production_data(dmu, x, y, z=z)
[docs] def load_Tim_Coelli_frontier(x_select=['capital', 'labour'], y_select=['output']): """Loading Tim Coelli 4.1 data Args: x_select (list, optional): input variables. Defaults to ['capital', 'labour']. y_select (list, optional): output variable. Defaults to ['output']. Returns: Numbers: selected input-output """ dataframe = pd.read_csv( file_path+"/data/41Firm.csv") dmu = np.asanyarray(dataframe['firm']).T x = np.column_stack( [np.asanyarray(dataframe[selected]).T for selected in x_select]) y = np.column_stack( [np.asanyarray(dataframe[selected]).T for selected in y_select]) return production_data(dmu, x, y)
[docs] def load_Philipines_rice_production(year=None, x_select=['AREA', 'LABOR', 'NPK', 'OTHER', 'AREAP', 'LABORP', 'NPKP', 'OTHERP'], y_select=['PROD', 'PRICE']): """Loading Philipines rice data Args: year (Numbers, optional): years. Defaults to None. x_select (list, optional): input variables. Defaults to ['AREA', 'LABOR', 'NPK', 'OTHER', 'AREAP', 'LABORP', 'NPKP', 'OTHERP']. y_select (list, optional): output variable. Defaults to ['PROD', 'PRICE']. Returns: Numbers: selected input-output """ dataframe = pd.read_csv( file_path+"/data/riceProduction.csv") if year != None: dataframe = dataframe[dataframe['YEARDUM'] == year-1989] else: dataframe['FMERCODE'] = dataframe['FMERCODE'].apply( str) + ": " + (dataframe['YEARDUM'].apply(int)+1989).apply(str) dmu = np.asanyarray(dataframe['FMERCODE']).T x = np.column_stack( [np.asanyarray(dataframe[selected]).T for selected in x_select]) y = np.column_stack( [np.asanyarray(dataframe[selected]).T for selected in y_select]) return production_data(dmu, x, y)