Extraer datos de potencia instalada

La ejecución de este código utiliza los datos de los informes mensuales de datos operativos eléctricos de la Compañía Administradora del Mercado Mayorista Eléctrico (CAMMESA) de Argentina, y los agrupa en un único archivo CSV.

Dentro de los mismos consta un resumen mensual que incluye detalles de potencia instalada (MW), por central, agente, región, tipo de máquina, fuente de generación y tecnología.

In [1]:
# Cargar librerías
import zipfile
import numpy as np
import pandas as pd
from datetime import date, datetime
from dateutil.rrule import rrule, MONTHLY
from os import remove, path
In [2]:
# Definir fechas
sdate = date(2016, 1, 1)   # fecha de inicio (año, mes, día)
edate = date(2016, 6, 1)   # fecha de fin (año, mes, día)
In [3]:
# Definir valores a extraer
filePath = './CAMMESA'
fileName = 'BASE_INFORME_MENSUAL_'
sheetName = 'POTENCIA INSTALADA'
headerRow = 22 #should be one less than the row in excel
columns = 'A:J'
columnNames = ['PERIODO', 'CENTRAL', 'AGENTE', 'AGENTE_DESCRIPCION', 
               'REGION', 'CATEGORIA_REGION', 'TIPO_MAQUINA', 'FUENTE_GENERACION',
               'TECNOLOGIA', 'POTENCIA_INSTALADA']

outFile = 'PotenciaInstalada.csv' #nombre del archivo de destino
In [4]:
#Funciones de fecha
def pad_date(month_num):
    month = str(month_num)
    if len(month) == 1:
        month = '0' + month
    return month

def month_iter(start_month, start_year, end_month, end_year):
    start = datetime(start_year, start_month, 1)
    end = datetime(end_year, end_month, 1)

    return ((pad_date(d.month), pad_date(d.year)) for d in rrule(MONTHLY, dtstart=start, until=end))
In [5]:
# Descomprimir datos
for (month,year) in month_iter(sdate.month, sdate.year, edate.month, edate.year):
    nameComponents = [year, month]  
    inFile = filePath + '/' + fileName + '-'.join(nameComponents) + '.zip'

    if path.isfile(inFile):
        print('Extracting ' + inFile)
        try:
            with zipfile.ZipFile(inFile) as zf:
                zf.extractall(filePath)
        except:
            print('ERROR: ' + inFile)
In [6]:
# Borrar archivos zip
for (month,year) in month_iter(sdate.month, sdate.year, edate.month, edate.year):
    nameComponents = [year, month]  
    inFile = filePath + '/' + fileName + '-'.join(nameComponents) + '.zip'
    
    if path.isfile(inFile):
        print('Deleting ' + inFile)
        remove(inFile)
    else:
        print('ERROR: ' + inFile + " does not exist")
ERROR: ./CAMMESA/BASE_INFORME_MENSUAL_2016-01.zip does not exist
ERROR: ./CAMMESA/BASE_INFORME_MENSUAL_2016-02.zip does not exist
ERROR: ./CAMMESA/BASE_INFORME_MENSUAL_2016-03.zip does not exist
ERROR: ./CAMMESA/BASE_INFORME_MENSUAL_2016-04.zip does not exist
ERROR: ./CAMMESA/BASE_INFORME_MENSUAL_2016-05.zip does not exist
ERROR: ./CAMMESA/BASE_INFORME_MENSUAL_2016-06.zip does not exist
In [7]:
# Unificar datos en un DataFrame
out = pd.DataFrame()

for (month,year) in month_iter(sdate.month, sdate.year, edate.month, edate.year):
    nameComponents = [year, month]  
    inFile = filePath + '/' + fileName + '-'.join(nameComponents) + '.xlsx'
    
    if path.isfile(inFile):
        print('Concatenating ' + inFile)
        df = pd.read_excel(inFile, sheet_name = sheetName, header = headerRow, usecols = columns)
        df.columns = columnNames
        out = pd.concat([out, df])
    else:
        print('ERROR: ' + inFile + " does not exist")
Concatenating ./CAMMESA/BASE_INFORME_MENSUAL_2016-01.xlsx
Concatenating ./CAMMESA/BASE_INFORME_MENSUAL_2016-02.xlsx
Concatenating ./CAMMESA/BASE_INFORME_MENSUAL_2016-03.xlsx
Concatenating ./CAMMESA/BASE_INFORME_MENSUAL_2016-04.xlsx
Concatenating ./CAMMESA/BASE_INFORME_MENSUAL_2016-05.xlsx
Concatenating ./CAMMESA/BASE_INFORME_MENSUAL_2016-06.xlsx
In [8]:
out.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 1707 entries, 0 to 281
Data columns (total 10 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   PERIODO             1707 non-null   datetime64[ns]
 1   CENTRAL             1707 non-null   object        
 2   AGENTE              1707 non-null   object        
 3   AGENTE_DESCRIPCION  1707 non-null   object        
 4   REGION              1707 non-null   object        
 5   CATEGORIA_REGION    1707 non-null   object        
 6   TIPO_MAQUINA        1707 non-null   object        
 7   FUENTE_GENERACION   1707 non-null   object        
 8   TECNOLOGIA          1707 non-null   object        
 9   POTENCIA_INSTALADA  1707 non-null   float64       
dtypes: datetime64[ns](1), float64(1), object(8)
memory usage: 146.7+ KB
In [9]:
#Exporta la tabla
out.to_csv(outFile, index = False)