La ejecución de este código utiliza los datos de los informes mensuales de datos operativos eléctricos de la Compañía Administradora del Mercado Mayorista Eléctrico (CAMMESA) de Argentina, y los agrupa en un único archivo CSV y filtra duplicados.
Dentro de los mismos consta un resumen anual del balance (MWh) entre oferta y demanda, por tipo de fuente.
# Cargar librerías
import zipfile
import numpy as np
import pandas as pd
from datetime import date, datetime
from dateutil.rrule import rrule, MONTHLY
from os import remove, path
# Definir fechas
sdate = date(2016, 1, 1) # fecha de inicio (año, mes, día)
edate = date(2016, 5, 1) # fecha de fin (año, mes, día)
# Definir valores a extraer
filePath = './CAMMESA'
fileName = 'BASE_INFORME_MENSUAL_'
sheetName = 'BALANCE'
headerRow = 23 #should be one less than the row in excel
columns = 'A:E'
columnNames = ['AÑO', 'MES', 'BALANCE', 'TIPO', 'ENERGIA']
outFile = 'Balance.csv' #nombre del archivo de destino
#Funciones de fecha
def pad_date(month_num):
month = str(month_num)
if len(month) == 1:
month = '0' + month
return month
def month_iter(start_month, start_year, end_month, end_year):
start = datetime(start_year, start_month, 1)
end = datetime(end_year, end_month, 1)
return ((pad_date(d.month), pad_date(d.year)) for d in rrule(MONTHLY, dtstart=start, until=end))
# Descomprimir datos
for (month, year) in month_iter(sdate.month, sdate.year, edate.month, edate.year):
nameComponents = [year, month]
inFile = filePath + '/' + fileName + '-'.join(nameComponents) + '.zip'
if path.isfile(inFile):
print('Extracting ' + inFile)
with zipfile.ZipFile(inFile) as zf:
zf.extractall(filePath)
else:
print('ERROR: ' + inFile + " does not exist")
# Borrar archivos zip
for (month, year) in month_iter(sdate.month, sdate.year, edate.month, edate.year):
nameComponents = [year, month]
inFile = filePath + '/' + fileName + '-'.join(nameComponents) + '.zip'
if path.isfile(inFile):
print('Deleting ' + inFile)
remove(inFile)
else:
print('ERROR: ' + inFile + " does not exist")
# Unificar datos en un DataFrame
out = pd.DataFrame()
for (month,year) in month_iter(sdate.month, sdate.year, edate.month, edate.year):
nameComponents = [year, month]
inFile = filePath + '/' + fileName + '-'.join(nameComponents) + '.xlsx'
if path.isfile(inFile):
print('Concatenating ' + inFile)
df = pd.read_excel(inFile, sheet_name = sheetName, header = headerRow, usecols = columns)
df.columns = columnNames
out = pd.concat([out, df])
else:
print('ERROR: ' + inFile + " does not exist")
out.info()
#Drop duplicated rows
df = out.drop_duplicates(subset = columnNames, inplace = False)
df.reset_index(drop= True, inplace = True)
df.info()
#Exporta la tabla
df.to_csv(outFile, index = False)