Spaces:
Sleeping
Sleeping
File size: 6,790 Bytes
83c588b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 | #!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Jun 13 14:04:47 2024
@author: Nikhil Kapila
"""
import pandas as pd
import os
import requests
from requests.exceptions import RequestException
import time
size_bins = ['1001_5000', '5001_10000', '10001_25000', '25001_50000',
'50001_100000', '100001_200000', '200001_500000']
def fetch_data()->pd.DataFrame:
# buildings = pd.read_csv('datasets/comstock/GA_filtered_building_list.csv')
# buildings = pd.read_csv('datasets/comstock/GA_baseline_basic_metadata_and_annual_results.csv')
buildings = pd.read_csv('datasets/comstock/GA_metadata.csv')
return buildings
def parse_floor_area_category(floor_area_category):
try:
if "_" in floor_area_category:
parts = floor_area_category.split("_")
return int(parts[0]), int(parts[1])
return int(floor_area_category), int(floor_area_category)
except ValueError:
return None, None
def calculate_midpoint(floor_area_category):
lower, upper = parse_floor_area_category(floor_area_category)
if lower is not None and upper is not None:
return (lower + upper) / 2
return None
def find_closest_floor_area_category(input_floor_area, predefined_bins):
input_midpoint = calculate_midpoint(input_floor_area)
closest_bins = sorted(predefined_bins, key=lambda bin: abs(calculate_midpoint(bin) - input_midpoint))
return closest_bins
def find_id(floor_area: str = '5001_10000', in_vintage: str = '1980 to 1989', building_type: str = 'Office') -> int:
if building_type == 'Lodging / Residential' or building_type == 'Lodging':
building_type = 'Lodging'
print(building_type)
filter_on = fetch_data()
closest_bins = find_closest_floor_area_category(floor_area, size_bins)
print(f"Input floor area: {floor_area}, Closest bins: {closest_bins}")
for closest_category in closest_bins:
conditions = {
'in.floor_area_category': [closest_category],
'in.comstock_building_type_group': [building_type],
# 'in.vintage': [in_vintage],
# 'in.cluster_name': [cluster_name]
}
query_string = ' & '.join([f"`{col}` in {values}" for col, values in conditions.items()])
filtered_bldgs = filter_on.query(query_string)
if not filtered_bldgs.empty:
filtered_bldgs1 = filtered_bldgs.sort_values(
by='calc.weighted.site_energy.total.energy_consumption..tbtu', ascending=True)
return filtered_bldgs1.iloc[0]['bldg_id']
# Fallback: If no match is found, sort all buildings by size and find the closest one
filter_on['midpoint'] = filter_on['in.floor_area_category'].apply(calculate_midpoint)
input_midpoint = calculate_midpoint(floor_area)
if input_midpoint is not None:
if input_midpoint <= filter_on['midpoint'].min():
closest_bldg = filter_on.sort_values(by='midpoint', ascending=True).iloc[0]
else:
closest_bldg = filter_on.sort_values(by='midpoint', ascending=False).iloc[0]
return closest_bldg['bldg_id']
raise ValueError("No buildings match the given criteria.")
# =============================================================================
# # Most affordable upgrades that can be suggested:
# 0 is Baseline
# "28": "Package 1, Wall & Roof Insulation + New Windows"
# "29": "Package 2, LED Lighting + Variable Speed HP RTU or HP Boilers"
# "31": "Package 4, Package 1 + Package 2"
# https://oedi-data-lake.s3.amazonaws.com/nrel-pds-building-stock/end-use-load-profiles-for-us-building-stock/2024/comstock_amy2018_release_1/timeseries_individual_buildings/by_state/upgrade=1/state=GA/100004-1.parquet
# =============================================================================
# fetch the building url from NREL
def fetch_building_urls(b_id:str, state:str='GA')->dict:
# upgrade_list = [21, 11, 26, 10, 19]
upgrade_list = [0, 28, 29, 31]
url_dict = {}
for upgrade_id in upgrade_list:
dataset_url_link = f"https://oedi-data-lake.s3.amazonaws.com/nrel-pds-building-stock/end-use-load-profiles-for-us-building-stock/2024/comstock_amy2018_release_1/timeseries_individual_buildings/by_state/upgrade={upgrade_id}/state={state}/{b_id}-{upgrade_id}.parquet"
url_dict[upgrade_id] = dataset_url_link
return url_dict
def get_datasets_from_comstock(b_id:str, url_dict:dict, max_attempts:int=3, output_col:str='out.site_energy.total.energy_consumption')->dict:
pd_dict = {}
folder_path = f'datasets/comstock/downloaded/{b_id}/'
if not os.path.exists(folder_path): os.makedirs(folder_path)
for k,v in url_dict.items():
# file_path = folder_path + f"buildingid_{b_id}_upgrade_{k}_data.parquet"
file_path = folder_path + f"{b_id}_{k}.parquet"
# if file already exists, skip download
if not os.path.exists(file_path):
print(f'Downloading {v}\n')
if file_downloader(v, k, file_path):
pd_dict[k] = pd.read_parquet(file_path)
pd_dict[k].set_index('timestamp', inplace=True)
pd_dict[k] = pd_dict[k].resample('h').sum()
print(f'File loaded into dict.')
else:
print(f'Failed to download after multiple attempts')
else:
print(f'File already exists at {file_path}. Skipping download.\n')
pd_dict[k] = pd.read_parquet(file_path)
pd_dict[k].set_index('timestamp', inplace=True)
pd_dict[k] = pd_dict[k].resample('h').sum()
pd_dict[k] = pd_dict[k]['out.site_energy.total.energy_consumption']
pd_dict[k] = pd.DataFrame(pd_dict[k])
return pd_dict
def file_downloader(url:str, upgrade_id:str, file_path:str, max_attempts:int=3)->bool:
for attempt in range(max_attempts):
try:
response = requests.get(url)
response.raise_for_status()
with open(file_path, 'wb') as file:
file.write(response.content)
print(f'File saved at {file_path}.')
return True
except RequestException as e:
print(f'Attempt {attempt+1} to download upgrade-ID {upgrade_id} failed.\n')
time.sleep(2)
print(f'Failed to download after multiple attempts')
return False
# data = fetch_data()
# b_id = find_id()
# dict = fetch_building_urls(b_id)
# print(dict)
# pd_dict = get_datasets_from_comstock(b_id, dict)
# lighting_upgrade = pd_dict[21]
# print(lighting_upgrade.columns)
# print(lighting_upgrade["out.site_energy.total.energy_consumption"].sum())
# out.site_energy.total.energy_consumption in kWh
# out.site_energy.total.energy_consumption_intensity in kwh_per_ft^2 (kwh/square foot)
|