#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Tue Jun 13 14:04:47 2024 @author: Nikhil Kapila """ import pandas as pd import os import requests from requests.exceptions import RequestException import time size_bins = ['1001_5000', '5001_10000', '10001_25000', '25001_50000', '50001_100000', '100001_200000', '200001_500000'] def fetch_data()->pd.DataFrame: # buildings = pd.read_csv('datasets/comstock/GA_filtered_building_list.csv') # buildings = pd.read_csv('datasets/comstock/GA_baseline_basic_metadata_and_annual_results.csv') buildings = pd.read_csv('datasets/comstock/GA_metadata.csv') return buildings def parse_floor_area_category(floor_area_category): try: if "_" in floor_area_category: parts = floor_area_category.split("_") return int(parts[0]), int(parts[1]) return int(floor_area_category), int(floor_area_category) except ValueError: return None, None def calculate_midpoint(floor_area_category): lower, upper = parse_floor_area_category(floor_area_category) if lower is not None and upper is not None: return (lower + upper) / 2 return None def find_closest_floor_area_category(input_floor_area, predefined_bins): input_midpoint = calculate_midpoint(input_floor_area) closest_bins = sorted(predefined_bins, key=lambda bin: abs(calculate_midpoint(bin) - input_midpoint)) return closest_bins def find_id(floor_area: str = '5001_10000', in_vintage: str = '1980 to 1989', building_type: str = 'Office') -> int: if building_type == 'Lodging / Residential' or building_type == 'Lodging': building_type = 'Lodging' print(building_type) filter_on = fetch_data() closest_bins = find_closest_floor_area_category(floor_area, size_bins) print(f"Input floor area: {floor_area}, Closest bins: {closest_bins}") for closest_category in closest_bins: conditions = { 'in.floor_area_category': [closest_category], 'in.comstock_building_type_group': [building_type], # 'in.vintage': [in_vintage], # 'in.cluster_name': [cluster_name] } query_string = ' & '.join([f"`{col}` in {values}" for col, values in conditions.items()]) filtered_bldgs = filter_on.query(query_string) if not filtered_bldgs.empty: filtered_bldgs1 = filtered_bldgs.sort_values( by='calc.weighted.site_energy.total.energy_consumption..tbtu', ascending=True) return filtered_bldgs1.iloc[0]['bldg_id'] # Fallback: If no match is found, sort all buildings by size and find the closest one filter_on['midpoint'] = filter_on['in.floor_area_category'].apply(calculate_midpoint) input_midpoint = calculate_midpoint(floor_area) if input_midpoint is not None: if input_midpoint <= filter_on['midpoint'].min(): closest_bldg = filter_on.sort_values(by='midpoint', ascending=True).iloc[0] else: closest_bldg = filter_on.sort_values(by='midpoint', ascending=False).iloc[0] return closest_bldg['bldg_id'] raise ValueError("No buildings match the given criteria.") # ============================================================================= # # Most affordable upgrades that can be suggested: # 0 is Baseline # "28": "Package 1, Wall & Roof Insulation + New Windows" # "29": "Package 2, LED Lighting + Variable Speed HP RTU or HP Boilers" # "31": "Package 4, Package 1 + Package 2" # https://oedi-data-lake.s3.amazonaws.com/nrel-pds-building-stock/end-use-load-profiles-for-us-building-stock/2024/comstock_amy2018_release_1/timeseries_individual_buildings/by_state/upgrade=1/state=GA/100004-1.parquet # ============================================================================= # fetch the building url from NREL def fetch_building_urls(b_id:str, state:str='GA')->dict: # upgrade_list = [21, 11, 26, 10, 19] upgrade_list = [0, 28, 29, 31] url_dict = {} for upgrade_id in upgrade_list: dataset_url_link = f"https://oedi-data-lake.s3.amazonaws.com/nrel-pds-building-stock/end-use-load-profiles-for-us-building-stock/2024/comstock_amy2018_release_1/timeseries_individual_buildings/by_state/upgrade={upgrade_id}/state={state}/{b_id}-{upgrade_id}.parquet" url_dict[upgrade_id] = dataset_url_link return url_dict def get_datasets_from_comstock(b_id:str, url_dict:dict, max_attempts:int=3, output_col:str='out.site_energy.total.energy_consumption')->dict: pd_dict = {} folder_path = f'datasets/comstock/downloaded/{b_id}/' if not os.path.exists(folder_path): os.makedirs(folder_path) for k,v in url_dict.items(): # file_path = folder_path + f"buildingid_{b_id}_upgrade_{k}_data.parquet" file_path = folder_path + f"{b_id}_{k}.parquet" # if file already exists, skip download if not os.path.exists(file_path): print(f'Downloading {v}\n') if file_downloader(v, k, file_path): pd_dict[k] = pd.read_parquet(file_path) pd_dict[k].set_index('timestamp', inplace=True) pd_dict[k] = pd_dict[k].resample('h').sum() print(f'File loaded into dict.') else: print(f'Failed to download after multiple attempts') else: print(f'File already exists at {file_path}. Skipping download.\n') pd_dict[k] = pd.read_parquet(file_path) pd_dict[k].set_index('timestamp', inplace=True) pd_dict[k] = pd_dict[k].resample('h').sum() pd_dict[k] = pd_dict[k]['out.site_energy.total.energy_consumption'] pd_dict[k] = pd.DataFrame(pd_dict[k]) return pd_dict def file_downloader(url:str, upgrade_id:str, file_path:str, max_attempts:int=3)->bool: for attempt in range(max_attempts): try: response = requests.get(url) response.raise_for_status() with open(file_path, 'wb') as file: file.write(response.content) print(f'File saved at {file_path}.') return True except RequestException as e: print(f'Attempt {attempt+1} to download upgrade-ID {upgrade_id} failed.\n') time.sleep(2) print(f'Failed to download after multiple attempts') return False # data = fetch_data() # b_id = find_id() # dict = fetch_building_urls(b_id) # print(dict) # pd_dict = get_datasets_from_comstock(b_id, dict) # lighting_upgrade = pd_dict[21] # print(lighting_upgrade.columns) # print(lighting_upgrade["out.site_energy.total.energy_consumption"].sum()) # out.site_energy.total.energy_consumption in kWh # out.site_energy.total.energy_consumption_intensity in kwh_per_ft^2 (kwh/square foot)