File size: 6,790 Bytes
83c588b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Jun 13 14:04:47 2024

@author: Nikhil Kapila
"""

import pandas as pd
import os
import requests
from requests.exceptions import RequestException
import time

size_bins = ['1001_5000', '5001_10000', '10001_25000', '25001_50000',
             '50001_100000', '100001_200000', '200001_500000']

def fetch_data()->pd.DataFrame:
    # buildings = pd.read_csv('datasets/comstock/GA_filtered_building_list.csv')
    # buildings = pd.read_csv('datasets/comstock/GA_baseline_basic_metadata_and_annual_results.csv')
    buildings = pd.read_csv('datasets/comstock/GA_metadata.csv')
    return buildings

def parse_floor_area_category(floor_area_category):
    try:
        if "_" in floor_area_category:
            parts = floor_area_category.split("_")
            return int(parts[0]), int(parts[1])
        return int(floor_area_category), int(floor_area_category)
    except ValueError:
        return None, None

def calculate_midpoint(floor_area_category):
    lower, upper = parse_floor_area_category(floor_area_category)
    if lower is not None and upper is not None:
        return (lower + upper) / 2
    return None

def find_closest_floor_area_category(input_floor_area, predefined_bins):
    input_midpoint = calculate_midpoint(input_floor_area)
    closest_bins = sorted(predefined_bins, key=lambda bin: abs(calculate_midpoint(bin) - input_midpoint))
    return closest_bins

def find_id(floor_area: str = '5001_10000', in_vintage: str = '1980 to 1989', building_type: str = 'Office') -> int:
    if building_type == 'Lodging / Residential' or building_type == 'Lodging':
        building_type = 'Lodging'
    print(building_type)

    filter_on = fetch_data()
    closest_bins = find_closest_floor_area_category(floor_area, size_bins)
    print(f"Input floor area: {floor_area}, Closest bins: {closest_bins}")

    for closest_category in closest_bins:
        conditions = {
            'in.floor_area_category': [closest_category],
            'in.comstock_building_type_group': [building_type],
            # 'in.vintage': [in_vintage],
            # 'in.cluster_name': [cluster_name]
        }

        query_string = ' & '.join([f"`{col}` in {values}" for col, values in conditions.items()])
        filtered_bldgs = filter_on.query(query_string)

        if not filtered_bldgs.empty:
            filtered_bldgs1 = filtered_bldgs.sort_values(
                by='calc.weighted.site_energy.total.energy_consumption..tbtu', ascending=True)
            return filtered_bldgs1.iloc[0]['bldg_id']

    # Fallback: If no match is found, sort all buildings by size and find the closest one
    filter_on['midpoint'] = filter_on['in.floor_area_category'].apply(calculate_midpoint)
    input_midpoint = calculate_midpoint(floor_area)

    if input_midpoint is not None:
        if input_midpoint <= filter_on['midpoint'].min():
            closest_bldg = filter_on.sort_values(by='midpoint', ascending=True).iloc[0]
        else:
            closest_bldg = filter_on.sort_values(by='midpoint', ascending=False).iloc[0]

        return closest_bldg['bldg_id']

    raise ValueError("No buildings match the given criteria.")

# =============================================================================
# # Most affordable upgrades that can be suggested:
# 0 is Baseline
# "28": "Package 1, Wall & Roof Insulation + New Windows"
# "29": "Package 2, LED Lighting + Variable Speed HP RTU or HP Boilers"
# "31": "Package 4, Package 1 + Package 2"
# https://oedi-data-lake.s3.amazonaws.com/nrel-pds-building-stock/end-use-load-profiles-for-us-building-stock/2024/comstock_amy2018_release_1/timeseries_individual_buildings/by_state/upgrade=1/state=GA/100004-1.parquet
# =============================================================================

# fetch the building url from NREL
def fetch_building_urls(b_id:str, state:str='GA')->dict:
    # upgrade_list = [21, 11, 26, 10, 19]
    upgrade_list = [0, 28, 29, 31]
    url_dict = {}
    for upgrade_id in upgrade_list:
        dataset_url_link = f"https://oedi-data-lake.s3.amazonaws.com/nrel-pds-building-stock/end-use-load-profiles-for-us-building-stock/2024/comstock_amy2018_release_1/timeseries_individual_buildings/by_state/upgrade={upgrade_id}/state={state}/{b_id}-{upgrade_id}.parquet"
        url_dict[upgrade_id] = dataset_url_link
    return url_dict

def get_datasets_from_comstock(b_id:str, url_dict:dict, max_attempts:int=3, output_col:str='out.site_energy.total.energy_consumption')->dict:
    pd_dict = {}
    folder_path = f'datasets/comstock/downloaded/{b_id}/'
    if not os.path.exists(folder_path): os.makedirs(folder_path)
    
    for k,v in url_dict.items():
        # file_path = folder_path + f"buildingid_{b_id}_upgrade_{k}_data.parquet"
        file_path = folder_path + f"{b_id}_{k}.parquet"

        # if file already exists, skip download
        if not os.path.exists(file_path):
            print(f'Downloading {v}\n')
            if file_downloader(v, k, file_path):
                pd_dict[k] = pd.read_parquet(file_path)
                pd_dict[k].set_index('timestamp', inplace=True)
                pd_dict[k] = pd_dict[k].resample('h').sum()
                print(f'File loaded into dict.')
            else:
                print(f'Failed to download after multiple attempts')
        else:
            print(f'File already exists at {file_path}. Skipping download.\n')
            pd_dict[k] = pd.read_parquet(file_path)
            pd_dict[k].set_index('timestamp', inplace=True)
            pd_dict[k] = pd_dict[k].resample('h').sum()
            pd_dict[k] = pd_dict[k]['out.site_energy.total.energy_consumption']
            pd_dict[k] = pd.DataFrame(pd_dict[k])
    return pd_dict

def file_downloader(url:str, upgrade_id:str, file_path:str, max_attempts:int=3)->bool:
    for attempt in range(max_attempts):
        try:
            response = requests.get(url)
            response.raise_for_status()
            with open(file_path, 'wb') as file:
                file.write(response.content)
            print(f'File saved at {file_path}.')
            return True
        except RequestException as e:
            print(f'Attempt {attempt+1} to download upgrade-ID {upgrade_id} failed.\n')
            time.sleep(2)
        print(f'Failed to download after multiple attempts')
    return False
    
# data = fetch_data()
# b_id = find_id()
# dict = fetch_building_urls(b_id)
# print(dict)
# pd_dict = get_datasets_from_comstock(b_id, dict)
# lighting_upgrade = pd_dict[21]
# print(lighting_upgrade.columns)
# print(lighting_upgrade["out.site_energy.total.energy_consumption"].sum())

# out.site_energy.total.energy_consumption in kWh
# out.site_energy.total.energy_consumption_intensity in kwh_per_ft^2 (kwh/square foot)