Spaces:
Running
Running
| import requests | |
| import pandas as pd | |
| from datetime import datetime, date, timedelta | |
| import pytz | |
| # --- Configuration --- | |
| LATITUDE = 24.86 | |
| LONGITUDE = 67.01 | |
| HISTORICAL_CSV = "data/last_7_days_hourly_data.csv" | |
| TIMEZONE = 'Asia/Karachi' # Use a constant for the timezone | |
| def get_complete_past_week_hourly_data(latitude, longitude, filename): | |
| """ | |
| Fetches a complete, seamless 7-day history of hourly data by combining | |
| the historical archive with the most recent real-time measurements. | |
| """ | |
| print("--- Starting full historical data assembly ---") | |
| # === FIX: Get the current date *in the target timezone* === | |
| # This ensures the script works correctly on any server (like UTC-based GitHub Actions). | |
| karachi_now = datetime.now(pytz.timezone(TIMEZONE)) | |
| today_in_karachi = karachi_now.date() | |
| # --- Step 1: Fetch HISTORICAL data (Archive API) --- | |
| hist_end_date = today_in_karachi - timedelta(days=2) | |
| hist_start_date = today_in_karachi - timedelta(days=8) | |
| print(f"Fetching historical archive from {hist_start_date} to {hist_end_date}...") | |
| try: | |
| weather_url = "https://archive-api.open-meteo.com/v1/archive" | |
| weather_params = {"latitude": latitude, "longitude": longitude, "start_date": hist_start_date.strftime("%Y-%m-%d"), "end_date": hist_end_date.strftime("%Y-%m-%d"), "hourly": "temperature_2m,relative_humidity_2m,wind_speed_10m", "timezone": TIMEZONE} | |
| df_weather_hist = pd.DataFrame(requests.get(weather_url, params=weather_params).json()['hourly']) | |
| aq_url = "https://air-quality-api.open-meteo.com/v1/air-quality" | |
| aq_params = {"latitude": latitude, "longitude": longitude, "start_date": hist_start_date.strftime("%Y-%m-%d"), "end_date": hist_end_date.strftime("%Y-%m-%d"), "hourly": "pm10,pm2_5,carbon_monoxide,nitrogen_dioxide,us_aqi", "timezone": TIMEZONE} | |
| df_aq_hist = pd.DataFrame(requests.get(aq_url, params=aq_params).json()['hourly']) | |
| df_historical = pd.merge(df_weather_hist, df_aq_hist, on='time') | |
| df_historical['time'] = pd.to_datetime(df_historical['time']) | |
| print(f"-> OK: Fetched {len(df_historical)} records from archive.") | |
| except KeyError: | |
| print("!!! WARNING: Historical data not available in the requested range (this is normal). Proceeding with recent data.") | |
| df_historical = pd.DataFrame() | |
| except Exception as e: | |
| print(f"!!! WARNING: Could not fetch historical data. Reason: {e}") | |
| df_historical = pd.DataFrame() | |
| # --- Step 2: Fetch RECENT data (Forecast API) --- | |
| recent_start_date = today_in_karachi - timedelta(days=2) | |
| recent_end_date = today_in_karachi | |
| print(f"Fetching recent measured data from {recent_start_date} to {recent_end_date}...") | |
| try: | |
| weather_url = "https://api.open-meteo.com/v1/forecast" | |
| weather_params = {"latitude": latitude, "longitude": longitude, "start_date": recent_start_date.strftime("%Y-%m-%d"), "end_date": recent_end_date.strftime("%Y-%m-%d"), "hourly": "temperature_2m,relative_humidity_2m,wind_speed_10m", "timezone": TIMEZONE} | |
| df_weather_recent = pd.DataFrame(requests.get(weather_url, params=weather_params).json()['hourly']) | |
| aq_url = "https://air-quality-api.open-meteo.com/v1/air-quality" | |
| aq_params = {"latitude": latitude, "longitude": longitude, "start_date": recent_start_date.strftime("%Y-%m-%d"), "end_date": recent_end_date.strftime("%Y-%m-%d"), "hourly": "pm10,pm2_5,carbon_monoxide,nitrogen_dioxide,us_aqi", "timezone": TIMEZONE} | |
| df_aq_recent = pd.DataFrame(requests.get(aq_url, params=aq_params).json()['hourly']) | |
| df_recent = pd.merge(df_weather_recent, df_aq_recent, on='time') | |
| df_recent['time'] = pd.to_datetime(df_recent['time']) | |
| print(f"-> OK: Fetched {len(df_recent)} recent records.") | |
| except Exception as e: | |
| print(f"!!! WARNING: Could not fetch recent data. Reason: {e}") | |
| df_recent = pd.DataFrame() | |
| # --- Step 3: Combine, De-duplicate, and Filter --- | |
| print("Combining and cleaning final dataset...") | |
| if df_historical.empty and df_recent.empty: | |
| print("!!! FATAL: Both historical and recent data fetches failed. Cannot proceed.") | |
| return | |
| df_combined = pd.concat([df_historical, df_recent]) | |
| df_combined = df_combined.drop_duplicates(subset='time', keep='last').sort_values(by='time') | |
| # The rest of the script is already correct because it uses a timezone-aware 'now' | |
| df_combined['time'] = df_combined['time'].dt.tz_localize(TIMEZONE, ambiguous='infer') | |
| df_measured = df_combined[df_combined['time'] <= karachi_now].copy() | |
| seven_days_ago = karachi_now - timedelta(days=7) | |
| df_final_week = df_measured[df_measured['time'] >= seven_days_ago] | |
| # --- Step 4: Final Rename and Save --- | |
| df_final = df_final_week.rename({ | |
| 'time': 'timestamp', | |
| 'temperature_2m': 'temperature', | |
| 'relative_humidity_2m': 'humidity', | |
| 'wind_speed_10m': 'wind_speed', | |
| 'pm2_5': 'pm25', | |
| 'us_aqi': 'aqi' | |
| }, axis='columns').dropna() | |
| df_final.to_csv(filename, index=False) | |
| print(f"\n DONE ") | |
| print(f"Saved {len(df_final)} hourly records to '{filename}', covering a complete and up-to-date 7-day period.") | |
| #main func | |
| if __name__ == "__main__": | |
| get_complete_past_week_hourly_data(LATITUDE, LONGITUDE, HISTORICAL_CSV) |