Spaces:

Qar-Raz
/

AQI_Predictor_Qamar

Running

AQI_Predictor_Qamar / fetch_current_data.py

github-actions[bot]

Automated backend deployment for 2026-04-03

334c1ea about 23 hours ago

5.39 kB

	import requests
	import pandas as pd
	from datetime import datetime, date, timedelta
	import pytz

	# --- Configuration ---
	LATITUDE = 24.86
	LONGITUDE = 67.01
	HISTORICAL_CSV = "data/last_7_days_hourly_data.csv"
	TIMEZONE = 'Asia/Karachi' # Use a constant for the timezone

	def get_complete_past_week_hourly_data(latitude, longitude, filename):
	"""
	Fetches a complete, seamless 7-day history of hourly data by combining
	the historical archive with the most recent real-time measurements.
	"""
	print("--- Starting full historical data assembly ---")

	# === FIX: Get the current date in the target timezone ===
	# This ensures the script works correctly on any server (like UTC-based GitHub Actions).
	karachi_now = datetime.now(pytz.timezone(TIMEZONE))
	today_in_karachi = karachi_now.date()

	# --- Step 1: Fetch HISTORICAL data (Archive API) ---
	hist_end_date = today_in_karachi - timedelta(days=2)
	hist_start_date = today_in_karachi - timedelta(days=8)

	print(f"Fetching historical archive from {hist_start_date} to {hist_end_date}...")
	try:
	weather_url = "https://archive-api.open-meteo.com/v1/archive"
	weather_params = {"latitude": latitude, "longitude": longitude, "start_date": hist_start_date.strftime("%Y-%m-%d"), "end_date": hist_end_date.strftime("%Y-%m-%d"), "hourly": "temperature_2m,relative_humidity_2m,wind_speed_10m", "timezone": TIMEZONE}
	df_weather_hist = pd.DataFrame(requests.get(weather_url, params=weather_params).json()['hourly'])

	aq_url = "https://air-quality-api.open-meteo.com/v1/air-quality"
	aq_params = {"latitude": latitude, "longitude": longitude, "start_date": hist_start_date.strftime("%Y-%m-%d"), "end_date": hist_end_date.strftime("%Y-%m-%d"), "hourly": "pm10,pm2_5,carbon_monoxide,nitrogen_dioxide,us_aqi", "timezone": TIMEZONE}
	df_aq_hist = pd.DataFrame(requests.get(aq_url, params=aq_params).json()['hourly'])

	df_historical = pd.merge(df_weather_hist, df_aq_hist, on='time')
	df_historical['time'] = pd.to_datetime(df_historical['time'])
	print(f"-> OK: Fetched {len(df_historical)} records from archive.")
	except KeyError:
	print("!!! WARNING: Historical data not available in the requested range (this is normal). Proceeding with recent data.")
	df_historical = pd.DataFrame()
	except Exception as e:
	print(f"!!! WARNING: Could not fetch historical data. Reason: {e}")
	df_historical = pd.DataFrame()

	# --- Step 2: Fetch RECENT data (Forecast API) ---
	recent_start_date = today_in_karachi - timedelta(days=2)
	recent_end_date = today_in_karachi

	print(f"Fetching recent measured data from {recent_start_date} to {recent_end_date}...")
	try:
	weather_url = "https://api.open-meteo.com/v1/forecast"
	weather_params = {"latitude": latitude, "longitude": longitude, "start_date": recent_start_date.strftime("%Y-%m-%d"), "end_date": recent_end_date.strftime("%Y-%m-%d"), "hourly": "temperature_2m,relative_humidity_2m,wind_speed_10m", "timezone": TIMEZONE}
	df_weather_recent = pd.DataFrame(requests.get(weather_url, params=weather_params).json()['hourly'])

	aq_url = "https://air-quality-api.open-meteo.com/v1/air-quality"
	aq_params = {"latitude": latitude, "longitude": longitude, "start_date": recent_start_date.strftime("%Y-%m-%d"), "end_date": recent_end_date.strftime("%Y-%m-%d"), "hourly": "pm10,pm2_5,carbon_monoxide,nitrogen_dioxide,us_aqi", "timezone": TIMEZONE}
	df_aq_recent = pd.DataFrame(requests.get(aq_url, params=aq_params).json()['hourly'])

	df_recent = pd.merge(df_weather_recent, df_aq_recent, on='time')
	df_recent['time'] = pd.to_datetime(df_recent['time'])
	print(f"-> OK: Fetched {len(df_recent)} recent records.")
	except Exception as e:
	print(f"!!! WARNING: Could not fetch recent data. Reason: {e}")
	df_recent = pd.DataFrame()

	# --- Step 3: Combine, De-duplicate, and Filter ---
	print("Combining and cleaning final dataset...")
	if df_historical.empty and df_recent.empty:
	print("!!! FATAL: Both historical and recent data fetches failed. Cannot proceed.")
	return

	df_combined = pd.concat([df_historical, df_recent])
	df_combined = df_combined.drop_duplicates(subset='time', keep='last').sort_values(by='time')

	# The rest of the script is already correct because it uses a timezone-aware 'now'
	df_combined['time'] = df_combined['time'].dt.tz_localize(TIMEZONE, ambiguous='infer')
	df_measured = df_combined[df_combined['time'] <= karachi_now].copy()

	seven_days_ago = karachi_now - timedelta(days=7)
	df_final_week = df_measured[df_measured['time'] >= seven_days_ago]

	# --- Step 4: Final Rename and Save ---
	df_final = df_final_week.rename({
	'time': 'timestamp',
	'temperature_2m': 'temperature',
	'relative_humidity_2m': 'humidity',
	'wind_speed_10m': 'wind_speed',
	'pm2_5': 'pm25',
	'us_aqi': 'aqi'
	}, axis='columns').dropna()

	df_final.to_csv(filename, index=False)

	print(f"\n DONE ")
	print(f"Saved {len(df_final)} hourly records to '{filename}', covering a complete and up-to-date 7-day period.")

	#main func
	if __name__ == "__main__":
	get_complete_past_week_hourly_data(LATITUDE, LONGITUDE, HISTORICAL_CSV)