Spaces:

OpenHandsCommunity
/

evaluation

Running

App Files Files Community

evaluation / utils /__init__.py

xingyaoww

fix metadata parsing

00e53ee over 1 year ago

raw

history blame

6.12 kB

	import re
	import os
	import json

	import pandas as pd
	import streamlit as st
	from glob import glob
	from pandas.api.types import (
	is_categorical_dtype,
	is_datetime64_any_dtype,
	is_numeric_dtype,
	is_object_dtype,
	)


	def parse_filepath(filepath: str):
	splited = (
	filepath.removeprefix('outputs/')
	.removesuffix('output.jsonl')
	.strip('/')
	.split('/')
	)

	metadata_path = os.path.join(os.path.dirname(filepath), 'metadata.json')
	if os.path.exists(metadata_path):
	with open(metadata_path, 'r') as f:
	metadata = json.load(f)
	else:
	output_path = os.path.join(os.path.dirname(filepath), 'output.jsonl')
	with open(output_path, 'r') as f:
	for line in f.readlines():
	metadata = json.loads(line)
	break
	try:
	benchmark = splited[0]
	agent_name = splited[1]
	# gpt-4-turbo-2024-04-09_maxiter_50(optional)_N_XXX
	# use regex to match the model name & maxiter
	matched = re.match(r'(.+)_maxiter_(\d+)(_.+)?', splited[2])
	model_name = matched.group(1)
	maxiter = matched.group(2)
	note = ''
	if matched.group(3):
	note += matched.group(3).removeprefix('_N_')
	if len(splited) != 3:
	assert len(splited) == 4
	# subset = splited[3]
	note += '_subset_' + splited[3]
	return {
	'benchmark': benchmark,
	'agent_name': agent_name,
	'model_name': model_name,
	'maxiter': maxiter,
	'note': note,
	'filepath': filepath,
	**metadata,
	}
	except Exception as e:
	# st.write([filepath, e, splited])
	st.write(f"Failed to parse filepath: {filepath}. Error: {e}")
	return None



	def filter_dataframe(df: pd.DataFrame) -> pd.DataFrame:
	"""
	Adds a UI on top of a dataframe to let viewers filter columns

	Args:
	df (pd.DataFrame): Original dataframe

	Returns:
	pd.DataFrame: Filtered dataframe
	"""
	modify = st.checkbox('Add filters')

	if not modify:
	return df

	df = df.copy()

	# Try to convert datetimes into a standard format (datetime, no timezone)
	for col in df.columns:
	if is_object_dtype(df[col]):
	try:
	df[col] = pd.to_datetime(df[col])
	except Exception:
	pass

	if is_datetime64_any_dtype(df[col]):
	df[col] = df[col].dt.tz_localize(None)

	modification_container = st.container()

	with modification_container:
	to_filter_columns = st.multiselect('Filter dataframe on', df.columns)
	for column in to_filter_columns:
	left, right = st.columns((1, 20))
	# Treat columns with < 10 unique values as categorical
	if is_categorical_dtype(df[column]) or df[column].nunique() < 10:
	user_cat_input = right.multiselect(
	f'Values for {column}',
	df[column].unique(),
	default=list(df[column].unique()),
	)
	df = df[df[column].isin(user_cat_input)]
	elif is_numeric_dtype(df[column]):
	_min = float(df[column].min())
	_max = float(df[column].max())
	step = (_max - _min) / 100
	user_num_input = right.slider(
	f'Values for {column}',
	min_value=_min,
	max_value=_max,
	value=(_min, _max),
	step=step,
	)
	df = df[df[column].between(*user_num_input)]
	elif is_datetime64_any_dtype(df[column]):
	user_date_input = right.date_input(
	f'Values for {column}',
	value=(
	df[column].min(),
	df[column].max(),
	),
	)
	if len(user_date_input) == 2:
	user_date_input = tuple(map(pd.to_datetime, user_date_input))
	start_date, end_date = user_date_input
	df = df.loc[df[column].between(start_date, end_date)]
	else:
	user_text_input = right.text_input(
	f'Substring or regex in {column}',
	)
	if user_text_input:
	df = df[df[column].astype(str).str.contains(user_text_input)]

	return df


	def dataframe_with_selections(
	df,
	selected_values=None,
	selected_col='filepath',
	):
	# https://docs.streamlit.io/knowledge-base/using-streamlit/how-to-get-row-selections
	df_with_selections = df.copy()
	df_with_selections.insert(0, 'Select', False)

	# Set the initial state of "Select" column based on query parameters
	if selected_values:
	df_with_selections.loc[
	df_with_selections[selected_col].isin(selected_values), 'Select'
	] = True

	# Get dataframe row-selections from user with st.data_editor
	edited_df = st.data_editor(
	df_with_selections,
	hide_index=True,
	column_config={'Select': st.column_config.CheckboxColumn(required=True)},
	disabled=df.columns,
	)

	# Filter the dataframe using the temporary column, then drop the column
	selected_rows = edited_df[edited_df.Select]
	return selected_rows.drop('Select', axis=1)


	def load_filepaths():
	glob_pattern = 'outputs/**/output.jsonl'
	filepaths = list(set(glob(glob_pattern, recursive=True)))
	# filter out gpqa for now
	def _keep_fn(fp):
	return 'gpqa' not in fp
	filepaths = [fp for fp in filepaths if _keep_fn(fp)]
	filepaths = pd.DataFrame(list(filter(lambda fp: fp is not None, map(parse_filepath, filepaths))))
	filepaths = filepaths.sort_values(
	[
	'benchmark',
	'agent_name',
	'model_name',
	'maxiter',
	]
	)
	st.write(f'Matching glob pattern: `{glob_pattern}`. {len(filepaths)} files found.')
	return filepaths