| |
| |
| |
| |
| |
| |
|
|
| import argparse |
| import string |
| import json |
| import sys |
| sys.path.insert(0, './coco-caption') |
|
|
| from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer |
| from pycocoevalcap.bleu.bleu import Bleu |
| from pycocoevalcap.meteor.meteor import Meteor |
| from pycocoevalcap.rouge.rouge import Rouge |
| from pycocoevalcap.cider.cider import Cider |
| from pycocoevalcap.spice.spice import Spice |
| from sets import Set |
| import numpy as np |
|
|
| def remove_nonascii(text): |
| return ''.join([i if ord(i) < 128 else ' ' for i in text]) |
|
|
| class ANETcaptions(object): |
| PREDICTION_FIELDS = ['results', 'version', 'external_data'] |
|
|
| def __init__(self, ground_truth_filenames=None, prediction_filename=None, |
| tious=None, max_proposals=1000, |
| prediction_fields=PREDICTION_FIELDS, verbose=False): |
| |
| if len(tious) == 0: |
| raise IOError('Please input a valid tIoU.') |
| if not ground_truth_filenames: |
| raise IOError('Please input a valid ground truth file.') |
| if not prediction_filename: |
| raise IOError('Please input a valid prediction file.') |
|
|
| self.verbose = verbose |
| self.tious = tious |
| self.max_proposals = max_proposals |
| self.pred_fields = prediction_fields |
| self.ground_truths = self.import_ground_truths(ground_truth_filenames) |
| self.prediction = self.import_prediction(prediction_filename) |
| self.tokenizer = PTBTokenizer() |
|
|
| |
| |
| if self.verbose: |
| self.scorers = [ |
| (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), |
| (Meteor(),"METEOR"), |
| (Rouge(), "ROUGE_L"), |
| (Cider(), "CIDEr"), |
| (Spice(), "SPICE") |
| ] |
| else: |
| self.scorers = [(Meteor(), "METEOR")] |
|
|
| def import_prediction(self, prediction_filename): |
| if self.verbose: |
| print "| Loading submission..." |
| submission = json.load(open(prediction_filename)) |
| if not all([field in submission.keys() for field in self.pred_fields]): |
| raise IOError('Please input a valid ground truth file.') |
| |
| results = {} |
| len_captions = 0 |
| for vid_id in submission['results']: |
| results[vid_id] = submission['results'][vid_id][:self.max_proposals] |
| len_captions+= len(submission['results'][vid_id][:self.max_proposals]) |
| print('len of results:', len(results)) |
| print('len of captions:', len_captions) |
| return results |
|
|
| def import_ground_truths(self, filenames): |
| gts = [] |
| self.n_ref_vids = Set() |
| for filename in filenames: |
| gt = json.load(open(filename)) |
| self.n_ref_vids.update(gt.keys()) |
| gts.append(gt) |
| if self.verbose: |
| print "| Loading GT. #files: %d, #videos: %d" % (len(filenames), len(self.n_ref_vids)) |
| return gts |
|
|
| def iou(self, interval_1, interval_2): |
| start_i, end_i = interval_1[0], interval_1[1] |
| start, end = interval_2[0], interval_2[1] |
| intersection = max(0, min(end, end_i) - max(start, start_i)) |
| union = min(max(end, end_i) - min(start, start_i), end-start + end_i-start_i) |
| iou = float(intersection) / (union + 1e-8) |
| return iou |
|
|
| def check_gt_exists(self, vid_id): |
| for gt in self.ground_truths: |
| if vid_id in gt: |
| return True |
| return False |
|
|
| def get_gt_vid_ids(self): |
| vid_ids = set([]) |
| for gt in self.ground_truths: |
| vid_ids |= set(gt.keys()) |
| return list(vid_ids) |
|
|
| def evaluate(self): |
| aggregator = {} |
| self.scores = {} |
| for tiou in self.tious: |
| scores = self.evaluate_tiou(tiou) |
| for metric, score in scores.items(): |
| if metric not in self.scores: |
| self.scores[metric] = [] |
| self.scores[metric].append(score) |
| if self.verbose: |
| self.scores['Recall'] = [] |
| self.scores['Precision'] = [] |
| for tiou in self.tious: |
| precision, recall = self.evaluate_detection(tiou) |
| self.scores['Recall'].append(recall) |
| self.scores['Precision'].append(precision) |
|
|
| def evaluate_detection(self, tiou): |
| gt_vid_ids = self.get_gt_vid_ids() |
| |
| |
| recall = [0] * len(gt_vid_ids) |
| precision = [0] * len(gt_vid_ids) |
| for vid_i, vid_id in enumerate(gt_vid_ids): |
| best_recall = 0 |
| best_precision = 0 |
| for gt in self.ground_truths: |
| if vid_id not in gt: |
| continue |
| refs = gt[vid_id] |
| ref_set_covered = set([]) |
| pred_set_covered = set([]) |
| num_gt = 0 |
| num_pred = 0 |
| if vid_id in self.prediction: |
| for pred_i, pred in enumerate(self.prediction[vid_id]): |
| pred_timestamp = pred['timestamp'] |
| for ref_i, ref_timestamp in enumerate(refs['timestamps']): |
| if self.iou(pred_timestamp, ref_timestamp) > tiou: |
| ref_set_covered.add(ref_i) |
| pred_set_covered.add(pred_i) |
|
|
| new_precision = float(len(pred_set_covered)) / (pred_i + 1) |
| best_precision = max(best_precision, new_precision) |
| new_recall = float(len(ref_set_covered)) / len(refs['timestamps']) |
| best_recall = max(best_recall, new_recall) |
| recall[vid_i] = best_recall |
| precision[vid_i] = best_precision |
| return sum(precision) / len(precision), sum(recall) / len(recall) |
|
|
| def evaluate_tiou(self, tiou): |
| |
| res = {} |
| gts = {} |
| gt_vid_ids = self.get_gt_vid_ids() |
|
|
| unique_index = 0 |
|
|
| |
| vid2capid = {} |
|
|
| cur_res = {} |
| cur_gts = {} |
|
|
| for vid_id in gt_vid_ids: |
|
|
| vid2capid[vid_id] = [] |
|
|
| |
| |
| if vid_id not in self.prediction: |
| pass |
|
|
| |
| |
| else: |
| |
| for i,pred in enumerate(self.prediction[vid_id]): |
| has_added = False |
| for gt in self.ground_truths: |
| if vid_id not in gt: |
| print('skipped') |
| continue |
| gt_captions = gt[vid_id] |
| for caption_idx, caption_timestamp in enumerate(gt_captions['timestamps']): |
| if True or self.iou(pred['timestamp'], caption_timestamp) >= tiou: |
| gt_caption = gt_captions['sentences'][i] |
| cur_res[unique_index] = [{'caption': remove_nonascii(pred['sentence'])}] |
| cur_gts[unique_index] = [{'caption': remove_nonascii(gt_caption)}] |
| |
| vid2capid[vid_id].append(unique_index) |
| unique_index += 1 |
| has_added = True |
| break |
|
|
| |
| |
| if not has_added: |
| cur_res[unique_index] = [{'caption': remove_nonascii(pred['sentence'])}] |
| cur_gts[unique_index] = [{'caption': 'abc123!@#'}] |
| vid2capid[vid_id].append(unique_index) |
| unique_index += 1 |
|
|
| |
| output = {} |
|
|
| |
| tokenize_res = self.tokenizer.tokenize(cur_res) |
| tokenize_gts = self.tokenizer.tokenize(cur_gts) |
|
|
| |
| for vid in vid2capid.keys(): |
| res[vid] = {index:tokenize_res[index] for index in vid2capid[vid]} |
| gts[vid] = {index:tokenize_gts[index] for index in vid2capid[vid]} |
|
|
| for scorer, method in self.scorers: |
| if self.verbose: |
| print 'computing %s score...'%(scorer.method()) |
|
|
| |
| all_scores = {} |
|
|
| if method == "SPICE": |
| print("getting spice score...") |
| score, scores = scorer.compute_score(tokenize_gts, tokenize_res) |
| all_scores[0] = score |
| else: |
| for i,vid_id in enumerate(gt_vid_ids): |
| if len(res[vid_id]) == 0 or len(gts[vid_id]) == 0: |
| if type(method) == list: |
| score = [0] * len(method) |
| else: |
| score = 0 |
| else: |
| score, scores = scorer.compute_score(gts[vid_id], res[vid_id]) |
| all_scores[vid_id] = score |
|
|
| |
| if type(method) == list: |
| scores = np.mean(all_scores.values(), axis=0) |
| for m in xrange(len(method)): |
| output[method[m]] = scores[m] |
| if self.verbose: |
| print "Calculated tIoU: %1.1f, %s: %0.3f" % (tiou, method[m], output[method[m]]) |
| else: |
| output[method] = np.mean(all_scores.values()) |
| if self.verbose: |
| print "Calculated tIoU: %1.1f, %s: %0.3f" % (tiou, method, output[method]) |
| return output |
|
|
| def main(args): |
| |
| evaluator = ANETcaptions(ground_truth_filenames=args.references, |
| prediction_filename=args.submission, |
| tious=args.tious, |
| max_proposals=args.max_proposals_per_video, |
| verbose=args.verbose) |
| evaluator.evaluate() |
|
|
| |
| if args.verbose: |
| for i, tiou in enumerate(args.tious): |
| print '-' * 80 |
| print "tIoU: " , tiou |
| print '-' * 80 |
| for metric in evaluator.scores: |
| score = evaluator.scores[metric][i] |
| print '| %s: %2.4f'%(metric, 100*score) |
|
|
| |
| print '-' * 80 |
| print "Average across all tIoUs" |
| print '-' * 80 |
| output = {} |
| for metric in evaluator.scores: |
| score = evaluator.scores[metric] |
| print '| %s: %2.4f'%(metric, 100 * sum(score) / float(len(score))) |
| output[metric] = 100 * sum(score) / float(len(score)) |
| json.dump(output,open(args.output,'w')) |
| print(output) |
| if __name__=='__main__': |
| parser = argparse.ArgumentParser(description='Evaluate the results stored in a submissions file.') |
| parser.add_argument('-s', '--submission', type=str, default='sample_submission.json', |
| help='sample submission file for ActivityNet Captions Challenge.') |
| parser.add_argument('-r', '--references', type=str, nargs='+', default=['data/val_1.json'], |
| help='reference files with ground truth captions to compare results against. delimited (,) str') |
| parser.add_argument('-o', '--output', type=str, default='result.json', |
| help='output file with final language metrics.') |
| parser.add_argument('--tious', type=float, nargs='+', default=[0.3], |
| help='Choose the tIoUs to average over.') |
| parser.add_argument('-ppv', '--max-proposals-per-video', type=int, default=1000, |
| help='maximum propoasls per video.') |
| parser.add_argument('-v', '--verbose', action='store_true', |
| help='Print intermediate steps.') |
| args = parser.parse_args() |
|
|
| main(args) |
|
|