[ { "checkpoint": "checkpoint-18779", "id_acc": 0.9630347222222222, "ood_acc": 0.25220703125, "total_acc": 0.588914884868421, "id_avg_loss": 6.54602900314331, "ood_avg_loss": 5.825457085227966, "total_avg_loss": 6.166780625293129, "id_avg_resp_len": 201.21272916666666, "ood_avg_resp_len": 284.85050390625, "count_id": 1152000, "count_ood": 1280000, "count_total": 2432000, "template_metrics": { "crazy_zootopia": { "count": 809728, "correct": 477128, "answer_accuracy": 0.5892447834334492, "avg_response_len": 235.475637498024, "resp_tokens_sum": 190671217, "pass_at_k": { "pass@1": 0.5892447834334492, "pass@2": 0.6242494417489666, "pass@4": 0.6517067209785661, "pass@8": 0.6752650274111465, "pass@16": 0.6966098480320178, "pass@32": 0.7161446042348242, "pass@64": 0.7340453705919688, "pass@128": 0.7510275055327221 }, "per_op_pass_at_k": { "10": { "pass@1": 0.8994976032448377, "pass@2": 0.9366747090795067, "pass@4": 0.954640098361598, "pass@8": 0.9671935395338649, "pass@16": 0.9780409476989385, "pass@32": 0.9873672705205916, "pass@64": 0.9931124421272916, "pass@128": 0.9941002949852508 }, "2": { "pass@1": 0.9997322819314641, "pass@2": 0.9999915679348491, "pass@4": 0.9999999897793149, "pass@8": 1.0, "pass@16": 1.0, "pass@32": 1.0, "pass@64": 1.0, "pass@128": 1.0 }, "3": { "pass@1": 0.9875525611620795, "pass@2": 0.9937483445303281, "pass@4": 0.9960841566249173, "pass@8": 0.9973945614382809, "pass@16": 0.9984052318072489, "pass@32": 0.9992931754002767, "pass@64": 0.9999118688143109, "pass@128": 1.0 }, "4": { "pass@1": 0.996337890625, "pass@2": 0.9982602577509845, "pass@4": 0.9992694307742784, "pass@8": 0.9998455780548049, "pass@16": 0.9999940908757206, "pass@32": 0.9999999967265157, "pass@64": 1.0, "pass@128": 1.0 }, "5": { "pass@1": 0.9481150793650793, "pass@2": 0.9641533089613797, "pass@4": 0.9757381440415184, "pass@8": 0.9837977641761237, "pass@16": 0.9891567790393069, "pass@32": 0.9924331098404072, "pass@64": 0.9948057827917357, "pass@128": 0.9968253968253968 }, "6": { "pass@1": 0.9665746631736527, "pass@2": 0.9810815131547932, "pass@4": 0.9889092237347574, "pass@8": 0.992817054998713, "pass@16": 0.9942118482404367, "pass@32": 0.9947566055669427, "pass@64": 0.9955089818359227, "pass@128": 0.9970059880239521 }, "7": { "pass@1": 0.9575397559171598, "pass@2": 0.9706686361412663, "pass@4": 0.9781720887884576, "pass@8": 0.9836457175352227, "pass@16": 0.9877211930993773, "pass@32": 0.991030454111008, "pass@64": 0.9934560055493304, "pass@128": 0.9940828402366864 }, "8": { "pass@1": 0.9534755608974359, "pass@2": 0.9758759684786995, "pass@4": 0.9847553208493165, "pass@8": 0.9897007291187125, "pass@16": 0.9935694000922215, "pass@32": 0.9965151373391712, "pass@64": 0.998300039303945, "pass@128": 1.0 }, "9": { "pass@1": 0.9408450704225352, "pass@2": 0.9764160751913055, "pass@4": 0.9897490211610868, "pass@8": 0.9954928597779902, "pass@16": 0.9986146363237696, "pass@32": 0.9998472651017836, "pass@64": 0.9999990904114461, "pass@128": 1.0 }, "11": { "pass@1": 0.7521689093484419, "pass@2": 0.8267172826838576, "pass@4": 0.8749963240253604, "pass@8": 0.9072539747181511, "pass@16": 0.9305891455160197, "pass@32": 0.9482171914771189, "pass@64": 0.9598296757598728, "pass@128": 0.9660056657223796 }, "12": { "pass@1": 0.4268626412429379, "pass@2": 0.5226548250589441, "pass@4": 0.5947451512946471, "pass@8": 0.6499540700619184, "pass@16": 0.697671407935311, "pass@32": 0.7391764349175001, "pass@64": 0.7728700105439386, "pass@128": 0.8022598870056498 }, "13": { "pass@1": 0.226048197492163, "pass@2": 0.2745025516500875, "pass@4": 0.32170087379359735, "pass@8": 0.36726090847720233, "pass@16": 0.4118837384913241, "pass@32": 0.4577964268716136, "pass@64": 0.5060004900301981, "pass@128": 0.554858934169279 }, "14": { "pass@1": 0.1970404984423676, "pass@2": 0.2447659871955257, "pass@4": 0.2905214605183697, "pass@8": 0.33432116638118264, "pass@16": 0.37689919457954874, "pass@32": 0.41849874775311363, "pass@64": 0.46201871752633594, "pass@128": 0.5109034267912772 }, "15": { "pass@1": 0.190774024566474, "pass@2": 0.24494681910245306, "pass@4": 0.29680882700356104, "pass@8": 0.3440605324830605, "pass@16": 0.38846742937996726, "pass@32": 0.43008799604881653, "pass@64": 0.4654618905817254, "pass@128": 0.4913294797687861 }, "16": { "pass@1": 0.1630796370967742, "pass@2": 0.1964297053594107, "pass@4": 0.23033826426938572, "pass@8": 0.2660781043146322, "pass@16": 0.3019958633560044, "pass@32": 0.33569879011745574, "pass@64": 0.3686862876847773, "pass@128": 0.4064516129032258 }, "17": { "pass@1": 0.1676300578034682, "pass@2": 0.20699551397296437, "pass@4": 0.2408987041359715, "pass@8": 0.271017564290959, "pass@16": 0.29876652985892266, "pass@32": 0.3264747152113181, "pass@64": 0.3549548508714773, "pass@128": 0.38439306358381503 }, "18": { "pass@1": 0.1616517857142857, "pass@2": 0.19595648200224966, "pass@4": 0.23106420322459695, "pass@8": 0.26888622832851616, "pass@16": 0.3071732341417159, "pass@32": 0.3420263056744111, "pass@64": 0.3714714335108666, "pass@128": 0.39714285714285713 }, "19": { "pass@1": 0.15040822072072071, "pass@2": 0.1915057281691139, "pass@4": 0.2273034600967671, "pass@8": 0.2598221642368353, "pass@16": 0.2905735616301428, "pass@32": 0.32042698971733125, "pass@64": 0.35124146512270843, "pass@128": 0.3843843843843844 }, "20": { "pass@1": 0.15052552552552553, "pass@2": 0.18885223806483653, "pass@4": 0.22942468621602477, "pass@8": 0.26979999128432536, "pass@16": 0.30689533475643643, "pass@32": 0.3397581402934893, "pass@64": 0.370991289408613, "pass@128": 0.4024024024024024 } } }, "teachers_in_school": { "count": 821120, "correct": 483803, "answer_accuracy": 0.5891988990646921, "avg_response_len": 242.341766124318, "resp_tokens_sum": 198991671, "pass_at_k": { "pass@1": 0.5891988990646921, "pass@2": 0.6227068003142299, "pass@4": 0.650107249088997, "pass@8": 0.6741994105674554, "pass@16": 0.6956359816626693, "pass@32": 0.7149534992963424, "pass@64": 0.7325456550204769, "pass@128": 0.7480904130943102 }, "per_op_pass_at_k": { "10": { "pass@1": 0.8878930214723927, "pass@2": 0.9240016333751996, "pass@4": 0.9427751138086263, "pass@8": 0.9559837013558444, "pass@16": 0.9669589357176731, "pass@32": 0.9757400698048759, "pass@64": 0.9820426961144421, "pass@128": 0.9877300613496932 }, "2": { "pass@1": 0.9987177051671733, "pass@2": 0.9996395064978579, "pass@4": 0.9999613177152248, "pass@8": 0.9999996232076699, "pass@16": 0.9999999999864305, "pass@32": 1.0, "pass@64": 1.0, "pass@128": 1.0 }, "3": { "pass@1": 0.9907670454545454, "pass@2": 0.9960163892865667, "pass@4": 0.9985778993534897, "pass@8": 0.9996915957072215, "pass@16": 0.9999815253902985, "pass@32": 0.99999995938087, "pass@64": 0.9999999999999987, "pass@128": 1.0 }, "4": { "pass@1": 0.9982664571005917, "pass@2": 0.9998132687648512, "pass@4": 0.999995557138198, "pass@8": 0.9999999967656203, "pass@16": 0.9999999999999998, "pass@32": 1.0, "pass@64": 1.0, "pass@128": 1.0 }, "5": { "pass@1": 0.9600317028985508, "pass@2": 0.9691975493552438, "pass@4": 0.9763000869456536, "pass@8": 0.9828931121195829, "pass@16": 0.989036439654314, "pass@32": 0.9944838684919953, "pass@64": 0.9981653755234783, "pass@128": 1.0 }, "6": { "pass@1": 0.9823379297994269, "pass@2": 0.9910194058389551, "pass@4": 0.9946701218221643, "pass@8": 0.9965445299840145, "pass@16": 0.9976712739922713, "pass@32": 0.9983904225804993, "pass@64": 0.9992893080034889, "pass@128": 1.0 }, "7": { "pass@1": 0.9681855130057804, "pass@2": 0.9832421146056164, "pass@4": 0.9914642009163593, "pass@8": 0.9964758591421767, "pass@16": 0.9990374180123244, "pass@32": 0.9999126056145744, "pass@64": 0.9999997855847184, "pass@128": 1.0 }, "8": { "pass@1": 0.9455765845070423, "pass@2": 0.9712158284351782, "pass@4": 0.9835400283591312, "pass@8": 0.9910390348371692, "pass@16": 0.9955726997113149, "pass@32": 0.9980539040490879, "pass@64": 0.9995741614459047, "pass@128": 1.0 }, "9": { "pass@1": 0.9390437874251497, "pass@2": 0.9670264539818006, "pass@4": 0.9783135702722786, "pass@8": 0.9840906736702117, "pass@16": 0.9879928605789021, "pass@32": 0.9908513468886352, "pass@64": 0.9930496212972972, "pass@128": 0.9940119760479041 }, "11": { "pass@1": 0.7309864457831325, "pass@2": 0.811214839673655, "pass@4": 0.8645121111555325, "pass@8": 0.9001346785530842, "pass@16": 0.9235781504819824, "pass@32": 0.939017807747773, "pass@64": 0.9485292552001391, "pass@128": 0.9548192771084337 }, "12": { "pass@1": 0.439042907523511, "pass@2": 0.5317378835188704, "pass@4": 0.6075961422729685, "pass@8": 0.6683062360710545, "pass@16": 0.7177272091161964, "pass@32": 0.7598458853246168, "pass@64": 0.7942021269976, "pass@128": 0.8213166144200627 }, "13": { "pass@1": 0.19227065826330533, "pass@2": 0.23474754074858276, "pass@4": 0.27791213361985206, "pass@8": 0.32123267772057174, "pass@16": 0.36547236780648024, "pass@32": 0.412591392141311, "pass@64": 0.46083134462740805, "pass@128": 0.5042016806722689 }, "14": { "pass@1": 0.20837902046783627, "pass@2": 0.2522699670764838, "pass@4": 0.2964683594923442, "pass@8": 0.3416618793696011, "pass@16": 0.3841336779597966, "pass@32": 0.42375806001618826, "pass@64": 0.4632207639904677, "pass@128": 0.5029239766081871 }, "15": { "pass@1": 0.19093276515151514, "pass@2": 0.24199400501073715, "pass@4": 0.2891956014020975, "pass@8": 0.3326345282383242, "pass@16": 0.37366558323041027, "pass@32": 0.4128573661812689, "pass@64": 0.4483166035224402, "pass@128": 0.4727272727272727 }, "16": { "pass@1": 0.15642806267806267, "pass@2": 0.1904278691926329, "pass@4": 0.22132350069489173, "pass@8": 0.251268169779741, "pass@16": 0.28180434335829546, "pass@32": 0.3122157586188942, "pass@64": 0.34211985927431315, "pass@128": 0.3732193732193732 }, "17": { "pass@1": 0.16779891304347827, "pass@2": 0.21377104526336374, "pass@4": 0.2616579632708955, "pass@8": 0.3084358591590647, "pass@16": 0.3485515761094292, "pass@32": 0.38192734451252297, "pass@64": 0.4116145438455866, "pass@128": 0.43788819875776397 }, "18": { "pass@1": 0.1518612132352941, "pass@2": 0.1958187384205651, "pass@4": 0.23828893419572547, "pass@8": 0.27971765090438966, "pass@16": 0.31909231139592203, "pass@32": 0.3541919542192856, "pass@64": 0.3879892602530647, "pass@128": 0.4235294117647059 }, "19": { "pass@1": 0.14004371279761904, "pass@2": 0.17627703763592056, "pass@4": 0.21033196631671033, "pass@8": 0.24241868591666452, "pass@16": 0.2725452956766527, "pass@32": 0.3023936104609465, "pass@64": 0.331830773125219, "pass@128": 0.3601190476190476 }, "20": { "pass@1": 0.1367421407185629, "pass@2": 0.17771825934744678, "pass@4": 0.21898993408009634, "pass@8": 0.260066157867715, "pass@16": 0.2989195527821917, "pass@32": 0.3334463816599912, "pass@64": 0.3631967204725436, "pass@128": 0.38622754491017963 } } }, "movie_festival_awards": { "count": 801152, "correct": 471310, "answer_accuracy": 0.5882903618788944, "avg_response_len": 258.0569242790382, "resp_tokens_sum": 206742821, "pass_at_k": { "pass@1": 0.5882903618788944, "pass@2": 0.6212685622467475, "pass@4": 0.6478714293112718, "pass@8": 0.6709216329453926, "pass@16": 0.6916683710373397, "pass@32": 0.7106315691905148, "pass@64": 0.728740000525103, "pass@128": 0.7466048889598977 }, "per_op_pass_at_k": { "10": { "pass@1": 0.8983908582089553, "pass@2": 0.9340132506757551, "pass@4": 0.9530650669599133, "pass@8": 0.9652801856862316, "pass@16": 0.9745693164759084, "pass@32": 0.9818461284408381, "pass@64": 0.9871328155545905, "pass@128": 0.991044776119403 }, "2": { "pass@1": 0.9998660714285714, "pass@2": 0.9999992969628797, "pass@4": 1.0, "pass@8": 1.0, "pass@16": 1.0, "pass@32": 1.0, "pass@64": 1.0, "pass@128": 1.0 }, "3": { "pass@1": 0.9907069970845481, "pass@2": 0.9970669721769471, "pass@4": 0.9993563997831172, "pass@8": 0.9999514691641971, "pass@16": 0.9999996499221584, "pass@32": 0.9999999999946803, "pass@64": 1.0, "pass@128": 1.0 }, "4": { "pass@1": 0.9958196271929824, "pass@2": 0.9977559342911083, "pass@4": 0.9990211264710332, "pass@8": 0.9997663197662907, "pass@16": 0.9999856176620865, "pass@32": 0.9999999740185194, "pass@64": 0.9999999999999997, "pass@128": 1.0 }, "5": { "pass@1": 0.9548943014705882, "pass@2": 0.9685079174386292, "pass@4": 0.9784535587463331, "pass@8": 0.9869293632462349, "pass@16": 0.9934826994856965, "pass@32": 0.9970310861536675, "pass@64": 0.9985066302193607, "pass@128": 1.0 }, "6": { "pass@1": 0.9775483044164038, "pass@2": 0.9891293716932861, "pass@4": 0.9959075131381444, "pass@8": 0.9991092920224423, "pass@16": 0.9999352040645176, "pass@32": 0.9999996662295679, "pass@64": 0.9999999999997393, "pass@128": 1.0 }, "7": { "pass@1": 0.9744115901898734, "pass@2": 0.9867456923402768, "pass@4": 0.9912063912303684, "pass@8": 0.9935151685224703, "pass@16": 0.9954041601504686, "pass@32": 0.9972167179953726, "pass@64": 0.9990265956175474, "pass@128": 1.0 }, "8": { "pass@1": 0.9515531156156156, "pass@2": 0.9743795222387744, "pass@4": 0.9852795146102226, "pass@8": 0.9899431408996546, "pass@16": 0.9925003924523234, "pass@32": 0.9950902478963195, "pass@64": 0.9976578190444356, "pass@128": 1.0 }, "9": { "pass@1": 0.9379521704180064, "pass@2": 0.9720591912043948, "pass@4": 0.9858236963747689, "pass@8": 0.9932814976290268, "pass@16": 0.9968830356374474, "pass@32": 0.9981725276413653, "pass@64": 0.9992024635603814, "pass@128": 1.0 }, "11": { "pass@1": 0.7650545634920635, "pass@2": 0.8431473409573809, "pass@4": 0.8877359086066622, "pass@8": 0.9145899228176874, "pass@16": 0.9334761048558039, "pass@32": 0.9489928847114996, "pass@64": 0.9642010673849086, "pass@128": 0.9809523809523809 }, "12": { "pass@1": 0.4054615825688073, "pass@2": 0.49957183534397626, "pass@4": 0.5768995727598271, "pass@8": 0.639355428961972, "pass@16": 0.6924379381519293, "pass@32": 0.7389465717283092, "pass@64": 0.7833812207530858, "pass@128": 0.8256880733944955 }, "13": { "pass@1": 0.21805073302469136, "pass@2": 0.2614506628511712, "pass@4": 0.30509700407819385, "pass@8": 0.34843945585161107, "pass@16": 0.3892841325907048, "pass@32": 0.42707158587605587, "pass@64": 0.46350207303509766, "pass@128": 0.5 }, "14": { "pass@1": 0.19482566765578635, "pass@2": 0.24658432440010283, "pass@4": 0.2977982871762691, "pass@8": 0.34710213397424194, "pass@16": 0.39305571585018145, "pass@32": 0.4354334888531387, "pass@64": 0.4760201367236571, "pass@128": 0.516320474777448 }, "15": { "pass@1": 0.15263310185185186, "pass@2": 0.20657633724603863, "pass@4": 0.26040626837154607, "pass@8": 0.31066200412971595, "pass@16": 0.3575054534309268, "pass@32": 0.40181191645690056, "pass@64": 0.4460592807353767, "pass@128": 0.49074074074074076 }, "16": { "pass@1": 0.15613477138643067, "pass@2": 0.1863313822497852, "pass@4": 0.21273005393131156, "pass@8": 0.23865739981671225, "pass@16": 0.2676959517806339, "pass@32": 0.3011310763692745, "pass@64": 0.3362407178410369, "pass@128": 0.37168141592920356 }, "17": { "pass@1": 0.1378012048192771, "pass@2": 0.17401619272365054, "pass@4": 0.21124320340981484, "pass@8": 0.24873698272180494, "pass@16": 0.28468715470923933, "pass@32": 0.31684364503562207, "pass@64": 0.3460631347877581, "pass@128": 0.37650602409638556 }, "18": { "pass@1": 0.16940524193548387, "pass@2": 0.20793830962661927, "pass@4": 0.24442847708552565, "pass@8": 0.27696590133200394, "pass@16": 0.3047539165997947, "pass@32": 0.329140512075412, "pass@64": 0.3525837977178592, "pass@128": 0.3774193548387097 }, "19": { "pass@1": 0.13812311178247735, "pass@2": 0.16759305790137258, "pass@4": 0.19758813267676884, "pass@8": 0.2284911534750015, "pass@16": 0.2603002938970115, "pass@32": 0.29249731296624343, "pass@64": 0.3260332407783438, "pass@128": 0.36253776435045315 }, "20": { "pass@1": 0.13072447447447447, "pass@2": 0.16936079780567978, "pass@4": 0.2094049935762534, "pass@8": 0.25098284669882537, "pass@16": 0.291417867530838, "pass@32": 0.32739810777438544, "pass@64": 0.35806392636422313, "pass@128": 0.3813813813813814 } } } } } ]