Anmol-Sharma's picture
Training in progress, epoch 1
9a9ec2c verified
{
"architectures": [
"ModernBertForSequenceClassification"
],
"attention_bias": false,
"attention_dropout": 0.0,
"bos_token_id": 50281,
"classifier_activation": "gelu",
"classifier_bias": false,
"classifier_dropout": 0.0,
"classifier_pooling": "mean",
"cls_token_id": 50281,
"decoder_bias": true,
"deterministic_flash_attn": false,
"dtype": "float32",
"embedding_dropout": 0.0,
"eos_token_id": 50282,
"global_attn_every_n_layers": 3,
"gradient_checkpointing": false,
"hidden_activation": "gelu",
"hidden_size": 768,
"id2label": {
"0": "Nim",
"1": "Vue",
"10": "Stylus",
"100": "Bluespec",
"101": "Apex",
"102": "Hoon",
"103": "TSV",
"104": "VCL",
"105": "Gradle",
"106": "Ioke",
"107": "Lex",
"108": "Visual Basic .NET",
"109": "Gnuplot",
"11": "ColdFusion",
"110": "Common Lisp",
"111": "JQ",
"112": "Dockerfile",
"113": "Fish",
"114": "SQL",
"115": "Lean",
"116": "Kvlang",
"117": "Ada",
"118": "YANG",
"119": "Java",
"12": "AGS Script",
"120": "Haskell",
"121": "Less",
"122": "Vim Script",
"123": "PureScript",
"124": "Scilab",
"125": "RDoc",
"126": "HTML+Razor",
"127": "Ragel in Ruby Host",
"128": "Go",
"129": "EJS",
"13": "Raw token data",
"130": "PostScript",
"131": "GLSL",
"132": "Makefile",
"133": "FreeMarker",
"134": "Io",
"135": "GAS",
"136": "PLpgSQL",
"137": "Unity3D Asset",
"138": "CMake",
"139": "CoffeeScript",
"14": "Csound",
"140": "J",
"141": "HCL",
"142": "Edoid",
"143": "TSX",
"144": "GDScript",
"145": "Thrift",
"146": "DIGITAL Command Language",
"147": "Kotlin",
"148": "Visual Basic",
"149": "Objective-C",
"15": "Sass",
"150": "Smali",
"151": "Fluent",
"152": "Git Config",
"153": "Assembly",
"16": "C",
"17": "Jupyter Notebook",
"18": "CODEOWNERS",
"19": "Rascal",
"2": "Squirrel",
"20": "Blade",
"21": "Eagle",
"22": "Objective-C++",
"23": "PHP",
"24": "Crystal",
"25": "OCaml",
"26": "Scheme",
"27": "Modelica",
"28": "Rust",
"29": "Unix Assembly",
"3": "Gherkin",
"30": "Shell",
"31": "JAR Manifest",
"32": "XML Property List",
"33": "Erlang",
"34": "Prolog",
"35": "ECL",
"36": "Csound Document",
"37": "Perl",
"38": "LookML",
"39": "Swift",
"4": "Logtalk",
"40": "C#",
"41": "Go Module",
"42": "Scala",
"43": "Gettext Catalog",
"44": "FreeBasic",
"45": "Adobe Font Metrics",
"46": "PowerShell",
"47": "Pascal",
"48": "Wavefront Object",
"49": "AsciiDoc",
"5": "Redcode",
"50": "Python",
"51": "Julia",
"52": "POV-Ray SDL",
"53": "Starlark",
"54": "Metal",
"55": "GAP",
"56": "Kit",
"57": "Graphviz (DOT)",
"58": "TypeScript",
"59": "Gerber Image",
"6": "ImageJ Macro",
"60": "Mathematica",
"61": "REALbasic",
"62": "SQF",
"63": "Solidity",
"64": "OpenType Feature File",
"65": "Unknown",
"66": "Protocol Buffer Text Format",
"67": "VHDL",
"68": "Haxe",
"69": "Twig",
"7": "Hack",
"70": "Isabelle",
"71": "Lua",
"72": "Groovy",
"73": "Ignore List",
"74": "LLVM",
"75": "JavaScript",
"76": "MATLAB",
"77": "Dart",
"78": "Inform 7",
"79": "VBScript",
"8": "PlantUML",
"80": "KiCad Layout",
"81": "OpenEdge ABL",
"82": "Turtle",
"83": "Open Policy Agent",
"84": "D",
"85": "Inno Setup",
"86": "ApacheConf",
"87": "C++",
"88": "SCSS",
"89": "Ruby",
"9": "Verilog",
"90": "Chapel",
"91": "OpenStep Property List",
"92": "Fortran Free Form",
"93": "ObjDump",
"94": "G-code",
"95": "PicoLisp",
"96": "XS",
"97": "Vim Snippet",
"98": "R",
"99": "GDB"
},
"initializer_cutoff_factor": 2.0,
"initializer_range": 0.02,
"intermediate_size": 1152,
"label2id": {
"AGS Script": "12",
"Ada": "117",
"Adobe Font Metrics": "45",
"ApacheConf": "86",
"Apex": "101",
"AsciiDoc": "49",
"Assembly": "153",
"Blade": "20",
"Bluespec": "100",
"C": "16",
"C#": "40",
"C++": "87",
"CMake": "138",
"CODEOWNERS": "18",
"Chapel": "90",
"CoffeeScript": "139",
"ColdFusion": "11",
"Common Lisp": "110",
"Crystal": "24",
"Csound": "14",
"Csound Document": "36",
"D": "84",
"DIGITAL Command Language": "146",
"Dart": "77",
"Dockerfile": "112",
"ECL": "35",
"EJS": "129",
"Eagle": "21",
"Edoid": "142",
"Erlang": "33",
"Fish": "113",
"Fluent": "151",
"Fortran Free Form": "92",
"FreeBasic": "44",
"FreeMarker": "133",
"G-code": "94",
"GAP": "55",
"GAS": "135",
"GDB": "99",
"GDScript": "144",
"GLSL": "131",
"Gerber Image": "59",
"Gettext Catalog": "43",
"Gherkin": "3",
"Git Config": "152",
"Gnuplot": "109",
"Go": "128",
"Go Module": "41",
"Gradle": "105",
"Graphviz (DOT)": "57",
"Groovy": "72",
"HCL": "141",
"HTML+Razor": "126",
"Hack": "7",
"Haskell": "120",
"Haxe": "68",
"Hoon": "102",
"Ignore List": "73",
"ImageJ Macro": "6",
"Inform 7": "78",
"Inno Setup": "85",
"Io": "134",
"Ioke": "106",
"Isabelle": "70",
"J": "140",
"JAR Manifest": "31",
"JQ": "111",
"Java": "119",
"JavaScript": "75",
"Julia": "51",
"Jupyter Notebook": "17",
"KiCad Layout": "80",
"Kit": "56",
"Kotlin": "147",
"Kvlang": "116",
"LLVM": "74",
"Lean": "115",
"Less": "121",
"Lex": "107",
"Logtalk": "4",
"LookML": "38",
"Lua": "71",
"MATLAB": "76",
"Makefile": "132",
"Mathematica": "60",
"Metal": "54",
"Modelica": "27",
"Nim": "0",
"OCaml": "25",
"ObjDump": "93",
"Objective-C": "149",
"Objective-C++": "22",
"Open Policy Agent": "83",
"OpenEdge ABL": "81",
"OpenStep Property List": "91",
"OpenType Feature File": "64",
"PHP": "23",
"PLpgSQL": "136",
"POV-Ray SDL": "52",
"Pascal": "47",
"Perl": "37",
"PicoLisp": "95",
"PlantUML": "8",
"PostScript": "130",
"PowerShell": "46",
"Prolog": "34",
"Protocol Buffer Text Format": "66",
"PureScript": "123",
"Python": "50",
"R": "98",
"RDoc": "125",
"REALbasic": "61",
"Ragel in Ruby Host": "127",
"Rascal": "19",
"Raw token data": "13",
"Redcode": "5",
"Ruby": "89",
"Rust": "28",
"SCSS": "88",
"SQF": "62",
"SQL": "114",
"Sass": "15",
"Scala": "42",
"Scheme": "26",
"Scilab": "124",
"Shell": "30",
"Smali": "150",
"Solidity": "63",
"Squirrel": "2",
"Starlark": "53",
"Stylus": "10",
"Swift": "39",
"TSV": "103",
"TSX": "143",
"Thrift": "145",
"Turtle": "82",
"Twig": "69",
"TypeScript": "58",
"Unity3D Asset": "137",
"Unix Assembly": "29",
"Unknown": "65",
"VBScript": "79",
"VCL": "104",
"VHDL": "67",
"Verilog": "9",
"Vim Script": "122",
"Vim Snippet": "97",
"Visual Basic": "148",
"Visual Basic .NET": "108",
"Vue": "1",
"Wavefront Object": "48",
"XML Property List": "32",
"XS": "96",
"YANG": "118"
},
"layer_norm_eps": 1e-05,
"layer_types": [
"full_attention",
"sliding_attention",
"sliding_attention",
"full_attention",
"sliding_attention",
"sliding_attention",
"full_attention",
"sliding_attention",
"sliding_attention",
"full_attention",
"sliding_attention",
"sliding_attention",
"full_attention",
"sliding_attention",
"sliding_attention",
"full_attention",
"sliding_attention",
"sliding_attention",
"full_attention",
"sliding_attention",
"sliding_attention",
"full_attention"
],
"local_attention": 128,
"max_position_embeddings": 8192,
"mlp_bias": false,
"mlp_dropout": 0.0,
"model_type": "modernbert",
"norm_bias": false,
"norm_eps": 1e-05,
"num_attention_heads": 12,
"num_hidden_layers": 22,
"pad_token_id": 50283,
"position_embedding_type": "absolute",
"problem_type": "single_label_classification",
"repad_logits_with_grad": false,
"rope_parameters": {
"full_attention": {
"rope_theta": 160000.0,
"rope_type": "default"
},
"sliding_attention": {
"rope_theta": 10000.0,
"rope_type": "default"
}
},
"sep_token_id": 50282,
"sparse_pred_ignore_index": -100,
"sparse_prediction": false,
"tie_word_embeddings": true,
"transformers_version": "5.0.0",
"use_cache": false,
"vocab_size": 50368
}