slawguy's picture
Training in progress, epoch 1
d64411d verified
{
"architectures": [
"ModernBertForSequenceClassification"
],
"attention_bias": false,
"attention_dropout": 0.0,
"bos_token_id": 50281,
"classifier_activation": "gelu",
"classifier_bias": false,
"classifier_dropout": 0.0,
"classifier_pooling": "mean",
"cls_token_id": 50281,
"decoder_bias": true,
"deterministic_flash_attn": false,
"dtype": "float32",
"embedding_dropout": 0.0,
"eos_token_id": 50282,
"global_attn_every_n_layers": 3,
"gradient_checkpointing": false,
"hidden_activation": "gelu",
"hidden_size": 768,
"id2label": {
"0": "CODEOWNERS",
"1": "LLVM",
"10": "Turtle",
"100": "Visual Basic .NET",
"101": "Dockerfile",
"102": "C",
"103": "Isabelle",
"104": "XML Property List",
"105": "Inform 7",
"106": "OpenStep Property List",
"107": "VHDL",
"108": "Erlang",
"109": "Starlark",
"11": "EJS",
"110": "Unity3D Asset",
"111": "Go",
"112": "Pickle",
"113": "Julia",
"114": "Metal",
"115": "Chapel",
"116": "Kotlin",
"117": "SCSS",
"118": "Sass",
"119": "Kit",
"12": "Haxe",
"120": "Modelica",
"121": "GDB",
"122": "VCL",
"123": "Gradle",
"124": "ApacheConf",
"125": "VBScript",
"126": "Unknown",
"127": "Vim Snippet",
"128": "ColdFusion",
"129": "Kvlang",
"13": "Gherkin",
"130": "R",
"131": "PureScript",
"132": "Blade",
"133": "Twig",
"134": "Dart",
"135": "Stylus",
"136": "Fluent",
"137": "Scilab",
"138": "JAR Manifest",
"139": "YANG",
"14": "Ada",
"140": "PHP",
"141": "Gettext Catalog",
"142": "GAS",
"143": "SQL",
"144": "Vue",
"145": "Logtalk",
"146": "RDoc",
"147": "Verilog",
"148": "Apex",
"149": "PostScript",
"15": "Bluespec",
"150": "Jupyter Notebook",
"151": "Objective-C++",
"152": "Thrift",
"153": "GLSL",
"154": "PLpgSQL",
"16": "JQ",
"17": "Objective-C",
"18": "Adobe Font Metrics",
"19": "Smali",
"2": "HTML+Razor",
"20": "Io",
"21": "Rascal",
"22": "Groovy",
"23": "Squirrel",
"24": "AGS Script",
"25": "Inno Setup",
"26": "Prolog",
"27": "Scala",
"28": "Ignore List",
"29": "TypeScript",
"3": "PowerShell",
"30": "CMake",
"31": "Raw token data",
"32": "MATLAB",
"33": "KiCad Layout",
"34": "OpenEdge ABL",
"35": "Hack",
"36": "Csound Document",
"37": "Lua",
"38": "Lean",
"39": "Git Config",
"4": "Open Policy Agent",
"40": "Java",
"41": "Pascal",
"42": "AsciiDoc",
"43": "Fish",
"44": "ObjDump",
"45": "Lex",
"46": "Common Lisp",
"47": "PicoLisp",
"48": "SQF",
"49": "Assembly",
"5": "Python",
"50": "Perl",
"51": "Redcode",
"52": "Visual Basic",
"53": "Mathematica",
"54": "Protocol Buffer Text Format",
"55": "GDScript",
"56": "OCaml",
"57": "Shell",
"58": "TSV",
"59": "ImageJ Macro",
"6": "REALbasic",
"60": "HCL",
"61": "PlantUML",
"62": "Scheme",
"63": "D",
"64": "Go Module",
"65": "Edoid",
"66": "JavaScript",
"67": "J",
"68": "FreeMarker",
"69": "Gnuplot",
"7": "FreeBasic",
"70": "Graphviz (DOT)",
"71": "LookML",
"72": "XS",
"73": "Eagle",
"74": "Solidity",
"75": "Less",
"76": "Makefile",
"77": "CoffeeScript",
"78": "Csound",
"79": "ECL",
"8": "Ragel in Ruby Host",
"80": "Gerber Image",
"81": "POV-Ray SDL",
"82": "Haskell",
"83": "C++",
"84": "C#",
"85": "GAP",
"86": "DIGITAL Command Language",
"87": "Ioke",
"88": "TSX",
"89": "Rust",
"9": "Vim Script",
"90": "Nim",
"91": "Fortran Free Form",
"92": "Crystal",
"93": "Swift",
"94": "Unix Assembly",
"95": "Ruby",
"96": "Wavefront Object",
"97": "Hoon",
"98": "OpenType Feature File",
"99": "G-code"
},
"initializer_cutoff_factor": 2.0,
"initializer_range": 0.02,
"intermediate_size": 1152,
"label2id": {
"AGS Script": "24",
"Ada": "14",
"Adobe Font Metrics": "18",
"ApacheConf": "124",
"Apex": "148",
"AsciiDoc": "42",
"Assembly": "49",
"Blade": "132",
"Bluespec": "15",
"C": "102",
"C#": "84",
"C++": "83",
"CMake": "30",
"CODEOWNERS": "0",
"Chapel": "115",
"CoffeeScript": "77",
"ColdFusion": "128",
"Common Lisp": "46",
"Crystal": "92",
"Csound": "78",
"Csound Document": "36",
"D": "63",
"DIGITAL Command Language": "86",
"Dart": "134",
"Dockerfile": "101",
"ECL": "79",
"EJS": "11",
"Eagle": "73",
"Edoid": "65",
"Erlang": "108",
"Fish": "43",
"Fluent": "136",
"Fortran Free Form": "91",
"FreeBasic": "7",
"FreeMarker": "68",
"G-code": "99",
"GAP": "85",
"GAS": "142",
"GDB": "121",
"GDScript": "55",
"GLSL": "153",
"Gerber Image": "80",
"Gettext Catalog": "141",
"Gherkin": "13",
"Git Config": "39",
"Gnuplot": "69",
"Go": "111",
"Go Module": "64",
"Gradle": "123",
"Graphviz (DOT)": "70",
"Groovy": "22",
"HCL": "60",
"HTML+Razor": "2",
"Hack": "35",
"Haskell": "82",
"Haxe": "12",
"Hoon": "97",
"Ignore List": "28",
"ImageJ Macro": "59",
"Inform 7": "105",
"Inno Setup": "25",
"Io": "20",
"Ioke": "87",
"Isabelle": "103",
"J": "67",
"JAR Manifest": "138",
"JQ": "16",
"Java": "40",
"JavaScript": "66",
"Julia": "113",
"Jupyter Notebook": "150",
"KiCad Layout": "33",
"Kit": "119",
"Kotlin": "116",
"Kvlang": "129",
"LLVM": "1",
"Lean": "38",
"Less": "75",
"Lex": "45",
"Logtalk": "145",
"LookML": "71",
"Lua": "37",
"MATLAB": "32",
"Makefile": "76",
"Mathematica": "53",
"Metal": "114",
"Modelica": "120",
"Nim": "90",
"OCaml": "56",
"ObjDump": "44",
"Objective-C": "17",
"Objective-C++": "151",
"Open Policy Agent": "4",
"OpenEdge ABL": "34",
"OpenStep Property List": "106",
"OpenType Feature File": "98",
"PHP": "140",
"PLpgSQL": "154",
"POV-Ray SDL": "81",
"Pascal": "41",
"Perl": "50",
"Pickle": "112",
"PicoLisp": "47",
"PlantUML": "61",
"PostScript": "149",
"PowerShell": "3",
"Prolog": "26",
"Protocol Buffer Text Format": "54",
"PureScript": "131",
"Python": "5",
"R": "130",
"RDoc": "146",
"REALbasic": "6",
"Ragel in Ruby Host": "8",
"Rascal": "21",
"Raw token data": "31",
"Redcode": "51",
"Ruby": "95",
"Rust": "89",
"SCSS": "117",
"SQF": "48",
"SQL": "143",
"Sass": "118",
"Scala": "27",
"Scheme": "62",
"Scilab": "137",
"Shell": "57",
"Smali": "19",
"Solidity": "74",
"Squirrel": "23",
"Starlark": "109",
"Stylus": "135",
"Swift": "93",
"TSV": "58",
"TSX": "88",
"Thrift": "152",
"Turtle": "10",
"Twig": "133",
"TypeScript": "29",
"Unity3D Asset": "110",
"Unix Assembly": "94",
"Unknown": "126",
"VBScript": "125",
"VCL": "122",
"VHDL": "107",
"Verilog": "147",
"Vim Script": "9",
"Vim Snippet": "127",
"Visual Basic": "52",
"Visual Basic .NET": "100",
"Vue": "144",
"Wavefront Object": "96",
"XML Property List": "104",
"XS": "72",
"YANG": "139"
},
"layer_norm_eps": 1e-05,
"layer_types": [
"full_attention",
"sliding_attention",
"sliding_attention",
"full_attention",
"sliding_attention",
"sliding_attention",
"full_attention",
"sliding_attention",
"sliding_attention",
"full_attention",
"sliding_attention",
"sliding_attention",
"full_attention",
"sliding_attention",
"sliding_attention",
"full_attention",
"sliding_attention",
"sliding_attention",
"full_attention",
"sliding_attention",
"sliding_attention",
"full_attention"
],
"local_attention": 128,
"max_position_embeddings": 8192,
"mlp_bias": false,
"mlp_dropout": 0.0,
"model_type": "modernbert",
"norm_bias": false,
"norm_eps": 1e-05,
"num_attention_heads": 12,
"num_hidden_layers": 22,
"pad_token_id": 50283,
"position_embedding_type": "absolute",
"problem_type": "single_label_classification",
"rope_parameters": {
"full_attention": {
"rope_theta": 160000.0,
"rope_type": "default"
},
"sliding_attention": {
"rope_theta": 10000.0,
"rope_type": "default"
}
},
"sep_token_id": 50282,
"sparse_pred_ignore_index": -100,
"sparse_prediction": false,
"tie_word_embeddings": true,
"transformers_version": "5.6.2",
"use_cache": false,
"vocab_size": 50368
}