Graph Machine Learning
Transformers
Safetensors
English
unicosys_hypergraph
knowledge-graph
hypergraph
legal-evidence
graph-neural-network
unicosys
Instructions to use drzo/unicosys-hypergraph with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use drzo/unicosys-hypergraph with Transformers:
# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("drzo/unicosys-hypergraph", dtype="auto") - Notebooks
- Google Colab
- Kaggle
| """ | |
| Unicosys Hypergraph Knowledge Model | |
| A trainable knowledge graph embedding model that encodes the unified | |
| hypergraph (entities, evidence, transactions, communications) as | |
| learned vector representations. | |
| Load with: | |
| from transformers import AutoConfig, AutoModel | |
| config = AutoConfig.from_pretrained("drzo/unicosys-hypergraph", trust_remote_code=True) | |
| model = AutoModel.from_pretrained("drzo/unicosys-hypergraph", trust_remote_code=True) | |
| """ | |
| import json | |
| import math | |
| from typing import Optional | |
| import torch | |
| import torch.nn as nn | |
| import torch.nn.functional as F | |
| from transformers import PreTrainedModel | |
| from .configuration_unicosys import UnicosysConfig | |
| # --------------------------------------------------------------------------- | |
| # Text Encoder (lightweight) | |
| # --------------------------------------------------------------------------- | |
| class LightweightTextEncoder(nn.Module): | |
| """A small transformer encoder for node labels and descriptions.""" | |
| def __init__(self, config: UnicosysConfig): | |
| super().__init__() | |
| self.token_embed = nn.Embedding(config.text_vocab_size, config.text_embed_dim) | |
| self.pos_embed = nn.Embedding(config.text_max_length, config.text_embed_dim) | |
| encoder_layer = nn.TransformerEncoderLayer( | |
| d_model=config.text_embed_dim, | |
| nhead=config.text_num_heads, | |
| dim_feedforward=config.text_embed_dim * 4, | |
| dropout=config.gat_dropout, | |
| batch_first=True, | |
| ) | |
| self.encoder = nn.TransformerEncoder( | |
| encoder_layer, num_layers=config.text_num_layers | |
| ) | |
| self.pool_proj = nn.Linear(config.text_embed_dim, config.hidden_dim) | |
| def forward(self, input_ids, attention_mask=None): | |
| B, L = input_ids.shape | |
| positions = torch.arange(L, device=input_ids.device).unsqueeze(0).expand(B, -1) | |
| x = self.token_embed(input_ids) + self.pos_embed(positions) | |
| if attention_mask is not None: | |
| src_key_padding_mask = attention_mask == 0 | |
| else: | |
| src_key_padding_mask = None | |
| x = self.encoder(x, src_key_padding_mask=src_key_padding_mask) | |
| if attention_mask is not None: | |
| mask = attention_mask.unsqueeze(-1).float() | |
| pooled = (x * mask).sum(dim=1) / mask.sum(dim=1).clamp(min=1) | |
| else: | |
| pooled = x.mean(dim=1) | |
| return self.pool_proj(pooled) | |
| # --------------------------------------------------------------------------- | |
| # Graph Attention Layer | |
| # --------------------------------------------------------------------------- | |
| class GraphAttentionLayer(nn.Module): | |
| """Multi-head graph attention for hypergraph node updates.""" | |
| def __init__(self, config: UnicosysConfig): | |
| super().__init__() | |
| self.num_heads = config.gat_num_heads | |
| self.head_dim = config.hidden_dim // config.gat_num_heads | |
| assert self.head_dim * self.num_heads == config.hidden_dim | |
| self.q_proj = nn.Linear(config.hidden_dim, config.hidden_dim) | |
| self.k_proj = nn.Linear(config.hidden_dim, config.hidden_dim) | |
| self.v_proj = nn.Linear(config.hidden_dim, config.hidden_dim) | |
| self.edge_proj = nn.Linear(config.node_embed_dim, config.hidden_dim) | |
| self.out_proj = nn.Linear(config.hidden_dim, config.hidden_dim) | |
| self.norm = nn.LayerNorm(config.hidden_dim) | |
| self.dropout = nn.Dropout(config.gat_dropout) | |
| def forward(self, node_embeds, edge_index, edge_type_embeds): | |
| N = node_embeds.size(0) | |
| src, tgt = edge_index | |
| q = self.q_proj(node_embeds[tgt]) | |
| k = self.k_proj(node_embeds[src]) | |
| v = self.v_proj(node_embeds[src]) | |
| edge_bias = self.edge_proj(edge_type_embeds) | |
| k = k + edge_bias | |
| q = q.view(-1, self.num_heads, self.head_dim) | |
| k = k.view(-1, self.num_heads, self.head_dim) | |
| v = v.view(-1, self.num_heads, self.head_dim) | |
| attn = (q * k).sum(dim=-1) / math.sqrt(self.head_dim) | |
| attn_max = torch.zeros(N, self.num_heads, device=attn.device) | |
| attn_max.scatter_reduce_(0, tgt.unsqueeze(1).expand_as(attn), attn, reduce="amax") | |
| attn = torch.exp(attn - attn_max[tgt]) | |
| attn_sum = torch.zeros(N, self.num_heads, device=attn.device) | |
| attn_sum.scatter_add_(0, tgt.unsqueeze(1).expand_as(attn), attn) | |
| attn = attn / attn_sum[tgt].clamp(min=1e-8) | |
| attn = self.dropout(attn) | |
| weighted = v * attn.unsqueeze(-1) | |
| weighted = weighted.view(-1, self.num_heads * self.head_dim) | |
| out = torch.zeros(N, self.num_heads * self.head_dim, device=weighted.device) | |
| out.scatter_add_(0, tgt.unsqueeze(1).expand_as(weighted), weighted) | |
| out = self.out_proj(out) | |
| return self.norm(node_embeds + out) | |
| # --------------------------------------------------------------------------- | |
| # Link Prediction Head | |
| # --------------------------------------------------------------------------- | |
| class LinkPredictionHead(nn.Module): | |
| """Scores candidate edges for link prediction training.""" | |
| def __init__(self, config: UnicosysConfig): | |
| super().__init__() | |
| self.edge_type_embed = nn.Embedding(config.num_edge_types, config.hidden_dim) | |
| self.scorer = nn.Sequential( | |
| nn.Linear(config.hidden_dim * 3, config.hidden_dim), | |
| nn.ReLU(), | |
| nn.Dropout(config.gat_dropout), | |
| nn.Linear(config.hidden_dim, 1), | |
| ) | |
| def forward(self, src_embeds, tgt_embeds, edge_type_ids): | |
| edge_embeds = self.edge_type_embed(edge_type_ids) | |
| combined = torch.cat([src_embeds, tgt_embeds, edge_embeds], dim=-1) | |
| return self.scorer(combined).squeeze(-1) | |
| # --------------------------------------------------------------------------- | |
| # Main Model | |
| # --------------------------------------------------------------------------- | |
| class UnicosysHypergraphModel(PreTrainedModel): | |
| """ | |
| Unicosys Hypergraph Knowledge Model. | |
| Encodes the unified hypergraph as trainable embeddings with: | |
| - Node type + subsystem structural embeddings | |
| - Text-based semantic embeddings from labels/descriptions | |
| - Graph attention for relational reasoning | |
| - Link prediction for discovering missing evidence connections | |
| Usage: | |
| from transformers import AutoConfig, AutoModel | |
| config = AutoConfig.from_pretrained("drzo/unicosys-hypergraph", trust_remote_code=True) | |
| model = AutoModel.from_pretrained("drzo/unicosys-hypergraph", trust_remote_code=True) | |
| """ | |
| config_class = UnicosysConfig | |
| _tied_weights_keys = {} | |
| supports_gradient_checkpointing = False | |
| def __init__(self, config: UnicosysConfig): | |
| super().__init__(config) | |
| # Structural embeddings | |
| self.node_type_embed = nn.Embedding(config.num_node_types, config.node_embed_dim) | |
| self.subsystem_embed = nn.Embedding(config.num_subsystems, config.node_embed_dim) | |
| self.node_id_embed = nn.Embedding(config.max_nodes, config.node_embed_dim) | |
| # Project structural features to hidden dim | |
| self.struct_proj = nn.Linear(config.node_embed_dim * 3, config.hidden_dim) | |
| # Text encoder for labels | |
| self.text_encoder = LightweightTextEncoder(config) | |
| # Combine structural + text | |
| self.combine_proj = nn.Linear(config.hidden_dim * 2, config.hidden_dim) | |
| self.combine_norm = nn.LayerNorm(config.hidden_dim) | |
| # Graph attention layers | |
| self.gat_layers = nn.ModuleList([ | |
| GraphAttentionLayer(config) for _ in range(config.gat_num_layers) | |
| ]) | |
| # Edge type embeddings for GAT | |
| self.edge_type_embed_gat = nn.Embedding( | |
| config.num_edge_types, config.node_embed_dim | |
| ) | |
| # Link prediction head | |
| self.link_predictor = LinkPredictionHead(config) | |
| # Initialize weights | |
| self.apply(self._init_weights) | |
| # Required by transformers >= 5.x for tied weight tracking | |
| self.post_init() | |
| def _init_weights(self, module): | |
| if isinstance(module, nn.Linear): | |
| nn.init.xavier_uniform_(module.weight) | |
| if module.bias is not None: | |
| nn.init.zeros_(module.bias) | |
| elif isinstance(module, nn.Embedding): | |
| nn.init.normal_(module.weight, mean=0.0, std=0.02) | |
| def encode_nodes( | |
| self, | |
| node_ids: torch.LongTensor, | |
| node_type_ids: torch.LongTensor, | |
| subsystem_ids: torch.LongTensor, | |
| text_input_ids: Optional[torch.LongTensor] = None, | |
| text_attention_mask: Optional[torch.LongTensor] = None, | |
| ) -> torch.Tensor: | |
| """Encode nodes into dense vectors of shape (N, hidden_dim).""" | |
| struct = torch.cat([ | |
| self.node_id_embed(node_ids), | |
| self.node_type_embed(node_type_ids), | |
| self.subsystem_embed(subsystem_ids), | |
| ], dim=-1) | |
| struct = self.struct_proj(struct) | |
| if text_input_ids is not None: | |
| text = self.text_encoder(text_input_ids, text_attention_mask) | |
| combined = torch.cat([struct, text], dim=-1) | |
| return self.combine_norm(self.combine_proj(combined)) | |
| else: | |
| zeros = torch.zeros_like(struct) | |
| combined = torch.cat([struct, zeros], dim=-1) | |
| return self.combine_norm(self.combine_proj(combined)) | |
| def forward( | |
| self, | |
| node_ids: torch.LongTensor, | |
| node_type_ids: torch.LongTensor, | |
| subsystem_ids: torch.LongTensor, | |
| edge_index: torch.LongTensor, | |
| edge_type_ids: torch.LongTensor, | |
| text_input_ids: Optional[torch.LongTensor] = None, | |
| text_attention_mask: Optional[torch.LongTensor] = None, | |
| pos_edge_index: Optional[torch.LongTensor] = None, | |
| pos_edge_types: Optional[torch.LongTensor] = None, | |
| neg_edge_index: Optional[torch.LongTensor] = None, | |
| neg_edge_types: Optional[torch.LongTensor] = None, | |
| labels: Optional[torch.FloatTensor] = None, | |
| ): | |
| """ | |
| Forward pass with optional link prediction training. | |
| Returns dict with: | |
| - node_embeddings: (N, hidden_dim) | |
| - loss: scalar (if labels provided) | |
| - pos_scores: scores for positive edges | |
| - neg_scores: scores for negative edges | |
| """ | |
| # 1. Encode all nodes | |
| node_embeds = self.encode_nodes( | |
| node_ids, node_type_ids, subsystem_ids, | |
| text_input_ids, text_attention_mask, | |
| ) | |
| # 2. Graph attention message passing | |
| edge_type_embeds = self.edge_type_embed_gat(edge_type_ids) | |
| for gat_layer in self.gat_layers: | |
| node_embeds = gat_layer(node_embeds, edge_index, edge_type_embeds) | |
| result = {"node_embeddings": node_embeds} | |
| # 3. Link prediction (if training edges provided) | |
| if pos_edge_index is not None and neg_edge_index is not None: | |
| pos_src, pos_tgt = pos_edge_index | |
| neg_src, neg_tgt = neg_edge_index | |
| pos_scores = self.link_predictor( | |
| node_embeds[pos_src], node_embeds[pos_tgt], pos_edge_types | |
| ) | |
| neg_scores = self.link_predictor( | |
| node_embeds[neg_src], node_embeds[neg_tgt], neg_edge_types | |
| ) | |
| result["pos_scores"] = pos_scores | |
| result["neg_scores"] = neg_scores | |
| if labels is not None: | |
| loss = F.margin_ranking_loss( | |
| pos_scores, neg_scores, | |
| torch.ones_like(pos_scores), | |
| margin=self.config.margin, | |
| ) | |
| result["loss"] = loss | |
| return result | |
| def get_node_embedding(self, node_idx: int) -> torch.Tensor: | |
| """Get the embedding for a single node by index.""" | |
| with torch.no_grad(): | |
| return self.node_id_embed.weight[node_idx] | |