| | cond_image_size: 512 |
| | isosurface_resolution: 160 |
| | radius: 0.87 |
| |
|
| | camera_embedder_cls: sf3d.models.camera.LinearCameraEmbedder |
| | camera_embedder: |
| | in_channels: 25 |
| | out_channels: 768 |
| | conditions: |
| | - c2w_cond |
| | - intrinsic_normed_cond |
| |
|
| | image_tokenizer_cls: sf3d.models.tokenizers.image.DINOV2SingleImageTokenizer |
| | image_tokenizer: |
| | pretrained_model_name_or_path: "facebook/dinov2-large" |
| | width: 512 |
| | height: 512 |
| | modulation_cond_dim: 768 |
| |
|
| | tokenizer_cls: sf3d.models.tokenizers.triplane.TriplaneLearnablePositionalEmbedding |
| | tokenizer: |
| | plane_size: 96 |
| | num_channels: 1024 |
| |
|
| | backbone_cls: sf3d.models.transformers.backbone.TwoStreamInterleaveTransformer |
| | backbone: |
| | num_attention_heads: 16 |
| | attention_head_dim: 64 |
| | raw_triplane_channels: 1024 |
| | triplane_channels: 1024 |
| | raw_image_channels: 1024 |
| | num_latents: 1792 |
| | num_blocks: 4 |
| | num_basic_blocks: 3 |
| |
|
| | post_processor_cls: sf3d.models.network.PixelShuffleUpsampleNetwork |
| | post_processor: |
| | in_channels: 1024 |
| | out_channels: 40 |
| | scale_factor: 4 |
| | conv_layers: 4 |
| |
|
| |
|
| | decoder_cls: sf3d.models.network.MaterialMLP |
| | decoder: |
| | in_channels: 120 |
| | n_neurons: 64 |
| | activation: silu |
| | heads: |
| | - name: density |
| | out_channels: 1 |
| | out_bias: -1.0 |
| | n_hidden_layers: 2 |
| | output_activation: trunc_exp |
| | - name: features |
| | out_channels: 3 |
| | n_hidden_layers: 3 |
| | output_activation: sigmoid |
| | - name: perturb_normal |
| | out_channels: 3 |
| | n_hidden_layers: 3 |
| | output_activation: normalize_channel_last |
| | - name: vertex_offset |
| | out_channels: 3 |
| | n_hidden_layers: 2 |
| |
|
| | image_estimator_cls: sf3d.models.image_estimator.clip_based_estimator.ClipBasedHeadEstimator |
| | image_estimator: |
| | distribution: beta |
| | distribution_eval: mode |
| | heads: |
| | - name: roughness |
| | out_channels: 1 |
| | n_hidden_layers: 3 |
| | output_activation: linear |
| | add_to_decoder_features: true |
| | output_bias: 1.0 |
| | shape: [-1, 1, 1] |
| | - name: metallic |
| | out_channels: 1 |
| | n_hidden_layers: 3 |
| | output_activation: linear |
| | add_to_decoder_features: true |
| | output_bias: 1.0 |
| | shape: [-1, 1, 1] |
| |
|
| | global_estimator_cls: sf3d.models.global_estimator.multi_head_estimator.MultiHeadEstimator |
| | global_estimator: |
| | triplane_features: 1024 |
| | heads: |
| | - name: sg_amplitudes |
| | out_channels: 24 |
| | n_hidden_layers: 3 |
| | output_activation: softplus |
| | output_bias: 1.0 |
| | shape: [-1, 24, 1] |
| |
|