| | --- |
| | license: mit |
| | language: |
| | - en |
| | library_name: transformers |
| | tags: |
| | - vision |
| | --- |
| | |
| | # ADD HEAD |
| | ``` |
| | |
| | |
| | |
| | print('Add Vision...') |
| | # ADD HEAD |
| | # Combine pre-trained encoder and pre-trained decoder to form a Seq2Seq model |
| | |
| | |
| | |
| | Vmodel = VisionEncoderDecoderModel.from_encoder_decoder_pretrained( |
| | "google/vit-base-patch16-224-in21k", "LeroyDyer/Mixtral_AI_Tiny" |
| | ) |
| | _Encoder_ImageProcessor = Vmodel.encoder |
| | _Decoder_ImageTokenizer = Vmodel.decoder |
| | _VisionEncoderDecoderModel = Vmodel |
| | # Add Pad tokems |
| | LM_MODEL.VisionEncoderDecoder = _VisionEncoderDecoderModel |
| | # Add Sub Components |
| | LM_MODEL.Encoder_ImageProcessor = _Encoder_ImageProcessor |
| | LM_MODEL.Decoder_ImageTokenizer = _Decoder_ImageTokenizer |
| | LM_MODEL |
| | |
| | |
| | |
| | ``` |