| | from PIL import Image |
| | import torch |
| | import torch.nn as nn |
| | import torch.nn.functional as F |
| | from huggingface_hub import hf_hub_download |
| |
|
| | |
| | |
| | from transformers import DepthProConfig, DepthProImageProcessorFast, DepthProForDepthEstimation |
| |
|
| | |
| | config = DepthProConfig(use_fov_model=False) |
| | model = DepthProForDepthEstimation(config) |
| | features = config.fusion_hidden_size |
| | semantic_classifier_dropout = 0.1 |
| | num_labels = 1 |
| | model.head.head = nn.Sequential( |
| | nn.Conv2d(features, features, kernel_size=3, padding=1, bias=False), |
| | nn.BatchNorm2d(features), |
| | nn.ReLU(), |
| | nn.Dropout(semantic_classifier_dropout), |
| | nn.Conv2d(features, features, kernel_size=1), |
| | nn.ConvTranspose2d(features, num_labels, kernel_size=2, stride=2, padding=0, bias=True), |
| | ) |
| |
|
| | |
| | weights_path = hf_hub_download(repo_id="geetu040/DepthPro_Segmentation_Human", filename="model_weights.pth") |
| | model.load_state_dict(torch.load(weights_path, map_location=torch.device('cpu'), weights_only=True)) |
| |
|
| | |
| | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
| | model = model.to(device) |
| |
|
| | |
| | image_processor = DepthProImageProcessorFast() |
| |
|
| | def predict(image): |
| | |
| |
|
| | image = image.convert("RGB") |
| |
|
| | |
| | inputs = image_processor(images=image, return_tensors="pt") |
| | inputs = {k: v.to(device) for k, v in inputs.items()} |
| |
|
| | |
| | with torch.no_grad(): |
| | output = model(**inputs) |
| |
|
| | |
| | output = output[0] |
| | output = F.interpolate( |
| | output.unsqueeze(0), |
| | size=(image.height, image.width) |
| | ) |
| | output = output.squeeze() |
| | output = output.sigmoid() |
| | output = (output > 0.5).float() |
| | output = output.cpu() |
| | output = output * 255 |
| | output = output.numpy() |
| | output = output.astype('uint8') |
| | output = Image.fromarray(output) |
| |
|
| | return output |
| |
|