Spaces:
Runtime error
Runtime error
added app
Browse files
1.jpg
ADDED
|
2.jpg
ADDED
|
3.jpg
ADDED
|
__pycache__/model.cpython-39.pyc
ADDED
|
Binary file (1.12 kB). View file
|
|
|
app.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# AUTOGENERATED! DO NOT EDIT! File to edit: ../app.ipynb.
|
| 2 |
+
|
| 3 |
+
# %% auto 0
|
| 4 |
+
__all__ = ['device', 'model', 'MEAN', 'STD', 'transform', 'image', 'label', 'examples', 'intf', 'to_img', 'draw_image_with_bbox',
|
| 5 |
+
'localize_dog']
|
| 6 |
+
|
| 7 |
+
# %% ../app.ipynb 3
|
| 8 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 9 |
+
|
| 10 |
+
model = Model()
|
| 11 |
+
model.load_state_dict(torch.load('model.pt'))
|
| 12 |
+
model = model.to(device)
|
| 13 |
+
model.eval()
|
| 14 |
+
|
| 15 |
+
# %% ../app.ipynb 4
|
| 16 |
+
MEAN = [0.485, 0.456, 0.406]
|
| 17 |
+
STD = [0.229, 0.224, 0.225]
|
| 18 |
+
|
| 19 |
+
# %% ../app.ipynb 5
|
| 20 |
+
transform = transforms.Compose([
|
| 21 |
+
transforms.Resize((224, 224)),
|
| 22 |
+
transforms.ToTensor(),
|
| 23 |
+
transforms.Normalize(MEAN, STD),
|
| 24 |
+
])
|
| 25 |
+
|
| 26 |
+
# %% ../app.ipynb 6
|
| 27 |
+
def to_img(inp):
|
| 28 |
+
mean = np.array(MEAN)
|
| 29 |
+
std = np.array(STD)
|
| 30 |
+
inp = std * inp + mean
|
| 31 |
+
inp = np.clip(inp, 0, 1)
|
| 32 |
+
return inp * 255
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def draw_image_with_bbox(im, shape, pred_bbox=None, pred_obj=1):
|
| 36 |
+
im = im.numpy().transpose((1, 2, 0))
|
| 37 |
+
im = cv2.resize(im, dsize=shape)
|
| 38 |
+
|
| 39 |
+
image_with_bbox = Image.fromarray(to_img(im).astype(np.uint8))
|
| 40 |
+
image_draw = ImageDraw.Draw(image_with_bbox)
|
| 41 |
+
|
| 42 |
+
xc, yc, w, h = pred_bbox
|
| 43 |
+
xmin = (xc - w / 2) * shape[0]
|
| 44 |
+
ymin = (yc - h / 2) * shape[1]
|
| 45 |
+
w = w * shape[0]
|
| 46 |
+
h = h * shape[1]
|
| 47 |
+
xmin, ymin, w, h = map(int, [xmin, ymin, w, h])
|
| 48 |
+
if pred_obj > 0.5:
|
| 49 |
+
image_draw.rectangle((max(xmin, 1), max(ymin, 1), min(xmin+w, shape[0] - 1), min(ymin+h, shape[1] - 1)), outline='red')
|
| 50 |
+
|
| 51 |
+
return image_with_bbox
|
| 52 |
+
|
| 53 |
+
# %% ../app.ipynb 7
|
| 54 |
+
def localize_dog(im):
|
| 55 |
+
shape = im.size[:2]
|
| 56 |
+
im = im.convert('RGB')
|
| 57 |
+
im = transform(im)
|
| 58 |
+
pred_label, pred_bbox = model(im.unsqueeze(0).to(device))
|
| 59 |
+
prediction = draw_image_with_bbox(im, shape, pred_bbox[0], pred_label[0])
|
| 60 |
+
return prediction
|
| 61 |
+
|
| 62 |
+
# %% ../app.ipynb 9
|
| 63 |
+
image = gr.inputs.Image(type="pil")
|
| 64 |
+
label = gr.outputs.Image(type="pil")
|
| 65 |
+
examples = ['1.jpg', '2.jpg', '3.jpg']
|
| 66 |
+
|
| 67 |
+
intf = gr.Interface(fn=localize_dog,
|
| 68 |
+
inputs=image,
|
| 69 |
+
outputs=label,
|
| 70 |
+
title='Dog localization',
|
| 71 |
+
examples=examples)
|
| 72 |
+
intf.launch()
|
model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7ae751730cd1867ed8266116a6e5ebcb0e0052fb2fed2cd58c2f7e76d05b08f5
|
| 3 |
+
size 46900993
|
model.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch.nn as nn
|
| 2 |
+
from torchvision.models import resnet18, ResNet18_Weights
|
| 3 |
+
import torch.nn.functional as F
|
| 4 |
+
import torch
|
| 5 |
+
|
| 6 |
+
class Model(nn.Module):
|
| 7 |
+
def __init__(self):
|
| 8 |
+
super().__init__()
|
| 9 |
+
self.feature_extractor = resnet18(weights=ResNet18_Weights)
|
| 10 |
+
in_channels = self.feature_extractor.fc.in_features
|
| 11 |
+
self.feature_extractor.fc = nn.Identity()
|
| 12 |
+
# Output is a vector of dimension 1 + 4
|
| 13 |
+
# 1 for probability of belonging to any class
|
| 14 |
+
# 4 for bounding box of object that is presented (if no object is presented i. e. the probability < a threshold, any 4 numbers)
|
| 15 |
+
self.fc_prob = nn.Sequential(
|
| 16 |
+
nn.Linear(in_channels, 512),
|
| 17 |
+
nn.Linear(512, 1)
|
| 18 |
+
)
|
| 19 |
+
self.fc_bbox = nn.Sequential(
|
| 20 |
+
nn.Linear(in_channels, 512),
|
| 21 |
+
nn.Linear(512, 4)
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
def forward(self, x):
|
| 25 |
+
pred_prob = torch.sigmoid(self.fc_prob(self.feature_extractor(x)))
|
| 26 |
+
|
| 27 |
+
pred_bbox = self.fc_bbox(self.feature_extractor(x))
|
| 28 |
+
|
| 29 |
+
return (pred_prob, pred_bbox)
|