Spaces:

sadjava
/

dog-localization

Runtime error

App Files Files Community

sadjava commited on Jun 7, 2023

Commit

e80e8f4

1 Parent(s): b333832

added app

Browse files

Files changed (7) hide show

1.jpg +0 -0
2.jpg +0 -0
3.jpg +0 -0
__pycache__/model.cpython-39.pyc +0 -0
app.py +72 -0
model.pt +3 -0
model.py +29 -0

1.jpg ADDED Viewed

2.jpg ADDED Viewed

3.jpg ADDED Viewed

__pycache__/model.cpython-39.pyc ADDED Viewed

Binary file (1.12 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,72 @@

+# AUTOGENERATED! DO NOT EDIT! File to edit: ../app.ipynb.
+# %% auto 0
+__all__ = ['device', 'model', 'MEAN', 'STD', 'transform', 'image', 'label', 'examples', 'intf', 'to_img', 'draw_image_with_bbox',
+           'localize_dog']
+# %% ../app.ipynb 3
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model = Model()
+model.load_state_dict(torch.load('model.pt'))
+model = model.to(device)
+model.eval()
+# %% ../app.ipynb 4
+MEAN = [0.485, 0.456, 0.406]
+STD = [0.229, 0.224, 0.225]
+# %% ../app.ipynb 5
+transform = transforms.Compose([
+            transforms.Resize((224, 224)),
+            transforms.ToTensor(),
+            transforms.Normalize(MEAN, STD),
+        ])
+# %% ../app.ipynb 6
+def to_img(inp):
+    mean = np.array(MEAN)
+    std = np.array(STD)
+    inp = std * inp + mean
+    inp = np.clip(inp, 0, 1)
+    return inp * 255
+def draw_image_with_bbox(im, shape, pred_bbox=None, pred_obj=1):
+    im = im.numpy().transpose((1, 2, 0))
+    im = cv2.resize(im, dsize=shape)
+    image_with_bbox = Image.fromarray(to_img(im).astype(np.uint8))
+    image_draw = ImageDraw.Draw(image_with_bbox)
+    xc, yc, w, h = pred_bbox
+    xmin = (xc - w / 2) * shape[0]
+    ymin = (yc - h / 2) * shape[1]
+    w = w * shape[0]
+    h = h * shape[1]
+    xmin, ymin, w, h = map(int, [xmin, ymin, w, h])
+    if pred_obj > 0.5:
+        image_draw.rectangle((max(xmin, 1), max(ymin, 1), min(xmin+w, shape[0] - 1), min(ymin+h, shape[1] - 1)), outline='red')
+    return image_with_bbox
+# %% ../app.ipynb 7
+def localize_dog(im):
+    shape = im.size[:2]
+    im = im.convert('RGB')
+    im = transform(im)
+    pred_label, pred_bbox = model(im.unsqueeze(0).to(device))
+    prediction = draw_image_with_bbox(im, shape, pred_bbox[0], pred_label[0])
+    return prediction
+# %% ../app.ipynb 9
+image = gr.inputs.Image(type="pil")
+label = gr.outputs.Image(type="pil")
+examples = ['1.jpg', '2.jpg', '3.jpg']
+intf = gr.Interface(fn=localize_dog,
+                    inputs=image,
+                    outputs=label,
+                    title='Dog localization',
+                    examples=examples)
+intf.launch()

model.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7ae751730cd1867ed8266116a6e5ebcb0e0052fb2fed2cd58c2f7e76d05b08f5
+size 46900993

model.py ADDED Viewed

	@@ -0,0 +1,29 @@

+import torch.nn as nn
+from torchvision.models import resnet18, ResNet18_Weights
+import torch.nn.functional as F
+import torch
+class Model(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.feature_extractor = resnet18(weights=ResNet18_Weights)
+        in_channels = self.feature_extractor.fc.in_features
+        self.feature_extractor.fc = nn.Identity()
+        # Output is a vector of dimension 1 + 4
+        # 1 for probability of belonging to any class
+        # 4 for bounding box of object that is presented (if no object is presented i. e. the probability < a threshold, any 4 numbers)
+        self.fc_prob = nn.Sequential(
+            nn.Linear(in_channels, 512),
+            nn.Linear(512, 1)
+        )
+        self.fc_bbox = nn.Sequential(
+            nn.Linear(in_channels, 512),
+            nn.Linear(512, 4)
+        )
+    def forward(self, x):
+        pred_prob = torch.sigmoid(self.fc_prob(self.feature_extractor(x)))
+        pred_bbox = self.fc_bbox(self.feature_extractor(x))
+        return (pred_prob, pred_bbox)