Spaces:
Sleeping
Sleeping
Abhishek Gola
commited on
Commit
·
339a69e
1
Parent(s):
89138dc
Added samples
Browse files- .gitattributes +5 -0
- app.py +42 -8
- examples/car.mp4 +3 -0
- examples/desert_car.mp4 +3 -0
.gitattributes
CHANGED
|
@@ -14,6 +14,11 @@
|
|
| 14 |
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
*.onnx filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
*.pb filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 14 |
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.jpg filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.gif filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.png filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.webp filter=lfs diff=lfs merge=lfs -text
|
| 22 |
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 23 |
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 24 |
*.pb filter=lfs diff=lfs merge=lfs -text
|
app.py
CHANGED
|
@@ -15,6 +15,9 @@ MODEL_PATH = hf_hub_download(
|
|
| 15 |
backend_id = cv.dnn.DNN_BACKEND_OPENCV
|
| 16 |
target_id = cv.dnn.DNN_TARGET_CPU
|
| 17 |
|
|
|
|
|
|
|
|
|
|
| 18 |
# Global state
|
| 19 |
state = {
|
| 20 |
"points": [],
|
|
@@ -23,6 +26,12 @@ state = {
|
|
| 23 |
"first_frame": None
|
| 24 |
}
|
| 25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
def load_first_frame(video_path):
|
| 27 |
"""Load video, grab first frame, reset state."""
|
| 28 |
state["video_path"] = video_path
|
|
@@ -32,11 +41,8 @@ def load_first_frame(video_path):
|
|
| 32 |
if not has_frame:
|
| 33 |
return None
|
| 34 |
state["first_frame"] = frame.copy()
|
| 35 |
-
state["points"].clear()
|
| 36 |
-
state["bbox"] = None
|
| 37 |
return cv.cvtColor(frame, cv.COLOR_BGR2RGB)
|
| 38 |
|
| 39 |
-
|
| 40 |
def select_point(img, evt: gr.SelectData):
|
| 41 |
"""Accumulate up to 4 clicks, draw polygon + bounding box."""
|
| 42 |
if state["first_frame"] is None:
|
|
@@ -64,7 +70,6 @@ def select_point(img, evt: gr.SelectData):
|
|
| 64 |
|
| 65 |
return cv.cvtColor(vis, cv.COLOR_BGR2RGB)
|
| 66 |
|
| 67 |
-
|
| 68 |
def clear_points():
|
| 69 |
"""Reset selected points only."""
|
| 70 |
state["points"].clear()
|
|
@@ -73,7 +78,6 @@ def clear_points():
|
|
| 73 |
return None
|
| 74 |
return cv.cvtColor(state["first_frame"], cv.COLOR_BGR2RGB)
|
| 75 |
|
| 76 |
-
|
| 77 |
def clear_all():
|
| 78 |
"""Reset everything."""
|
| 79 |
state["points"].clear()
|
|
@@ -82,7 +86,6 @@ def clear_all():
|
|
| 82 |
state["first_frame"] = None
|
| 83 |
return None, None, None
|
| 84 |
|
| 85 |
-
|
| 86 |
def track_video():
|
| 87 |
"""Init VitTrack and process entire video, return output path."""
|
| 88 |
if state["video_path"] is None or state["bbox"] is None:
|
|
@@ -145,8 +148,21 @@ def track_video():
|
|
| 145 |
writer.release()
|
| 146 |
return out_path
|
| 147 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
|
| 149 |
-
with gr.Blocks() as demo:
|
| 150 |
gr.Markdown("## VitTrack: Interactive Video Object Tracking")
|
| 151 |
gr.Markdown(
|
| 152 |
"""
|
|
@@ -166,7 +182,7 @@ with gr.Blocks() as demo:
|
|
| 166 |
)
|
| 167 |
|
| 168 |
with gr.Row():
|
| 169 |
-
video_in = gr.
|
| 170 |
first_frame = gr.Image(label="First Frame", interactive=True)
|
| 171 |
output_video = gr.Video(label="Tracking Result")
|
| 172 |
|
|
@@ -175,6 +191,24 @@ with gr.Blocks() as demo:
|
|
| 175 |
clear_pts_btn = gr.Button("Clear Points")
|
| 176 |
clear_all_btn = gr.Button("Clear All")
|
| 177 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
video_in.change(fn=load_first_frame, inputs=video_in, outputs=first_frame)
|
| 179 |
first_frame.select(fn=select_point, inputs=first_frame, outputs=first_frame)
|
| 180 |
clear_pts_btn.click(fn=clear_points, outputs=first_frame)
|
|
|
|
| 15 |
backend_id = cv.dnn.DNN_BACKEND_OPENCV
|
| 16 |
target_id = cv.dnn.DNN_TARGET_CPU
|
| 17 |
|
| 18 |
+
car_on_road_video = "examples/car.mp4"
|
| 19 |
+
car_in_desert_video = "examples/desert_car.mp4"
|
| 20 |
+
|
| 21 |
# Global state
|
| 22 |
state = {
|
| 23 |
"points": [],
|
|
|
|
| 26 |
"first_frame": None
|
| 27 |
}
|
| 28 |
|
| 29 |
+
#Example bounding boxes
|
| 30 |
+
bbox_dict = {
|
| 31 |
+
"car.mp4": "(152, 356, 332, 104)",
|
| 32 |
+
"desert_car.mp4": "(758, 452, 119, 65)",
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
def load_first_frame(video_path):
|
| 36 |
"""Load video, grab first frame, reset state."""
|
| 37 |
state["video_path"] = video_path
|
|
|
|
| 41 |
if not has_frame:
|
| 42 |
return None
|
| 43 |
state["first_frame"] = frame.copy()
|
|
|
|
|
|
|
| 44 |
return cv.cvtColor(frame, cv.COLOR_BGR2RGB)
|
| 45 |
|
|
|
|
| 46 |
def select_point(img, evt: gr.SelectData):
|
| 47 |
"""Accumulate up to 4 clicks, draw polygon + bounding box."""
|
| 48 |
if state["first_frame"] is None:
|
|
|
|
| 70 |
|
| 71 |
return cv.cvtColor(vis, cv.COLOR_BGR2RGB)
|
| 72 |
|
|
|
|
| 73 |
def clear_points():
|
| 74 |
"""Reset selected points only."""
|
| 75 |
state["points"].clear()
|
|
|
|
| 78 |
return None
|
| 79 |
return cv.cvtColor(state["first_frame"], cv.COLOR_BGR2RGB)
|
| 80 |
|
|
|
|
| 81 |
def clear_all():
|
| 82 |
"""Reset everything."""
|
| 83 |
state["points"].clear()
|
|
|
|
| 86 |
state["first_frame"] = None
|
| 87 |
return None, None, None
|
| 88 |
|
|
|
|
| 89 |
def track_video():
|
| 90 |
"""Init VitTrack and process entire video, return output path."""
|
| 91 |
if state["video_path"] is None or state["bbox"] is None:
|
|
|
|
| 148 |
writer.release()
|
| 149 |
return out_path
|
| 150 |
|
| 151 |
+
def example_pipeline(video_path):
|
| 152 |
+
clear_all()
|
| 153 |
+
|
| 154 |
+
filename = video_path.split('/')[-1]
|
| 155 |
+
state["video_path"] = video_path
|
| 156 |
+
state["bbox"] = eval(bbox_dict[filename])
|
| 157 |
+
|
| 158 |
+
return track_video()
|
| 159 |
+
|
| 160 |
+
with gr.Blocks(css='''.example * {
|
| 161 |
+
font-style: italic;
|
| 162 |
+
font-size: 18px !important;
|
| 163 |
+
color: #0ea5e9 !important;
|
| 164 |
+
}''') as demo:
|
| 165 |
|
|
|
|
| 166 |
gr.Markdown("## VitTrack: Interactive Video Object Tracking")
|
| 167 |
gr.Markdown(
|
| 168 |
"""
|
|
|
|
| 182 |
)
|
| 183 |
|
| 184 |
with gr.Row():
|
| 185 |
+
video_in = gr.Video(label="Upload Video")
|
| 186 |
first_frame = gr.Image(label="First Frame", interactive=True)
|
| 187 |
output_video = gr.Video(label="Tracking Result")
|
| 188 |
|
|
|
|
| 191 |
clear_pts_btn = gr.Button("Clear Points")
|
| 192 |
clear_all_btn = gr.Button("Clear All")
|
| 193 |
|
| 194 |
+
gr.Markdown("Click any row to load an example.", elem_classes=["example"])
|
| 195 |
+
|
| 196 |
+
examples = [
|
| 197 |
+
[car_on_road_video],
|
| 198 |
+
[car_in_desert_video],
|
| 199 |
+
]
|
| 200 |
+
|
| 201 |
+
gr.Examples(
|
| 202 |
+
examples=examples,
|
| 203 |
+
inputs=[video_in],
|
| 204 |
+
outputs=[output_video],
|
| 205 |
+
fn=example_pipeline,
|
| 206 |
+
cache_examples=False,
|
| 207 |
+
run_on_click=True
|
| 208 |
+
)
|
| 209 |
+
|
| 210 |
+
gr.Markdown("Example videos credit: https://pixabay.com/")
|
| 211 |
+
|
| 212 |
video_in.change(fn=load_first_frame, inputs=video_in, outputs=first_frame)
|
| 213 |
first_frame.select(fn=select_point, inputs=first_frame, outputs=first_frame)
|
| 214 |
clear_pts_btn.click(fn=clear_points, outputs=first_frame)
|
examples/car.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:40b4545bba02d4f35238c8ea4382bc96e7e17d192bd115c7c0f6fe781d5717a3
|
| 3 |
+
size 2088343
|
examples/desert_car.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5e8c2492b28d60539b5d61dec2a36a94f61a2f04707eab6f2ab6830d672c0f50
|
| 3 |
+
size 3971502
|