Push model using huggingface_hub.
Browse files- README.md +4 -41
- config.json +165 -0
- model.safetensors +3 -0
README.md
CHANGED
@@ -2,46 +2,9 @@
|
|
2 |
tags:
|
3 |
- model_hub_mixin
|
4 |
- pytorch_model_hub_mixin
|
5 |
-
license: cc-by-nc-4.0
|
6 |
-
language:
|
7 |
-
- en
|
8 |
-
pipeline_tag: other
|
9 |
---
|
10 |
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
<a href="https://uniflowmatch.github.io/"><img src="https://img.shields.io/badge/Project_Page-green" alt="Project Page"></a>
|
16 |
-
<a href='https://huggingface.co/spaces/infinity1096/UFM'><img src='https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Demo-blue'></a>
|
17 |
-
|
18 |
-
|
19 |
-
**Carnegie Mellon University**
|
20 |
-
|
21 |
-
[Yuchen Zhang](https://infinity1096.github.io/), [Nikhil Keetha](https://nik-v9.github.io/), [Chenwei Lyu](https://www.linkedin.com/in/chenwei-lyu/), [Bhuvan Jhamb](https://www.linkedin.com/in/bhuvanjhamb/), [Yutian Chen](https://www.yutianchen.blog/about/)
|
22 |
-
[Yuheng Qiu](https://haleqiu.github.io), [Jay Karhade](https://jaykarhade.github.io/), [Shreyas Jha](https://www.linkedin.com/in/shreyasjha/), [Yaoyu Hu](http://www.huyaoyu.com/)
|
23 |
-
[Deva Ramanan](https://www.cs.cmu.edu/~deva/), [Sebastian Scherer](https://theairlab.org/team/sebastian/), [Wenshan Wang](http://www.wangwenshan.com/)
|
24 |
-
</div>
|
25 |
-
|
26 |
-
## Overview
|
27 |
-
|
28 |
-
UFM(UniFlowMatch) is a simple, end-to-end trained transformer model that directly regresses pixel displacement image that applies concurrently to both optical flow and wide-baseline matching tasks.
|
29 |
-
|
30 |
-
This model space contains the refine model.
|
31 |
-
|
32 |
-
## Quick Start
|
33 |
-
|
34 |
-
Check out our [Github Repo](https://github.com/UniFlowMatch/UFM) and the hugging face [demo](https://huggingface.co/spaces/infinity1096/UFM).
|
35 |
-
|
36 |
-
|
37 |
-
## Citation
|
38 |
-
If you find our repository useful, please consider giving it a star ⭐ and citing our paper in your work:
|
39 |
-
|
40 |
-
```bibtex
|
41 |
-
@inproceedings{zhang2025ufm,
|
42 |
-
title={UFM: A Simple Path towards Unified Dense Correspondence with Flow},
|
43 |
-
author={Zhang, Yuchen and Keetha, Nikhil and Lyu, Chenwei and Jhamb, Bhuvan and Chen, Yutian and Qiu, Yuheng and Karhade, Jay and Jha, Shreyas and Hu, Yaoyu and Ramanan, Deva and Scherer, Sebastian and Wang, Wenshan},
|
44 |
-
booktitle={TBD},
|
45 |
-
year={2025}
|
46 |
-
}
|
47 |
-
```
|
|
|
2 |
tags:
|
3 |
- model_hub_mixin
|
4 |
- pytorch_model_hub_mixin
|
|
|
|
|
|
|
|
|
5 |
---
|
6 |
|
7 |
+
This model has been pushed to the Hub using the [PytorchModelHubMixin](https://huggingface.co/docs/huggingface_hub/package_reference/mixins#huggingface_hub.PyTorchModelHubMixin) integration:
|
8 |
+
- Code: [More Information Needed]
|
9 |
+
- Paper: [More Information Needed]
|
10 |
+
- Docs: [More Information Needed]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
config.json
ADDED
@@ -0,0 +1,165 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"adaptors_kwargs": {
|
3 |
+
"flow": {
|
4 |
+
"class": "FlowAdaptor",
|
5 |
+
"kwargs": {
|
6 |
+
"base_shape": [
|
7 |
+
224,
|
8 |
+
224
|
9 |
+
],
|
10 |
+
"flow_mean": [
|
11 |
+
0.0,
|
12 |
+
0.0
|
13 |
+
],
|
14 |
+
"flow_std": [
|
15 |
+
25,
|
16 |
+
25
|
17 |
+
],
|
18 |
+
"name": "flow",
|
19 |
+
"output_normalized_coordinate": false,
|
20 |
+
"scale_strategy": "scale_both"
|
21 |
+
}
|
22 |
+
}
|
23 |
+
},
|
24 |
+
"classification_head_kwargs": {
|
25 |
+
"bias": true,
|
26 |
+
"input_feature_dim": "1024 + 768",
|
27 |
+
"mlp_ratio": 2,
|
28 |
+
"output_dim": 16,
|
29 |
+
"patch_size": 14,
|
30 |
+
"pretrained_checkpoint_path": null
|
31 |
+
},
|
32 |
+
"classification_head_type": "patch_mlp",
|
33 |
+
"detach_uncertainty_head": true,
|
34 |
+
"encoder_kwargs": {
|
35 |
+
"data_norm_type": "dinov2",
|
36 |
+
"indices": [
|
37 |
+
6,
|
38 |
+
23
|
39 |
+
],
|
40 |
+
"name": "dinov2_encoder",
|
41 |
+
"patch_size": 14,
|
42 |
+
"size": "large",
|
43 |
+
"with_registers": false
|
44 |
+
},
|
45 |
+
"encoder_str": "dinov2",
|
46 |
+
"feature_combine_method": "conv",
|
47 |
+
"feature_head_kwargs": {
|
48 |
+
"dpt_feature": {
|
49 |
+
"feature_dim": 256,
|
50 |
+
"hooks": [
|
51 |
+
0,
|
52 |
+
1,
|
53 |
+
2,
|
54 |
+
3
|
55 |
+
],
|
56 |
+
"input_feature_dims": [
|
57 |
+
1024,
|
58 |
+
768,
|
59 |
+
768,
|
60 |
+
768
|
61 |
+
],
|
62 |
+
"layer_dims": [
|
63 |
+
96,
|
64 |
+
192,
|
65 |
+
384,
|
66 |
+
768
|
67 |
+
],
|
68 |
+
"output_width_ratio": 1,
|
69 |
+
"patch_size": 14,
|
70 |
+
"pretrained_checkpoint_path": null,
|
71 |
+
"use_bn": false
|
72 |
+
},
|
73 |
+
"dpt_processor": {
|
74 |
+
"hidden_dims": [
|
75 |
+
128,
|
76 |
+
128
|
77 |
+
],
|
78 |
+
"input_feature_dim": 256,
|
79 |
+
"output_dim": 2,
|
80 |
+
"pretrained_checkpoint_path": null
|
81 |
+
}
|
82 |
+
},
|
83 |
+
"head_type": "dpt",
|
84 |
+
"inference_resolution": [
|
85 |
+
560,
|
86 |
+
420
|
87 |
+
],
|
88 |
+
"info_sharing_and_head_structure": "dual+single",
|
89 |
+
"info_sharing_kwargs": {
|
90 |
+
"attn_drop": 0.0,
|
91 |
+
"custom_positional_encoding": null,
|
92 |
+
"depth": 12,
|
93 |
+
"dim": 768,
|
94 |
+
"drop_path": 0.0,
|
95 |
+
"gradient_checkpointing": false,
|
96 |
+
"indices": [
|
97 |
+
5,
|
98 |
+
8
|
99 |
+
],
|
100 |
+
"init_values": null,
|
101 |
+
"input_embed_dim": 1024,
|
102 |
+
"max_num_views": 2,
|
103 |
+
"mlp_ratio": 4.0,
|
104 |
+
"name": "global_attention",
|
105 |
+
"norm_intermediate": true,
|
106 |
+
"num_heads": 12,
|
107 |
+
"pretrained_checkpoint_path": null,
|
108 |
+
"proj_drop": 0.0,
|
109 |
+
"qk_norm": false,
|
110 |
+
"qkv_bias": true,
|
111 |
+
"size": "base",
|
112 |
+
"use_rand_idx_pe_for_non_reference_views": false
|
113 |
+
},
|
114 |
+
"info_sharing_str": "global_attention",
|
115 |
+
"pretrained_backbone_checkpoint_path": null,
|
116 |
+
"pretrained_checkpoint_path": null,
|
117 |
+
"refinement_range": 7,
|
118 |
+
"temperature": 4,
|
119 |
+
"uncertainty_adaptors_kwargs": {
|
120 |
+
"non_occluded_mask": {
|
121 |
+
"class": "MaskAdaptor",
|
122 |
+
"kwargs": {
|
123 |
+
"name": "non_occluded_mask"
|
124 |
+
}
|
125 |
+
}
|
126 |
+
},
|
127 |
+
"uncertainty_head_kwargs": {
|
128 |
+
"dpt_feature": {
|
129 |
+
"feature_dim": 256,
|
130 |
+
"hooks": [
|
131 |
+
0,
|
132 |
+
1,
|
133 |
+
2,
|
134 |
+
3
|
135 |
+
],
|
136 |
+
"input_feature_dims": [
|
137 |
+
1024,
|
138 |
+
768,
|
139 |
+
768,
|
140 |
+
768
|
141 |
+
],
|
142 |
+
"layer_dims": [
|
143 |
+
96,
|
144 |
+
192,
|
145 |
+
384,
|
146 |
+
768
|
147 |
+
],
|
148 |
+
"output_width_ratio": 1,
|
149 |
+
"patch_size": 14,
|
150 |
+
"pretrained_checkpoint_path": null,
|
151 |
+
"use_bn": false
|
152 |
+
},
|
153 |
+
"dpt_processor": {
|
154 |
+
"hidden_dims": [
|
155 |
+
128,
|
156 |
+
128
|
157 |
+
],
|
158 |
+
"input_feature_dim": 256,
|
159 |
+
"output_dim": 1,
|
160 |
+
"pretrained_checkpoint_path": null
|
161 |
+
}
|
162 |
+
},
|
163 |
+
"uncertainty_head_type": "dpt",
|
164 |
+
"use_unet_feature": true
|
165 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ccddd553551a6dc8298bef0f2e9227a70d9ebd4b1d3537bc8b132dd673c625d5
|
3 |
+
size 1911127408
|