Kwai-Keye commited on
Commit
a56823f
·
verified ·
1 Parent(s): 03ea770

Add model files

Browse files
image_processing_keye.py CHANGED
@@ -128,8 +128,8 @@ def smart_resize(
128
  height: int,
129
  width: int,
130
  factor: int = 28,
131
- min_pixels: int = 56 * 56,
132
- max_pixels: int = 14 * 14 * 4096,
133
  ):
134
  """Rescales the image so that the following conditions are met:
135
 
@@ -193,9 +193,9 @@ class SiglipImageProcessor(BaseImageProcessor):
193
  Standard deviation to use if normalizing the image. This is a float or list of floats for each channel in the image.
194
  do_convert_rgb (`bool`, *optional*, defaults to `True`):
195
  Whether to convert the image to RGB.
196
- min_pixels (`int`, *optional*, defaults to `56 * 56`):
197
  The min pixels of the image to resize the image.
198
- max_pixels (`int`, *optional*, defaults to `28 * 28 * 1280`):
199
  The max pixels of the image to resize the image.
200
  patch_size (`int`, *optional*, defaults to 14):
201
  The spacial patch size of the vision encoder.
@@ -222,8 +222,8 @@ class SiglipImageProcessor(BaseImageProcessor):
222
  image_mean: Optional[Union[float, List[float]]] = None,
223
  image_std: Optional[Union[float, List[float]]] = None,
224
  do_convert_rgb: bool = True,
225
- min_pixels: int = 56 * 56,
226
- max_pixels: int = 28 * 28 * 1280,
227
  patch_size: int = 14,
228
  temporal_patch_size: int = 1,
229
  merge_size: int = 2,
 
128
  height: int,
129
  width: int,
130
  factor: int = 28,
131
+ min_pixels: int = 28 * 28 * 130,
132
+ max_pixels: int = 28 * 28 * 1670,
133
  ):
134
  """Rescales the image so that the following conditions are met:
135
 
 
193
  Standard deviation to use if normalizing the image. This is a float or list of floats for each channel in the image.
194
  do_convert_rgb (`bool`, *optional*, defaults to `True`):
195
  Whether to convert the image to RGB.
196
+ min_pixels (`int`, *optional*, defaults to `28 * 28 * 130`):
197
  The min pixels of the image to resize the image.
198
+ max_pixels (`int`, *optional*, defaults to `28 * 28 * 1670`):
199
  The max pixels of the image to resize the image.
200
  patch_size (`int`, *optional*, defaults to 14):
201
  The spacial patch size of the vision encoder.
 
222
  image_mean: Optional[Union[float, List[float]]] = None,
223
  image_std: Optional[Union[float, List[float]]] = None,
224
  do_convert_rgb: bool = True,
225
+ min_pixels: int = 28 * 28 * 130,
226
+ max_pixels: int = 28 * 28 * 1670,
227
  patch_size: int = 14,
228
  temporal_patch_size: int = 1,
229
  merge_size: int = 2,
preprocessor_config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "min_pixels": 3136,
3
- "max_pixels": 1003520,
4
  "patch_size": 14,
5
  "temporal_patch_size": 1,
6
  "merge_size": 2,
 
1
  {
2
+ "min_pixels": 101920,
3
+ "max_pixels": 1309280,
4
  "patch_size": 14,
5
  "temporal_patch_size": 1,
6
  "merge_size": 2,