yifehuang97 commited on
Commit
97e2fd4
·
verified ·
1 Parent(s): 27de4bf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -34
app.py CHANGED
@@ -210,42 +210,43 @@ def count_objects(image, pos_caption, neg_caption, box_threshold, point_radius,
210
  use_neg = bool(neg_caption and neg_caption.strip() and neg_caption != '.')
211
 
212
 
213
- if use_neg:
214
  # print('neg_caption: ', neg_caption)
215
- neg_inputs = processor(
216
- images=image,
217
- text=neg_caption,
218
- return_tensors="pt",
219
- padding=True
220
- )
221
- neg_inputs = {k: v.to(device) for k, v in neg_inputs.items()}
222
- neg_inputs['pixel_values'] = neg_inputs['pixel_values'].to(torch.bfloat16)
223
-
224
- # Add negative inputs to positive inputs dict
225
- pos_inputs['neg_token_type_ids'] = neg_inputs['token_type_ids']
226
- pos_inputs['neg_attention_mask'] = neg_inputs['attention_mask']
227
- pos_inputs['neg_pixel_mask'] = neg_inputs['pixel_mask']
228
- pos_inputs['neg_pixel_values'] = neg_inputs['pixel_values']
229
- pos_inputs['neg_input_ids'] = neg_inputs['input_ids']
230
- pos_inputs['use_neg'] = True
231
- else:
232
  neg_caption = "None."
233
- neg_inputs = processor(
234
- images=image,
235
- text=neg_caption,
236
- return_tensors="pt",
237
- padding=True
238
- )
239
- neg_inputs = {k: v.to(device) for k, v in neg_inputs.items()}
240
- neg_inputs['pixel_values'] = neg_inputs['pixel_values'].to(torch.bfloat16)
241
-
242
- # Add negative inputs to positive inputs dict
243
- pos_inputs['neg_token_type_ids'] = neg_inputs['token_type_ids']
244
- pos_inputs['neg_attention_mask'] = neg_inputs['attention_mask']
245
- pos_inputs['neg_pixel_mask'] = neg_inputs['pixel_mask']
246
- pos_inputs['neg_pixel_values'] = neg_inputs['pixel_values']
247
- pos_inputs['neg_input_ids'] = neg_inputs['input_ids']
248
- pos_inputs['use_neg'] = False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
249
 
250
  # Run inference
251
  with torch.no_grad():
 
210
  use_neg = bool(neg_caption and neg_caption.strip() and neg_caption != '.')
211
 
212
 
213
+ if not use_neg:
214
  # print('neg_caption: ', neg_caption)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
  neg_caption = "None."
216
+ neg_inputs = processor(
217
+ images=image,
218
+ text=neg_caption,
219
+ return_tensors="pt",
220
+ padding=True
221
+ )
222
+ neg_inputs = {k: v.to(device) for k, v in neg_inputs.items()}
223
+ neg_inputs['pixel_values'] = neg_inputs['pixel_values'].to(torch.bfloat16)
224
+
225
+ # Add negative inputs to positive inputs dict
226
+ pos_inputs['neg_token_type_ids'] = neg_inputs['token_type_ids']
227
+ pos_inputs['neg_attention_mask'] = neg_inputs['attention_mask']
228
+ pos_inputs['neg_pixel_mask'] = neg_inputs['pixel_mask']
229
+ pos_inputs['neg_pixel_values'] = neg_inputs['pixel_values']
230
+ pos_inputs['neg_input_ids'] = neg_inputs['input_ids']
231
+ pos_inputs['use_neg'] = True
232
+ # else:
233
+ # neg_caption = "None."
234
+ # neg_inputs = processor(
235
+ # images=image,
236
+ # text=neg_caption,
237
+ # return_tensors="pt",
238
+ # padding=True
239
+ # )
240
+ # neg_inputs = {k: v.to(device) for k, v in neg_inputs.items()}
241
+ # neg_inputs['pixel_values'] = neg_inputs['pixel_values'].to(torch.bfloat16)
242
+
243
+ # # Add negative inputs to positive inputs dict
244
+ # pos_inputs['neg_token_type_ids'] = neg_inputs['token_type_ids']
245
+ # pos_inputs['neg_attention_mask'] = neg_inputs['attention_mask']
246
+ # pos_inputs['neg_pixel_mask'] = neg_inputs['pixel_mask']
247
+ # pos_inputs['neg_pixel_values'] = neg_inputs['pixel_values']
248
+ # pos_inputs['neg_input_ids'] = neg_inputs['input_ids']
249
+ # pos_inputs['use_neg'] = False
250
 
251
  # Run inference
252
  with torch.no_grad():