| | |
| | import os |
| | import glob |
| | import json |
| | import tqdm |
| | import natsort |
| | import random |
| |
|
| | from PIL import Image |
| |
|
| | import numpy as np |
| |
|
| | import torch |
| | from torch.utils.data import Dataset, DataLoader |
| |
|
| | import clip |
| |
|
| | from torchvision import models |
| |
|
| | from config import config |
| |
|
| |
|
| | class loading_img(Dataset): |
| | def __init__(self, img_list): |
| | self.img_list = img_list |
| |
|
| | def __len__(self): |
| | return len(self.img_list) |
| |
|
| | def __getitem__(self, idx): |
| | return preprocess(Image.open(self.img_list[idx])) |
| |
|
| |
|
| | |
| | def select_frames(folder_list, preprocess, resnet18_pretrained): |
| | for folder in folder_list: |
| | img_list = natsort.natsorted(glob.glob(f"{folder}/*.jpg")) |
| | img_feats = [] |
| |
|
| | img_set = loading_img(img_list) |
| | img_loader = DataLoader(img_set, batch_size=64, shuffle=False, num_workers=16) |
| |
|
| | for imgtensor in img_loader: img_feats.append(imgtensor) |
| | img_feats = torch.concat(img_feats, dim=0).to(device) |
| |
|
| | with torch.no_grad(): |
| | featuremap = resnet18_pretrained(img_feats) |
| | frame_num = featuremap.shape[0] |
| |
|
| | dist_list = [] |
| | for img_feat in featuremap: dist_list.append(torch.mean(torch.sqrt((featuremap-img_feat)**2), dim=-1)) |
| | dist_list = torch.concat(dist_list).reshape(frame_num, frame_num) |
| |
|
| | idx_list = [_ for _ in range(frame_num)] |
| | loop_idx = 0 |
| | out_frames = [] |
| |
|
| | output_results = [] |
| | while len(idx_list) > 5: |
| | dist_idx = idx_list.pop(0) |
| |
|
| | data = dist_list[dist_idx, idx_list].softmax(dim=-1) |
| | mu, std = torch.mean(data), torch.std(data) |
| | pop_idx_list = torch.where(data < mu-std*(np.exp(1-loop_idx/config.divlam)))[0].detach().cpu().numpy() |
| | result = list(np.array(idx_list)[pop_idx_list]) |
| | result.append(dist_idx) |
| | output_results.append(result) |
| |
|
| | num_picks = 18 |
| | if len(result) > num_picks: |
| | idx_result_list = sorted(random.sample(result, num_picks)) |
| | img_list = np.array(img_list) |
| | idx_result_list = np.array(idx_result_list) |
| | out_frames.extend(img_list[idx_result_list]) |
| | else: |
| | idx_result_list = sorted(result) |
| | img_list = np.array(img_list) |
| | idx_result_list = np.array(idx_result_list) |
| | out_frames.extend(img_list[idx_result_list]) |
| |
|
| | loop_idx += 1 |
| | |
| | for pop_idx in reversed(pop_idx_list): idx_list.pop(pop_idx) |
| |
|
| | return out_frames, output_results |
| |
|
| |
|
| | |
| | random.seed(10) |
| |
|
| | device = "cuda" if torch.cuda.is_available() else "cpu" |
| |
|
| | resnet18_pretrained = models.resnet18(pretrained=True).to(device) |
| | resnet18_pretrained.fc = torch.nn.Identity() |
| | resnet18_pretrained.avgpool = torch.nn.Identity() |
| | resnet18_pretrained.eval() |
| |
|
| | model, preprocess = clip.load("ViT-B/32", device=device) |
| |
|
| | objs_acts = config.f_path |
| | questions = config.q_path |
| |
|
| | questions = [json.loads(q) for q in open(os.path.expanduser(questions), "r")] |
| | objs_acts = [json.loads(q) for q in open(os.path.expanduser(objs_acts), "r")] |
| |
|
| | answer_path = os.path.expanduser(config.a_path) |
| | os.makedirs(os.path.dirname(answer_path), exist_ok=True) |
| | ans_file = open(answer_path, "w") |
| |
|
| | output_results = [] |
| | for question in tqdm.tqdm(questions): |
| | test_token = True |
| |
|
| | for objs_act in objs_acts: |
| | if objs_act['q_uid'] == question['q_uid']: |
| | question['Object'] = objs_act["Activity"] |
| | question['Activity'] = objs_act["Activity"] |
| |
|
| | folder_list = glob.glob(f"{config.img_folder}/{question['q_uid']}/") |
| | out_frames, output_result = select_frames(folder_list, preprocess, resnet18_pretrained) |
| | output_results.append(output_result) |
| | question['filepath'] = out_frames |
| |
|
| | ans_file.write(json.dumps(question) + "\n") |
| | test_token = False |
| | break |