File size: 7,936 Bytes
ea25121 946feeb ea25121 572fff2 2c5d8ea 202510d 9348575 8f5830b 932c3e0 ba672d1 f463c64 7fe26c4 202510d 35a9d43 572fff2 c53c092 de7156a 18951ff 6255974 35a9d43 18951ff 7086037 35a9d43 7086037 6255974 d4d2343 6255974 35a9d43 f00330e 6255974 35a9d43 f00330e ea25121 6255974 7086037 35a9d43 6255974 d4d2343 6255974 f00330e ea25121 69a63e3 f00330e 7086037 f00330e ea25121 f00330e 7086037 f00330e ea25121 a71824a f00330e fa6bffd f00330e ddf5562 6255974 f00330e 6255974 35a9d43 f00330e a71824a ea25121 a9871ba ea25121 f97e1dd ea25121 f00330e ea25121 a9871ba b546fdf ea25121 a9871ba d4d2343 5e58814 ea25121 a9871ba ea25121 d4d2343 162b814 f00330e a9871ba ea25121 d4d2343 ea25121 d4d2343 a3741ce ea25121 d4d2343 ea25121 f00330e a71824a ea25121 f00330e 8062677 ea25121 eac0fd7 db0c43e 0b853b9 450d66c db0c43e 450d66c 0b853b9 db0c43e 0b853b9 db0c43e b546fdf | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 | from skimage import img_as_uint
from skimage.filters import gaussian
from skimage.segmentation import clear_border
from scipy.ndimage import gaussian_filter
import gradio as gr
from transformers import DPTFeatureExtractor, DPTForDepthEstimation
import torch
import numpy as np
from PIL import Image
import open3d as o3d
from pathlib import Path
import os
import cv2
from rembg import remove
# Initialize model and feature extractor for depth estimation
feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large")
model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
def process_image(image_path, depth_map_path=None):
image_path = Path(image_path)
image_raw = Image.open(image_path).convert("RGB")
image = image_raw.resize(
(2048, int(2048 * image_raw.size[1] / image_raw.size[0])),
Image.Resampling.LANCZOS,
)
# Remove background using rembg
foreground = remove(image_raw)
foreground = Image.fromarray(np.array(foreground)).convert("RGB")
# Check if user-provided depth map is available
if depth_map_path:
if depth_map_path.endswith('.npy'):
depth_image = np.load(depth_map_path)
# Invert depth only for npy files
depth_image = 1 - depth_image
else:
depth_image_raw = Image.open(depth_map_path).convert("L")
depth_image = depth_image_raw.resize(image.size, Image.Resampling.NEAREST)
depth_image = np.array(depth_image)
# Normalize depth image to [0, 1] range
depth_image = (depth_image - np.min(depth_image)) / (np.max(depth_image) - np.min(depth_image))
depth_image = np.clip(depth_image, 0, 1)
depth_image = img_as_uint(depth_image) # Changed to uint16
else:
# Generate depth map using DPT model
encoding = feature_extractor(foreground, return_tensors="pt")
with torch.no_grad():
outputs = model(**encoding)
predicted_depth = outputs.predicted_depth
prediction = torch.nn.functional.interpolate(
predicted_depth.unsqueeze(1),
size=image.size[::-1],
mode="bicubic",
align_corners=False,
).squeeze()
depth_image = prediction.cpu().numpy()
# Normalize depth image to [0, 1] range before converting
depth_image = (depth_image - np.min(depth_image)) / (np.max(depth_image) - np.min(depth_image))
depth_image = np.clip(depth_image, 0, 1)
depth_image = img_as_uint(depth_image) # Changed to uint16
# Resize and apply lighter Gaussian smoothing for smoother transitions
depth_image = cv2.resize(depth_image, (image.size[0], image.size[1]), interpolation=cv2.INTER_CUBIC)
mask_blurred = cv2.GaussianBlur(depth_image, (3, 3), 0.8) # Lighter blur for transition
# Ensure matching size and single channel for blending
if len(depth_image.shape) == 3:
depth_image = cv2.cvtColor(depth_image, cv2.COLOR_BGR2GRAY)
if len(mask_blurred.shape) == 3:
mask_blurred = cv2.cvtColor(mask_blurred, cv2.COLOR_BGR2GRAY)
mask_blurred_resized = cv2.resize(mask_blurred, (depth_image.shape[1], depth_image.shape[0]), interpolation=cv2.INTER_LINEAR)
# Blend depth_image with mask_blurred_resized for smoother transition
blended_depth_image = cv2.addWeighted(depth_image, 0.9, mask_blurred_resized, 0.1, 0)
# Inpaint background where main subject was removed
background = remove(image_raw, only_mask=True)
background = cv2.cvtColor(np.array(background), cv2.COLOR_GRAY2BGR)
background_inpainted = cv2.inpaint(np.array(image_raw), background[:, :, 0], inpaintRadius=3, flags=cv2.INPAINT_TELEA)
try:
gltf_path = create_3d_obj(np.array(foreground), blended_depth_image, background_inpainted, image_path)
img = Image.fromarray(blended_depth_image)
return [img, gltf_path, gltf_path]
except Exception as e:
print(f"Error with default depth: {str(e)}. Retrying with a shallower depth.")
gltf_path = create_3d_obj(np.array(foreground), blended_depth_image, background_inpainted, image_path, depth=9)
img = Image.fromarray(blended_depth_image)
return [img, gltf_path, gltf_path]
import cv2 # Make sure OpenCV is installed
def create_3d_obj(foreground, depth_image, background, image_path, depth=10):
if depth_image.ndim != 2:
raise ValueError("Depth image should be a 2D array, but got: {}".format(depth_image.shape))
depth_image = depth_image.astype(np.uint16)
# Convert depth image to Open3D compatible format without PIL if needed
depth_image_resized = cv2.resize(depth_image, (foreground.shape[1], foreground.shape[0]), interpolation=cv2.INTER_LINEAR)
# Check for valid shape and dtype
if depth_image_resized.ndim != 2 or depth_image_resized.shape[0] == 0 or depth_image_resized.shape[1] == 0:
raise ValueError(f"Resized depth image is not valid. Shape: {depth_image_resized.shape}")
depth_o3d = o3d.geometry.Image(depth_image_resized)
image_o3d = o3d.geometry.Image(foreground)
rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
image_o3d, depth_o3d, convert_rgb_to_intensity=False
)
# Set camera intrinsic parameters
w, h = foreground.shape[1], foreground.shape[0]
camera_intrinsic = o3d.camera.PinholeCameraIntrinsic()
camera_intrinsic.set_intrinsics(w, h, 1500, 1500, w / 2, h / 2)
# Create point cloud from RGBD image
pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, camera_intrinsic)
if len(pcd.points) < 100:
raise RuntimeError("Insufficient points in the point cloud for normals estimation.")
# Estimate normals
if not pcd.has_normals():
pcd.estimate_normals(search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.25, max_nn=50))
if pcd.has_normals():
pcd.orient_normals_towards_camera_location(camera_location=np.array([0.0, 0.0, 1500.0]))
else:
raise RuntimeError("Failed to estimate normals for the point cloud.")
# Increase the depth parameter dynamically based on image size for finer details in complex images
adjusted_depth = depth + int(min(w, h) / 500)
# Create mesh using Poisson reconstruction
mesh_raw, densities = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(
pcd, depth=adjusted_depth, width=0, scale=1.2, linear_fit=True
)
# Simplify mesh
voxel_size = max(mesh_raw.get_max_bound() - mesh_raw.get_min_bound()) / 400
mesh = mesh_raw.simplify_vertex_clustering(
voxel_size=voxel_size,
contraction=o3d.geometry.SimplificationContraction.Average,
)
mesh = mesh.filter_smooth_simple(number_of_iterations=3)
# Crop mesh to point cloud bounding box
bbox = pcd.get_axis_aligned_bounding_box()
mesh_crop = mesh.crop(bbox)
# Save as GLTF
gltf_path = f"./{image_path.stem}.gltf"
o3d.io.write_triangle_mesh(gltf_path, mesh_crop, write_triangle_uvs=True)
return gltf_path
# Gradio Interface
title = "Depth Estimation & 3D Reconstruction Demo"
description = "Upload an image and optionally a depth map (in .npy or image format) to generate a 3D model. If no depth map is provided, the DPT model will generate it."
examples = [["examples/" + img] for img in os.listdir("examples/")]
iface = gr.Interface(
fn=process_image,
inputs=[
gr.Image(type="filepath", label="Input Image"),
gr.File(type="filepath", label="Input Depth Map (optional)"),
],
outputs=[
gr.Image(label="Predicted Depth", type="pil"),
gr.Model3D(label="3D Mesh Reconstruction", clear_color=[1.0, 1.0, 1.0, 1.0]),
gr.File(label="3D gLTF File"),
],
title=title,
description=description,
examples=examples,
allow_flagging="never",
cache_examples=False,
)
iface.launch(debug=True, show_api=True, share=True) |