1. git clone
git clone https://github.com/IDEA-Research/GroundingDINO.git
也可手动下载:https://github.com/IDEA-Research/GroundingDINO
2. 安装所需的依赖项
pip install -e .
我用该方法老是报错或者直接卡掉,选用第二种方法:
python setup.py install
3. 下载预训练权重
wget https://github.com/IDEA-Research/GroundingDINO/releases/download/v0.1.0-alpha/groundingdino_swint_ogc.pth
4. 创建一个新的python文件
命名为:grounding_dino_demo.py 放在GroundingDINO目录下,其代码如下:
from groundingdino.util.inference import load_model, load_image, predict, annotate, Model
import cv2
CONFIG_PATH = "groundingdino/config/GroundingDINO_SwinT_OGC.py" #源码自带的配置文件
CHECKPOINT_PATH = "./weights/groundingdino_swint_ogc.pth" #下载的权重文件
DEVICE = "cuda" #可以选择cpu/cuda
IMAGE_PATH = "/home/code/wll/code/GroundingDINO-main/asset/cat_dog.jpeg" #用户设置的需要读取image的路径
TEXT_PROMPT = "dog" #给出的文本提示
BOX_TRESHOLD = 0.35 #源码给定的边界框判定阈值
TEXT_TRESHOLD = 0.25 #源码给定的文本端获取关键属性阈值
image_source, image = load_image(IMAGE_PATH)
model = load_model(CONFIG_PATH, CHECKPOINT_PATH)
boxes, logits, phrases = predict(
model=model,
image=image,
caption=TEXT_PROMPT,
box_threshold=BOX_TRESHOLD,
text_threshold=TEXT_TRESHOLD,
device=DEVICE,
)
annotated_frame = annotate(image_source=image_source, boxes=boxes, logits=logits, phrases=phrases)
cv2.imwrite("./result_image/dog.jpg", annotated_frame)
5. 运行grounding_dino_demo.py
会报错:
原因:huggingface 貌似被墙了,所以不能在线下载bert预训练模型,只能离线下载
解决:
(1)下载betr-base-uncased模型
需下载的文件如下:
(2)修改groundingdino/util/get_tokenlizer.py
from transformers import AutoTokenizer, BertModel, BertTokenizer, RobertaModel, RobertaTokenizerFast
import os
# 获取指定文本编码器类型的分词器
def get_tokenlizer(text_encoder_type, local_files_only=True):
# 检查text_encoder_type是否为字符串
if not isinstance(text_encoder_type, str):
# 如果不是字符串,尝试从对象中获取text_encoder_type属性
if hasattr(text_encoder_type, "text_encoder_type"):
text_encoder_type = text_encoder_type.text_encoder_type
# 如果字典中存在"text_encoder_type"键,则从字典中获取
elif text_encoder_type.get("text_encoder_type", False):
text_encoder_type = text_encoder_type.get("text_encoder_type")
# 如果是目录且存在,则保持不变
elif os.path.isdir(text_encoder_type) and os.path.exists(text_encoder_type):
pass
else:
# 如果无法确定text_encoder_type类型,则引发错误
raise ValueError(
"Unknown type of text_encoder_type: {}".format(type(text_encoder_type))
)
print("final text_encoder_type: {}".format(text_encoder_type))
# 使用transformers库中的AutoTokenizer根据text_encoder_type加载分词器
tokenizer = AutoTokenizer.from_pretrained(text_encoder_type, local_files_only=local_files_only)
return tokenizer
# 获取指定预训练语言模型的模型实例
def get_pretrained_language_model(text_encoder_type, local_files_only=True):
# 根据text_encoder_type选择合适的预训练语言模型
if text_encoder_type == "bert-base-uncased" or (
os.path.isdir(text_encoder_type) and os.path.exists(text_encoder_type)):
# 如果是BERT模型,则使用BertModel加载
return BertModel.from_pretrained(text_encoder_type, local_files_only=local_files_only)
elif text_encoder_type == "roberta-base":
# 如果是RoBERTa模型,则使用RobertaModel加载
return RobertaModel.from_pretrained(text_encoder_type, local_files_only=local_files_only)
# 如果text_encoder_type不是已知的模型类型,则引发错误
raise ValueError("Unknown text_encoder_type {}".format(text_encoder_type))
再次运行grounding_dino_demo.py,得到如下检测效果: