Bootstrap

【布隆过滤器】的三种实现方式


Alt

1. Redis+自己实现

  • 引入依赖
        <dependency>
            <groupId>redis.clients</groupId>
            <artifactId>jedis</artifactId>
            <version>3.6.3</version>
        </dependency>

        <!--引入Redis-->
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-redis</artifactId>
            <version>1.4.2.RELEASE</version>
        </dependency>

  • 添加配置
@Configuration
@PropertySource("classpath:application.properties")
public class RedisConfig {
    @Value("${redis.host}")
    private String host;

    @Value("${redis.port}")
    private int port;

    @Value("${redis.password}")
    private String passWord;

    @Value("${redis.timeout}")
    private int timeout;

    @Value("${redis.maxIdle}")
    private int maxIdle;

    @Value("${redis.maxWaitMillis}")
    private int maxWaitMillis;

    @Value("${redis.blockWhenExhausted}")
    private Boolean blockWhenExhausted;

    @Value("${redis.JmxEnabled}")
    private Boolean JmxEnabled;

    @Bean
    public JedisPool jedisPoolFactory() {
        JedisPoolConfig jedisPoolConfig = new JedisPoolConfig();
        jedisPoolConfig.setMaxIdle(maxIdle);
        jedisPoolConfig.setMaxWaitMillis(maxWaitMillis);
        // 连接耗尽时是否阻塞, false报异常,true阻塞直到超时, 默认true
        jedisPoolConfig.setBlockWhenExhausted(blockWhenExhausted);
        // 是否启用pool的jmx管理功能, 默认true
        jedisPoolConfig.setJmxEnabled(JmxEnabled);
        JedisPool jedisPool = new JedisPool(jedisPoolConfig, host, port, timeout,passWord);
        return jedisPool;
    }
}
  • 布隆过滤器工具类
/*仿Google的布隆过滤器实现,基于redis支持分布式*/
public class RedisBloomFilter {

    public final static String RS_BF_NS = "rbf:";
    private int numApproxElements; /*预估元素数量*/
    private double fpp; /*可接受的最大误差*/
    private int numHashFunctions; /*自动计算的hash函数个数*/
    private int bitmapLength; /*自动计算的最优Bitmap长度*/

    @Autowired
    private JedisPool jedisPool;

    /**
     * 构造布隆过滤器
     * @param numApproxElements 预估元素数量
     * @param fpp 可接受的最大误差
     * @return
     */
    public RedisBloomFilter init(int numApproxElements,double fpp){
        this.numApproxElements = numApproxElements;
        this.fpp = fpp;
        /*位数组的长度*/
        this.bitmapLength = (int) (-numApproxElements*Math.log(fpp)/(Math.log(2)*Math.log(2)));
        //this.bitmapLength=128;
        /*算hash函数个数*/
        this.numHashFunctions = Math.max(1, (int) Math.round((double) bitmapLength / numApproxElements * Math.log(2)));
        //this.numHashFunctions=2;
        return  this;
    }

    /**
     * 计算一个元素值哈希后映射到Bitmap的哪些bit上
     * 用两个hash函数来模拟多个hash函数的情况
     *     * @param element 元素值
     * @return bit下标的数组
     */
    private long[] getBitIndices(String element){
        long[] indices = new long[numHashFunctions];
        /*会把传入的字符串转为一个128位的hash值,并且转化为一个byte数组*/
        byte[] bytes = Hashing.murmur3_128().
                hashObject(element, Funnels.stringFunnel(Charset.forName("UTF-8"))).
                asBytes();

        long hash1 = Longs.fromBytes(bytes[7],bytes[6],bytes[5],bytes[4],bytes[3],bytes[2],bytes[1],bytes[0]);
        long hash2 = Longs.fromBytes(bytes[15],bytes[14],bytes[13],bytes[12],bytes[11],bytes[10],bytes[9],bytes[8]);

        /*用这两个hash值来模拟多个函数产生的值*/
        long combinedHash = hash1;
        for(int i=0;i<numHashFunctions;i++){
            //数组下标
            indices[i]=(combinedHash&Long.MAX_VALUE) % bitmapLength;
            combinedHash = combinedHash + hash2;
        }

        System.out.print(element+"数组下标");
        for(long index:indices){
            System.out.print(index+",");
        }
        System.out.println(" ");
        return indices;
    }

    /**
     * 插入元素
     *
     * @param key       原始Redis键,会自动加上前缀
     * @param element   元素值,字符串类型
     * @param expireSec 过期时间(秒)
     */
    public void insert(String key, String element, int expireSec) {
        if (key == null || element == null) {
            throw new RuntimeException("键值均不能为空");
        }
        String actualKey = RS_BF_NS.concat(key);

        try (Jedis jedis = jedisPool.getResource()) {
            try (Pipeline pipeline = jedis.pipelined()) {
                for (long index : getBitIndices(element)) {
                    pipeline.setbit(actualKey, index, true);
                }
                pipeline.syncAndReturnAll();
            } catch (Exception ex) {
                ex.printStackTrace();
            }
            jedis.expire(actualKey, expireSec);
        }
    }

    /**
     * 检查元素在集合中是否(可能)存在
     *
     * @param key     原始Redis键,会自动加上前缀
     * @param element 元素值,字符串类型
     */
    public boolean mayExist(String key, String element) {
        if (key == null || element == null) {
            throw new RuntimeException("键值均不能为空");
        }
        String actualKey = RS_BF_NS.concat(key);
        boolean result = false;

        try (Jedis jedis = jedisPool.getResource()) {
            try (Pipeline pipeline = jedis.pipelined()) {
                for (long index : getBitIndices(element)) {
                    pipeline.getbit(actualKey, index);
                }
                result = !pipeline.syncAndReturnAll().contains(false);
            } catch (Exception ex) {
                ex.printStackTrace();
            }
        }
        return result;
    }

    @Override
    public String toString() {
        return "RedisBloomFilter{" +
                "numApproxElements=" + numApproxElements +
                ", fpp=" + fpp +
                ", numHashFunctions=" + numHashFunctions +
                ", bitmapLength=" + bitmapLength +
                '}';
    }
}

  • 使用
    @Autowired
    private RedisBloomFilter redisBloomFilter;
    //时效一天
	private static final int DAY_SEC = 60 * 60 * 24;
    
    @Test
    public void testInsert() throws Exception {
        redisBloomFilter.insert("bloom:name", "ybh", DAY_SEC);
    }


	@Test
    public void testMayExist() throws Exception {
        System.out.println(redisBloomFilter.mayExist("bloom:name", "ybh"));
    }

2. Redis+Redisson

  • 引入依赖
        <dependency>
            <groupId>org.redisson</groupId>
            <artifactId>redisson</artifactId>
            <version>3.12.3</version>
        </dependency>
  • 使用
public static void main(String[] args) {
        Config config = new Config();
        config.useSingleServer().setAddress("redis://127.0.0.1:6379");

        //构造Redisson
        RedissonClient redisson = Redisson.create(config);

        RBloomFilter<String> bloomFilter = redisson.getBloomFilter("phoneList");
        //初始化布隆过滤器:预计元素为100000000L,误差率为3%
        bloomFilter.tryInit(100000000L,0.03);
        //将号码10081~10086插入到布隆过滤器中
        bloomFilter.add("10081");
        bloomFilter.add("10082");
        bloomFilter.add("10083");
        bloomFilter.add("10084");
        bloomFilter.add("10085");
        bloomFilter.add("10086");

        //判断下面号码是否在布隆过滤器中
        System.out.println("123456:BF--"+bloomFilter.contains("123456"));//false
        System.out.println("10086:BF--"+bloomFilter.contains("10086"));//true
        System.out.println("10084:BF--"+bloomFilter.contains("10084"));//true

    }

3. 单机实现

  • 引入依赖

Guava 是一套来自 Google 的核心 Java 库,其中包括新的集合类型(如 multimap 和 multiset)、不可变的集合、图库,以及并发、I/O、散列、缓存、基元、字符串等实用工具!它被广泛用于 Google 内部的大多数 Java 项目,也被许多其他公司广泛使用。它被广泛用于 Google 内部的大多数 Java 项目,也被许多其他公司广泛使用。

	    <dependency>
            <groupId>com.google.guava</groupId>
            <artifactId>guava</artifactId>
            <version>30.1.1-jre</version>
        </dependency>
  • 使用

/*单机下无Redis的布隆过滤器:使用Google的Guava的BloomFilter*/
public class GuavaBF {
    public static void main(String[] args) {
        long expectedInsertions = 100000;
        double fpp = 0.00005;

        BloomFilter<String> bloomFilter = BloomFilter.create(Funnels.stringFunnel(Charsets.UTF_8), expectedInsertions, fpp);

        bloomFilter.put("10081");
        bloomFilter.put("10082");
        bloomFilter.put("10083");
        bloomFilter.put("10084");
        bloomFilter.put("10085");
        bloomFilter.put("10086");

        System.out.println("123456:BF--"+bloomFilter.mightContain("123456"));//false
        System.out.println("10086:BF--"+bloomFilter.mightContain("10086"));//true
        System.out.println("10084:BF--"+bloomFilter.mightContain("10084"));//true

    }
}
;