介紹
本文全部代碼地址
布隆過濾器是一種高效的數(shù)據(jù)結(jié)構(gòu),用于判斷一個元素是否存在于一個集合中.它的主要優(yōu)點是速度快,空間占用少,因此在需要快速判斷某個元素是否在集合中的場合得到廣泛引用.
布隆過濾器就是一個大型的位數(shù)組和幾個不一樣的無偏hash函數(shù).所謂無偏就是能夠把元素的hash值算的比較均勻.當(dāng)布隆過濾器說某個值存在時,這個值可能不存在;當(dāng)它說某個值不存在時,那就肯定不存在.
向布隆過濾器中添加key時,會使用多個hash函數(shù)對key進行hash算得一個整數(shù)索引值然后對應(yīng)位數(shù)數(shù)組長度進行取模運算得到一個位置,每個hash函數(shù)都會算得一個不同的位置.再把位數(shù)組的這幾個位置都置為1就完成了add操作.
向布隆過濾器詢問key是否存在時,跟add一樣,也會把hash的幾個位置都算出來,看看數(shù)組中這幾個位置是否都為1,只要有一個位為0,那么就說明布隆過濾器中這個key不存在.如果都是1,這并不能說明這個key就一定存在,只是極有可能存在,因為這些位置被置為1可能是因為其他的key存在所致.如果這個位數(shù)組長度比較大,存在概率就會很大,如果這個位數(shù)組長度比較小,存在的概率就會降低.
這種方法適用于數(shù)據(jù)命中不高、數(shù)據(jù)相對固定、實時性低(通常是數(shù)據(jù)集較大) 的應(yīng)用場景,代碼維護較為復(fù)雜,但是緩存空間占用很少.
實現(xiàn)
初始化數(shù)據(jù)
DROP TABLE IF EXISTS `user`;
CREATE TABLE `user` (
`id` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
`name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL,
`address` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL,
PRIMARY KEY (`id`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic;
INSERT INTO `user` VALUES ('be079b29ddc111eda9b20242ac110003', '張三', '北京市海淀區(qū)xx街道123號');
INSERT INTO `user` VALUES ('be079b53ddc111eda9b20242ac110003', '李四', '上海市徐匯區(qū)xx路456號');
INSERT INTO `user` VALUES ('be079b95ddc111eda9b20242ac110003', '王五', '廣州市天河區(qū)xx街道789號');
INSERT INTO `user` VALUES ('be079ba4ddc111eda9b20242ac110003', '趙六', '深圳市南山區(qū)xx路321號');
INSERT INTO `user` VALUES ('be079bb8ddc111eda9b20242ac110003', '周七', '成都市高新區(qū)xx街道654號');
INSERT INTO `user` VALUES ('be079bc5ddc111eda9b20242ac110003', '黃八', '武漢市江漢區(qū)xx街道234號');
INSERT INTO `user` VALUES ('be079bd4ddc111eda9b20242ac110003', '羅九', '南京市秦淮區(qū)xx路567號');
INSERT INTO `user` VALUES ('be079be2ddc111eda9b20242ac110003', '錢十', '重慶市渝北區(qū)xx街道890號');
INSERT INTO `user` VALUES ('be079befddc111eda9b20242ac110003', '周十一', '長沙市岳麓區(qū)xx路432號');
INSERT INTO `user` VALUES ('be079bfbddc111eda9b20242ac110003', '吳十二', '西安市雁塔區(qū)xx街道765號');
代碼實現(xiàn)
這里只展示關(guān)于布隆過濾器的核心代碼
public class BloomFilterHelper<T> {
private int numHashFunctions;
private int bitSize;
private Funnel<T> funnel;
public BloomFilterHelper(Funnel<T> funnel, int expectedInsertions, double fpp) {
Preconditions.checkArgument(funnel != null, "funnel不能為空");
this.funnel = funnel;
// 計算bit數(shù)組長度
bitSize = optimalNumOfBits(expectedInsertions, fpp);
// 計算hash方法執(zhí)行次數(shù)
numHashFunctions = optimalNumOfHashFunctions(expectedInsertions, bitSize);
}
public int[] murmurHashOffset(T value) {
int[] offset = new int[numHashFunctions];
long hash64 = Hashing.murmur3_128().hashObject(value, funnel).asLong();
int hash1 = (int) hash64;
int hash2 = (int) (hash64 >>> 32);
for (int i = 1; i <= numHashFunctions; i++) {
int nextHash = hash1 + i * hash2;
if (nextHash < 0) {
nextHash = ~nextHash;
}
offset[i - 1] = nextHash % bitSize;
}
return offset;
}
/**
* 計算bit數(shù)組長度
*/
private int optimalNumOfBits(long n, double p) {
if (p == 0) {
// 設(shè)定最小期望長度
p = Double.MIN_VALUE;
}
return (int) (-n * Math.log(p) / (Math.log(2) * Math.log(2)));
}
/**
* 計算hash方法執(zhí)行次數(shù)
*/
private int optimalNumOfHashFunctions(long n, long m) {
return Math.max(1, (int) Math.round((double) m / n * Math.log(2)));
}
}
@Slf4j
@Configuration
public class BloomFilterConfig implements InitializingBean {
@Autowired
private StringRedisTemplate template;
@Autowired
private UserService userService;
public static final String BLOOM_REDIS_PREFIX = "bloom_user";
@Bean
public BloomFilterHelper<String> initBloomFilterHelper() {
return new BloomFilterHelper<>((Funnel<String>) (from, into) -> into.putString(from, Charsets.UTF_8)
.putString(from, Charsets.UTF_8), 1000000, 0.01);
}
/**
* 布隆過濾器bean注入
*
* @return
*/
@Bean
public BloomRedisService bloomRedisService() {
BloomRedisService bloomRedisService = new BloomRedisService();
bloomRedisService.setBloomFilterHelper(initBloomFilterHelper());
bloomRedisService.setRedisTemplate(template);
return bloomRedisService;
}
/**
* 初始化方法,將數(shù)據(jù)庫中的id加入到布隆過濾器
* 也可以不必實現(xiàn){@link InitializingBean}使用{@link javax.annotation.PostConstruct}注解
*
* @throws Exception
*/
@Override
public void afterPropertiesSet() throws Exception {
List<String> idList = userService.getAllUserId();
log.info("加載用戶id到布隆過濾器當(dāng)中,size:{}", idList.size());
if (!CollectionUtils.isEmpty(idList)) {
idList.forEach(item -> {
bloomRedisService().addByBloomFilter(BLOOM_REDIS_PREFIX, item);
});
}
}
}
public class BloomRedisService {
private StringRedisTemplate redisTemplate;
private BloomFilterHelper bloomFilterHelper;
public void setBloomFilterHelper(BloomFilterHelper bloomFilterHelper) {
this.bloomFilterHelper = bloomFilterHelper;
}
public void setRedisTemplate(StringRedisTemplate redisTemplate) {
this.redisTemplate = redisTemplate;
}
/**
* 根據(jù)給定的布隆過濾器添加值
*/
public <T> void addByBloomFilter(String key, T value) {
Preconditions.checkArgument(bloomFilterHelper != null, "bloomFilterHelper不能為空");
int[] offset = bloomFilterHelper.murmurHashOffset(value);
for (int i : offset) {
redisTemplate.opsForValue().setBit(key, i, true);
}
}
/**
* 根據(jù)給定的布隆過濾器判斷值是否存在
*/
public <T> boolean includeByBloomFilter(String key, T value) {
Preconditions.checkArgument(bloomFilterHelper != null, "bloomFilterHelper不能為空");
int[] offset = bloomFilterHelper.murmurHashOffset(value);
for (int i : offset) {
if (!redisTemplate.opsForValue().getBit(key, i)) {
return false;
}
}
return true;
}
}
@Configuration
public class InterceptorConfiguration implements WebMvcConfigurer {
@Override
public void addInterceptors(InterceptorRegistry registry) {
//注冊攔截器
registry.addInterceptor(authInterceptorHandler())
.addPathPatterns("/user/get/{id}");
}
@Bean
public BloomFilterInterceptor authInterceptorHandler(){
return new BloomFilterInterceptor();
}
}
@Slf4j
public class BloomFilterInterceptor implements HandlerInterceptor {
@Autowired
private BloomRedisService bloomRedisService;
@Override
public boolean preHandle(HttpServletRequest request, HttpServletResponse response, Object handler) throws Exception {
String currentUrl = request.getRequestURI();
PathMatcher matcher = new AntPathMatcher();
//解析出pathvariable
Map<String, String> pathVariable = matcher.extractUriTemplateVariables("/user/get/{id}", currentUrl);
//布隆過濾器存儲在redis中
String id = pathVariable.get("id");
if (bloomRedisService.includeByBloomFilter(BloomFilterConfig.BLOOM_REDIS_PREFIX, id)) {
log.info("{}極有可能存在,繼續(xù)向下執(zhí)行;", id);
return true;
}
/*
* 不在本地布隆過濾器當(dāng)中,直接返回驗證失敗
* 設(shè)置響應(yīng)頭
*/
log.info("{}不存在,直接返回失敗;", id);
response.setHeader(HttpHeaders.CONTENT_TYPE, MediaType.APPLICATION_JSON_VALUE);
response.setCharacterEncoding(StandardCharsets.UTF_8.toString());
response.setStatus(HttpStatus.NOT_FOUND.value());
Result res = new Result(HttpStatus.NOT_FOUND.value(), "用戶不存在!", null);
String result = new ObjectMapper().writeValueAsString(res);
response.getWriter().print(result);
return false;
}
}
測試
存在的數(shù)據(jù)
文章來源:http://www.zghlxwxcb.cn/news/detail-421097.html
不存在的數(shù)據(jù)
文章來源地址http://www.zghlxwxcb.cn/news/detail-421097.html
到了這里,關(guān)于布隆過濾器詳解的文章就介紹完了。如果您還想了解更多內(nèi)容,請在右上角搜索TOY模板網(wǎng)以前的文章或繼續(xù)瀏覽下面的相關(guān)文章,希望大家以后多多支持TOY模板網(wǎng)!