問題描述
- 情況1:FineBI導(dǎo)入表名中文亂碼,字段內(nèi)容正常
- 情況2:FineBI導(dǎo)入表字段中文亂碼,表名內(nèi)容正常
情況一的解決
- 使用navcat等工具連接node1 mysql數(shù)據(jù)庫,執(zhí)行下列代碼,修改相關(guān)字符集格式
- 執(zhí)行的時(shí)機(jī)準(zhǔn)備數(shù)據(jù)表階段和清洗數(shù)據(jù)階段都可,需在完成需求生成結(jié)果表之前
-- 在Hive的MySQL元數(shù)據(jù)庫中執(zhí)行
use hive;
-- 1.修改字段注釋字符集
alter table COLUMNS_V2 modify column COMMENT varchar(256) character set utf8;
-- 2.修改表注釋字符集
alter table TABLE_PARAMS modify column PARAM_VALUE varchar(4000) character set utf8;
-- 3.修改分區(qū)表參數(shù),以支持分區(qū)鍵能夠用中文表示
alter table PARTITION_PARAMS modify column PARAM_VALUE varchar(4000) character set utf8;
alter table PARTITION_KEYS modify column PKEY_COMMENT varchar(4000) character set utf8;
-- 4.修改索引注解
alter table INDEX_PARAMS modify column PARAM_VALUE varchar(4000) character set utf8;
情況二的解決
- 如果出現(xiàn)字段中文亂碼,但是通過dataGrip等工具查看表數(shù)據(jù)中文正常顯示,那么就是FineBI連接hive時(shí)設(shè)置編碼utf-8導(dǎo)致出現(xiàn)的問題!
-
設(shè)置連接信息編碼為自動(dòng)即可
ETL數(shù)據(jù)清洗知識
- ETL:
- E,Extract,抽取
- T,Transform,轉(zhuǎn)換
- L,Load,加載
- 從A抽取數(shù)據(jù)(E),進(jìn)行數(shù)據(jù)轉(zhuǎn)換過濾(T),將結(jié)果加載到B(L),就是ETL
- 針對大數(shù)據(jù)中的TEL數(shù)據(jù)清洗,可以利用分布式計(jì)算框架、并行處理、數(shù)據(jù)采樣、數(shù)據(jù)質(zhì)量檢查等方法,確保數(shù)據(jù)的質(zhì)量和準(zhǔn)確性。為了滿足實(shí)時(shí)需求,還可以使用流式處理框架。重要的是根據(jù)具體的需求和數(shù)據(jù)特點(diǎn)選擇合適的方法和技術(shù)。
社交案例參考代碼
-- 創(chuàng)建數(shù)據(jù)庫
create database db_msg;
-- 選擇數(shù)據(jù)庫
use db_msg;
-- 如果表已存在就刪除
drop table if exists db_msg.tb_msg_source ;
-- 建表
create table db_msg.tb_msg_source(
msg_time string comment "消息發(fā)送時(shí)間",
sender_name string comment "發(fā)送人昵稱",
sender_account string comment "發(fā)送人賬號",
sender_sex string comment "發(fā)送人性別",
sender_ip string comment "發(fā)送人ip地址",
sender_os string comment "發(fā)送人操作系統(tǒng)",
sender_phonetype string comment "發(fā)送人手機(jī)型號",
sender_network string comment "發(fā)送人網(wǎng)絡(luò)類型",
sender_gps string comment "發(fā)送人的GPS定位",
receiver_name string comment "接收人昵稱",
receiver_ip string comment "接收人IP",
receiver_account string comment "接收人賬號",
receiver_os string comment "接收人操作系統(tǒng)",
receiver_phonetype string comment "接收人手機(jī)型號",
receiver_network string comment "接收人網(wǎng)絡(luò)類型",
receiver_gps string comment "接收人的GPS定位",
receiver_sex string comment "接收人性別",
msg_type string comment "消息類型",
distance string comment "雙方距離",
message string comment "消息內(nèi)容"
);
-- 上傳數(shù)據(jù)到HDFS(Linux命令)
--hadoop fs -mkdir -p /chatdemo/data
--hadoop fs -put chat_data-30W.csv /chatdemo/data/
-- 加載數(shù)據(jù)到表中,基于HDFS加載
load data inpath '/chatdemo/data/chat_data-30W.csv' into table tb_msg_source;
-- 驗(yàn)證數(shù)據(jù)加載
select * from tb_msg_source tablesample(100 rows);
-- 驗(yàn)證一下表的數(shù)量
select count(*) from tb_msg_source;
--問題1:當(dāng)前數(shù)據(jù)中,有一些數(shù)據(jù)的字段為空,不是合法數(shù)據(jù)
select *
from tb_msg_source
where length(sender_gps)=0;
--問題2∶需求中,需要統(tǒng)計(jì)每天、每個(gè)小時(shí)的消息量,但是數(shù)據(jù)中沒有天和小時(shí)字段,只有整體時(shí)間字段,不好處理
select msg_time from tb_msg_source limit 10;
--問題3:需求中,需要對經(jīng)度和維度構(gòu)建地區(qū)的可視化地圖,但是數(shù)據(jù)中GPS經(jīng)緯度為一個(gè)字段,不好處理
select sender_gps from tb_msg_source limit 10;
--需求
--需求1:對字段為空的不合法數(shù)據(jù)進(jìn)行過濾 where
--需求2:通過時(shí)間字段構(gòu)建天和小時(shí)字段 date hour
--需求3:從GPS的經(jīng)緯度中提取經(jīng)度和緯度 split()
--需求4:將ETL以后的結(jié)果保存在一張新的Hive表中
drop table if exists db_msg.tb_msg_etl;
--ETL清洗轉(zhuǎn)換(Extract 抽取, Transform 轉(zhuǎn)換,Load 加載)
create table db_msg.tb_msg_etl(
msg_time string comment "消息發(fā)送時(shí)間",
sender_name string comment "發(fā)送人昵稱",
sender_account string comment "發(fā)送人賬號",
sender_sex string comment "發(fā)送人性別",
sender_ip string comment "發(fā)送人ip地址",
sender_os string comment "發(fā)送人操作系統(tǒng)",
sender_phonetype string comment "發(fā)送人手機(jī)型號",
sender_network string comment "發(fā)送人網(wǎng)絡(luò)類型",
sender_gps string comment "發(fā)送人的GPS定位",
receiver_name string comment "接收人昵稱",
receiver_ip string comment "接收人IP",
receiver_account string comment "接收人賬號",
receiver_os string comment "接收人操作系統(tǒng)",
receiver_phonetype string comment "接收人手機(jī)型號",
receiver_network string comment "接收人網(wǎng)絡(luò)類型",
receiver_gps string comment "接收人的GPS定位",
receiver_sex string comment "接收人性別",
msg_type string comment "消息類型",
distance string comment "雙方距離",
message string comment "消息內(nèi)容",
msg_day string comment "消息日",
msg_hour string comment "消息小時(shí)",
sender_lng double comment "經(jīng)度",
sender_lat double comment "緯度"
);
INSERT OVERWRITE TABLE db_msg.tb_msg_etl
SELECT
*,
DATE(msg_time) AS msg_day,
HOUR(msg_time) AS msg_hour,
SPLIT(sender_gps, ',')[0] AS sender_lng,
SPLIT(sender_gps, ',')[1] AS sender_lat
FROM db_msg.tb_msg_source
WHERE LENGTH(sender_gps) > 0;
--需求
-- 1.統(tǒng)計(jì)今日總消息量
create table if not exists tb_rs_total_msg_cnt
comment '每日消總量' AS
select msg_day,count(*) AS total_msg_cnt
from tb_msg_etl group by msg_day;
-- 2.統(tǒng)計(jì)今日每小時(shí)消息量、發(fā)送和接收用戶數(shù)
create table if not exists tb_rs_hours_msg_cnt
comment "每小時(shí)消息量趨勢" AS
select
msg_hour,
count(*) as total_msg_cnt,
count(DISTINCT sender_account) as sender_usr_cnt,
count(DISTINCT receiver_account) as receiver_usr_cnt
from tb_msg_etl group by msg_hour;
-- 3.統(tǒng)計(jì)今日各地區(qū)發(fā)送消息數(shù)據(jù)量
create table if not exists tb_rs_loc_cnt
comment "今日各地區(qū)發(fā)送消息總量" AS
select
msg_day,
sender_lng,
sender_lat,
count(*) as total_msg_cnt
from tb_msg_etl
group by msg_day,sender_lng,sender_lat;
-- 4.統(tǒng)計(jì)今日發(fā)送消息和接收消息的用戶數(shù)
create table if not exists tb_rs_usr_cnt
comment "今日發(fā)送消息和接收消息的用戶數(shù)" AS
select
msg_day,
count(distinct sender_account) as sender_user_cnt,
count(distinct receiver_account) as receiver_user_cnt
from tb_msg_etl
group by msg_day;
-- 5.統(tǒng)計(jì)今日發(fā)送消息最多的Top10用戶
create table if not exists tb_rs_user_sender_msg_top10
comment "今日發(fā)送消息最多的Top10用戶" AS
select
sender_name,
count(*) as sender_msg_cnt
from tb_msg_etl
group by sender_name
order by sender_msg_cnt desc
limit 10;
-- 6.統(tǒng)計(jì)今日接收消息最多的Top10用戶
create table if not exists tb_rs_user_receiver_msg_top10
comment "今日接收消息最多的Top10用戶" AS
select
receiver_name,
count(*) as receiver_msg_cnt
from tb_msg_etl
group by receiver_name
order by receiver_msg_cnt desc
limit 10;
-- 7.統(tǒng)計(jì)發(fā)送人的手機(jī)型號分布情況
create table if not exists tb_rs_sender_phone_type
comment '發(fā)送人手機(jī)型號' as
select
sender_phonetype,
count(*) as cnt
from tb_msg_etl
group by sender_phonetype;
-- 8.統(tǒng)計(jì)發(fā)送人的設(shè)備操作系統(tǒng)分布情況
create table if not exists tb_rs_sender_phone_os
comment '發(fā)送人手機(jī)操作系統(tǒng)' as
select
sender_os,
count(*) as cnt
from tb_msg_etl
group by sender_os;
結(jié)果展示
文章來源地址http://www.zghlxwxcb.cn/news/detail-705715.html
文章來源:http://www.zghlxwxcb.cn/news/detail-705715.html
到了這里,關(guān)于關(guān)于黑馬hive課程案例FineBI中文亂碼的解決的文章就介紹完了。如果您還想了解更多內(nèi)容,請?jiān)谟疑辖撬阉鱐OY模板網(wǎng)以前的文章或繼續(xù)瀏覽下面的相關(guān)文章,希望大家以后多多支持TOY模板網(wǎng)!