복붙노트

[HADOOP] JSON 오류에서 하이브

HADOOP

JSON 오류에서 하이브

어떻게 든 하이브 테이블에이 JSON을, 중 하나를 선택하고 할 수있는 모든 널 (null) 데이터 또는하지 될 수 없습니다. 난 그냥 내 DDL과 모두 같은 필드를해야하고,이 안에 구성 않다면, 난 문자열 대신에 구문 분석하려고로 수 있도록합니다.

단 하나의 거의 유일한 달성 : 하이브 - hcatalog 코어-1.1.0-cdh5.10.0.jar 일부 데이터가 비어 있기 때문에, 내가 LIMIT로 조회 할 수 있어요하지만 난이 제한을 제거 할 때, 그것은 나에게 오류 org.apache.hadoop.hive.serde2.SerDeException 이런 종류의 반환되었다 : java.io.IOException 상위를 필드 이름 예상

내 테이블 작성 :

ADD JAR hive-hcatalog-core-1.1.0-cdh5.10.0.jar;

CREATE EXTERNAL TABLE tabless (`dt` STRING, `hGeoLocation` STRING, `loginId` STRING, `hSearchFunnel` STRING, `timeseries` STRING, `locale` STRING, `fetcherResult` STRING, `searchType` STRING, `isBackDate` STRING, `hId` STRING, `hFrequency` STRING, `currency` STRING, `userType` STRING, `isSNA` STRING, `isBinding` STRING, `nodeId` STRING, `_id` STRING, `adjustedResult` STRING, `chosenProviderSell` STRING, `ChosenInventoryBeforeAdjusted` STRING, `PricingRules` STRING, `cInDate` STRING, `cOutDate` STRING, `machineId` STRING, `interface` STRING, `pricingSpec` STRING, `elapsedTime` STRING, `ChosenInventoryAfterAdjusted` STRING, `chosenProviderBase` STRING, `fFrequency` STRING, `kafkaPT` STRING, `kafkaST` STRING, `cookieId` STRING, `sessionId` STRING,`pricingSpecAbPriceAdjustment` STRING,`searchId` STRING,`prevSearchId` STRING, `competitorRequest` STRING, `CPricingRule` STRING, `CStatisticChosenMethod` STRING, `ChosenCId` STRING, `ChosenCPricingRule` STRING, `chosenCPriceType` STRING, `CPriceDiff` STRING, `competitorResponse` STRING, `searchRateType` STRING) COMMENT 'somecomment'
ROW FORMAT SERDE 
'org.apache.hive.hcatalog.data.JsonSerDe' 
LOCATION 'someremotelocation';

필요한 경우 온라인 JSON 파서를 사용하시기 바랍니다, 내 JSON 대규모 수량과 같습니다 :

{"ChosenCId":null,"ChosenCPricingRule":null,"ChosenInventoryAfterAdjusted":[{"hRoomId":1086174,"BASEFARE":22150,"SELLFARE":25000},{"hRoomId":103270,"BASEFARE":249,"SELLFARE":2800},{"hRoomId":103272,"BASEFARE":2470,"SELLFARE":200},{"hRoomId":100273,"BASEFARE":3050,"SELLFARE":3500},{"hRoomId":10376,"BASEFARE":3050,"SELLFARE":3500},{"hRoomId":10375,"BASEFARE":3050,"SELLFARE":3500},{"hRoomId":10374,"BASEFARE":367,"SELLFARE":4250},{"hRoomId":1069,"BASEFARE":430,"SELLFARE":500},{"hRoomId":108634,"BASEFARE":44700,"SELLFARE":5000},{"hRoomId":10270,"BASEFARE":400,"SELLFARE":570},{"hRoomId":102,"BASEFARE":400,"SELLFARE":5700},{"hRoomId":1026,"BASEFARE":610,"SELLFARE":70},{"hRoomId":1033,"BASEFARE":610,"SELLFARE":70},{"hRoomId":1075,"BASEFARE":60,"SELLFARE":0},{"hRoomId":1074,"BASEFARE":730,"SELLFARE":80},{"hRoomId":1039,"BASEFARE":870,"SELLFARE":10},{"hRoomId":1269,"BASEFARE":800,"SELLFARE":10000},{"hRoomId":10271,"BASEFARE":9500,"SELLFARE":1100},{"hRoomId":1039,"BASEFARE":17000,"SELLFARE":2000},{"hRoomId":1271,"BASEFARE":1900,"SELLFARE":200}],"ChosenInventoryBeforeAdjusted":[{"hRoomId":1084,"BASEFARE":220,"SELLFARE":2000},{"hRoomId":10320,"BASEFARE":250,"SELLFARE":280},{"hRoomId":10372,"BASEFARE":240,"SELLFARE":200},{"hRoomId":103273,"BASEFARE":3850,"SELLFARE":300},{"hRoomId":1076,"BASEFARE":350,"SELLFARE":300},{"hRoomId":10275,"BASEFARE":380,"SELLFARE":350},{"hRoomId":1074,"BASEFARE":360,"SELLFARE":420},{"hRoomId":1069,"BASEFARE":430,"SELLFARE":500},{"hRoomId":1084,"BASEFARE":440,"SELLFARE":50},{"hRoomId":10370,"BASEFARE":490,"SELLFARE":500},{"hRoomId":1032,"BASEFARE":400,"SELLFARE":500},{"hRoomId":1036,"BASEFARE":610,"SELLFARE":710},{"hRoomId":1073,"BASEFARE":610,"SELLFARE":710},{"hRoomId":1035,"BASEFARE":61,"SELLFARE":710},{"hRoomId":1034,"BASEFARE":730,"SELLFARE":80},{"hRoomId":1029,"BASEFARE":800,"SELLFARE":100},{"hRoomId":10269,"BASEFARE":800,"SELLFARE":100},{"hRoomId":101,"BASEFARE":9500,"SELLFARE":100},{"hRoomId":109,"BASEFARE":1700,"SELLFARE":200},{"hRoomId":1071,"BASEFARE":1900,"SELLFARE":20}],"CPriceDiff":0.0,"CPricingRule":{},"CStatisticChosenMethod":"none","CookieID":"1547597","FTA":[{"hRoomId":1074,"BASEFARE":220,"SELLFARE":20},{"hRoomId":10370,"BASEFARE":2450,"SELLFARE":200},{"hRoomId":1072,"BASEFARE":240,"SELLFARE":28},{"hRoomId":1033,"BASEFARE":37,"SELLFARE":35},{"hRoomId":1036,"BASEFARE":300,"SELLFARE":350},{"hRoomId":105,"BASEFARE":30,"SELLFARE":350},{"hRoomId":1074,"BASEFARE":30,"SELLFARE":420},{"hRoomId":109,"BASEFARE":430,"SELLFARE":00},{"hRoomId":10874,"BASEFARE":440,"SELLFARE":500},{"hRoomId":10370,"BASEFARE":4900,"SELLFARE":570},{"hRoomId":103,"BASEFARE":490,"SELLFARE":5700},{"hRoomId":10376,"BASEFARE":6100,"SELLFARE":70},{"hRoomId":10273,"BASEFARE":600,"SELLFARE":700},{"hRoomId":175,"BASEFARE":60,"SELLFARE":70},{"hRoomId":104,"BASEFARE":730,"SELLFARE":80},{"hRoomId":1069,"BASEFARE":80,"SELLFARE":100},{"hRoomId":109,"BASEFARE":80,"SELLFARE":10},{"hRoomId":171,"BASEFARE":950,"SELLFARE":110},{"hRoomId":10,"BASEFARE":170,"SELLFARE":20},{"hRoomId":101,"BASEFARE":100,"SELLFARE":200}],"PricingRules":{"t_l":22000900002,"hbeds":2200000002,"t_p":2200000002,"t_m":22000000002,"e_private":22000900002,"t":22000000002,"e":222,"hbeds_ratebinding":220000002,"t_budgetrooms":22000},"SessionID":"d586280d34","_id":154766,"adjustedResult":{"CheapestBase":{"t":{"BASEFARE":22,"SELLFARE":25},"e":{"BASEFARE":26,"SELLFARE":28}},"CheapestSell":{"t":{"BASEFARE":22,"SELLFARE":25},"e":{"BASEFARE":26,"SELLFARE":28}}},"cInDate":"01-01-2012","cOutDate":"12-12-2017","chosenProviderBase":"t","chosenProviderSell":"t","currency":"SGD","dt":147591430,"elapsedTime":5,"fetcherResult":{"CheapestBase":{"t":{"BASEFARE":20,"SELLFARE":25},"e":{"BASEFARE":20,"SELLFARE":28}},"CheapestSell":{"t":{"BASEFARE":22,"SELLFARE":25},"e":{"BASEFARE":20,"SELLFARE":20}}},"fFrequency":["NONE"],"hFrequency":[],"hGeoLocation":"SINGAPORE","hId":200344,"interface":["MOBILE_APPS_ANDROID"],"isBackDate":false,"isBinding":false,"isSNA":false,"locale":"id_ID","loginId":"","machineId":"416","nodeId":"hivv2","pricingSpec":{"isB":false,"searchDate":14700,"hTransactionFrequencyStatus":"","userLocale":"id_ID","isBackDate":false,"currency":"VND","hTransactionFrequency":0,"roomCount":"1","hUserType":"NON_LOGGED_IN_USER","lengthOfStay":"1","fTransactionRecency":0,"userGeoCountry":"Australia","abPriceAdjustment":"treatmentGroup","searchTime":530,"bookingWindowInDays":1,"roomNight":"1","hSearchFunnel":"LOWER_FUNNEL","cInDate":147000,"cOutDate":1476032400000,"searchDay":"6","fTransactionFrequency":0,"fTransactionFrequencyStatus":"NONE","cInDay":"7","hGrouping":"1,93,41,122","hIds":"2000000369344","hTransactionRecency":0,"clientType":"MOBILE"},"searchType":"hRoomSearch","timeSeries":1475919104430,"timeseries":1475919104430,"userType":["NON"],"kafkaPT":1475919104430,"kafkaST":1475919656986}

너희들은 왜 / 솔루션을 알 수 있습니까?

해결법

  1. ==============================

    1.

    create external table tabless (json_doc string)
    row format delimited
    tblproperties ('serialization.last.column.takes.rest'='true')    
    ;
    
    select  json_tuple 
            (
                json_doc
    
               ,'dt','hGeoLocation','loginId','hSearchFunnel'
               ,'timeseries','locale','fetcherResult','searchType'
               ,'isBackDate','hId','hFrequency','currency'
               ,'userType','isSNA','isBinding','nodeId'
               ,'_id','adjustedResult','chosenProviderSell','ChosenInventoryBeforeAdjusted'
               ,'PricingRules','cInDate','cOutDate','machineId'
               ,'interface','pricingSpec','elapsedTime','ChosenInventoryAfterAdjusted'
               ,'chosenProviderBase','fFrequency','kafkaPT','kafkaST'
               ,'cookieId','sessionId','pricingSpecAbPriceAdjustment','searchId'
               ,'prevSearchId','competitorRequest','CPricingRule','CStatisticChosenMethod'
               ,'ChosenCId','ChosenCPricingRule','chosenCPriceType','CPriceDiff'
               ,'competitorResponse','searchRateType'
    
            )   as (
                    `dt`,`hGeoLocation`,`loginId`,`hSearchFunnel`
                   ,`timeseries`,`locale`,`fetcherResult`,`searchType`
                   ,`isBackDate`,`hId`,`hFrequency`,`currency`
                   ,`userType`,`isSNA`,`isBinding`,`nodeId`
                   ,`_id`,`adjustedResult`,`chosenProviderSell`,`ChosenInventoryBeforeAdjusted`
                   ,`PricingRules`,`cInDate`,`cOutDate`,`machineId`
                   ,`interface`,`pricingSpec`,`elapsedTime`,`ChosenInventoryAfterAdjusted`
                   ,`chosenProviderBase`,`fFrequency`,`kafkaPT`,`kafkaST`
                   ,`cookieId`,`sessionId`,`pricingSpecAbPriceAdjustment`,`searchId`
                   ,`prevSearchId`,`competitorRequest`,`CPricingRule`,`CStatisticChosenMethod`
                   ,`ChosenCId`,`ChosenCPricingRule`,`chosenCPriceType`,`CPriceDiff`
                   ,`competitorResponse`,`searchRateType`
                    )
    
    from    tabless 
    ;
    
    +-----------+--------------+---------+---------------+---------------+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------+------------+--------+------------+----------+----------+-------+-----------+--------+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------+------------+-----------+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------+------------+---------------+---------------+----------+-----------+------------------------------+----------+--------------+-------------------+--------------+------------------------+-----------+--------------------+------------------+------------+--------------------+----------------+
    |    dt     | hgeolocation | loginid | hsearchfunnel |  timeseries   | locale |                                                                                fetcherresult                                                                                | searchtype  | isbackdate |  hid   | hfrequency | currency | usertype | issna | isbinding | nodeid |  _id   |                                                                               adjustedresult                                                                                | chosenprovidersell |                                                                                                                                                                                                                                                                                                                                                                                                                                                                            choseninventorybeforeadjusted                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |                                                                                 pricingrules                                                                                  |  cindate   |  coutdate  | machineid |        interface        |                                                                                                                                                                                                                                                                                                                     pricingspec                                                                                                                                                                                                                                                                                                                     | elapsedtime |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          choseninventoryafteradjusted                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           | chosenproviderbase | ffrequency |    kafkapt    |    kafkast    | cookieid | sessionid | pricingspecabpriceadjustment | searchid | prevsearchid | competitorrequest | cpricingrule | cstatisticchosenmethod | chosencid | chosencpricingrule | chosencpricetype | cpricediff | competitorresponse | searchratetype |
    +-----------+--------------+---------+---------------+---------------+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------+------------+--------+------------+----------+----------+-------+-----------+--------+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------+------------+-----------+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------+------------+---------------+---------------+----------+-----------+------------------------------+----------+--------------+-------------------+--------------+------------------------+-----------+--------------------+------------------+------------+--------------------+----------------+
    | 147591430 | SINGAPORE    |         | (null)        | 1475919104430 | id_ID  | {"CheapestBase":{"t":{"BASEFARE":20,"SELLFARE":25},"e":{"BASEFARE":20,"SELLFARE":28}},"CheapestSell":{"t":{"BASEFARE":22,"SELLFARE":25},"e":{"BASEFARE":20,"SELLFARE":20}}} | hRoomSearch | false      | 200344 | []         | SGD      | ["NON"]  | false | false     | hivv2  | 154766 | {"CheapestBase":{"t":{"BASEFARE":22,"SELLFARE":25},"e":{"BASEFARE":26,"SELLFARE":28}},"CheapestSell":{"t":{"BASEFARE":22,"SELLFARE":25},"e":{"BASEFARE":26,"SELLFARE":28}}} | t                  | [{"hRoomId":1084,"BASEFARE":220,"SELLFARE":2000},{"hRoomId":10320,"BASEFARE":250,"SELLFARE":280},{"hRoomId":10372,"BASEFARE":240,"SELLFARE":200},{"hRoomId":103273,"BASEFARE":3850,"SELLFARE":300},{"hRoomId":1076,"BASEFARE":350,"SELLFARE":300},{"hRoomId":10275,"BASEFARE":380,"SELLFARE":350},{"hRoomId":1074,"BASEFARE":360,"SELLFARE":420},{"hRoomId":1069,"BASEFARE":430,"SELLFARE":500},{"hRoomId":1084,"BASEFARE":440,"SELLFARE":50},{"hRoomId":10370,"BASEFARE":490,"SELLFARE":500},{"hRoomId":1032,"BASEFARE":400,"SELLFARE":500},{"hRoomId":1036,"BASEFARE":610,"SELLFARE":710},{"hRoomId":1073,"BASEFARE":610,"SELLFARE":710},{"hRoomId":1035,"BASEFARE":61,"SELLFARE":710},{"hRoomId":1034,"BASEFARE":730,"SELLFARE":80},{"hRoomId":1029,"BASEFARE":800,"SELLFARE":100},{"hRoomId":10269,"BASEFARE":800,"SELLFARE":100},{"hRoomId":101,"BASEFARE":9500,"SELLFARE":100},{"hRoomId":109,"BASEFARE":1700,"SELLFARE":200},{"hRoomId":1071,"BASEFARE":1900,"SELLFARE":20}] | {"t_l":22000900002,"hbeds":2200000002,"t_p":2200000002,"t_m":22000000002,"e_private":22000900002,"t":22000000002,"e":222,"hbeds_ratebinding":220000002,"t_budgetrooms":22000} | 01-01-2012 | 12-12-2017 |       416 | ["MOBILE_APPS_ANDROID"] | {"isB":false,"searchDate":14700,"hTransactionFrequencyStatus":"","userLocale":"id_ID","isBackDate":false,"currency":"VND","hTransactionFrequency":10,"roomCount":"1","hUserType":"NON_LOGGED_IN_USER","lengthOfStay":"1","fTransactionRecency":10,"userGeoCountry":"Australia","abPriceAdjustment":"treatmentGroup","searchTime":530,"bookingWindowInDays":1,"roomNight":"1","hSearchFunnel":"LOWER_FUNNEL","cInDate":147000,"cOutDate":1476032400000,"searchDay":"6","fTransactionFrequency":10,"fTransactionFrequencyStatus":"NONE","cInDay":"7","hGrouping":"1,93,41,122","hIds":"2000000369344","hTransactionRecency":10,"clientType":"MOBILE"} |           5 | [{"hRoomId":1086174,"BASEFARE":22150,"SELLFARE":25000},{"hRoomId":103270,"BASEFARE":249,"SELLFARE":2800},{"hRoomId":103272,"BASEFARE":2470,"SELLFARE":200},{"hRoomId":100273,"BASEFARE":3050,"SELLFARE":3500},{"hRoomId":10376,"BASEFARE":3050,"SELLFARE":3500},{"hRoomId":10375,"BASEFARE":3050,"SELLFARE":3500},{"hRoomId":10374,"BASEFARE":367,"SELLFARE":4250},{"hRoomId":1069,"BASEFARE":430,"SELLFARE":500},{"hRoomId":108634,"BASEFARE":44700,"SELLFARE":5000},{"hRoomId":10270,"BASEFARE":400,"SELLFARE":570},{"hRoomId":102,"BASEFARE":400,"SELLFARE":5700},{"hRoomId":1026,"BASEFARE":610,"SELLFARE":70},{"hRoomId":1033,"BASEFARE":610,"SELLFARE":70},{"hRoomId":1075,"BASEFARE":60,"SELLFARE":10},{"hRoomId":1074,"BASEFARE":730,"SELLFARE":80},{"hRoomId":1039,"BASEFARE":870,"SELLFARE":10},{"hRoomId":1269,"BASEFARE":800,"SELLFARE":10000},{"hRoomId":10271,"BASEFARE":9500,"SELLFARE":1100},{"hRoomId":1039,"BASEFARE":17000,"SELLFARE":2000},{"hRoomId":1271,"BASEFARE":1900,"SELLFARE":200}] | t                  | ["NONE"]   | 1475919104430 | 1475919656986 | (null)   | (null)    | (null)                       | (null)   | (null)       | (null)            | {}           | none                   | (null)    | (null)             | (null)           | 10.0       | (null)             | (null)         |
    +-----------+--------------+---------+---------------+---------------+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------+------------+--------+------------+----------+----------+-------+-----------+--------+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------+------------+-----------+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------+------------+---------------+---------------+----------+-----------+------------------------------+----------+--------------+-------------------+--------------+------------------------+-----------+--------------------+------------------+------------+--------------------+----------------+
    
  2. ==============================

    2.이 문제는 여기에있다

    이 문제는 여기에있다

    DDL 문제

    ChosenInventoryBeforeAdjusted 및 ChosenInventoryAfterAdjusted는 문자열로 정의 할 수 없습니다. 그들은 그들이 복잡한 유형으로 정의되어야한다 -

    array<struct<hRoomId:int,BASEFARE:int,SELLFARE:int>>
    

    데이터 문제

    숫자 값은 예를 들어, 0으로 시작할 수 없다 (SELLFARE)

    "hRoomId": 1075,
    "BASEFARE": 60,
    "SELLFARE": 000
    
  3. from https://stackoverflow.com/questions/44240611/hive-from-json-error by cc-by-sa and MIT license