如何解决将JSON字符串转换为单独的字段
我有一个包含两列的表:
create table customerData (id bigint IDENTITY(1,1) NOT NULL,rawData varchar(max))
此处rawData将以字符串形式保存json格式的数据,例如以下将是该列中的数据:
insert into customerData
values ('[{"customerName":"K C Nalina","attendance":"P","collectedAmount":"757","isOverdrafted":false,"loanDisbProduct":null,"paidBy":"Y","customerNumber":"1917889","totalDue":"757"},{"customerName":"Mahalakshmi","collectedAmount":"881","loanDisbProduct":"Emergency Loan","customerNumber":"430833","totalDue":"757"}]'),('[{"customerName":"John","collectedAmount":"700","customerNumber":"192222","totalDue":"788"},{"customerName":"weldon","collectedAmount":"771","customerNumber":"435874","totalDue":"757"}]')
预期结果:
我需要在每行的单独字段中显示这些customerName,customerNumber,loanDisbProduct。 还要注意,在很多情况下,rawData内每行的客户详细信息会超过两个。
我不知道如何在rawData列中切碎数据。
我正在使用SQL Server 2012,它不支持JSON数据,因此我必须操纵字符串并获取字段。
解决方法
感谢Red-Gate blog post,首先定义一个View
如下:(我将使用此view
在函数内部生成一个新的uniqueidentifier
)
CREATE VIEW getNewID as SELECT NEWID() AS new_id
然后创建一个如下函数(此函数与Red-Gate博客文章中的函数相同,但是我对其进行了一些更改,并在其中包含了identifier
):
CREATE FUNCTION dbo.parseJSON( @JSON NVARCHAR(MAX))
RETURNS @hierarchy TABLE
(
Element_ID INT IDENTITY(1,1) NOT NULL,/* internal surrogate primary key gives the order of parsing and the list order */
SequenceNo [int] NULL,/* the place in the sequence for the element */
Parent_ID INT null,/* if the element has a parent then it is in this column. The document is the ultimate parent,so you can get the structure from recursing from the document */
Object_ID INT null,/* each list or object has an object id. This ties all elements to a parent. Lists are treated as objects here */
Name NVARCHAR(2000) NULL,/* the Name of the object */
StringValue NVARCHAR(MAX) NOT NULL,/*the string representation of the value of the element. */
ValueType VARCHAR(10) NOT NULL,/* the declared type of the value represented as a string in StringValue*/
Identifier UNIQUEIDENTIFIER NOT NULL
)
AS
BEGIN
DECLARE
@FirstObject INT,--the index of the first open bracket found in the JSON string
@OpenDelimiter INT,--the index of the next open bracket found in the JSON string
@NextOpenDelimiter INT,--the index of subsequent open bracket found in the JSON string
@NextCloseDelimiter INT,--the index of subsequent close bracket found in the JSON string
@Type NVARCHAR(10),--whether it denotes an object or an array
@NextCloseDelimiterChar CHAR(1),--either a '}' or a ']'
@Contents NVARCHAR(MAX),--the unparsed contents of the bracketed expression
@Start INT,--index of the start of the token that you are parsing
@end INT,--index of the end of the token that you are parsing
@param INT,--the parameter at the end of the next Object/Array token
@EndOfName INT,--the index of the start of the parameter at end of Object/Array token
@token NVARCHAR(200),--either a string or object
@value NVARCHAR(MAX),-- the value as a string
@SequenceNo int,-- the sequence number within a list
@Name NVARCHAR(200),--the Name as a string
@Parent_ID INT,--the next parent ID to allocate
@lenJSON INT,--the current length of the JSON String
@characters NCHAR(36),--used to convert hex to decimal
@result BIGINT,--the value of the hex symbol being parsed
@index SMALLINT,--used for parsing the hex value
@Escape INT,--the index of the next escape character
@Identifier UNIQUEIDENTIFIER
DECLARE @Strings TABLE /* in this temporary table we keep all strings,even the Names of the elements,since they are 'escaped' in a different way,and may contain,unescaped,brackets denoting objects or lists. These are replaced in the JSON string by tokens representing the string */
(
String_ID INT IDENTITY(1,1),StringValue NVARCHAR(MAX)
)
SELECT--initialise the characters to convert hex to ascii
@characters='0123456789abcdefghijklmnopqrstuvwxyz',@SequenceNo=0,--set the sequence no. to something sensible.
/* firstly we process all strings. This is done because [{} and ] aren't escaped in strings,which complicates an iterative parse. */
@Parent_ID=0,@Identifier = (SELECT new_id FROM dbo.getNewID)
WHILE 1=1 --forever until there is nothing more to do
BEGIN
SELECT
@start=PATINDEX('%[^a-zA-Z]["]%',@json collate SQL_Latin1_General_CP850_Bin);--next delimited string
IF @start=0 BREAK --no more so drop through the WHILE loop
IF SUBSTRING(@json,@start+1,1)='"'
BEGIN --Delimited Name
SET @start=@Start+1;
SET @end=PATINDEX('%[^\]["]%',RIGHT(@json,LEN(@json+'|')-@start) collate SQL_Latin1_General_CP850_Bin);
END
IF @end=0 --either the end or no end delimiter to last string
BEGIN-- check if ending with a double slash...
SET @end=PATINDEX('%[\][\]["]%',LEN(@json+'|')-@start) collate SQL_Latin1_General_CP850_Bin);
IF @end=0 --we really have reached the end
BEGIN
BREAK --assume all tokens found
END
END
SELECT @token=SUBSTRING(@json,@end-1)
--now put in the escaped control characters
SELECT @token=REPLACE(@token,FromString,ToString)
FROM
(SELECT '\b',CHAR(08)
UNION ALL SELECT '\f',CHAR(12)
UNION ALL SELECT '\n',CHAR(10)
UNION ALL SELECT '\r',CHAR(13)
UNION ALL SELECT '\t',CHAR(09)
UNION ALL SELECT '\"','"'
UNION ALL SELECT '\/','/'
) substitutions(FromString,ToString)
SELECT @token=Replace(@token,'\\','\')
SELECT @result=0,@escape=1
--Begin to take out any hex escape codes
WHILE @escape>0
BEGIN
SELECT @index=0,--find the next hex escape sequence
@escape=PATINDEX('%\x[0-9a-f][0-9a-f][0-9a-f][0-9a-f]%',@token collate SQL_Latin1_General_CP850_Bin)
IF @escape>0 --if there is one
BEGIN
WHILE @index<4 --there are always four digits to a \x sequence
BEGIN
SELECT --determine its value
@result=@result+POWER(16,@index)
*(CHARINDEX(SUBSTRING(@token,@escape+2+3-@index,@characters)-1),@index=@index+1 ;
END
-- and replace the hex sequence by its unicode value
SELECT @token=STUFF(@token,@escape,6,NCHAR(@result))
END
END
--now store the string away
INSERT INTO @Strings (StringValue) SELECT @token
-- and replace the string with a token
SELECT @JSON=STUFF(@json,@start,@end+1,'@string'+CONVERT(NCHAR(5),@@identity))
END
-- all strings are now removed. Now we find the first leaf.
WHILE 1=1 --forever until there is nothing more to do
BEGIN
SELECT @Parent_ID=@Parent_ID+1,@Identifier=(SELECT new_id FROM dbo.getNewID)
--find the first object or list by looking for the open bracket
SELECT @FirstObject=PATINDEX('%[{[[]%',@json collate SQL_Latin1_General_CP850_Bin)--object or array
IF @FirstObject = 0 BREAK
IF (SUBSTRING(@json,@FirstObject,1)='{')
SELECT @NextCloseDelimiterChar='}',@type='object'
ELSE
SELECT @NextCloseDelimiterChar=']',@type='array'
SELECT @OpenDelimiter=@firstObject
WHILE 1=1 --find the innermost object or list...
BEGIN
SELECT
@lenJSON=LEN(@JSON+'|')-1
--find the matching close-delimiter proceeding after the open-delimiter
SELECT
@NextCloseDelimiter=CHARINDEX(@NextCloseDelimiterChar,@json,@OpenDelimiter+1)
--is there an intervening open-delimiter of either type
SELECT @NextOpenDelimiter=PATINDEX('%[{[[]%',@lenJSON-@OpenDelimiter)collate SQL_Latin1_General_CP850_Bin)--object
IF @NextOpenDelimiter=0
BREAK
SELECT @NextOpenDelimiter=@NextOpenDelimiter+@OpenDelimiter
IF @NextCloseDelimiter<@NextOpenDelimiter
BREAK
IF SUBSTRING(@json,@NextOpenDelimiter,1)='{'
SELECT @NextCloseDelimiterChar='}',@type='object'
ELSE
SELECT @NextCloseDelimiterChar=']',@type='array'
SELECT @OpenDelimiter=@NextOpenDelimiter
END
---and parse out the list or Name/value pairs
SELECT
@contents=SUBSTRING(@json,@OpenDelimiter+1,@NextCloseDelimiter-@OpenDelimiter-1)
SELECT
@JSON=STUFF(@json,@OpenDelimiter,@NextCloseDelimiter-@OpenDelimiter+1,'@'+@type+CONVERT(NCHAR(5),@Parent_ID))
WHILE (PATINDEX('%[A-Za-z0-9@+.e]%',@contents collate SQL_Latin1_General_CP850_Bin))<>0
BEGIN
IF @Type='object' --it will be a 0-n list containing a string followed by a string,number,boolean,or null
BEGIN
SELECT
@SequenceNo=0,@end=CHARINDEX(':',' '+@contents)--if there is anything,it will be a string-based Name.
SELECT @start=PATINDEX('%[^A-Za-z@][@]%',' '+@contents collate SQL_Latin1_General_CP850_Bin)--AAAAAAAA
SELECT @token=RTrim(Substring(' '+@contents,@End-@Start-1)),@endofName=PATINDEX('%[0-9]%',@token collate SQL_Latin1_General_CP850_Bin),@param=RIGHT(@token,LEN(@token)-@endofName+1)
SELECT
@token=LEFT(@token,@endofName-1),@Contents=RIGHT(' '+@contents,LEN(' '+@contents+'|')-@end-1)
SELECT @Name=StringValue FROM @strings
WHERE string_id=@param --fetch the Name
END
ELSE
SELECT @Name=null,@SequenceNo=@SequenceNo+1
SELECT
@end=CHARINDEX(',',@contents)-- a string-token,object-token,list-token,or null
IF @end=0
--HR Engineering notation bugfix start
IF ISNUMERIC(@contents) = 1
SELECT @end = LEN(@contents) + 1
Else
--HR Engineering notation bugfix end
SELECT @end=PATINDEX('%[A-Za-z0-9@+.e][^A-Za-z0-9@+.e]%',@contents+' ' collate SQL_Latin1_General_CP850_Bin) + 1
SELECT
@start=PATINDEX('%[^A-Za-z0-9@+.e][A-Za-z0-9@+.e]%',' '+@contents collate SQL_Latin1_General_CP850_Bin)
--select @start,@end,LEN(@contents+'|'),@contents
SELECT
@Value=RTRIM(SUBSTRING(@contents,@End-@Start)),@Contents=RIGHT(@contents+' ',LEN(@contents+'|')-@end)
IF SUBSTRING(@value,1,7)='@object'
INSERT INTO @hierarchy
(Name,SequenceNo,Parent_ID,StringValue,Object_ID,ValueType,Identifier)
SELECT @Name,@SequenceNo,@Parent_ID,SUBSTRING(@value,8,5),'object',@Identifier
ELSE
IF SUBSTRING(@value,6)='@array'
INSERT INTO @hierarchy
(Name,Identifier)
SELECT @Name,7,'array',@Identifier
ELSE
IF SUBSTRING(@value,7)='@string'
INSERT INTO @hierarchy
(Name,Identifier)
SELECT @Name,'string',@Identifier
FROM @strings
WHERE string_id=SUBSTRING(@value,5)
ELSE
IF @value IN ('true','false')
INSERT INTO @hierarchy
(Name,Identifier)
SELECT @Name,@value,'boolean',@Identifier
ELSE
IF @value='null'
INSERT INTO @hierarchy
(Name,Identifier)
SELECT @Name,'null',@Identifier
ELSE
IF PATINDEX('%[^0-9]%',@value collate SQL_Latin1_General_CP850_Bin)>0
INSERT INTO @hierarchy
(Name,Identifier)
SELECT @Name,'real',@Identifier
ELSE
INSERT INTO @hierarchy
(Name,'int',@Identifier
if @Contents=' ' Select @SequenceNo=0
END
END
INSERT INTO @hierarchy (Name,Identifier)
SELECT '-',NULL,'',@Parent_ID-1,@type,@Identifier
--
RETURN
END
最后,如果我们有此表和数据:
DECLARE @customerData TABLE (jsonValue NVARCHAR(MAX))
INSERT INTO @customerData
VALUES ('[{"customerName":"K C Nalina","attendance":"P","collectedAmount":"757","isOverdrafted":false,"loanDisbProduct":null,"paidBy":"Y","customerNumber":"1917889","totalDue":"757"},{"customerName":"Mahalakshmi","collectedAmount":"881","loanDisbProduct":"Emergency Loan","customerNumber":"430833","totalDue":"757"}]'),('[{"customerName":"John","collectedAmount":"700","customerNumber":"192222","totalDue":"788"},{"customerName":"weldon","collectedAmount":"771","customerNumber":"435874","totalDue":"757"}]')
我们可以简单地解析JSON值,如下所示:
;WITH jsonValue AS(
SELECT * FROM @customerData
CROSS APPLY(SELECT * FROM dbo.parseJSON(jsonvalue)) AS d
WHERE d.Name IN('customerName','customerNumber','loanDisbProduct')
),openResult AS(
SELECT i.Name,i.StringValue,i.Identifier FROM jsonValue AS i
)
SELECT
MAX(K.CustomerName) AS CustomerName,MAX(K.CustomerNumber) AS CustomerNumber,MAX(K.LoanDisbProduct) AS LoanDisbProduct
FROM (
SELECT
CASE WHEN openResult.Name='customerName' THEN openResult.StringValue ELSE NULL END AS CustomerName,CASE WHEN openResult.Name='customerNumber' THEN openResult.StringValue ELSE NULL END AS CustomerNumber,CASE WHEN openResult.Name='loanDisbProduct' THEN openResult.StringValue ELSE NULL END AS LoanDisbProduct,openResult.Identifier
FROM openResult
) AS K
GROUP BY K.Identifier
我们将获得以下输出:
CustomerName | CustomerNumber | LoanDisbProduct
------------------------------------------------------
John | 192222 | null
Mahalakshmi | 430833 | Emergency Loan
K C Nalina | 1917889 | null
weldon | 435874 | Emergency Loan
,
如果您不知道每一行有多少个客户,则不应将每个客户都切分成一个字段,至少要切成pr行。
这是粉碎数据的开始,我正在使用dbo.STRING_SPLIT function from this page:
首先,我在Json中用{}分隔,然后用','分隔,然后为每个ID保留属性名称和值,并在每行中对客户进行编号。
我本可以对','进行拆分,就像对'{...}'进行拆分一样,但是我选择为此使用函数。
一切都依赖于JSON的相同结构。为了更好地解析SQL Server 2016+,建议使用。
DROP TABLE IF EXISTS #customerData
create table #customerData (id bigint IDENTITY(1,rawData varchar(max))
INSERT INTO #customerData
VALUES ('[{"customerName":"K C Nalina","totalDue":"757"}]')
;
WITH cte AS
(
SELECT id,REPLACE(REPLACE(REPLACE(REPLACE(SUBSTRING(rawData,CHARINDEX('{',rawData),CHARINDEX('}',rawData) - CHARINDEX('{',rawData)),'{',''),'[','}',']','') person,SUBSTRING(rawData,rawData) + 1,LEN(rawData)) personrest,1 nr
FROM #customerData
UNION ALL
SELECT id,REPLACE(REPLACE(REPLACE(REPLACE(SUBSTRING(personrest,personrest),personrest) - CHARINDEX('{',personrest)),SUBSTRING(personrest,personrest) + 1,LEN(personrest)) personrest,nr + 1
FROM cte
WHERE CHARINDEX('}',personrest) > 0
AND CHARINDEX('{',personrest) > 0
)
SELECT id,a.nr CustomerOrder,LEFT([value],CHARINDEX(':',[value]) - 1),SUBSTRING([value],[value]) + 1,LEN([value]))
FROM cte a
CROSS APPLY (
SELECT *
FROM dbo.STRING_SPLIT(REPLACE(a.person,'"',')
) b
结果是:
+─────+────────────────+──────────────────+─────────────────+
| id | CustomerOrder | Attribute | value |
+─────+────────────────+──────────────────+─────────────────+
| 1 | 1 | customerName | K C Nalina |
| 1 | 1 | attendance | P |
| 1 | 1 | collectedAmount | 757 |
| 1 | 1 | isOverdrafted | false |
| 1 | 1 | loanDisbProduct | null |
| 1 | 1 | paidBy | Y |
| 1 | 1 | customerNumber | 1917889 |
| 1 | 1 | totalDue | 757 |
| 2 | 1 | customerName | John |
| 2 | 1 | attendance | P |
| 2 | 1 | collectedAmount | 700 |
| 2 | 1 | isOverdrafted | false |
| 2 | 1 | loanDisbProduct | null |
| 2 | 1 | paidBy | Y |
| 2 | 1 | customerNumber | 192222 |
| 2 | 1 | totalDue | 788 |
| 2 | 2 | customerName | weldon |
| 2 | 2 | attendance | P |
| 2 | 2 | collectedAmount | 771 |
| 2 | 2 | isOverdrafted | false |
| 2 | 2 | loanDisbProduct | Emergency Loan |
| 2 | 2 | paidBy | Y |
| 2 | 2 | customerNumber | 435874 |
| 2 | 2 | totalDue | 757 |
| 1 | 2 | customerName | Mahalakshmi |
| 1 | 2 | attendance | P |
| 1 | 2 | collectedAmount | 881 |
| 1 | 2 | isOverdrafted | false |
| 1 | 2 | loanDisbProduct | Emergency Loan |
| 1 | 2 | paidBy | Y |
| 1 | 2 | customerNumber | 430833 |
| 1 | 2 | totalDue | 757 |
+─────+────────────────+──────────────────+─────────────────+
,
最好是升级到v2016 +。有了JSON支持,这很容易...
在v2012上,您必须四处寻找。为此,使用其他工具可能是一个更好的选择。但是,如果您必须坚持使用TSQL,我将尝试将JSON转换为以属性为中心的XML,如下所示:
DECLARE @customerData TABLE (id bigint IDENTITY(1,rawData varchar(max));
insert into @customerData
values ('[{"customerName":"K C Nalina","totalDue":"757"}]')
-查询
SELECT cd.id,B.*
FROM @customerData cd
CROSS APPLY(SELECT REPLACE(REPLACE(REPLACE(cd.rawData,'false','"0"'),'true','"1"'),'"#NULL"')) A(JustStringValues)
CROSS APPLY(SELECT CAST(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(JustStringValues,'},{"',' /><x '),'{"','<x '),' />'),'","','" '),'":"','="') AS XML)) B(SinlgeRow)
-结果
<x customerName="K C Nalina" attendance="P" collectedAmount="757" isOverdrafted="0" loanDisbProduct="#NULL" paidBy="Y" customerNumber="1917889" totalDue="757" /x>
<x customerName="Mahalakshmi" attendance="P" collectedAmount="881" isOverdrafted="0" loanDisbProduct="Emergency Loan" paidBy="Y" customerNumber="430833" totalDue="757" /x>
<x customerName="John" attendance="P" collectedAmount="700" isOverdrafted="0" loanDisbProduct="#NULL" paidBy="Y" customerNumber="192222" totalDue="788" /x>
<x customerName="weldon" attendance="P" collectedAmount="771" isOverdrafted="0" loanDisbProduct="Emergency Loan" paidBy="Y" customerNumber="435874" totalDue="757" /x>
简而言之:
- 我们用带引号的占位符替换未引用的值(false,true,null)
- 我们使用各种替代方法来获取以属性为中心的XML
使用此查询获取值
SELECT cd.id,OneCustomer.value('@customerName','nvarchar(max)') AS CustomerName,OneCustomer.value('@attendance','nvarchar(max)') AS Attendance
--more attributes
FROM @customerData cd
CROSS APPLY(SELECT REPLACE(REPLACE(REPLACE(cd.rawData,'="') AS XML)) B(SinlgeRow)
CROSS APPLY B.SinlgeRow.nodes('/x') AS C(OneCustomer);
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。