Skip to main content
Solved

XML ingestion

  • December 13, 2024
  • 6 replies
  • 70 views

Forum|alt.badge.img
  • Problem Solver
  • 113 replies

Anyone have a suggestion about how do i use this XML to ingest data into ODX?

<?xml version="1.0" encoding="utf-8"?>
<soap:Envelope xmlns:soap="http://www.w3.org/2003/05/soap-envelope" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">
    <soap:Body>
        <ExportDocsProjectWithTendersResponse xmlns="http://publicprocurement.com">    
    <ExportDocsProjectWithTendersResult>{
  "ProjectBodies": [
    {
      "ExternalId": "666666",
      "DefaultEvaluationModel": "LowestPrice",
      "DocumentNumberingSchema": "Numeric",
      "SectionNumberingSchema": 0,
      "RequirementNumberingSchema": "Alphabetic",
      "Metadata": [
	  {
          "Name": "ProjectName",
          "Value": "Ingesting Data into TX"
        },
        {
          "Name": "Reference",
          "Value": "123456"
        },
        {
          "Name": "ContractType",
          "Value": "XML"
        }
	  ],
	  "DataFields": [
        {
          "ExternalId": "1",
          "Type": "FreeText",
          "Name": "Prosjct Name",
          "Category": "C1",
          "Value": "V1"
        },
        {
          "ExternalId": "2",
          "Type": "FreeText",
          "Name": "Prosjct Name 2",
          "Category": "C2",
          "Value": "V2"
        },
        {
         "ExternalId": 3,
          "Type": "FreeText",
          "Name": "Prosjct Name 3",
          "Category": "C3",
          "Value": "V3"
        },
	      }
	  ]
	 }
	 </ExportDocsProjectWithTendersResult>
        </ExportDocsProjectWithTendersResponse>
    </soap:Body>
</soap:Envelope>

I have come this far:

<api:script xmlns:api="http://apiscript.com/ns?v1" xmlns:xs="http://www.w3.org/2001/XMLSchema">

	<!-- See Column Definitions to specify column behavior and use XPaths to extract column values from XML. -->
	<api:info title="contracts/contract" desc="Generated schema file." xmlns:other="http://apiscript.com/ns?v1">

		<attr name="ExternalId"         			xs:type="string"  		readonly="false"   other:xPath="/Envelope/Body/ExportDocsProjectWithTendersResponse/ExportDocsProjectWithTendersResult" />

	</api:info>

	<api:set attr="DataModel" 				value="RELATIONAL" />
	<api:set attr="URI" 					value="https://my.soap.provider.com/endpoint.asmx?WSDL" />
	<api:set attr="PushAttributes" 			value="true" />
	<api:set attr="EnablePaging" 			value="true" />
	<api:set attr="Header:Name#" 			value="SOAPAction"/>
	<api:set attr="Header:Value#" 			value="http://publicprocurement.com/ExportDocsProjectWithTenders"/>
	<api:set attr="XPath" 					value="/Envelope/Body/ExportDocsProjectWithTendersResponse/ExportDocsProjectWithTendersResult" />

	<!-- The GET method corresponds to SELECT. Here you can override the default processing of the SELECT statement. The results of processing are pushed to the schema's output. See SELECT Execution for more information. -->
	<api:script method="GET">
		<api:set attr="method" value="POST"/>
		<api:set attr="ContentType" value="text/xml"/>
		<api:set attr="AcceptCharset" value="utf-8" />
		<api:set attr="data">
			<?xml version="1.0" encoding="utf-8"?>
				<soap:Envelope xmlns:soap="http://www.w3.org/2003/05/soap-envelope" xmlns:pub="http://publicprocurement.com">
		<soap:Header/>
		<soap:Body>
			<pub:ExportDocsProjectWithTenders>
				<pub:SystemUserID>123456</pub:SystemUserID>
				<pub:ProjectID>666666</pub:ProjectID>
			</pub:ExportDocsProjectWithTenders>
	</soap:Body>
	</soap:Envelope>
		</api:set>
		<api:call op="xmlproviderGet">
			<api:push/>    
		</api:call>
	</api:script>
</api:script>

When i run this. i get everything from ProjectBodies

I have tried this to splitt the result into columns of Metadata and Datafileds but this didnt gave me anythig

<attr name="Metadata"         			xs:type="string"  		readonly="false"   other:xPath="/Envelope/Body/ExportDocsProjectWithTendersResponse/ExportDocsProjectWithTendersResult/Metadata" />
		<attr name="DataFields"         			xs:type="string"  		readonly="false"   other:xPath="/Envelope/Body/ExportDocsProjectWithTendersResponse/ExportDocsProjectWithTendersResult/DataFields" />

 

 

Ideally, i would like to splitt everything under Metadata and DataField into columns. 
I am sort of stuck

Best answer by aftabp

I have managed to solve in two steps.
The first step - extract everything in one column with this code 

<api:script xmlns:api="http://apiscript.com/ns?v1" xmlns:xs="http://www.w3.org/2001/XMLSchema">

	<!-- See Column Definitions to specify column behavior and use XPaths to extract column values from XML. -->
	<api:info title="contract" desc="Generated schema file." xmlns:other="http://apiscript.com/ns?v1">
    <attr name="ExternalId"                 xs:type="string"  readonly="false"              other:xPath="/Envelope/Body/ExportDocsProjectWithTendersResponse/ExportDocsProjectWithTendersResult"/>
	</api:info>

	<api:set attr="DataModel" 				value="RELATIONAL" />
	<api:set attr="URI" 					value="https://my.soap.endpoint/services.asmx.?WSDL" />
	<api:set attr="PushAttributes" 			value="true" />
	<api:set attr="EnablePaging" 			value="true" />
	<api:set attr="Header:Name#" 			value="SOAPAction"/>
	<api:set attr="Header:Value#" 			value="http://publicprocurement.com/ExportDocsProjectWithTenders"/>
	<api:set attr="XPath" 					value="/Envelope/Body" />

	<!-- The GET method corresponds to SELECT. Here you can override the default processing of the SELECT statement. The results of processing are pushed to the schema's output. See SELECT Execution for more information. -->
	<api:script method="GET">
		<api:set attr="method" value="POST"/>
		<api:set attr="ContentType" value="text/xml"/>
		<api:set attr="AcceptCharset" value="utf-8" />
		<api:set attr="data">
			<?xml version="1.0" encoding="utf-8"?>
				<soap:Envelope xmlns:soap="http://www.w3.org/2003/05/soap-envelope" xmlns:pub="http://publicprocurement.com">
		<soap:Header/>
		<soap:Body>
			<pub:ExportDocsProjectWithTenders>
					<pub:SystemUserID>123456</pub:SystemUserID>
					<pub:ProjectID>6666</pub:ProjectID>
				</pub:ExportDocsProjectWithTenders>
	</soap:Body>
	</soap:Envelope>
		</api:set>
		<api:call op="xmlproviderGet">
			<api:push/>    
		</api:call>
	</api:script>
</api:script>

I got this in ny ODX

Then i created a Table insert in DSA. Here is the code

 

SELECT
    'Metadata' AS Source,
    JSON_VALUE(Metadata.[value], '$.Name') AS Name,
    JSON_VALUE(Metadata.[value], '$.Value') AS Value
FROM
    [myTable]
CROSS APPLY OPENJSON([myColumn], '$.ProjectBodies[0].Metadata') AS Metadata
UNION ALL
SELECT
    'DataFields' AS Source,
    JSON_VALUE(DataFields.[value], '$.Name') AS Name,
    JSON_VALUE(DataFields.[value], '$.Value') AS Value
FROM
    [myTable]
CROSS APPLY OPENJSON([myColumn], '$.ProjectBodies[0].DataFields') AS DataFields

As a result i am able to split the data up in columns

 

 

View original
Did this topic help you find an answer to your question?

6 replies

Thomas Lind
Community Manager
Forum|alt.badge.img+5
  • Community Manager
  • 1029 replies
  • December 16, 2024

Hi ​@aftabp 

Is the result in JSON and the input in XML?

Start by generating the RSD file based on the JSON result. Then the fields will have the right XPath and the XPath or JSONPath setup will point to where the data is.

Afterwards you can change the URI and similar to point at the real data. If there is a mix between XML and JSON, it will be hard to get it to read it properly as it requires different contenttype setups.


Forum|alt.badge.img
  • Author
  • Problem Solver
  • 113 replies
  • December 16, 2024

 

@Thomas Lind 
I have already tried before posting the question.

The output in POSTMAN is in XML.
I have converted it into JSON

         [ {"ProjectBodies": [
            {
              "ExternalId": "666666",
              "DefaultEvaluationModel": "LowestPrice",
              "DocumentNumberingSchema": "Numeric",
              "SectionNumberingSchema": 0,
              "RequirementNumberingSchema": "Alphabetic",
              "Metadata": [
                {
                  "Name": "ProjectName",
                  "Value": "Ingesting Data into TX"
                },
                {
                  "Name": "Reference",
                  "Value": "123456"
                },
                {
                  "Name": "ContractType",
                  "Value": "XML"
                }
              ],
              "DataFields": [
                {
                  "ExternalId": "1",
                  "Type": "FreeText",
                  "Name": "Prosjct Name",
                  "Category": "C1",
                  "Value": "V1"
                },
                {
                  "ExternalId": "2",
                  "Type": "FreeText",
                  "Name": "Prosjct Name 2",
                  "Category": "C2",
                  "Value": "V2"
                },
                {
                  "ExternalId": 3,
                  "Type": "FreeText",
                  "Name": "Prosjct Name 3",
                  "Category": "C3",
                  "Value": "V3"
                }
              ]
            }
          ]
		  }
		  ]

Then i got this error in TX

System.Data.CData.REST.RESTDataReader ExecuteDataReader(System.Data.CommandBehavior)
[500] Could not execute the specified command: Unable to retrieve columns for table [JSONData]. : "REST"."JSONData"

 


Thomas Lind
Community Manager
Forum|alt.badge.img+5
  • Community Manager
  • 1029 replies
  • December 16, 2024

Hi ​@aftabp 

Why are you converting it to JSON, if it is XML why not add it like that?

I thought your source returned in JSON, but if it returns data in XML, that is what you should use.

You are able to get the correct result of your SOAP Call in Postman, right?


Forum|alt.badge.img
  • Author
  • Problem Solver
  • 113 replies
  • December 16, 2024

@Thomas Lind 
May be i was unclear 

What i ment was that i converted the XML into JSON from POSTMAN to find out XPath and it turned out that i was right all along about the path. 
The issue is that when i define it in RSD-file, the fields turn out empty 

Here is my RSD

<api:script xmlns:api="http://apiscript.com/ns?v1" xmlns:xs="http://www.w3.org/2001/XMLSchema">

	<!-- See Column Definitions to specify column behavior and use XPaths to extract column values from XML. -->
	<api:info title="contract" desc="Generated schema file." xmlns:other="http://apiscript.com/ns?v1">
    <attr name="ExternalId"                 xs:type="string"  readonly="false"              other:xPath="/Envelope/Body/ExportDocsProjectWithTendersResponse/ExportDocsProjectWithTendersResult/ProjectBodies/ExternalId"                       />
	<attr name="Name"                       xs:type="string"  readonly="false"              other:xPath="/Envelope/Body/ExportDocsProjectWithTendersResponse/ExportDocsProjectWithTendersResult/ProjectBodies/ProjectBodies/Metadata/Name"                    />


	</api:info>

	<api:set attr="DataModel" 				value="RELATIONAL" />
	<api:set attr="URI" 					value="https://my.soap.endpoint.com/extention.asmx?WSDL" />
	<api:set attr="PushAttributes" 			value="true" />
	<api:set attr="EnablePaging" 			value="true" />
	<api:set attr="Header:Name#" 			value="SOAPAction"/>
	<api:set attr="Header:Value#" 			value="http://publicprocurement.com/ExportDocsProjectWithTenders"/>
	<api:set attr="XPath" 					value="/Envelope/Body/ExportDocsProjectWithTendersResponse" />

	<!-- The GET method corresponds to SELECT. Here you can override the default processing of the SELECT statement. The results of processing are pushed to the schema's output. See SELECT Execution for more information. -->
	<api:script method="GET">
		<api:set attr="method" value="POST"/>
		<api:set attr="ContentType" value="text/xml"/>
		<api:set attr="AcceptCharset" value="utf-8" />
		<api:set attr="data">
			<?xml version="1.0" encoding="utf-8"?>
				<soap:Envelope xmlns:soap="http://www.w3.org/2003/05/soap-envelope" xmlns:pub="http://publicprocurement.com">
		<soap:Header/>
		<soap:Body>
			<pub:ExportDocsProjectWithTenders>
					<pub:SystemUserID>123456</pub:SystemUserID>
					<pub:ProjectID>666666</pub:ProjectID>
				</pub:ExportDocsProjectWithTenders>
	</soap:Body>
	</soap:Envelope>
		</api:set>
		<api:call op="xmlproviderGet">
			<api:push/>    
		</api:call>
	</api:script>
</api:script>

This is how table look like:

 


Forum|alt.badge.img
  • Author
  • Problem Solver
  • 113 replies
  • Answer
  • January 6, 2025

I have managed to solve in two steps.
The first step - extract everything in one column with this code 

<api:script xmlns:api="http://apiscript.com/ns?v1" xmlns:xs="http://www.w3.org/2001/XMLSchema">

	<!-- See Column Definitions to specify column behavior and use XPaths to extract column values from XML. -->
	<api:info title="contract" desc="Generated schema file." xmlns:other="http://apiscript.com/ns?v1">
    <attr name="ExternalId"                 xs:type="string"  readonly="false"              other:xPath="/Envelope/Body/ExportDocsProjectWithTendersResponse/ExportDocsProjectWithTendersResult"/>
	</api:info>

	<api:set attr="DataModel" 				value="RELATIONAL" />
	<api:set attr="URI" 					value="https://my.soap.endpoint/services.asmx.?WSDL" />
	<api:set attr="PushAttributes" 			value="true" />
	<api:set attr="EnablePaging" 			value="true" />
	<api:set attr="Header:Name#" 			value="SOAPAction"/>
	<api:set attr="Header:Value#" 			value="http://publicprocurement.com/ExportDocsProjectWithTenders"/>
	<api:set attr="XPath" 					value="/Envelope/Body" />

	<!-- The GET method corresponds to SELECT. Here you can override the default processing of the SELECT statement. The results of processing are pushed to the schema's output. See SELECT Execution for more information. -->
	<api:script method="GET">
		<api:set attr="method" value="POST"/>
		<api:set attr="ContentType" value="text/xml"/>
		<api:set attr="AcceptCharset" value="utf-8" />
		<api:set attr="data">
			<?xml version="1.0" encoding="utf-8"?>
				<soap:Envelope xmlns:soap="http://www.w3.org/2003/05/soap-envelope" xmlns:pub="http://publicprocurement.com">
		<soap:Header/>
		<soap:Body>
			<pub:ExportDocsProjectWithTenders>
					<pub:SystemUserID>123456</pub:SystemUserID>
					<pub:ProjectID>6666</pub:ProjectID>
				</pub:ExportDocsProjectWithTenders>
	</soap:Body>
	</soap:Envelope>
		</api:set>
		<api:call op="xmlproviderGet">
			<api:push/>    
		</api:call>
	</api:script>
</api:script>

I got this in ny ODX

Then i created a Table insert in DSA. Here is the code

 

SELECT
    'Metadata' AS Source,
    JSON_VALUE(Metadata.[value], '$.Name') AS Name,
    JSON_VALUE(Metadata.[value], '$.Value') AS Value
FROM
    [myTable]
CROSS APPLY OPENJSON([myColumn], '$.ProjectBodies[0].Metadata') AS Metadata
UNION ALL
SELECT
    'DataFields' AS Source,
    JSON_VALUE(DataFields.[value], '$.Name') AS Name,
    JSON_VALUE(DataFields.[value], '$.Value') AS Value
FROM
    [myTable]
CROSS APPLY OPENJSON([myColumn], '$.ProjectBodies[0].DataFields') AS DataFields

As a result i am able to split the data up in columns

 

 


Thomas Lind
Community Manager
Forum|alt.badge.img+5
  • Community Manager
  • 1029 replies
  • January 7, 2025

Hi ​@aftabp 

Good that you found a way to handle it.

I did a similar thing for a very large CSV file that somehow got entirely put into one field.

I first convert it to XML and then back to SQL. So this could also be used if the content was XML instead of JSON.

My project below, it is quite old, but it will still work.

It contained multiple delimiters forcing me to use multiple table inserts for it to work.

 

Split1 is used to generate the Group Members All.

declare @text_IN nvarchar(max) = (SELECT [Filecontent] FROM [GroupMembers_BigFileTable])

declare @temptable table (csvRow nvarchar(max))    
declare @DelimiterInit nvarchar(4) = '
'
declare @Delimiter nvarchar(1) = '|'
declare @idx int       
declare @slice nvarchar(max)    

set @text_IN = REPLACE(@text_IN,@DelimiterInit,@Delimiter)

select @idx = 1       
    if len(@text_IN)<1 or @text_IN is null  return       

while @idx!= 0       
begin       
    set @idx = charindex(@Delimiter,@text_IN)       
    if @idx!=0       
        set @slice = left(@text_IN,@idx - 1)       
    else       
        set @slice = @text_IN 

    if(len(@slice)>0)  
        insert into @temptable(csvRow) values(@slice)       

    set @text_IN = right(@text_IN,len(@text_IN) - @idx)       
    if len(@text_IN) = 0 break       
end

;WITH XMLTable (xmlTag)
AS
(
    SELECT CONVERT(XML,'<CSV><champ>' + REPLACE([Data],',', '</champ><champ>') + '</champ></CSV>') AS xmlTag FROM (SELECT ROW_NUMBER()OVER(ORDER BY csvRow ASC) row,REPLACE(csvRow, '"', '') AS [Data] FROM @temptable) AS A
)

SELECT RTRIM(LTRIM(xmlTag.value('/CSV[1]/champ[1]','nvarchar(max)'))) AS [Name],
	RTRIM(LTRIM(xmlTag.value('/CSV[1]/champ[2]','nvarchar(max)'))) AS [Group Members]
FROM XMLTable

Then I use Group Members All and Split2 to generate the content for Security Groups.

WITH cte ([Name],Prod_Attributes)
AS
(
SELECT
    [Name],
    CONVERT(XML,'<Product><Attribute>' 
        + REPLACE(REPLACE(value,' (', '</Attribute><Attribute>'),')', '</Attribute><Attribute>') 
        + '</Attribute></Product>') AS Prod_Attributes
FROM [Group Members All]
CROSS APPLY STRING_SPLIT([Group Members], ';') 
)

SELECT 
    [Name] AS [Group Name],
    LTRIM(Prod_Attributes.value('/Product[1]/Attribute[1]','nvarchar(50)')) AS [Account Name],
    Prod_Attributes.value('/Product[1]/Attribute[2]','nvarchar(100)') AS [Accounts]
FROM cte
WHERE [Name] <> 'Name'
ORDER BY [Name],[Account Name]

Then I use Security Groups and Split2 to generate the content for Security Group Members.

WITH cte ([Group Name], [Account Name],Prod_Attributes)
AS
(
SELECT
	[Group Name],
    [Account Name],
    CONVERT(XML,'<Product><Attribute>' 
        + REPLACE(value,'/', '</Attribute><Attribute>') 
        + '</Attribute></Product>') AS Prod_Attributes
FROM [Security Groups]
CROSS APPLY STRING_SPLIT([Accounts], ';') 
)

SELECT 
    [Group Name],
    [Account Name] as [User Name],
    Prod_Attributes.value('/Product[1]/Attribute[1]','nvarchar(50)') AS [Account Type],
    Prod_Attributes.value('/Product[1]/Attribute[2]','nvarchar(50)') AS [City Code],
    Prod_Attributes.value('/Product[1]/Attribute[3]','nvarchar(50)') AS [Country Code],
    Prod_Attributes.value('/Product[1]/Attribute[4]','nvarchar(50)') AS [Company],
    Prod_Attributes.value('/Product[1]/Attribute[5]','nvarchar(50)') AS [Corporation]
FROM cte

 


Cookie policy

We use cookies to enhance and personalize your experience. If you accept you agree to our full cookie policy. Learn more about our cookies.

 
Cookie settings