TIBCO Spotfire Community

Welcome to TIBCO Spotfire Community Sign in | Join | Help

Your daily commit from subversion using XSL and Spotfire Text Data Format (STDF)

A previous post showed how SQL Server bulk import of xml could be used to import subversion commits. Sometimes ordinary files is simpler to use, in this article I’ll show how an xml file can be converted to a text file using XSL transformation. Text files which contains tabular data are usually in comma or tab separated formats. However since text files can be produced and parsed arbitrary it is sometimes hard to get a correct result, especially for string columns where a field value can include the field separator and have multiple lines. Sometimes a text qualifier (quote or similar) can be used but then the text qualifier must be escaped correctly. That's where Spotfire Text Data Format (STDF) can help to overcome some of these obstacles because it defines how the data should be formatted. From Spotfire Technology Network: The Spotfire Text Data Format (STDF) is a tabular data format, the common file format for Spotfire products. It is strict, unforgiving, easy to parse efficiently, and particularly useful if data is both formatted and parsed by Spotfire products. Otherwise, a more flexible format might be preferable.

First extract the subversion commits using a command line client (can be downloaded from http://subversion.tigris.org/getting.html#binary-packages)

svn log <url> --xml -v > svnlog.xml

Write a small program which perform an XSLT transformation.

using System;
using System.Xml;
using System.Xml.Xsl;

namespace Something
{
    class Program
    {
        static void Main(string[] args)
        {
            string xmlFile = args[0];
            string xslFile = args[1];
            string outputFile = args[2];

            try
            {
                using (XmlReader src = XmlReader.Create(xmlFile))
                {
                    XslCompiledTransform xslt = new XslCompiledTransform();
                    xslt.Load(xslFile);
                    XmlWriter result = XmlWriter.Create(outputFile, xslt.OutputSettings);
                    xslt.Transform(src, null, result, new XmlUrlResolver());
                }
            }
            catch (Exception e)
            {

                Console.WriteLine("Exception: {0}", e.ToString());
            }

        }
    }
}

Create three XSL documents; one for general purpose STDF formatting, one which extracts the log entries and one that extract the log paths.

stdf.formatters.xsl

<?xml version="1.0" encoding="utf-8"?>
<xsl:stylesheet version="1.0"
                xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
                xmlns:html="http://www.w3.org/1999/xhtml">
  
  <xsl:template name="stdf_integerformatter">
    <xsl:param name="text" />

    <xsl:choose>
      <xsl:when test="string-length($text) = 0">
        <xsl:text>\?</xsl:text>
      </xsl:when>
      <xsl:otherwise>
        <xsl:value-of select="$text"/>
      </xsl:otherwise>
    </xsl:choose>
  </xsl:template>

  <xsl:template name="stdf_datetimeformatter">
    <xsl:param name="text" />
    <!--  Remove characters not supported -->
    <xsl:variable name="a1" select="translate($text, 'TZ', '  ')"/>
    <!-- Remove the last digits from the millilseconds (only 3 are supported) -->
    <xsl:value-of select="substring($a1, 0, 24)"/>
  </xsl:template>

  <xsl:template name="stdf_stringformatter">
    <xsl:param name="text"/>

    <!--    backslash -->
    <xsl:variable name="a1">
      <xsl:call-template name="replace-string">
        <xsl:with-param name="text" select="$text"/>
        <xsl:with-param name="from" select="'\'"/>
        <xsl:with-param name="to" select="'\\'"/>
      </xsl:call-template>
    </xsl:variable>

    <!--    semicolon -->
    <xsl:variable name="a2">
      <xsl:call-template name="replace-string">
        <xsl:with-param name="text" select="$a1"/>
        <xsl:with-param name="from" select="';'"/>
        <xsl:with-param name="to" select="'\s'"/>
      </xsl:call-template>
    </xsl:variable>

    <!--    tab -->
    <xsl:variable name="a3">
      <xsl:call-template name="replace-string">
        <xsl:with-param name="text" select="$a2"/>
        <xsl:with-param name="from" select="'&#9;'"/>
        <xsl:with-param name="to" select="'\t'"/>
      </xsl:call-template>
    </xsl:variable>

    <!--    line feed -->
    <xsl:variable name="a4">
      <xsl:call-template name="replace-string">
        <xsl:with-param name="text" select="$a3"/>
        <xsl:with-param name="from" select="'&#10;'"/>
        <xsl:with-param name="to" select="'\r'"/>
      </xsl:call-template>
    </xsl:variable>

    <!--    carriage return -->
    <xsl:variable name="a5">
      <xsl:call-template name="replace-string">
        <xsl:with-param name="text" select="$a4"/>
        <xsl:with-param name="from" select="'&#13;'"/>
        <xsl:with-param name="to" select="'\n'"/>
      </xsl:call-template>
    </xsl:variable>

    <xsl:value-of select="$a5"/>
  </xsl:template>

  <!-- reusable replace-string function -->
  <xsl:template name="replace-string">
    <xsl:param name="text"/>
    <xsl:param name="from"/>
    <xsl:param name="to"/>

    <xsl:choose>
      <xsl:when test="contains($text, $from)">

        <xsl:variable name="before" select="substring-before($text, $from)"/>
        <xsl:variable name="after" select="substring-after($text, $from)"/>
        <xsl:variable name="prefix" select="concat($before, $to)"/>

        <xsl:value-of select="$before"/>
        <xsl:value-of select="$to"/>
        <xsl:call-template name="replace-string">
          <xsl:with-param name="text" select="$after"/>
          <xsl:with-param name="from" select="$from"/>
          <xsl:with-param name="to" select="$to"/>
        </xsl:call-template>
      </xsl:when>
      <xsl:otherwise>
        <xsl:value-of select="$text"/>
      </xsl:otherwise>
    </xsl:choose>
  </xsl:template>
</xsl:stylesheet>

svn.logpaths2stdf.xsl

<?xml version="1.0" encoding="utf-8"?>
<xsl:stylesheet version="1.0"
                xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
                xmlns:html="http://www.w3.org/1999/xhtml">
  <xsl:include href="stdf.formatters.xsl"/>
  <xsl:output method="text" indent="no" encoding="UTF-8"/>
  <xsl:template match="/">
    <xsl:text>\! filetype=Spotfire.DataFormat.Text; version=1.0;
\* engine=xml2stdf.xsl;
Revision;Action;Repository Path;Copy From Path;Copy From Revision;
Integer;String;String;String;Integer;
</xsl:text>
    <xsl:for-each select="log/logentry">
      <xsl:variable name="revision">
        <xsl:call-template name="stdf_integerformatter">
          <xsl:with-param name="text" select="@revision"/>
        </xsl:call-template>
      </xsl:variable>
      <xsl:for-each select="paths/path">
        <xsl:value-of select="$revision"/>
        <xsl:text>;</xsl:text>
        <xsl:call-template name="stdf_stringformatter">
          <xsl:with-param name="text" select="@action"/>
        </xsl:call-template>
        <xsl:text>;</xsl:text>
        <xsl:call-template name="stdf_stringformatter">
          <xsl:with-param name="text" select="text()"/>
        </xsl:call-template>
        <xsl:text>;</xsl:text>
        <xsl:call-template name="stdf_stringformatter">
          <xsl:with-param name="text" select="@copyfrom-path"/>
        </xsl:call-template>
        <xsl:text>;</xsl:text>
        <xsl:call-template name="stdf_integerformatter">
          <xsl:with-param name="text" select="@copyfrom-rev"/>
        </xsl:call-template>
        <xsl:text>;</xsl:text>
        <xsl:text>
</xsl:text>
      </xsl:for-each>
    </xsl:for-each>
  </xsl:template>
</xsl:stylesheet>

 

svn.logentries2stdf.xsl

<?xml version="1.0" encoding="utf-8"?>
<xsl:stylesheet version="1.0"
                xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
                xmlns:html="http://www.w3.org/1999/xhtml">
  <xsl:include href="stdf.formatters.xsl"/>
  <xsl:output method="text" indent="no" encoding="UTF-8"/>
    <xsl:template match="/">
        <xsl:text>\! filetype=Spotfire.DataFormat.Text; version=1.0;
\* engine=xml2stdf.xsl;
Revision;Author;Date;Message;
Integer;String;DateTime;String;
</xsl:text>
        <xsl:for-each select="log/logentry">
            <xsl:call-template name="stdf_integerformatter">
                <xsl:with-param name="text" select="@revision"/>
            </xsl:call-template>
            <xsl:text>;</xsl:text>
            <xsl:call-template name="stdf_stringformatter">
                <xsl:with-param name="text" select="author"/>
            </xsl:call-template>
            <xsl:text>;</xsl:text>
            <xsl:call-template name="stdf_datetimeformatter">
                <xsl:with-param name="text" select="date"/>
            </xsl:call-template>
            <xsl:text>;</xsl:text>
            <xsl:call-template name="stdf_stringformatter">
                <xsl:with-param name="text" select="msg"/>
            </xsl:call-template>
            <xsl:text>;</xsl:text>
            <xsl:text>
</xsl:text>
        </xsl:for-each>
    </xsl:template>
</xsl:stylesheet>

Command file

Create a command file that executes the .Net program in order to produce the STDF files

xslt.exe svnlog.xml svn.logentries2stdf.xsl svn.logentries.txt

xslt.exe svnlog.xml svn.logpaths2stdf.xsl svn.logpaths.txt

Now these files can be imported into TIBCO Spotfire for further analysis

Published Dec 15 2008, 06:27 AM by Daniel Vulcan
Filed under: , ,
Rating:
Comments

About Daniel Vulcan

Daniel Vulcan is a developer with the TIBCO Spotfire and the TIBCO Spotfire Web Player teams.