User-Defined Types

Concepts

In order to create a UDT in U-SQL there are two thing that have to be created:

  • The UDT type itself
  • A formatter that can conver the UDT into a string and can transform a string into an instance of the UDT

Scenario

We will define a UDT - and its corresponding formatter - for a "Bits" UDT. This UDFT is a simple bitarray with a textual representation that looks like "1100101010".

UDT and formatter skeletons

First let's look at the fundamental structure of the Bits type.

[SqlUserDefinedType(typeof(BitFormatter))]
public struct Bits
{
}

Notice that the only thing the Bits UDT requires is that it identifies its corresponding formatter.

And here is the skeleton of its corresponding formatter.

public class BitFormatter : Microsoft.Analytics.Interfaces.IFormatter<Bits>
{
    public BitFormatter()
    { ... }

    public void Serialize(
        Bits instance,
        IColumnWriter writer,
        ISerializationContext context)
    { ... }

    public Bits Deserialize(
         IColumnReader reader,
         ISerializationContext context)
    { ... }
}

Full code for the UDT


namespace MyUDTExamples
{
    [SqlUserDefinedType(typeof(BitFormatter))]
    public struct Bits
    {
        System.Collections.BitArray bitarray;

        public Bits(string s)
        {
            this.bitarray = new System.Collections.BitArray(s.Length);
            for (int i = 0; i<s.Length; i++)
            {
                this.bitarray[i] = (s[s.Length-i-1] == '1' ? true : false);
            }
        }

        public int ToInteger()
        {
            int value = 0;
            for (int i = 0; i < this.bitarray.Length; i++)
                { if (bitarray[i]) { value += (int)System.Math.Pow(2, i); } }
            return value;
        }

        public override string ToString()
        {
            var sb = new System.Text.StringBuilder(this.bitarray.Length);
            for (int i = 0; i < this.bitarray.Length; i++)
            { sb.Append(this.bitarray[i] ? "1" : "0"); }
            return sb.ToString();
        }
    }

}

Full code for the UDT's formatter

namespace MyUDTExamples
{

    public class BitFormatter : Microsoft.Analytics.Interfaces.IFormatter<Bits>
    {
        public BitFormatter()
        {
        }

        public void Serialize(
            Bits instance,
            IColumnWriter writer,
            ISerializationContext context)
        {
            using (var w = new System.IO.StreamWriter(writer.BaseStream))
            {
                var bitstring = instance.ToString();
                w.Write(bitstring);
                w.Flush();
            }
        }

        public Bits Deserialize(
            IColumnReader reader,
            ISerializationContext context)
        {
            using (var w = new System.IO.StreamReader(reader.BaseStream))
            {
                string bitstring = w.ReadToEnd();
                var bits = new Bits(bitstring);
                return bits;
            }
        }
    }

}

Using the UDT

@products  = 
    SELECT * FROM 
        (VALUES
            ("Apple", "0000"),
            ("Cherry", "0001"),
            ("Banana", "1001"),
            ("Orange", "0110")
        ) AS 
              D( bitstring );

@products = 
    SELECT 
       ProductCode
       BitString, 
       new MyUDTExamples.Bits(BitString) AS Bits
    FROM @products;

results matching ""

    No results matching ""