Commits

Anonymous committed a010cad

init commit

  • Participants

Comments (0)

Files changed (52)

File libML/CsLibrary/CsLibrary.csproj

+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <PropertyGroup>
+    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+    <ProductVersion>8.0.30703</ProductVersion>
+    <SchemaVersion>2.0</SchemaVersion>
+    <ProjectGuid>{321ADDA5-CADF-4184-8BE8-12789D21C621}</ProjectGuid>
+    <OutputType>Library</OutputType>
+    <AppDesignerFolder>Properties</AppDesignerFolder>
+    <RootNamespace>CsLibrary</RootNamespace>
+    <AssemblyName>CsLibrary</AssemblyName>
+    <TargetFrameworkVersion>v4.0</TargetFrameworkVersion>
+    <FileAlignment>512</FileAlignment>
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+    <DebugSymbols>true</DebugSymbols>
+    <DebugType>full</DebugType>
+    <Optimize>false</Optimize>
+    <OutputPath>bin\Debug\</OutputPath>
+    <DefineConstants>DEBUG;TRACE</DefineConstants>
+    <ErrorReport>prompt</ErrorReport>
+    <WarningLevel>4</WarningLevel>
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
+    <DebugType>pdbonly</DebugType>
+    <Optimize>true</Optimize>
+    <OutputPath>bin\Release\</OutputPath>
+    <DefineConstants>TRACE</DefineConstants>
+    <ErrorReport>prompt</ErrorReport>
+    <WarningLevel>4</WarningLevel>
+  </PropertyGroup>
+  <ItemGroup>
+    <Reference Include="System" />
+    <Reference Include="System.Core" />
+    <Reference Include="System.Windows.Forms" />
+    <Reference Include="System.Xml.Linq" />
+    <Reference Include="System.Data.DataSetExtensions" />
+    <Reference Include="Microsoft.CSharp" />
+    <Reference Include="System.Data" />
+    <Reference Include="System.Xml" />
+  </ItemGroup>
+  <ItemGroup>
+    <Compile Include="Stemmer.cs" />
+  </ItemGroup>
+  <ItemGroup>
+    <Folder Include="Properties\" />
+  </ItemGroup>
+  <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
+  <!-- To modify your build process, add your task inside one of the targets below and uncomment it. 
+       Other similar extension points exist, see Microsoft.Common.targets.
+  <Target Name="BeforeBuild">
+  </Target>
+  <Target Name="AfterBuild">
+  </Target>
+  -->
+</Project>

File libML/CsLibrary/Properties/.svn/entries

+10
+
+dir
+17
+svn+ssh://yinz@lcpu2.cse.ust.hk/csproject/cf/svn/libML/libML/CsLibrary/Properties
+svn+ssh://yinz@lcpu2.cse.ust.hk/csproject/cf/svn/libML
+
+
+
+2010-04-25T16:50:18.844399Z
+8
+yinz
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+9452a81e-f76a-49cc-929b-959964f810e3
+

File libML/CsLibrary/Stemmer.cs

+/*
+ * this code is from http://tartarus.org/~martin/PorterStemmer/
+ */
+
+using System;
+using System.IO;
+using System.Reflection;
+using System.Runtime.InteropServices;
+using System.Windows.Forms;
+
+[assembly: AssemblyTitle("")]
+[assembly: AssemblyDescription("Porter stemmer in CSharp")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyCompany("")]
+[assembly: AssemblyProduct("")]
+[assembly: AssemblyCopyright("")]
+[assembly: AssemblyTrademark("")]
+[assembly: AssemblyCulture("")]
+[assembly: AssemblyVersion("1.4")]
+[assembly: AssemblyDelaySign(false)]
+[assembly: AssemblyKeyName("")]
+
+namespace libML.Text
+{
+
+    /*
+
+       Porter stemmer in CSharp, based on the Java port. The original paper is in
+
+           Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14,
+           no. 3, pp 130-137,
+
+       See also http://www.tartarus.org/~martin/PorterStemmer
+
+       History:
+
+       Release 1
+
+       Bug 1 (reported by Gonzalo Parra 16/10/99) fixed as marked below.
+       The words 'aed', 'eed', 'oed' leave k at 'a' for step 3, and b[k-1]
+       is then out outside the bounds of b.
+
+       Release 2
+
+       Similarly,
+
+       Bug 2 (reported by Steve Dyrdahl 22/2/00) fixed as marked below.
+       'ion' by itself leaves j = -1 in the test for 'ion' in step 5, and
+       b[j] is then outside the bounds of b.
+
+       Release 3
+
+       Considerably revised 4/9/00 in the light of many helpful suggestions
+       from Brian Goetz of Quiotix Corporation (brian@quiotix.com).
+
+       Release 4
+	   
+       This revision allows the Porter Stemmer Algorithm to be exported via the
+       .NET Framework. To facilate its use via .NET, the following commands need to be
+       issued to the operating system to register the component so that it can be
+       imported into .Net compatible languages, such as Delphi.NET, Visual Basic.NET,
+       Visual C++.NET, etc. 
+	   
+       1. Create a stong name: 		
+            sn -k Keyfile.snk  
+       2. Compile the C# class, which creates an assembly PorterStemmerAlgorithm.dll
+            csc /t:library PorterStemmerAlgorithm.cs
+       3. Register the dll with the Windows Registry 
+          and so expose the interface to COM Clients via the type library 
+          ( PorterStemmerAlgorithm.tlb will be created)
+            regasm /tlb PorterStemmerAlgorithm.dll
+       4. Load the component in the Global Assembly Cache
+            gacutil -i PorterStemmerAlgorithm.dll
+		
+       Note: You must have the .Net Studio installed.
+	   
+       Once this process is performed you should be able to import the class 
+       via the appropiate mechanism in the language that you are using.
+	   
+       i.e in Delphi 7 .NET this is simply a matter of selecting: 
+            Project | Import Type Libary
+       And then selecting Porter stemmer in CSharp Version 1.4"!
+	   
+       Cheers Leif
+	
+    */
+
+    /**
+      * Stemmer, implementing the Porter Stemming Algorithm
+      *
+      * The Stemmer class transforms a word into its root form.  The input
+      * word can be provided a character at time (by calling add()), or at once
+      * by calling one of the various stem(something) methods.
+      */
+
+    public interface StemmerInterface
+    {
+        string stemTerm(string s);
+    }
+
+
+
+
+    [ClassInterface(ClassInterfaceType.None)]
+    public class PorterStemmer : StemmerInterface
+    {
+        private char[] b;
+        private int i,     /* offset into b */
+            i_end, /* offset to end of stemmed word */
+            j, k;
+        private static int INC = 200;
+        /* unit of size whereby b is increased */
+
+        public PorterStemmer()
+        {
+            b = new char[INC];
+            i = 0;
+            i_end = 0;
+        }
+
+        /* Implementation of the .NET interface - added as part of realease 4 (Leif) */
+        public string stemTerm(string s)
+        {
+            setTerm(s);
+            stem();
+            return getTerm();
+        }
+
+        /*
+            SetTerm and GetTerm have been simply added to ease the 
+            interface with other lanaguages. They replace the add functions 
+            and toString function. This was done because the original functions stored
+            all stemmed words (and each time a new woprd was added, the buffer would be
+            re-copied each time, making it quite slow). Now, The class interface 
+            that is provided simply accepts a term and returns its stem, 
+            instead of storing all stemmed words.
+            (Leif)
+        */
+
+
+
+        void setTerm(string s)
+        {
+            i = s.Length;
+            char[] new_b = new char[i];
+            for (int c = 0; c < i; c++)
+                new_b[c] = s[c];
+
+            b = new_b;
+
+        }
+
+        public string getTerm()
+        {
+            return new String(b, 0, i_end);
+        }
+
+
+        /* Old interface to the class - left for posterity. However, it is not
+         * used when accessing the class via .NET (Leif)*/
+
+        /**
+         * Add a character to the word being stemmed.  When you are finished
+         * adding characters, you can call stem(void) to stem the word.
+         */
+
+        public void add(char ch)
+        {
+            if (i == b.Length)
+            {
+                char[] new_b = new char[i + INC];
+                for (int c = 0; c < i; c++)
+                    new_b[c] = b[c];
+                b = new_b;
+            }
+            b[i++] = ch;
+        }
+
+
+        /** Adds wLen characters to the word being stemmed contained in a portion
+         * of a char[] array. This is like repeated calls of add(char ch), but
+         * faster.
+         */
+
+        public void add(char[] w, int wLen)
+        {
+            if (i + wLen >= b.Length)
+            {
+                char[] new_b = new char[i + wLen + INC];
+                for (int c = 0; c < i; c++)
+                    new_b[c] = b[c];
+                b = new_b;
+            }
+            for (int c = 0; c < wLen; c++)
+                b[i++] = w[c];
+        }
+
+        /**
+         * After a word has been stemmed, it can be retrieved by toString(),
+         * or a reference to the internal buffer can be retrieved by getResultBuffer
+         * and getResultLength (which is generally more efficient.)
+         */
+        public override string ToString()
+        {
+            return new String(b, 0, i_end);
+        }
+
+        /**
+         * Returns the length of the word resulting from the stemming process.
+         */
+        public int getResultLength()
+        {
+            return i_end;
+        }
+
+        /**
+         * Returns a reference to a character buffer containing the results of
+         * the stemming process.  You also need to consult getResultLength()
+         * to determine the length of the result.
+         */
+        public char[] getResultBuffer()
+        {
+            return b;
+        }
+
+        /* cons(i) is true <=> b[i] is a consonant. */
+        private bool cons(int i)
+        {
+            switch (b[i])
+            {
+                case 'a':
+                case 'e':
+                case 'i':
+                case 'o':
+                case 'u': return false;
+                case 'y': return (i == 0) ? true : !cons(i - 1);
+                default: return true;
+            }
+        }
+
+        /* m() measures the number of consonant sequences between 0 and j. if c is
+           a consonant sequence and v a vowel sequence, and <..> indicates arbitrary
+           presence,
+
+              <c><v>       gives 0
+              <c>vc<v>     gives 1
+              <c>vcvc<v>   gives 2
+              <c>vcvcvc<v> gives 3
+              ....
+        */
+        private int m()
+        {
+            int n = 0;
+            int i = 0;
+            while (true)
+            {
+                if (i > j) return n;
+                if (!cons(i)) break; i++;
+            }
+            i++;
+            while (true)
+            {
+                while (true)
+                {
+                    if (i > j) return n;
+                    if (cons(i)) break;
+                    i++;
+                }
+                i++;
+                n++;
+                while (true)
+                {
+                    if (i > j) return n;
+                    if (!cons(i)) break;
+                    i++;
+                }
+                i++;
+            }
+        }
+
+        /* vowelinstem() is true <=> 0,...j contains a vowel */
+        private bool vowelinstem()
+        {
+            int i;
+            for (i = 0; i <= j; i++)
+                if (!cons(i))
+                    return true;
+            return false;
+        }
+
+        /* doublec(j) is true <=> j,(j-1) contain a double consonant. */
+        private bool doublec(int j)
+        {
+            if (j < 1)
+                return false;
+            if (b[j] != b[j - 1])
+                return false;
+            return cons(j);
+        }
+
+        /* cvc(i) is true <=> i-2,i-1,i has the form consonant - vowel - consonant
+           and also if the second c is not w,x or y. this is used when trying to
+           restore an e at the end of a short word. e.g.
+
+              cav(e), lov(e), hop(e), crim(e), but
+              snow, box, tray.
+
+        */
+        private bool cvc(int i)
+        {
+            if (i < 2 || !cons(i) || cons(i - 1) || !cons(i - 2))
+                return false;
+            int ch = b[i];
+            if (ch == 'w' || ch == 'x' || ch == 'y')
+                return false;
+            return true;
+        }
+
+        private bool ends(String s)
+        {
+            int l = s.Length;
+            int o = k - l + 1;
+            if (o < 0)
+                return false;
+            char[] sc = s.ToCharArray();
+            for (int i = 0; i < l; i++)
+                if (b[o + i] != sc[i])
+                    return false;
+            j = k - l;
+            return true;
+        }
+
+        /* setto(s) sets (j+1),...k to the characters in the string s, readjusting
+           k. */
+        private void setto(String s)
+        {
+            int l = s.Length;
+            int o = j + 1;
+            char[] sc = s.ToCharArray();
+            for (int i = 0; i < l; i++)
+                b[o + i] = sc[i];
+            k = j + l;
+        }
+
+        /* r(s) is used further down. */
+        private void r(String s)
+        {
+            if (m() > 0)
+                setto(s);
+        }
+
+        /* step1() gets rid of plurals and -ed or -ing. e.g.
+               caresses  ->  caress
+               ponies    ->  poni
+               ties      ->  ti
+               caress    ->  caress
+               cats      ->  cat
+
+               feed      ->  feed
+               agreed    ->  agree
+               disabled  ->  disable
+
+               matting   ->  mat
+               mating    ->  mate
+               meeting   ->  meet
+               milling   ->  mill
+               messing   ->  mess
+
+               meetings  ->  meet
+
+        */
+
+        private void step1()
+        {
+            if (b[k] == 's')
+            {
+                if (ends("sses"))
+                    k -= 2;
+                else if (ends("ies"))
+                    setto("i");
+                else if (b[k - 1] != 's')
+                    k--;
+            }
+            if (ends("eed"))
+            {
+                if (m() > 0)
+                    k--;
+            }
+            else if ((ends("ed") || ends("ing")) && vowelinstem())
+            {
+                k = j;
+                if (ends("at"))
+                    setto("ate");
+                else if (ends("bl"))
+                    setto("ble");
+                else if (ends("iz"))
+                    setto("ize");
+                else if (doublec(k))
+                {
+                    k--;
+                    int ch = b[k];
+                    if (ch == 'l' || ch == 's' || ch == 'z')
+                        k++;
+                }
+                else if (m() == 1 && cvc(k)) setto("e");
+            }
+        }
+
+        /* step2() turns terminal y to i when there is another vowel in the stem. */
+        private void step2()
+        {
+            if (ends("y") && vowelinstem())
+                b[k] = 'i';
+        }
+
+        /* step3() maps double suffices to single ones. so -ization ( = -ize plus
+           -ation) maps to -ize etc. note that the string before the suffix must give
+           m() > 0. */
+        private void step3()
+        {
+            if (k == 0)
+                return;
+
+            /* For Bug 1 */
+            switch (b[k - 1])
+            {
+                case 'a':
+                    if (ends("ational")) { r("ate"); break; }
+                    if (ends("tional")) { r("tion"); break; }
+                    break;
+                case 'c':
+                    if (ends("enci")) { r("ence"); break; }
+                    if (ends("anci")) { r("ance"); break; }
+                    break;
+                case 'e':
+                    if (ends("izer")) { r("ize"); break; }
+                    break;
+                case 'l':
+                    if (ends("bli")) { r("ble"); break; }
+                    if (ends("alli")) { r("al"); break; }
+                    if (ends("entli")) { r("ent"); break; }
+                    if (ends("eli")) { r("e"); break; }
+                    if (ends("ousli")) { r("ous"); break; }
+                    break;
+                case 'o':
+                    if (ends("ization")) { r("ize"); break; }
+                    if (ends("ation")) { r("ate"); break; }
+                    if (ends("ator")) { r("ate"); break; }
+                    break;
+                case 's':
+                    if (ends("alism")) { r("al"); break; }
+                    if (ends("iveness")) { r("ive"); break; }
+                    if (ends("fulness")) { r("ful"); break; }
+                    if (ends("ousness")) { r("ous"); break; }
+                    break;
+                case 't':
+                    if (ends("aliti")) { r("al"); break; }
+                    if (ends("iviti")) { r("ive"); break; }
+                    if (ends("biliti")) { r("ble"); break; }
+                    break;
+                case 'g':
+                    if (ends("logi")) { r("log"); break; }
+                    break;
+                default:
+                    break;
+            }
+        }
+
+        /* step4() deals with -ic-, -full, -ness etc. similar strategy to step3. */
+        private void step4()
+        {
+            switch (b[k])
+            {
+                case 'e':
+                    if (ends("icate")) { r("ic"); break; }
+                    if (ends("ative")) { r(""); break; }
+                    if (ends("alize")) { r("al"); break; }
+                    break;
+                case 'i':
+                    if (ends("iciti")) { r("ic"); break; }
+                    break;
+                case 'l':
+                    if (ends("ical")) { r("ic"); break; }
+                    if (ends("ful")) { r(""); break; }
+                    break;
+                case 's':
+                    if (ends("ness")) { r(""); break; }
+                    break;
+            }
+        }
+
+        /* step5() takes off -ant, -ence etc., in context <c>vcvc<v>. */
+        private void step5()
+        {
+            if (k == 0)
+                return;
+
+            /* for Bug 1 */
+            switch (b[k - 1])
+            {
+                case 'a':
+                    if (ends("al")) break; return;
+                case 'c':
+                    if (ends("ance")) break;
+                    if (ends("ence")) break; return;
+                case 'e':
+                    if (ends("er")) break; return;
+                case 'i':
+                    if (ends("ic")) break; return;
+                case 'l':
+                    if (ends("able")) break;
+                    if (ends("ible")) break; return;
+                case 'n':
+                    if (ends("ant")) break;
+                    if (ends("ement")) break;
+                    if (ends("ment")) break;
+                    /* element etc. not stripped before the m */
+                    if (ends("ent")) break; return;
+                case 'o':
+                    if (ends("ion") && j >= 0 && (b[j] == 's' || b[j] == 't')) break;
+                    /* j >= 0 fixes Bug 2 */
+                    if (ends("ou")) break; return;
+                /* takes care of -ous */
+                case 's':
+                    if (ends("ism")) break; return;
+                case 't':
+                    if (ends("ate")) break;
+                    if (ends("iti")) break; return;
+                case 'u':
+                    if (ends("ous")) break; return;
+                case 'v':
+                    if (ends("ive")) break; return;
+                case 'z':
+                    if (ends("ize")) break; return;
+                default:
+                    return;
+            }
+            if (m() > 1)
+                k = j;
+        }
+
+        /* step6() removes a final -e if m() > 1. */
+        private void step6()
+        {
+            j = k;
+
+            if (b[k] == 'e')
+            {
+                int a = m();
+                if (a > 1 || a == 1 && !cvc(k - 1))
+                    k--;
+            }
+            if (b[k] == 'l' && doublec(k) && m() > 1)
+                k--;
+        }
+
+        /** Stem the word placed into the Stemmer buffer through calls to add().
+         * Returns true if the stemming process resulted in a word different
+         * from the input.  You can retrieve the result with
+         * getResultLength()/getResultBuffer() or toString().
+         */
+        public void stem()
+        {
+            k = i - 1;
+            if (k > 1)
+            {
+                step1();
+                step2();
+                step3();
+                step4();
+                step5();
+                step6();
+            }
+            i_end = k + 1;
+            i = 0;
+        }
+
+    }
+}
+

File libML/libML.sln

+
+Microsoft Visual Studio Solution File, Format Version 11.00
+# Visual Studio 2010
+Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "libML", "libml\libML.fsproj", "{2A755FAD-14D5-464B-9C56-6DCBC8D33119}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "CsLibrary", "CsLibrary\CsLibrary.csproj", "{321ADDA5-CADF-4184-8BE8-12789D21C621}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|Any CPU = Debug|Any CPU
+		Debug|Mixed Platforms = Debug|Mixed Platforms
+		Debug|Win32 = Debug|Win32
+		Debug|x64 = Debug|x64
+		Debug|x86 = Debug|x86
+		Release|Any CPU = Release|Any CPU
+		Release|Mixed Platforms = Release|Mixed Platforms
+		Release|Win32 = Release|Win32
+		Release|x64 = Release|x64
+		Release|x86 = Release|x86
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{2A755FAD-14D5-464B-9C56-6DCBC8D33119}.Debug|Any CPU.ActiveCfg = Debug|x86
+		{2A755FAD-14D5-464B-9C56-6DCBC8D33119}.Debug|Mixed Platforms.ActiveCfg = Debug|x86
+		{2A755FAD-14D5-464B-9C56-6DCBC8D33119}.Debug|Mixed Platforms.Build.0 = Debug|x86
+		{2A755FAD-14D5-464B-9C56-6DCBC8D33119}.Debug|Win32.ActiveCfg = Debug|x86
+		{2A755FAD-14D5-464B-9C56-6DCBC8D33119}.Debug|x64.ActiveCfg = Debug|x86
+		{2A755FAD-14D5-464B-9C56-6DCBC8D33119}.Debug|x86.ActiveCfg = Debug|x86
+		{2A755FAD-14D5-464B-9C56-6DCBC8D33119}.Debug|x86.Build.0 = Debug|x86
+		{2A755FAD-14D5-464B-9C56-6DCBC8D33119}.Release|Any CPU.ActiveCfg = Release|x86
+		{2A755FAD-14D5-464B-9C56-6DCBC8D33119}.Release|Mixed Platforms.ActiveCfg = Release|x86
+		{2A755FAD-14D5-464B-9C56-6DCBC8D33119}.Release|Mixed Platforms.Build.0 = Release|x86
+		{2A755FAD-14D5-464B-9C56-6DCBC8D33119}.Release|Win32.ActiveCfg = Release|x86
+		{2A755FAD-14D5-464B-9C56-6DCBC8D33119}.Release|x64.ActiveCfg = Release|x86
+		{2A755FAD-14D5-464B-9C56-6DCBC8D33119}.Release|x86.ActiveCfg = Release|x86
+		{2A755FAD-14D5-464B-9C56-6DCBC8D33119}.Release|x86.Build.0 = Release|x86
+		{321ADDA5-CADF-4184-8BE8-12789D21C621}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{321ADDA5-CADF-4184-8BE8-12789D21C621}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{321ADDA5-CADF-4184-8BE8-12789D21C621}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU
+		{321ADDA5-CADF-4184-8BE8-12789D21C621}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU
+		{321ADDA5-CADF-4184-8BE8-12789D21C621}.Debug|Win32.ActiveCfg = Debug|Any CPU
+		{321ADDA5-CADF-4184-8BE8-12789D21C621}.Debug|x64.ActiveCfg = Debug|Any CPU
+		{321ADDA5-CADF-4184-8BE8-12789D21C621}.Debug|x86.ActiveCfg = Debug|Any CPU
+		{321ADDA5-CADF-4184-8BE8-12789D21C621}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{321ADDA5-CADF-4184-8BE8-12789D21C621}.Release|Any CPU.Build.0 = Release|Any CPU
+		{321ADDA5-CADF-4184-8BE8-12789D21C621}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU
+		{321ADDA5-CADF-4184-8BE8-12789D21C621}.Release|Mixed Platforms.Build.0 = Release|Any CPU
+		{321ADDA5-CADF-4184-8BE8-12789D21C621}.Release|Win32.ActiveCfg = Release|Any CPU
+		{321ADDA5-CADF-4184-8BE8-12789D21C621}.Release|x64.ActiveCfg = Release|Any CPU
+		{321ADDA5-CADF-4184-8BE8-12789D21C621}.Release|x86.ActiveCfg = Release|Any CPU
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal

File libML/libml/Script1.fsx

+
+#r "FSharp.PowerPack.dll"
+#r @"FSharp.PowerPack.Compatibility"
+#r @"D:\WORK\MyFSharpExamples\Small\..\..\..\Business\references\books\F# and FP\FSharp-1.9.7.8\source\fsppack\FSharp.PowerPack\math\lapack\obj\Release\FSharp.PowerPack.Math.Providers.dll"
+#load "libMLcommon.fs"
+#load "utility.fs"
+#load "dataset.fs"
+#load "optimization.fs"
+#load "classification.fs"
+
+open Microsoft.FSharp.Math
+open Microsoft.FSharp.Math.Experimental
+open libML
+open libML.Utility
+
+let isSucc = Experimental.LinearAlgebra.Lapack.Start()
+printfn "service = %A" (if isSucc then "Netlib lapack" else "Managed code")
+let solve = LinearAlgebra.SolveLinearSystem
+
+(* sqrt newton
+let next N x = (x + N/x) / 2.0
+let repeat f a0 = a0 |> Seq.unfold (fun x -> let v = f x in Some(v, v))
+*)
+
+
+
+
+let quadfun(a, b, c, d:float) = 
+    { new IFunction with
+        member this.Size = 2
+        member this.Eval(v:vector) = 
+            let vv = v.[0]*v.[0]*a + b*v.[0] + c*v.[1]*v.[1] + d*v.[1]
+            let gg = vector [2.0*a*v.[0] + b ; 2.0*c*v.[1] + d]
+            vv, gg
+    }
+
+
+let f = quadfun(1.,2.,3.,4.)
+let x = vector [1.;2.]
+let v,g = f.Eval x
+
+
+
+
+let ret = Optimization.optim f EASY
+
+let cubic (x:float) = x * x * x
+let sqr (x:float) = x * x
+
+let cubicLine q0 qp0 lamc qc blow bhigh lamm qm = 
+    let lleft = lamc * blow
+    let lright = lamc * bhigh
+    let a = matrix [ [sqr lamc; cubic lamc]; [sqr lamm; cubic lamm]]
+    let b = (vector [ qc; qm ]) - (vector [q0 + qp0*lamc; q0+qp0+lamm])
+    let c = solve a b
+    let lplus = (-c.[0]+sqrt(sqr c.[0] - 3.*c.[1]*qp0))/(3.*c.[1]);
+    if lplus < lleft then
+        lleft
+    else
+        lright
+
+let sqrLine q0 qp0 lamc qc blow bhigh = 
+    let lleft = lamc * blow
+    let lright = lamc * bhigh
+    let lplus = - qp0/(2. * lamc*(qc - q0 - qp0) );
+    if lplus < lleft then
+        lleft
+    else
+        lright
+
+let steepest (f:IFunction) = 
+    // default parameters
+    let alp = 1e-4
+    let maxIter = 1000
+    let tol = 1e-6
+    let init = Vector.zero (f.Size)
+
+    let xc = init
+    let gc = f.Gradient init
+    let fc = f.Eval init
+
+    let rec iterate x gc iter histVal= 
+        if Vector.norm gc < tol || iter > maxIter then
+            x, histVal
+        else
+            let mutable lambda = min 1.0 (100. / (1. + Vector.norm gc))
+            let xt = xc - lambda * gc
+            let ft = f.Eval(xt)
+            let fgoal = fc - alp * lambda * (gc.Transpose * gc)
+
+            // polynomial line search
+
+            let q0 = fc
+            let qp0 = - (gc.Transpose * gc)
+            let lamc = lambda
+            let qc = ft
+
+            let mutable iarm = 0
+            let mutable qm = 0.0
+            while (ft > fgoal) do 
+                iarm <- iarm + 1
+                if iarm = 1 then
+                    lambda <- sqrLine q0 qp0 lamc qc blow bhigh 
+                else
+                    lambda <- cubicLine q0 qp0 lamc qc blow bhigh lamm qm
+                
+
+                qm <- qc
+                lamm <- lamc
+                lamc <- lambda
+                xt <- xc - lambda * gc
+
+
+
+
+
+open System.IO
+open System.Collections.Generic
+let readLibSvm path = 
+    let lines = File.ReadAllLines path
+    let dim = ref 0
+    let total = ref 0
+    let readInstance (line:string) = 
+        let s = line.Split([|' '|], System.StringSplitOptions.RemoveEmptyEntries)
+        let label =  int s.[0] - 1
+        let idValMap = new Dictionary<int, float>()
+        for i=1 to s.Length-1 do
+            let idval = s.[i].Split ':'
+            //printfn "idval = %A" idval
+            let id, v = int idval.[0] - 1, float idval.[1]
+            idValMap.Add(id, v)
+        label, idValMap, int ((s.[s.Length-1].Split ':').[0]) - 1
+        
+    let datalabel = 
+        [| for line in lines do
+            let label, idVal, maxIdx = readInstance line
+            total := !total + idVal.Count
+            if !dim < maxIdx then
+                dim := maxIdx
+            yield label, idVal
+        |]
+    let n = datalabel.Length
+    if (float (n * !dim) * 0.25 > float !total) then
+        failwith "not support sparse matrix yet"
+
+    datalabel
+
+let path = @"c:\users\yin\desktop\satimage.scale"
+
+
+readLibSvm @"c:\users\yin\desktop\satimage.scale"
+
+
+let a = new Multinomial(10) :> IDistribution
+a.Prob(1.)

File libML/libml/ann.fsx

+
+#r "FSharp.PowerPack.dll"
+#r @"FSharp.PowerPack.Compatibility"
+#r @"D:\WORK\MyFSharpExamples\Small\..\..\..\Business\references\books\F# and FP\FSharp-1.9.7.8\source\fsppack\FSharp.PowerPack\math\lapack\obj\Release\FSharp.PowerPack.Math.Providers.dll"
+#r "System.Windows.Forms.DataVisualization.dll"
+#r @"D:\WORK\libML\libML\CsLibrary\bin\Debug\CsLibrary.dll"
+#r "FSharp.PowerPack.Parallel.Seq.dll"
+
+#r @"D:\WORK\libML\libML\Debug\libsvm.dll"
+
+#load "fatlab.fs"
+#load "matrixExt.fs"
+#load "libMLcommon.fs"
+#load "utility.fs"
+#load "io.fs"
+#load "distribution.fs"
+#load "dataset.fs"
+#load "dimension_reduction.fs"
+#load "plot.fs"
+#load "text.fs"
+#load "optimization.fs"
+
+
+open libML
+open libML.Utility
+
+module ANN = 
+    let sigmoid x = 1.0 / (1.0 + exp(-x))
+
+    /// type: multi layer perceptions
+    /// ds: the data, supports regression and classification
+    /// alpha: the momtoun parameter
+    /// beta: learning rate
+    /// nhidden: number of hidden perceptions
+    type MultiPerceptions(ds:dataset2, alpha:float, beta:float, nhidden:int) = 
+        let nIn = ds.NFeatures
+        let nOut = if ds.IsRegression then 1 else ds.NLabels //if ds.NLabels = 2 then 1 else ds.NLabels
+        let rand = new System.Random()
+        let wH = Array.create nhidden (Vector.init nIn (fun _ -> rand.NextDouble() - 0.5))
+        let oH = Vector.create nhidden 0.0
+        let eH = Vector.create nhidden 0.0
+
+        let wO = Array.create nOut (Vector.init nhidden (fun _ -> rand.NextDouble() - 0.5))
+        let oO = Vector.create nOut 0.0
+        let eO = Vector.create nOut 0.0
+
+        let t = if ds.IsClassification then ds.Labels |> Array.map float else ds.RegValues
+        let tO = Array.init ds.NSamples (fun i->
+            if ds.IsClassification then Array.init nOut (fun j -> if int(t.[i]) = j then 1.0 else 0.0)
+            else Array.init 1 (fun _ -> t.[i]))
+
+        
+
+        let feed(x:vector) = 
+            for j=0 to nhidden-1 do
+                oH.[j] <- sigmoid (Vector.dot x  wH.[j])
+            for j=0 to nOut-1 do
+                oO.[j] <- sigmoid (Vector.dot wO.[j] oH)
+
+        let iter() = 
+            match ds.data with
+                | Dense d ->
+                    let mutable err = 0.
+                    for i=0 to d.Length-1 do
+                        // calculate output
+                        feed(d.[i])
+
+                        // output error
+                        for j=0 to nOut-1 do
+                            eO.[j] <- oO.[j] * (1.0 - oO.[j]) * (tO.[i].[j] - oO.[j] )
+                            err <- err + abs(eO.[j])
+
+                        // hidden error
+                        for j=0 to nhidden-1 do
+                            let mutable a = 0.
+                            for k=0 to nOut-1 do
+                                a <- a + wO.[k].[j] * eO.[k]
+                            eH.[j] <- oH.[j] * (1.0 - oH.[j]) * a
+
+                        // update weight
+                        for j=0 to nhidden-1 do 
+                            for k=0 to nIn-1 do
+                                let delta = beta * eH.[j] * d.[i].[k]
+                                wH.[j].[k] <- wH.[j].[k] + delta
+                        for j=0 to nOut-1 do
+                            for k=0 to nhidden-1 do
+                                let delta = beta * eO.[j] * oH.[k]
+                                wO.[j].[k] <- wO.[j].[k] + delta
+                    printfn "err = %A" err
+
+                | Sparse d -> NYI()
+
+        member this.train(maxiter:int) = 
+            printfn "%A" tO
+            for it=0 to maxiter-1 do
+                iter()
+
+        member this.test() = 
+            match ds.data with
+                | Dense d -> 
+                    if ds.IsRegression then
+                        RegTarget(  Array.init  ds.NSamples (fun i-> 
+                            feed(d.[i])
+                            oO.[0]))
+                    else
+                        ClaTarget( 
+                            Array.init ds.NSamples (fun i->
+                                feed(d.[i])
+                                Array.maxIndex oO.InternalValues),
+                            ds.NLabels)
+                | Sparse d ->
+                    NYI()
+
+
+open libML.Dataset
+open libML.IO
+                
+let iris = Dataset.readArff @"D:\WORK\libML\data\weka\UCI20051003\iris.arff" |> Dataset2.fromDataFrame
+
+
+let a = new ANN.MultiPerceptions(iris, 0.1, 0.005, 1)
+
+a.train(20)
+a.test()
+
+
+
+

File libML/libml/ann2.fsx

+
+open System
+
+module ANN = 
+    let rnd = new System.Random(0xDA22A)
+    let output (s:string) = System.Console.Write(s)
+
+    type Neuron =
+        val mutable output : float      (* output value *)
+        val mutable error : float       (* error value *)
+        val mutable weights : float list        (* weights from each previous layer to this neuron *)
+        val mutable bias : float                (* neuron bias *)
+        val ActivationFunction : float -> bool -> float     (* activation function *)
+    
+        new (inputs, activationFunction) =  {
+          ActivationFunction = activationFunction;
+          weights = List.init inputs (fun _ -> rnd.NextDouble());
+          bias = 0.0;
+          output = 0.0;
+          error = 0.0
+        }
+    
+        member x.Bias = x.bias
+        member x.Output with get() = x.output
+        member x.Weights = x.weights
+    
+        member x.GetOutput inputs = 
+          let preActivation = List.fold2 (fun sum weight input -> sum + (weight * input)) (-x.bias) x.weights inputs in
+            (x.ActivationFunction) preActivation false
+
+    
+        member x.UpdateOutput inputs = 
+          x.output <- x.GetOutput inputs
+    
+        member x.GetError errorInputs = 
+          List.fold2 (fun sum weight errorInput -> sum + (weight * errorInput)) (0.0) x.weights errorInputs
+    
+        member x.UpdateError errorInputs = 
+          x.error <- x.GetError errorInputs
+
+    (* should be the number of neurons from the previous layer *)(* neurons in this layer *)(* outputs of each neuron *)
+    
+    type Layer = 
+        val inputs : int
+        val neurons : Neuron list
+        val mutable output : float list
+        new (inputs, neurons, activationFunction) = {
+            inputs = inputs;
+            neurons = List.init neurons (fun _ -> new Neuron(inputs, activationFunction));
+            output = List.init inputs (fun _ -> 0.0)
+        }
+    
+        member x.Output with get() = x.output
+        member x.GetOutput inputs = List.init (x.neurons.Length) (fun i -> (List.nth x.neurons i).GetOutput(inputs))
+        member x.UpdateOutput inputs = x.output <- x.GetOutput inputs
+    
+        member x.Neuron n = List.nth x.neurons n
+        member x.Neurons with get() = x.neurons
+
+    type ANN =
+        val layers : Layer list
+    
+        new ((layerNeurons:int list), activationFunction) =   {
+            layers = List.init 
+              layerNeurons.Length 
+              (fun i -> match i with
+                        | 0 -> new Layer(List.nth layerNeurons 0, List.nth layerNeurons 0, activationFunction)
+                        | _ -> new Layer(List.nth layerNeurons (i - 1), List.nth layerNeurons i, activationFunction)
+              );
+        }
+    
+        member x.Layer n = List.nth x.layers n
+        member x.Layers = x.layers.Length
+        member x.InputLayer with get() = x.Layer 0
+        member x.OutputLayer with get() = x.Layer (x.layers.Length - 1)
+        member x.Neuron layer neuron = ((x.Layer layer).Neuron neuron)
+        member x.Neurons layer = (List.nth x.layers layer)
+        member x.Output with get() = x.OutputLayer.Output
+    
+        member x.GetOutput (inputs:float list) = 
+          let rec calcLayer (input:float list) (layers:Layer list) = 
+            match layers with
+              | [] -> input
+              | h::tail -> calcLayer (h.GetOutput(input)) tail in
+        
+            calcLayer inputs x.layers
+    
+        member x.UpdateOutput (inputs:float list) = 
+          let rec updateLayer (input:float list) (layers:Layer list) =
+            match layers with
+              | [] -> ()
+              | h::tail -> h.UpdateOutput(input); updateLayer (h.Output) tail in
+          
+            updateLayer inputs x.layers
+          
+        member x.Train (trainingData:(float list * float list) list) learning_rate max_iterations mse_threshold = 
+          let train (input,output) = 
+            let updateWeights layer_n backInput =
+              let layerOutput = (x.Layer layer_n).Output
+              let layerInput = 
+                match layer_n with
+                    | 0 -> input
+                    | _ -> (x.Layer (layer_n - 1)).Output
+              let error = List.map2 (fun z g -> z * (1.0 - z) * g) layerOutput backInput 
+              List.iteri (fun i e -> ((x.Layer layer_n).Neuron i).error <- e) error;
+              let change = List.map (fun e -> learning_rate * e) error 
+          
+              List.iteri 
+                  (
+                    fun i change ->
+                      let weights = ((x.Layer layer_n).Neuron i).Weights 
+                      let newWeights = List.map2 (fun oldW diff -> oldW + diff) weights (List.map (fun x -> x * change) layerInput) 
+                      ((x.Layer layer_n).Neuron i).weights <- newWeights
+                  ) change           
+
+        
+            x.UpdateOutput(input);
+        
+            (* difference between expected and actual *)
+            let error = List.map2 (fun actual expected -> expected - actual) x.Output output 
+        
+            (* mean squared error *)
+            let MSE = (List.fold (fun mse e -> mse + (e * e)) 0.0 error) / 2.0 
+        
+            (* update output layer *)
+            updateWeights (x.Layers - 1) error;
+        
+            (* update hidden layers + input layer *)
+            let rec processLayer l = 
+                match l >= 0 with
+                      | true ->
+                        let g = List.init (x.Layer l).Neurons.Length (fun i -> List.fold (fun prev (neuron:Neuron) -> prev + ((List.nth (neuron.weights) i) * neuron.error)) 0.0 (x.Layer (l+1)).Neurons) in
+                          updateWeights l g;
+                          processLayer (l - 1)
+                      | _ -> () 
+        
+            processLayer (x.Layers - 2);
+        
+            MSE
+      
+          let mutable i = 0 in
+          let mutable mse = 1.0 in
+          while (i < max_iterations && mse > mse_threshold) do 
+            let (tmp_i, tmp_mse) = 
+                List.fold
+                    (fun (prev_i,_) data -> 
+                        let mse = train data in if ((prev_i + 1) % 500 = 0) then output (sprintf "Iteration %A: MSE %A\n"  (prev_i + 1) mse)
+                        (prev_i + 1, train data))
+                    (i,0.0) 
+                    trainingData
+            i <- tmp_i
+            mse <- tmp_mse
+
+
+    let NoActivationFunction input _ = input
+
+    let SigmoidActivationFunction input primed = 
+      let alpha = 1.0  
+      match primed with
+      | true -> alpha * input * (1.0 - input)
+      | _ -> 1.0 / (1.0 + (exp (-alpha * input)))
+ 
+
+module ANNTEST = 
+    open ANN
+
+    (* -------------------------------------------------------------------------------------------------------------------------------- *)
+    (* Training Data																													*)
+    (* -------------------------------------------------------------------------------------------------------------------------------- *)
+
+    (* 0-9 Digital Training Data *)
+    let DigitTrainingData = [
+     ([ 0.0;1.0;1.0;1.0;1.0;1.0;0.0;1.0;0.0;0.0;0.0;0.0;0.0;1.0;1.0;0.0;0.0;0.0;0.0;0.0;1.0;1.0;0.0;0.0;0.0;0.0;0.0;1.0;0.0;1.0;1.0;1.0;1.0;1.0;0.0 ], [ 1.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0 ]);
+     ([ 0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;1.0;0.0;0.0;0.0;0.0;1.0;1.0;1.0;1.0;1.0;1.0;1.0;1.0;0.0;0.0;0.0;0.0;0.0;0.0;1.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0 ], [ 0.0;1.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0 ]);
+     ([ 0.0;1.0;0.0;0.0;0.0;0.0;1.0;1.0;0.0;0.0;0.0;0.0;1.0;1.0;1.0;0.0;0.0;0.0;1.0;0.0;1.0;1.0;0.0;0.0;1.0;0.0;0.0;1.0;0.0;1.0;1.0;0.0;0.0;0.0;1.0 ], [ 0.0;0.0;1.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0 ]);
+     ([ 1.0;0.0;0.0;0.0;0.0;1.0;0.0;1.0;0.0;0.0;0.0;0.0;0.0;1.0;1.0;0.0;0.0;1.0;0.0;0.0;1.0;1.0;1.0;1.0;0.0;1.0;0.0;1.0;1.0;0.0;0.0;0.0;1.0;1.0;0.0 ], [ 0.0;0.0;0.0;1.0;0.0;0.0;0.0;0.0;0.0;0.0 ]);
+     ([ 0.0;0.0;0.0;1.0;1.0;0.0;0.0;0.0;0.0;1.0;0.0;1.0;0.0;0.0;0.0;1.0;0.0;0.0;1.0;0.0;0.0;1.0;1.0;1.0;1.0;1.0;1.0;1.0;0.0;0.0;0.0;0.0;1.0;0.0;0.0 ], [ 0.0;0.0;0.0;0.0;1.0;0.0;0.0;0.0;0.0;0.0 ]);
+     ([ 1.0;1.0;1.0;0.0;0.0;1.0;0.0;1.0;0.0;1.0;0.0;0.0;0.0;1.0;1.0;0.0;1.0;0.0;0.0;0.0;1.0;1.0;0.0;1.0;0.0;0.0;0.0;1.0;1.0;0.0;0.0;1.0;1.0;1.0;0.0 ], [ 0.0;0.0;0.0;0.0;0.0;1.0;0.0;0.0;0.0;0.0 ]);
+     ([ 0.0;0.0;1.0;1.0;1.0;1.0;0.0;0.0;1.0;0.0;1.0;0.0;0.0;1.0;1.0;0.0;0.0;1.0;0.0;0.0;1.0;1.0;0.0;0.0;1.0;0.0;0.0;1.0;0.0;0.0;0.0;0.0;1.0;1.0;0.0 ], [ 0.0;0.0;0.0;0.0;0.0;0.0;1.0;0.0;0.0;0.0 ]);
+     ([ 1.0;0.0;0.0;0.0;0.0;0.0;0.0;1.0;0.0;0.0;0.0;0.0;0.0;0.0;1.0;0.0;0.0;1.0;1.0;1.0;1.0;1.0;0.0;1.0;0.0;0.0;0.0;0.0;1.0;1.0;0.0;0.0;0.0;0.0;0.0 ], [ 0.0;0.0;0.0;0.0;0.0;0.0;0.0;1.0;0.0;0.0 ]);
+     ([ 0.0;1.0;1.0;0.0;1.0;1.0;0.0;1.0;0.0;0.0;1.0;0.0;0.0;1.0;1.0;0.0;0.0;1.0;0.0;0.0;1.0;1.0;0.0;0.0;1.0;0.0;0.0;1.0;0.0;1.0;1.0;0.0;1.0;1.0;0.0 ], [ 0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;1.0;0.0 ]);
+     ([ 0.0;1.0;1.0;0.0;0.0;0.0;0.0;1.0;0.0;0.0;1.0;0.0;0.0;1.0;1.0;0.0;0.0;1.0;0.0;0.0;1.0;1.0;0.0;0.0;1.0;0.0;1.0;0.0;0.0;1.0;1.0;1.0;1.0;0.0;0.0 ], [ 0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;1.0 ]);
+     ([ 1.0;1.0;1.0;1.0;0.0;0.0;0.0;0.0;0.0;0.0;1.0;0.0;0.0;0.0;0.0;0.0;0.0;1.0;0.0;0.0;0.0;0.0;0.0;0.0;1.0;0.0;0.0;0.0;1.0;1.0;1.0;1.0;1.0;1.0;1.0 ], [ 0.0;0.0;0.0;0.0;1.0;0.0;0.0;0.0;0.0;0.0 ]); 
+     ([ 1.0;1.0;1.0;1.0;0.0;1.0;0.0;1.0;0.0;0.0;1.0;0.0;0.0;1.0;1.0;0.0;0.0;1.0;0.0;0.0;1.0;1.0;0.0;0.0;1.0;0.0;0.0;1.0;1.0;0.0;0.0;0.0;1.0;1.0;0.0 ], [ 0.0;0.0;0.0;0.0;0.0;1.0;0.0;0.0;0.0;0.0 ]);
+     ([ 1.0;0.0;0.0;0.0;0.0;0.0;0.0;1.0;0.0;0.0;0.0;0.0;0.0;0.0;1.0;0.0;0.0;1.0;0.0;0.0;0.0;1.0;1.0;1.0;1.0;1.0;1.0;1.0;0.0;0.0;0.0;1.0;0.0;0.0;0.0 ], [ 0.0;0.0;0.0;0.0;0.0;0.0;0.0;1.0;0.0;0.0 ]);
+     ([ 0.0;1.0;0.0;0.0;0.0;1.0;0.0;1.0;0.0;0.0;0.0;0.0;0.0;1.0;1.0;0.0;0.0;1.0;0.0;0.0;1.0;1.0;0.0;1.0;0.0;1.0;0.0;1.0;0.0;1.0;1.0;0.0;1.0;1.0;0.0 ], [ 0.0;0.0;0.0;1.0;0.0;0.0;0.0;0.0;0.0;0.0 ]);
+     ([ 1.0;0.0;0.0;0.0;0.0;1.0;1.0;1.0;0.0;0.0;0.0;1.0;0.0;1.0;1.0;0.0;0.0;1.0;0.0;0.0;1.0;1.0;0.0;1.0;0.0;0.0;0.0;1.0;1.0;1.0;0.0;0.0;0.0;0.0;1.0 ], [ 0.0;0.0;1.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0 ]);
+     ([ 1.0;1.0;1.0;1.0;0.0;0.0;0.0;0.0;0.0;0.0;1.0;0.0;0.0;0.0;0.0;0.0;0.0;1.0;0.0;0.0;0.0;1.0;1.0;1.0;1.0;1.0;1.0;1.0;0.0;0.0;0.0;1.0;0.0;0.0;0.0 ], [ 0.0;0.0;0.0;0.0;1.0;0.0;0.0;0.0;0.0;0.0 ]); 
+     ([ 0.0;0.0;0.0;1.0;1.0;1.0;0.0;0.0;1.0;1.0;0.0;0.0;0.0;1.0;1.0;0.0;0.0;0.0;0.0;0.0;1.0;1.0;0.0;0.0;0.0;0.0;1.0;0.0;1.0;1.0;1.0;1.0;1.0;0.0;0.0 ], [ 1.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0 ]);
+     ([ 0.0;1.0;1.0;0.0;0.0;0.0;1.0;1.0;0.0;0.0;1.0;0.0;0.0;1.0;1.0;0.0;0.0;1.0;0.0;0.0;1.0;1.0;0.0;0.0;1.0;0.0;0.0;1.0;0.0;1.0;1.0;1.0;1.0;1.0;1.0 ], [ 0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;1.0 ])
+    ]
+
+    (* AB || C logic training data *)
+    let LogicTrainingData = [
+    (*    A   B   C        F    T     *)
+      ([ 0.0;0.0;0.0 ], [ 1.0; 0.0 ]);
+      ([ 0.0;0.0;1.0 ], [ 0.0; 1.0 ]);
+      ([ 0.0;1.0;0.0 ], [ 1.0; 0.0 ]);
+      ([ 0.0;1.0;1.0 ], [ 0.0; 1.0 ]);
+      ([ 1.0;0.0;0.0 ], [ 1.0; 0.0 ]);
+      ([ 1.0;0.0;1.0 ], [ 0.0; 1.0 ]);
+      ([ 1.0;1.0;0.0 ], [ 0.0; 1.0 ]);
+      ([ 1.0;1.0;1.0 ], [ 0.0; 1.0 ])
+    ]
+
+    (* -------------------------------------------------------------------------------------------------------------------------------- *)
+    (* Main - simple MLP with single hidden layer																						*)
+    (* -------------------------------------------------------------------------------------------------------------------------------- *)
+
+    let _ = 
+      let input = 3 in
+      let hidden = 3 in
+      let output = 2 in
+      let activation_function = SigmoidActivationFunction in
+      let training_data = LogicTrainingData in
+      let max_iterations = 1000000 in
+      let mse_threshold = 0.000001 in
+      let learning_rate = 0.15 in
+  
+        let myANN = new ANN([input; hidden; output], activation_function) in   
+          myANN.Train training_data learning_rate max_iterations mse_threshold;
+    
+        List.iter
+        (
+          fun (i,o) -> 
+            let print_any = printf "%A"
+            printf "Input ";
+            print_any i;
+            printf "  Expected ";
+            print_any o;
+            printf "  ANN Output ";
+            print_any (myANN.GetOutput(i));
+            printfn "";
+        
+        ) training_data;
+        
+
+
+
+let a = {1..10000000} |> Seq.toArray
+let b = {1..10000000} |> Seq.toList
+
+
+
+let vv = vector [1.;3.];
+vv.InternalValues

File libML/libml/chart.fsx

+// Luca Bolognese
+// http://lucabolognese.wordpress.com/
+// http://blogs.msdn.com/lucabol/
+
+#r "System.Windows.Forms.DataVisualization.dll"
+#load "plot.fs"
+
+open libML.Plot
+
+// ---------------------------------------------------------------------------
+// Sample tests for the API above.
+// ---------------------------------------------------------------------------
+
+open System
+
+let x = [1.;2.5;3.1;4.;4.8;6.0;7.5;8.;9.1;15.]
+let y = [1.6;2.1;1.4;4.;2.3;1.9;2.4;1.4;5.;2.9]
+
+let x0 = [1.; 2.]
+let y0 = [4.; 6.]
+let c1 = lc.scatter(x=x, y=y,  seriesName = "Good", title = "t") 
+c1 + lc.scatter(x=x0, y=y0, color = Color.Red) |> display
+
+
+lc.scatter(x = x, y = y, markerSize = 10, markerStyle = MarkerStyle.Diamond,
+    xname = "Players", yname = "Ratings", title = "Players' Ratings")     
+lc.line(y = y, markerSize = 10, markerStyle = MarkerStyle.Diamond, xname = "Players", yname = "Ratings", title = "Players' Ratings", isValueShownAsLabel = true,
+    color = Color.Red)  |> display
+lc.spline(x = x, y = y, markerSize = 10, markerStyle = MarkerStyle.Diamond, xname = "Players", yname = "Ratings",
+    title = "Players' Ratings", isValueShownAsLabel = true, color = Color.Red)
+lc.stepline(x = x, y = y, markerSize = 10, markerStyle = MarkerStyle.Diamond, xname = "Players", yname = "Ratings",
+    title = "Players' Ratings", isValueShownAsLabel = true, color = Color.Red) 
+lc.bar(y = y, xname = "Players", yname = "Ratings", title = "Players' Ratings", isValueShownAsLabel = true,
+    drawingStyle = "Emboss")       
+lc.column(y = y, xname = "Players", yname = "Ratings", title = "Players' Ratings",
+    isValueShownAsLabel = true, drawingStyle = "Cylinder") |> display
+lc.boxplot(y = y, xname = "Players", yname = "Ratings", title = "Players' Ratings", color = Color.Blue, whiskerPercentile = 5, percentile = 30,
+    showAverage = false, showMedian = false, showUnusualValues = true)   |> display
+lc.bubble(y = y, xname = "Players", yname = "Ratings", title = "Players' Ratings", color = Color.Blue) |> display    
+
+lc.bubble(y = y, markerSize = 1) |> display   
+
+lc.scatter(y, markerSize = 10) + lc.column() + lc.boxplot() + lc.line()  + lc.column(x) + lc.boxplot() |> display
+lc.scatter(y, markerSize = 10) + lc.column() + (lc.line(x)  + lc.column()) + lc.scatter(markerSize = 20) |> display
+
+// This sample will plot a combination of graphs
+// We plot a line and column graph justiposed on top of each other,
+// combined with a line graph at the bottom.
+// Then we pipe it to the display
+
+let h = [1.;2.5;3.1;4.;4.8;6.0;7.5;8.;9.1;15.]
+let w = h |> List.map (fun h -> h * 1.2)
+
+lc.line(h) + lc.column() ++ lc.line(w) ++ lc.bubble(w) |> display
+
+
+let r = new System.Random()
+let getrand n = 
+    seq {
+        for i=1 to n do
+            yield r.NextDouble()
+    }
+    |> Seq.toList
+
+let x1 = getrand 10000
+let y1 = getrand 10000
+let x2 = getrand 10000
+let y2 = getrand 10000
+
+let lc1 = lc.scatter(x = x1, y = y1, color = Color.Red)
+let lc2 = lc.scatter(x = x2, y = y2)
+lc1 + lc2 |> display

File libML/libml/classification.fs

+namespace libML.Classification
+
+open libML.Utility
+open libML
+open libML.Distribution
+open libML.Math
+open libML.IO
+
+
+module PredictorHelper = 
+    let predictDataset (ds:dataset2) (predictor:gvector -> int) =
+        match ds.data with
+            | Dense d -> 
+                d |> Array.map (fun v -> predictor(DenseVec v)) 
+            | Sparse d ->
+                d |> Array.map (fun v -> predictor(SparseVec v))
+
+    let probPredictDataset (ds:dataset2) (probPredictor:gvector -> int * float array) = 
+        let out = 
+            match ds.data with
+                | Dense d ->
+                    d |> Array.map (fun v -> probPredictor(DenseVec v))
+                | Sparse d ->
+                    d |> Array.map (fun v -> probPredictor(SparseVec v))
+        Array.splitAssoc out
+
+    
+
+type IPredictor = 
+    abstract member Predict: gvector -> int
+    abstract member Predict: dataset2 -> int array
+
+
+type IProbPredictor =
+    abstract member ProbPredict: gvector -> int * (float array)
+    abstract member ProbPredict: dataset2 -> (int array) * (float array array)
+    
+    
+type ILearner = 
+    abstract member Learn: dataset2 * parameters -> IPredictor
+
+(* TODO: find the code in old laptop, make LibSVM work *)
+(*
+module LibSVM = 
+    open libML.LibSVM
+    let defaultPara = Map(["parameter", "-c 10 -t 0"])
+    let parsePara (p:parameters) = 
+        Parameters.getString p "parameter"  "-c 10 -t 0"
+
+    let makeSvmData(ds:dataset2) = 
+        let V, I = 
+            match ds.data with
+                | Sparse d ->
+                    let I = Array.init ds.NSamples (fun i -> 
+                        let n = d.[i].Indices
+                        Array.init n.Length (fun j -> n.[j] + 1))
+                    let V = Array.init ds.NSamples (fun i ->
+                        let n = d.[i].Values
+                        Array.init n.Length (fun j -> n.[j]))
+                    V, I
+                | Dense d ->
+                    let I = Array.init ds.NSamples (fun i -> Array.init ds.NFeatures (fun j-> j+1))
+                    let V = Array.init ds.NSamples (fun i -> Array.init ds.NFeatures (fun j-> d.[i].[j]))
+                    V, I
+        let L = 
+            if ds.NLabels = 2 then
+                Array.init ds.NSamples (fun i -> if ds.Labels.[i]=0 then -1 else 1)
+            else
+                Array.init ds.NSamples (fun i -> ds.Labels.[i] + 1)
+
+        new SvmData(ds.NSamples, ds.NFeatures, V, I, L)
+    
+    type svmPredictor(svmModel:SvmModel) = 
+        interface IPredictor with 
+            member this.Predict(v:gvector) = 
+                NYI()
+                0
+            member this.Predict(ds:dataset2) = 
+                let data = makeSvmData(ds)
+                let svm = new SVM()
+                let res = svm.predict(data, svmModel)
+                printfn "acc returned by libsvm = %A" res.accuracy
+                res.predictLabels
+        
+    let learner = 
+        { new ILearner with
+            member this.Learn(ds:dataset2, para:parameters) = 
+                let p = parsePara para
+                let data = makeSvmData(ds)
+                let svm = new SVM()
+                let svmModel = svm.train(data, p)
+                new svmPredictor(svmModel) :> IPredictor
+        }
+        *)
+
+/// the k-nearest neighbour classifier
+module KNN = 
+    let defaultPara = 
+        Map(["k", "1"; ])
+
+    let parsePara (para:Map<string, string>) = 
+        match para.TryFind("k") with
+            | Some k -> int(k)
+            | None -> 1 // default k is 1
+
+    type knnPredictor (ds:dataset2, K:int) =
+        let predict(gv:gvector) = 
+            if ds.NSamples < K then failwith "K is bigger than the size of the dataset!"
+
+            let distArr = 
+                match gv with
+                    | DenseVec v -> 
+                        match ds.data with
+                            | Dense d ->
+                                d |> Array.map (fun u -> Vector.euclidian u v) 
+                            | Sparse d ->
+                                failwith "try to predict a dense vector on a sparse data set"
+                    | SparseVec v ->
+                        match ds.data with
+                            | Sparse d ->
+                                d |> Array.map (fun u -> SVector.euclidian u v)
+                            | Dense _ ->
+                                failwith "try to predict a sparse vector on a dense data set"
+
+            let idx = Array.minK K distArr
+            let cnt = Array.create ds.NLabels 0
+            let labels = ds.Labels
+            for i=0 to K-1 do
+                cnt.[labels.[idx.[i]]] <- cnt.[labels.[idx.[i]]] + 1
+            cnt |> Seq.mapi (fun i v -> (v, i)) |> Seq.max |> snd
+    
+        interface IPredictor with 
+            member this.Predict(v:gvector) = predict(v)
+            member this.Predict(ds:dataset2) = PredictorHelper.predictDataset ds predict
+
+    let learner = 
+        { new ILearner with
+            member this.Learn(ds, para) = 
+                new knnPredictor(ds, parsePara para) :> IPredictor
+        }
+
+
+// no bias term now
+module LogReg = 
+    let defaultPara = 
+        Map(["l2weight", "1.0"; "solver", "EASY"])
+
+    let parsePara (para:parameters) = 
+        let l2weight = 
+            match para.TryFind("l2weight") with
+                | Some k -> float(k)
+                | None -> 1.0 // default l2weight is 1.0
+        let solver = 
+            match para.TryFind("solver") with
+                | Some k when k = "EASY" -> EASY
+                | Some k when k = "CG" -> CG
+                | Some k when k = "LBFGS" -> LBFGS
+                | Some k -> failwith ("solver" + k + "not supported")
+                | None -> EASY // default solver is EASY
+        l2weight, solver
+
+    let inline sigmoid x = 
+        1. / (1. + exp(-x))
+
+    type logregPredictor(x:vector) = 
+        let probPredict(v:gvector) = 
+            let prob = 
+                match v with
+                | SparseVec s ->
+                    sigmoid(SVector.dotDense s x)
+                | DenseVec d ->
+                    sigmoid(Vector.dot  d  x)
+            let label = if prob < 0.5 then 0 else 1
+            label, [1.0-prob; prob] |> List.toArray
+        let predict (v:gvector) = 
+            let label, _ = probPredict(v)
+            label
+        interface IPredictor with 
+            member this.Predict(v:gvector) = predict(v)
+            member this.Predict(ds:dataset2) = PredictorHelper.predictDataset ds predict
+        interface IProbPredictor with
+            member this.ProbPredict(v:gvector) = probPredict(v)
+            member this.ProbPredict(ds:dataset2) = PredictorHelper.probPredictDataset ds probPredict
+ 
+    let learner = 
+        { new ILearner with
+            member this.Learn(ds:dataset2, para:parameters) = 
+                if ds.NLabels > 2 then
+                    failwith "not support multiclass classification"
+                let p = parsePara para
+                let weight = fst p
+                let solver = snd p
+                let dim = ds.NFeatures
+                let logreg = 
+                    { new IFunction with
+                        member this.Size = dim
+                        member this.Eval(v:vector) = 
+                            let mutable loss = 1.0 + Array.sumBy (fun x -> 0.5*x*x*weight) (v.InternalValues)
+                            let mutable gradient = weight * v
+                            
+                            match ds.data with
+                                |Dense dat ->
+                                    for i=0 to ds.NSamples-1 do
+                                        let score = 
+                                            if ds.Labels.[i] = 0 then - (Vector.dot dat.[i]   v)
+                                            else Vector.dot dat.[i]  v
+                                        let insLoss, insProb = 
+                                            if score < -30. then -score, 0.0
+                                            elif score > 30. then 0.0, 1.0
+                                            else let temp = 1.0 + exp(-score) in log(temp), 1.0/temp
+                                        loss <- loss + insLoss
+                                        if ds.Labels.[i] = 0 then
+                                            gradient <- gradient + ((1.0 - insProb) * (dat.[i] ))
+                                        else
+                                            gradient <- gradient + ((-1.0 * (1.0 - insProb)) * (dat.[i] ))
+                                |Sparse dat->
+                                    for i=0 to ds.NSamples-1 do
+                                        let score = 
+                                            if ds.Labels.[i] = 0 then - (SVector.dotDense dat.[i]  v)
+                                            else (SVector.dotDense dat.[i]  v)
+                                        let insLoss, insProb = 
+                                            if score < -30. then -score, 0.0
+                                            elif score > 30. then 0.0, 1.0
+                                            else let temp = 1.0 + exp(-score) in log(temp), 1.0/temp
+                                        loss <- loss + insLoss
+                                        if ds.Labels.[i] = 0 then
+                                            SVector.addToDenseInPlace  (dat.[i]) (1.0 - insProb) gradient
+                                            //gradient <- gradient + ((1.0 - insProb) * (dat.[i].Transpose))
+                                        else
+                                            SVector.addToDenseInPlace (dat.[i]) (insProb - 1.0) gradient
+                                            //gradient <- gradient + ((-1.0 * (1.0 - insProb)) * (dat.[i].Transpose))
+                            loss, gradient
+                    }
+
+                let x = Optimization.optim logreg solver
+
+                new logregPredictor(x) :> IPredictor
+        }
+
+
+
+        (*
+/// binary bogistic regression
+module LogReg = 
+    let defaultPara = 
+        Map(["l2weight", "1.0"; "solver", "EASY"])
+
+    let parsePara (para:parameters) = 
+        let l2weight = 
+            match para.TryFind("l2weight") with
+                | Some k -> float(k)
+                | None -> 1.0 // default weight is 1
+        let solver = 
+            match para.TryFind("solver") with
+                | Some k when k = "EASY" -> EASY
+                | Some k when k = "CG" -> CG
+                | Some k when k = "LBFGS" -> LBFGS
+                | Some k -> failwith ("solver" + k + "not supported")
+                | None -> EASY // default solver is EASY
+        l2weight, solver
+
+    let private sigmoid x = 
+        1. / (1. + exp(-x))
+
+    type logregPredictor(x:vector) = 
+        member this.predict(v:rowvec) = 
+            let prob = sigmoid(v * x)
+            let label = if prob < 0.5 then 0 else 1
+            label, prob
+
+        member this.probOutput(data:matrix) = 
+            Vector.Generic.init (data.NumRows) (fun i -> (this.predict (data.Row(i))))
+
+        interface IPredictor with 
+            member this.Predict (v:rowvec) = 
+                this.predict(v) |> fst
+
+            member this.Predict (data:matrix) = 
+                let pred = Vector.Generic.init (data.NumRows) (fun i -> (this.predict (data.Row(i)) |> fst))
+                pred
+            member this.Predict (ds:dataset) = 
+                let pred = Vector.Generic.init (ds.label.Length) (fun i -> (this.predict (ds.data.Row(i)) |> fst))
+                let mutable cnt = 0
+                for i=0 to ds.label.Length-1 do
+                    if pred.[i] = ds.label.[i] then 
+                        cnt <- cnt + 1
+                printfn "pred = %A" pred
+                pred, (float cnt)/(float (ds.label.Length))
+
+
+    let learner = 
+        { new ILearner with
+            member this.Learn(ds:dataset, para:parameters) = 
+                if (Array.max (ds.label.InternalValues)) > 1 then
+                    failwith "not support multiclass classification"
+                let p = parsePara para
+                let weight = fst p
+                let solver = snd p
+                let dim = ds.data.NumCols
+                let logreg = 
+                    { new IFunction with
+                        member this.Size = dim
+                        member this.Eval(v:vector) = 
+                            let mutable loss = 1.0 + Array.sumBy (fun x -> 0.5*x*x*weight) (v.InternalValues)
+                            let mutable gradient = weight * v
+                            
+                            for i=0 to ds.data.NumRows-1 do
+                                let score = 
+                                    if ds.label.[i] = 0 then - (ds.data.Row(i) * v)
+                                    else ds.data.Row(i) * v
+                                let insLoss, insProb = 
+                                    if score < -30. then -score, 0.0
+                                    elif score > 30. then 0.0, 1.0
+                                    else let temp = 1.0 + exp(-score) in log(temp), 1.0/temp
+                                loss <- loss + insLoss
+                                if ds.label.[i] = 0 then
+                                    gradient <- gradient + ((1.0 - insProb) * (ds.data.Row(i).Transpose))
+                                else
+                                    gradient <- gradient + ((-1.0 * (1.0 - insProb)) * (ds.data.Row(i).Transpose))
+                            loss, gradient
+                    }
+
+                let minval, x = Optimization.optim logreg solver
+
+                new logregPredictor(x) :> IPredictor
+        }
+
+
+/// binary regularied least square classifier
+module RegSqr = 
+    let defaultPara = LogReg.defaultPara
+    let parsePara = LogReg.parsePara
+
+    type regsqrPredictor(x:vector) = 
+        member this.predict(v:rowvec) = 
+            let prob = v * x
+            let label = if prob < 0.5 then 0 else 1
+            label, prob
+
+        member this.probOutput(data:matrix) = 
+            Vector.Generic.init (data.NumRows) (fun i -> (this.predict (data.Row(i))))
+
+        interface IPredictor with 
+            member this.Predict (v:rowvec) = 
+                this.predict(v) |> fst
+
+            member this.Predict (data:matrix) = 
+                let pred = Vector.Generic.init (data.NumRows) (fun i -> (this.predict (data.Row(i)) |> fst))
+                pred
+            member this.Predict (ds:dataset) = 
+                let pred = Vector.Generic.init (ds.label.Length) (fun i -> (this.predict (ds.data.Row(i)) |> fst))
+                let mutable cnt = 0
+                for i=0 to ds.label.Length-1 do
+                    if pred.[i] = ds.label.[i] then 
+                        cnt <- cnt + 1
+                printfn "pred = %A" pred
+                pred, (float cnt)/(float (ds.label.Length))
+
+    let learner = 
+        { new ILearner with
+            member this.Learn(ds:dataset, para:parameters) = 
+                if (Array.max (ds.label.InternalValues)) > 1 then
+                    failwith "not support multiclass classification"
+                let p = parsePara para
+                let weight = fst p
+                let solver = snd p
+                let dim = ds.data.NumCols
+                let n = ds.data.NumRows
+                let regsqr = 
+                    { new IFunction with
+                        member this.Size = dim
+                        member this.Eval(v:vector) = 
+                            let temp = Array.init n (fun i -> - (float ds.label.[i]))
+                            let gradient = Array.create dim 0.0
+                            let mutable loss = 0.0
+                            for j=0 to dim-1 do
+                                loss <- loss * v.[j] * v.[j] * weight
+                                gradient.[j] <- weight * v.[j]
+                                for i=0 to n-1 do
+                                    temp.[i] <- temp.[i] + v.[j] * ds.data.[i,j]
+                            for i=0 to n-1 do
+                                if temp.[i] <> 0.0 then
+                                    loss <- loss + temp.[i] * temp.[i]
+                                    for j=0 to dim-1 do
+                                        gradient.[j] <- gradient.[j] + ds.data.[i,j] * temp.[i]
+
+                            0.5 * loss + 1.0, gradient |> Vector.ofArray
+                    }
+                let minval, x = Optimization.optim regsqr solver
+                new regsqrPredictor(x) :> IPredictor
+        }
+
+
+
+
+
+
+module NaiveBayes = 
+    
+
+    let defaultPara = Map(["numerical", "gaussian"])
+
+    let parsePara (p: parameters) = ()
+
+    type nbPredictor (estimators: IDistribution [][], labelPrior:vector) = 
+        member this.predictAll(v:rowvec) = 
+            let nclass = labelPrior.Length
+            let p = seq {
+                for i=0 to nclass-1 do 
+                    let likelihood = 
+                        v
+                        |> Seq.mapi (fun j v ->
+                            let e = estimators.[i].[j]
+                            log (e.Prob(v))
+                            )
+                        |> Seq.sum
+                    let p_i = likelihood * labelPrior.[i]
+                    yield p_i, i
+                }
+            p
+
+        member this.predict(v:rowvec) = 
+            let p = this.predictAll(v)
+            p |> Seq.max |> snd
+
+        member this.predictProb(v:rowvec) = 
+            let p = this.predictAll(v) |> Seq.toArray
+            let sum = Array.sumBy (fun (t, _) -> t) p
+            fst p.[0] / sum, snd p.[0]
+
+        interface IPredictor with 
+            member this.Predict (v:rowvec) = this.predict(v)
+            member this.Predict (ds:matrix) = 
+                Vector.Generic.init (ds.NumRows) (fun i -> (this.predict (ds.Row(i)))) 
+            member this.Predict (ds:dataset) = 
+                let pred = Vector.Generic.init (ds.label.Length) (fun i -> (this.predict (ds.data.Row(i))))
+                let mutable cnt = 0
+                for i=0 to ds.label.Length-1 do
+                    if pred.[i] = ds.label.[i] then 
+                        cnt <- cnt + 1
+                pred, (float cnt)/(float (ds.label.Length))
+
+    let learner = 
+        { new ILearner with 
+            member this.Learn (ds:dataset, para:parameters) = 
+                let dss = Dataset.splitByLabel ds
+
+                let estimators = 
+                    dss |> Array.map (fun d ->
+                        let estimator = Array.init (d.data.NumCols) (fun j ->
+                            let col = d.data.Column(j)
+                            let gussianE = new Gaussian (col)
+                            gussianE :> IDistribution
+                        )
+                        estimator
+                    )
+                let labelPrior = Dataset.countLabel ds |> Array.map float |> Vector.ofArray
+                Vector.normalizeInPlace labelPrior
+
+                new nbPredictor(estimators, labelPrior) :> IPredictor
+        }
+
+        *)
+
+    
+    
+    (*
+module SVM = 
+    type svmSolver = LIBSVM | SVMLIGHT
+
+    let defaultPara = 
+        Map(["solver", "LIBSVM"; "para", "-t 0 -c 10"])
+
+
+    let parsePara (para:parameters) = 
+        let solver = 
+            match para.TryFind("solver") with
+                | Some t when t = "LIBSVM" -> LIBSVM
+                | Some t when t = "SVMLIGHT" -> SVMLIGHT
+                | _ -> LIBSVM
+        let para = Parameters.getString para "para" ""
+        solver, para
+
+    type svmPredictor(dataname:string, modelname:string) = 
+        interface 
+    let learner = 
+        { new ILearner with
+            member this.Learn(ds, p) =
+                let dataname = @"c:\temp\svm.temp.dat" 
+                let modelname = @"c:\temp\svm.temp.model"
+                Dataset.saveLibsvm ds dataname
+
+        }
+        
+
+    *)
+
+/// decision tree module
+module DecisionTree = 
+    
+
+    let defaultPara = Map(["featureSelector", "infoGain"; ])
+
+    type featureSlectorType = InfoGain | GainRatio | GiniIndex
+    type treeParameterType = 
+        {
+            prune: bool;
+            minNodes: int;
+            maxDepth: int;
+            pruneFolds: int;
+            binaryNominal: bool;
+            featureSelector: featureSlectorType;
+        }
+
+    let parsePara (p:parameters) = 
+        let selector = 
+            let str = Parameters.getString p "featureSelector" "infoGain"
+            if str = "infoGain" then InfoGain 
+            elif str = "gainRatio" then GainRatio
+            else GiniIndex
+        {
+            prune = Parameters.getBool p "prune" false;
+            minNodes = Parameters.getInt p "minNodes" 2;
+            maxDepth = Parameters.getInt p "maxDepth" 15;
+            pruneFolds = Parameters.getInt p "pruneFolds" 3;
+            binaryNominal = Parameters.getBool p "binaryNominal" false;
+            featureSelector = selector;
+        }
+
+    type TreeNodeType = LeafNode | InternalNode of TreeNode array
+    and TreeNode = {
+        dat: dataset2; // a reference to the original dataset
+        featIdx: datavecs; // a referce to the transpose to the original dataset, for fast feature indexing
+        maxFeat: int array; // a reference to number of different values for each features
+        activeSamples: int array; // active sample ids in this node
+        activeFeats: int array; // active feature ids
+        mutable children: TreeNodeType; // sub trees, only this node is mutable as it will be changed during puring and other operations
+        mutable splitFeat: int; // split feature id;
+        labelCnt: float array; // how many samples are in each label/class
+        depth: int; // the depth of this node, root has level 0
+        label: int; // a node, either internal or leaf, must have a class label
+    }
+    with 
+        member m.IsLeaf = 
+            match m.children with
+                | LeafNode -> true
+                | _ -> false
+        member m.NChildren = 
+            match m.children with
+                | LeafNode -> 0
+                | InternalNode c -> c.Length
+        member m.Label = m.label
+        member m.LabelWithProbability =
+            m.label, float m.labelCnt.[m.label] / (float m.activeSamples.Length)
+        member m.Child (i:int) = 
+            match m.children with
+                | LeafNode -> failwith "not a leaf"
+                | InternalNode c -> c.[i]
+
+    
+    let infoGainSelector (node:TreeNode) = 
+        let weightEntropy (labelweight: (int*float) seq) (n:int) = 
+            let cnt = Array.create n 0.0
+            labelweight |> Seq.iter (fun (l,w) -> cnt.[l] <- cnt.[l] + w)
+            //printfn "cnt = %A" cnt
+            let totalCnt = cnt |> Seq.sum
+            totalCnt, cnt |> Seq.map (fun c -> if c = 0.0 then 0.0 else let p = (float c)/totalCnt in - p * log(p)/log(float n)) |> Seq.sum
+
+        let featIdx = node.featIdx
+        match node.dat.data with
+            | Dense _ ->
+                seq {
+                    for feat in node.activeFeats do
+                        let values = featIdx.DenseData.[feat].InternalValues  // values in the (feat) column in the data matrix
+                        let totalweight, totalent = weightEntropy (node.activeSamples |> Seq.map (fun id -> node.dat.Labels.[id], node.dat.GetSampleWeight(id))) node.dat.NLabels
+                        let subtrees = 
+                            seq {
+                            for featval=0 to node.maxFeat.[feat]-1 do
+                                let sampleUnderFeatVal = seq { for id in node.activeSamples do if int(values.[id])=featval then yield (node.dat.Labels.[id],node.dat.GetSampleWeight(id)) }
+                                let weight, ent = weightEntropy sampleUnderFeatVal node.dat.NLabels
+                                //printfn "feat
+                                yield weight, ent
+                            }
+                        let entropyDeduction = 
+                            subtrees |> Seq.map (fun (w,e) -> w/totalweight * e) |> Seq.sum
+                        //printfn "total = %A feat = %A gain = %A" totalent feat (totalent - entropyDeduction)
+                        yield totalent - entropyDeduction, feat
+                }
+                |> Seq.max
+                |> snd
+            | Sparse _ ->
+                NYI()
+
+
+    let makeRoot(ds:dataset2) = 
+        let lc = Dataset2.countLabelWeightInActive ds (Array.init ds.NSamples (fun i->i));
+        { dat = ds;
+          featIdx = Dataset2.transpose ds;
+          maxFeat = Dataset2.getNumOfDifferentValuesInFeature ds;
+          activeSamples = Array.init ds.NSamples (fun i -> i);
+          activeFeats = Array.init ds.NFeatures (fun i -> i);
+          children = LeafNode;
+          labelCnt = lc;
+          label = Array.maxIndex lc;
+          splitFeat = -1;
+          depth = 0;
+        }
+
+
+    let splitNode (featureSelector: TreeNode -> int) (node: TreeNode) = 
+        
+        let splitFeat = featureSelector node 
+
+        let newActiveFeats = node.activeFeats |> Array.filter (fun i -> i <> splitFeat)
+        
+        node.children <- InternalNode (
+            [|
+                let values = node.featIdx.DenseData.[splitFeat].InternalValues
+                //printfn "values = %A" values
+                //printfn "activesamples = %A" node.activeSamples
+
+                let maxf = node.maxFeat.[splitFeat]
+                for featval=0 to maxf-1 do
+                    let newActiveS = [| for id in node.activeSamples do if int(values.[id])=featval then yield id|]
+                    //printfn "newactives = %A" newActiveS
+                    let newLabelCnt= Dataset2.countLabelWeightInActive node.dat newActiveS
+
+                    let newLabel = Array.maxIndex newLabelCnt
+                    yield { 
+                        node with
+                            activeSamples = newActiveS;
+                            activeFeats = newActiveFeats;
+                            children = LeafNode;
+                            depth = node.depth + 1;
+                            labelCnt = newLabelCnt;
+                            label = newLabel;
+                    }
+            |]  
+            )
+        node.splitFeat <- splitFeat
+
+
+        //printfn "depth = %d splitor = %A label = %A " node.depth (node.dat.GetFeatureName(node.splitFeat)) node.Label
+
+    let isLeafNode(node: TreeNode) (p:treeParameterType) = 
+        let isPure (node: TreeNode) =
+            let p = node.LabelWithProbability |> snd
+            p > 0.999 
+
+        if node.depth = p.maxDepth then 
+            true
+        elif node.activeSamples.Length <= p.minNodes then
+            true
+        elif isPure node then
+            true
+        else
+            false
+
+    let rec buildTree (node: TreeNode) (p:treeParameterType) = 
+        if not (isLeafNode node p) then
+            splitNode infoGainSelector node 
+            match node.children with
+                | InternalNode d ->
+                    d |> Array.iter (fun subtree -> buildTree subtree p)
+                | LeafNode _ ->
+                    failwith "impossible"
+
+
+    type dtPredictor(tree:TreeNode) = 
+        let predict(gv:gvector) = 
+            let rec pred (node:TreeNode) (gv:gvector) = 
+                (*
+                match node.children with
+                    | LeafNode ->
+                        if node.label >= 0 then node.label
+                        else -1
+                    | InternalNode d ->
+                        let split = node.splitFeat
+                        let v = max 0 (int (gv.[split]))//FUCK
+                        
+                        let p = pred (node.Child(v)) gv
+                        if p = -1 then node.Label else p
+                *)
+                        
+                if node.IsLeaf  then
+