Commits

Sky Sanders  committed 62c734b

Initial checking

  • Participants

Comments (0)

Files changed (52)

File trunk/soddi.sln

+
+Microsoft Visual Studio Solution File, Format Version 10.00
+# Visual Studio 2008
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "soddi", "soddi\soddi.csproj", "{94451A1F-9DBA-438C-956F-CD1C072310B2}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|Any CPU = Debug|Any CPU
+		Release|Any CPU = Release|Any CPU
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{94451A1F-9DBA-438C-956F-CD1C072310B2}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{94451A1F-9DBA-438C-956F-CD1C072310B2}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{94451A1F-9DBA-438C-956F-CD1C072310B2}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{94451A1F-9DBA-438C-956F-CD1C072310B2}.Release|Any CPU.Build.0 = Release|Any CPU
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal

File trunk/soddi.suo

Binary file added.

File trunk/soddi/BulkInsert/BulkInsertJob.cs

+/*!
+ * Project: SODDI v.09
+ * File   : BulkInsertJob.cs
+ * http://skysanders.net/tools/se/
+ *
+ * Copyright 2010, Sky Sanders
+ * Dual licensed under the MIT or GPL Version 2 licenses.
+ * http://skysanders.net/tools/se/LICENSE.TXT
+ *
+ * Date: Sat Mar 28 2010 
+ */
+using System;
+using System.Collections.Generic;
+
+namespace StackOverflowDataDumpImport.BulkInsert
+{
+    /// <summary>
+    /// Manages a group of related BulkInsertTasks that are to
+    /// be run sequentially. In this case, we are cuing all updates
+    /// to a particular table into one BulkInsertJob to prevent deadlocks.
+    /// </summary>
+    public class BulkInsertJob
+    {
+        private readonly List<IBulkInsertTask> _tasks;
+
+        public BulkInsertJob()
+        {
+            _tasks = new List<IBulkInsertTask>();
+        }
+
+        public event EventHandler<RowsCopiedEventArgs> RowsInserted;
+
+        public virtual void OnRowsInserted(IBulkInsertTask task, RowsCopiedEventArgs ea)
+        {
+            if (RowsInserted != null)
+                RowsInserted(task, ea);
+        }
+
+        public void Add(IBulkInsertTask task)
+        {
+            task.RowsInserted += (s, e) => OnRowsInserted((IBulkInsertTask) s, e);
+            _tasks.Add(task);
+        }
+
+        public void Process()
+        {
+            foreach (IBulkInsertTask task in _tasks)
+            {
+                task.Process();
+            }
+        }
+    }
+}

File trunk/soddi/BulkInsert/BulkInsertTask.cs

+/*!
+ * Project: SODDI v.09
+ * File   : BulkInsertTask.cs
+ * http://skysanders.net/tools/se/
+ *
+ * Copyright 2010, Sky Sanders
+ * Dual licensed under the MIT or GPL Version 2 licenses.
+ * http://skysanders.net/tools/se/LICENSE.TXT
+ *
+ * Date: Sat Mar 28 2010 
+ */
+using System;
+using System.Data.SqlClient;
+using Salient.Data;
+
+namespace StackOverflowDataDumpImport.BulkInsert
+{
+    public interface IBulkInsertTask
+    {
+        string ConnectionString { get; set; }
+        string DestinationTable { get; set; }
+        string Name { get; set; }
+        void Process();
+        event EventHandler<RowsCopiedEventArgs> RowsInserted;
+    }
+
+    /// <summary>
+    /// Streams an EnumerableDataReader into the destination table via SqlBulkCopy
+    /// </summary>
+    public class BulkInsertTask : IBulkInsertTask
+    {
+        private readonly int _batchSize;
+        private readonly EnumerableDataReader _readerIn;
+
+        public BulkInsertTask(string connectionString, string destinationTable, EnumerableDataReader readerIn,
+                              string name, int batchSize)
+        {
+            _batchSize = batchSize;
+            _readerIn = readerIn;
+            Name = name;
+            ConnectionString = connectionString;
+            DestinationTable = destinationTable;
+        }
+
+        #region IBulkInsertTask Members
+
+        public string Name { get; set; }
+
+        public event EventHandler<RowsCopiedEventArgs> RowsInserted;
+
+        public string ConnectionString { get; set; }
+        public string DestinationTable { get; set; }
+
+        public void Process()
+        {
+            try
+            {
+                using (var bc = new SqlBulkCopy(ConnectionString, SqlBulkCopyOptions.TableLock))
+                {
+                    int count = 0;
+                    OnRowsInserted(new RowsCopiedEventArgs(new SqlRowsCopiedEventArgs(0), "", CopyEventType.Begin));
+                    bc.NotifyAfter = 1000;
+
+                    bc.SqlRowsCopied += (s, e) =>
+                        {
+                            count += bc.NotifyAfter;
+                            OnRowsInserted(new RowsCopiedEventArgs(e, "", CopyEventType.Active));
+                        };
+                    bc.BulkCopyTimeout = 35000;
+                    bc.BatchSize = _batchSize;
+                    bc.DestinationTableName = DestinationTable;
+                    using (EnumerableDataReader reader = _readerIn)
+                    {
+                        for (int i = 0; i < reader.FieldCount; i++)
+                        {
+                            string fieldName = reader.GetName(i);
+                            bc.ColumnMappings.Add(fieldName, fieldName);
+                        }
+
+                        bc.WriteToServer(reader);
+                        bc.Close();
+                        reader.Close();
+                        OnRowsInserted(new RowsCopiedEventArgs(new SqlRowsCopiedEventArgs(count), "",
+                                                               CopyEventType.Complete));
+                    }
+                }
+            }
+            catch (Exception ex)
+            {
+                OnRowsInserted(new RowsCopiedEventArgs(new SqlRowsCopiedEventArgs(0), ex.Message, CopyEventType.Error));
+            }
+        }
+
+        #endregion
+
+        public virtual void OnRowsInserted(RowsCopiedEventArgs ea)
+        {
+            if (RowsInserted != null)
+                RowsInserted(this, ea);
+        }
+    }
+}

File trunk/soddi/BulkInsert/CopyEventType.cs

+namespace StackOverflowDataDumpImport.BulkInsert
+{
+    public enum CopyEventType
+    {
+        None,
+        Begin,
+        Active,
+        Complete,
+        Error
+    }
+}

File trunk/soddi/BulkInsert/Extensions.cs

+/*!
+ * Project: SODDI v.09
+ * File   : Extensions.cs
+ * http://skysanders.net/tools/se/
+ *
+ * Copyright 2010, Sky Sanders
+ * Dual licensed under the MIT or GPL Version 2 licenses.
+ * http://skysanders.net/tools/se/LICENSE.TXT
+ *
+ * Date: Sat Mar 28 2010 
+ */
+using System;
+using System.Collections.Generic;
+using System.Data.SqlClient;
+using System.Linq;
+using System.Text.RegularExpressions;
+using System.Xml;
+using System.Xml.Linq;
+using StackOverflowDataDumpImport.Domain;
+
+namespace StackOverflowDataDumpImport.BulkInsert
+{
+    public static class Extensions
+    {
+        /// <summary>
+        /// Types and returns the value of an attribute or the default of the
+        /// type if the attribute is null.
+        /// </summary>
+        /// <param name="attr"></param>
+        /// <param name="returnType"></param>
+        /// <returns></returns>
+        public static object GetValueOrDefault(this XAttribute attr, Type returnType)
+        {
+            if (attr == null)
+            {
+                return returnType.IsValueType ? Activator.CreateInstance(returnType) : null;
+            }
+
+            Type baseType = returnType;
+
+            if (returnType.IsGenericType && returnType.GetGenericTypeDefinition() == typeof (Nullable<>))
+            {
+                baseType = returnType.GetGenericArguments()[0];
+            }
+
+            return Convert.ChangeType(attr.Value, baseType);
+        }
+
+
+        /// <summary>
+        /// Returns a streaming sequence of PostTag built from the newly inserted Post
+        /// records.
+        /// </summary>
+        /// <param name="connectionString"></param>
+        /// <param name="schema"></param>
+        /// <returns></returns>
+        public static IEnumerable<PostTag> PostsTagFromReader(string connectionString, string schema)
+        {
+            var rx = new Regex(@"\<([^>]+)\>", RegexOptions.Compiled);
+            using (var conn = new SqlConnection(connectionString))
+            {
+                using (SqlCommand cmd = conn.CreateCommand())
+                {
+                    cmd.CommandTimeout = 300;
+                    cmd.CommandText = "SELECT Id,SiteId,Tags FROM " + schema + ".Posts WHERE NOT Tags IS NULL";
+                    conn.Open();
+                    using (SqlDataReader reader = cmd.ExecuteReader())
+                    {
+                        if (reader != null)
+                            while (reader.Read())
+                            {
+                                int id = reader.GetInt32(0);
+                                string siteId = reader.GetString(1);
+                                string tags = reader.GetString(2);
+                                IEnumerable<string> distinctTags =
+                                    rx.Matches(tags).Cast<Match>().Select(m => m.Groups[1].Value).Distinct();
+                                foreach (string tag in distinctTags)
+                                {
+                                    yield return new PostTag {Tag = tag, PostId = id, SiteId = siteId};
+                                }
+                            }
+                    }
+                }
+            }
+        }
+
+        /// <summary>
+        /// Returns a streaming sequence of T deserialized from a stackoverflow
+        /// data dump xml document.
+        /// </summary>
+        /// <typeparam name="T">The type of domain object to stream</typeparam>
+        /// <param name="fileName">A stackoverflow data dump xml file</param>
+        /// <param name="siteId"></param>
+        /// <returns></returns>
+        public static IEnumerable<T> SoBaseFromXml<T>(string fileName, string siteId)
+            where T : ISoBase, new()
+        {
+            using (XmlReader rdr = XmlReader.Create(fileName))
+            {
+                rdr.MoveToContent();
+                while (rdr.Read())
+                {
+                    if ((rdr.NodeType == XmlNodeType.Element) && (rdr.Name == "row"))
+                    {
+                        T result = SoBase<T>.FromXElement((XElement) XNode.ReadFrom(rdr), siteId);
+                        yield return result;
+                    }
+                }
+                rdr.Close();
+            }
+        }
+    }
+}

File trunk/soddi/BulkInsert/RowsCopiedEventArgs.cs

+/*!
+ * Project: SODDI v.09
+ * File   : RowsCopiedEventArgs.cs
+ * http://skysanders.net/tools/se/
+ *
+ * Copyright 2010, Sky Sanders
+ * Dual licensed under the MIT or GPL Version 2 licenses.
+ * http://skysanders.net/tools/se/LICENSE.TXT
+ *
+ * Date: Sat Mar 28 2010 
+ */
+
+using System;
+using System.Data.SqlClient;
+
+namespace StackOverflowDataDumpImport.BulkInsert
+{
+    public class RowsCopiedEventArgs : EventArgs
+    {
+        private readonly SqlRowsCopiedEventArgs _inner;
+
+        public RowsCopiedEventArgs(SqlRowsCopiedEventArgs inner, string message, CopyEventType type)
+        {
+            Type = type;
+            _inner = inner;
+            Message = message;
+        }
+
+        public long RowsCopied
+        {
+            get { return _inner.RowsCopied; }
+        }
+
+        public bool Abort
+        {
+            get { return _inner.Abort; }
+            set { _inner.Abort = value; }
+        }
+
+        public CopyEventType Type { get; set; }
+        public string Message { get; set; }
+    }
+}

File trunk/soddi/Domain.cs

+/*!
+ * Project: SODDI v.09
+ * File   : Domain.cs
+ * http://skysanders.net/tools/se/
+ *
+ * Copyright 2010, Sky Sanders
+ * Dual licensed under the MIT or GPL Version 2 licenses.
+ * http://skysanders.net/tools/se/LICENSE.TXT
+ *
+ * Date: Sat Mar 28 2010 
+ */
+
+using System;
+using System.Collections.Generic;
+using System.Xml.Linq;
+using Salient.Reflection;
+using StackOverflowDataDumpImport.BulkInsert;
+
+namespace StackOverflowDataDumpImport.Domain
+{
+    public interface ISoBase
+    {
+        string SiteId { get; set; }
+    }
+
+    /// <summary>
+    /// Provides fast deserialization functionality via dynamic
+    /// getter/setters.
+    /// 
+    /// The properties of the derived classes correspond directly
+    /// to both the source xml schema and the destination schema.
+    /// 
+    /// Any changes to these object will not result in rainbows and
+    /// unicorns. You are warned.
+    /// 
+    /// </summary>
+    /// <typeparam name="T"></typeparam>
+    public class SoBase<T> where T : ISoBase, new()
+    {
+        private static readonly IList<DynamicProperties.Property> Props;
+
+        static SoBase()
+        {
+            Props = DynamicProperties.CreatePropertyMethods(typeof (T));
+        }
+
+        public string SiteId { get; set; }
+
+
+        public static T FromXElement(XElement r, string siteId)
+        {
+            var result = new T {SiteId = siteId};
+            foreach (DynamicProperties.Property prop in Props)
+            {
+                XAttribute attribute = r.Attribute(prop.Info.Name);
+                if (attribute != null)
+                {
+                    prop.Setter(result, attribute.GetValueOrDefault(prop.Info.PropertyType));
+                }
+            }
+
+            return result;
+        }
+    }
+
+    public class Badge : SoBase<Badge>, ISoBase
+    {
+        public int Id { get; set; }
+        public string Name { get; set; }
+        public int UserId { get; set; }
+        public DateTime Date { get; set; }
+    }
+
+    public class PostTag
+    {
+        public int PostId { get; set; }
+        public string SiteId { get; set; }
+        public string Tag { get; set; }
+    }
+
+    public class Comment : SoBase<Comment>, ISoBase
+    {
+        public int Id { get; set; }
+        public int PostId { get; set; }
+        public int? UserId { get; set; }
+        public int? Score { get; set; }
+        public string Text { get; set; }
+        public DateTime CreationDate { get; set; }
+    }
+
+    public class Post : SoBase<Post>, ISoBase
+    {
+        public int Id { get; set; }
+        public int ParentId { get; set; }
+        public int OwnerUserId { get; set; }
+        public int PostTypeId { get; set; }
+        public string Title { get; set; }
+        public string Tags { get; set; }
+        public string Body { get; set; }
+        public int ViewCount { get; set; }
+        public int Score { get; set; }
+        public int AnswerCount { get; set; }
+        public int AcceptedAnswerId { get; set; }
+        public int CommentCount { get; set; }
+        public int FavoriteCount { get; set; }
+        public int LastEditorUserId { get; set; }
+        public string LastEditorDisplayName { get; set; }
+        public DateTime CreationDate { get; set; }
+        public DateTime LastActivityDate { get; set; }
+        public DateTime? LastEditDate { get; set; }
+        public DateTime? CommunityOwnedDate { get; set; }
+        public DateTime? ClosedDate { get; set; }
+    }
+
+    public class User : SoBase<User>, ISoBase
+    {
+        public int Id { get; set; }
+        public string DisplayName { get; set; }
+        public string EmailHash { get; set; }
+        public string Location { get; set; }
+        public string WebsiteUrl { get; set; }
+        public int Views { get; set; }
+        public int? Age { get; set; }
+        public string AboutMe { get; set; }
+        public int Reputation { get; set; }
+        public int UpVotes { get; set; }
+        public int DownVotes { get; set; }
+        public DateTime CreationDate { get; set; }
+        public DateTime LastAccessDate { get; set; }
+    }
+
+    public class Vote : SoBase<Vote>, ISoBase
+    {
+        public int Id { get; set; }
+        public int VoteTypeId { get; set; }
+        public int PostId { get; set; }
+        public int? UserId { get; set; }
+        public int? BountyAmount { get; set; }
+        public DateTime CreationDate { get; set; }
+    }
+}

File trunk/soddi/GPL.TXT

+        GNU GENERAL PUBLIC LICENSE
+           Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+          Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+        GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+          NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.

File trunk/soddi/ImportOptions.cs

+using System;
+
+namespace StackOverflowDataDumpImport
+{
+    [Flags]
+    public enum ImportOptions
+    {
+        None = 0,
+        Drop = 1,
+        Indexes = 2,
+        Fulltext = 4,
+        Rowid = 8,
+        Pk = 16,
+        Unique = 32,
+        Split = 64,
+        All = 127
+    }
+}

File trunk/soddi/LICENSE.TXT

+You may use this code under the terms of either the MIT License or 
+the GNU General Public License (GPL) Version 2.
+
+The MIT License is recommended for most projects. It is simple and 
+easy to understand and it places almost no restrictions on what you 
+can do with this code.
+
+If the GPL suits your project better you are also free to use this 
+code project under that license.
+
+You don’t have to do anything special to choose one license or the 
+other and you don’t have to notify anyone which license you are using. 
+
+You are free to use this code in commercial projects as long as the 
+copyright header is left intact.

File trunk/soddi/MIT.TXT

+Copyright (c) 2010 Sky Sanders, http://skysanders.net/
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

File trunk/soddi/Program.cs

+/*!
+ * Project: SODDI v.09
+ * File   : Program.cs
+ * http://skysanders.net/tools/se/
+ *
+ * Copyright 2010, Sky Sanders
+ * Dual licensed under the MIT or GPL Version 2 licenses.
+ * http://skysanders.net/tools/se/LICENSE.TXT
+ *
+ * Date: Sat Mar 28 2010 
+ */
+
+using System;
+using System.Collections.Generic;
+using System.Data.SqlClient;
+using System.Diagnostics;
+using System.IO;
+using System.Linq;
+using System.Text.RegularExpressions;
+using System.Threading;
+using Salient.Data;
+using StackOverflowDataDumpImport.BulkInsert;
+using StackOverflowDataDumpImport.Domain;
+
+namespace StackOverflowDataDumpImport
+{
+    internal class Program
+    {
+        private static readonly object LockObj = new object();
+        private static int _logTop;
+
+        private static void Main(string[] args)
+        {
+            var sw = new Stopwatch();
+            sw.Start();
+
+            Console.Title = "StackOverflow Data Dump Import v.09";
+
+            Console.SetWindowSize(80, 50);
+            Console.Clear();
+            Write(Console.Title, 1, 0, ConsoleColor.White);
+            Write("http://skysanders.net/tools/se/", 2, 0, ConsoleColor.DarkGray);
+
+            Console.WriteLine();
+
+
+            long rowCount = 0;
+
+            if (args.Length < 2)
+            {
+                Usage();
+                return;
+            }
+
+            ImportOptions options;
+            int batchSize;
+            string schema;
+            List<string> include = ParseOptionalArguments(args, out options, out schema, out batchSize);
+
+
+            string sourceDir = args[0];
+            string connectionString = args[1];
+
+            if (!Directory.Exists(sourceDir))
+            {
+                Write(string.Format("\r\nSource directory '{0}' does not exist.\r\n", sourceDir), Console.CursorTop, 0,
+                      ConsoleColor.Red);
+                Usage();
+                return;
+            }
+
+
+            Dictionary<string, string> sourceDirectories = GetSourceDirectories(sourceDir);
+
+
+            if (sourceDirectories == null)
+            {
+                Write("\r\nInvalid source directory.\r\n", Console.CursorTop, 0, ConsoleColor.Red);
+                Usage();
+                return;
+            }
+
+            var jobs = new Dictionary<string, BulkInsertJob>
+                {
+                    {"Users", new BulkInsertJob()},
+                    {"Badges", new BulkInsertJob()},
+                    {"Comments", new BulkInsertJob()},
+                    {"Votes", new BulkInsertJob()},
+                    {"Posts", new BulkInsertJob()}
+                };
+
+
+            var positions = new Dictionary<string, int>();
+
+            string proccessingMessage = "Processing ";
+
+            int cursorTop = Console.CursorTop = Console.CursorTop + 2;
+
+            foreach (var kvp in sourceDirectories)
+            {
+                string path = kvp.Value;
+                string siteId = kvp.Key;
+
+                // if no include list specified or this site is explicitely included
+                if (include.Count == 0 || include.Contains(siteId.ToLowerInvariant()))
+                {
+                    proccessingMessage += siteId + " ";
+
+                    jobs["Users"].Add(new BulkInsertTask(connectionString, schema + ".Users",
+                                                         new EnumerableDataReader(
+                                                             Extensions.SoBaseFromXml<User>(
+                                                                 Path.Combine(path, "Users.xml"), siteId)), siteId,
+                                                         batchSize));
+                    positions.Add(siteId + " " + schema + ".Users", cursorTop++);
+
+
+                    jobs["Badges"].Add(new BulkInsertTask(connectionString, schema + ".Badges",
+                                                          new EnumerableDataReader(
+                                                              Extensions.SoBaseFromXml<Badge>(
+                                                                  Path.Combine(path, "Badges.xml"), siteId)), siteId,
+                                                          batchSize));
+                    positions.Add(siteId + " " + schema + ".Badges", cursorTop++);
+
+
+                    jobs["Comments"].Add(new BulkInsertTask(connectionString, schema + ".Comments",
+                                                            new EnumerableDataReader(
+                                                                Extensions.SoBaseFromXml<Comment>(
+                                                                    Path.Combine(path, "Comments.xml"), siteId)), siteId,
+                                                            batchSize));
+                    positions.Add(siteId + " " + schema + ".Comments", cursorTop++);
+
+
+                    jobs["Votes"].Add(new BulkInsertTask(connectionString, schema + ".Votes",
+                                                         new EnumerableDataReader(
+                                                             Extensions.SoBaseFromXml<Vote>(
+                                                                 Path.Combine(path, "Votes.xml"), siteId)), siteId,
+                                                         batchSize));
+                    positions.Add(siteId + " " + schema + ".Votes", cursorTop++);
+
+
+                    jobs["Posts"].Add(new BulkInsertTask(connectionString, schema + ".Posts",
+                                                         new EnumerableDataReader(
+                                                             Extensions.SoBaseFromXml<Post>(
+                                                                 Path.Combine(path, "Posts.xml"), siteId)), siteId,
+                                                         batchSize));
+                    positions.Add(siteId + " " + schema + ".Posts", cursorTop++);
+
+                    cursorTop++;
+                }
+            }
+
+            proccessingMessage += "\r\n with options " + options;
+
+            InitializeLog((options & ImportOptions.Split) == ImportOptions.Split, cursorTop - 5);
+
+            _logTop = Write("Found " + string.Join(" ", sourceDirectories.Keys.ToArray()) + "\r\n", _logTop, 1,
+                            ConsoleColor.Gray);
+            _logTop = Write(proccessingMessage + "\r\n", _logTop, 1, ConsoleColor.Gray);
+
+
+            try
+            {
+                PrepareDatabase(connectionString, schema, options);
+            }
+            catch (Exception ex)
+            {
+                _logTop = Write("\r\nError initializing database: " + ex.Message, _logTop, 1, ConsoleColor.Red);
+                return;
+            }
+
+
+            try
+            {
+                rowCount += ProcessJobs(jobs, positions);
+            }
+            catch (Exception ex)
+            {
+                _logTop = Write("\r\nError processing jobs: " + ex.Message, _logTop, 1, ConsoleColor.Red);
+                return;
+            }
+
+
+            if ((options & ImportOptions.Split) == ImportOptions.Split)
+            {
+                rowCount += ProcessTagSplits(connectionString, schema, cursorTop, batchSize);
+            }
+
+            lock (LockObj)
+            {
+                Console.CursorTop = ++_logTop;
+                Console.CursorVisible = true;
+            }
+
+            sw.Stop();
+
+            _logTop =
+                Write(
+                    string.Format("\r\n\r\nImport complete. {0} rows in {1:N} minutes.\r\n", rowCount.ToString("#,##0"),
+                                  sw.ElapsedMilliseconds/1000f/60f), _logTop, ConsoleColor.Yellow);
+
+#if DEBUG
+            Console.WriteLine("Press a key to quit");
+            Console.ReadKey();
+#endif
+        }
+
+
+        private static long ProcessTagSplits(string connectionString, string schema, int splitPos, int batchSize)
+        {
+            long rowCount = 0;
+            _logTop = Write("Starting tag split processing....\r\n", _logTop, ConsoleColor.Gray);
+            var sw2 = new Stopwatch();
+
+            sw2.Reset();
+            sw2.Start();
+
+            // create a single task to stream through the posts we just inserted, split the tags and
+            // insert a new PostsTags records for each.
+
+            var ts = new BulkInsertTask(connectionString, schema + ".PostsTags",
+                                        new EnumerableDataReader(
+                                            Extensions.PostsTagFromReader(connectionString, schema),
+                                            typeof (PostTag)), "PostTags", batchSize);
+
+            ts.RowsInserted += (s, ea) =>
+                {
+                    string message =
+                        string.Format("Tags {0} {1}", ea.RowsCopied > 0 ? ea.RowsCopied.ToString("#,##0") : "",
+                                      ea.Type.ToString() + (ea.Type == CopyEventType.Error ? " " + ea.Message : "")).
+                            PadRight(40);
+
+                    if (ea.Type == CopyEventType.Complete)
+                    {
+                        rowCount = ea.RowsCopied;
+                    }
+                    WriteCopyMessage(message, ea, splitPos);
+
+                    // let it breath
+                    Thread.Sleep(10);
+                };
+
+            ts.Process();
+
+
+            sw2.Stop();
+            _logTop =
+                Write(
+                    string.Format("\r\nSODDI split tags complete {0:N} minutes.\r\n", sw2.ElapsedMilliseconds/1000f/60f),
+                    _logTop, ConsoleColor.DarkGreen);
+
+            return rowCount;
+        }
+
+        private static long ProcessJobs(Dictionary<string, BulkInsertJob> jobs, IDictionary<string, int> positions)
+        {
+            _logTop = Write("Importing....\r\n", _logTop, ConsoleColor.White);
+            var sw = new Stopwatch();
+            sw.Start();
+            Console.CursorVisible = false;
+
+            // group bulk inserts to each table in a job and
+            // spin up all 5 jobs.
+
+            var threads = new List<Thread>();
+            long rowCount = 0;
+            foreach (BulkInsertJob job in jobs.Values)
+            {
+                job.RowsInserted += (s, ea) =>
+                    {
+                        lock (LockObj)
+                        {
+                            var task = (IBulkInsertTask) s;
+                            string message = string.Format("{0} {1} {2} {3}", task.Name, task.DestinationTable,
+                                                           ea.RowsCopied > 0 ? ea.RowsCopied.ToString("#,##0") : "",
+                                                           ea.Type.ToString() +
+                                                           (ea.Type == CopyEventType.Error ? " " + ea.Message : "")).
+                                PadRight(40);
+
+                            int position = positions[task.Name + " " + task.DestinationTable];
+                            if (ea.Type == CopyEventType.Complete)
+                            {
+                                rowCount += ea.RowsCopied;
+                            }
+                            WriteCopyMessage(message, ea, position);
+                            Thread.Sleep(10);
+                        }
+                    };
+
+                var t = new Thread(job.Process);
+                threads.Add(t);
+                t.Start();
+            }
+
+            // have a short break and wait for all jobs to finish
+            foreach (Thread t in threads)
+            {
+                t.Join();
+            }
+
+            sw.Stop();
+            _logTop =
+                Write(
+                    string.Format("\r\nSODDI import complete in {0:N} minutes.\r\n", sw.ElapsedMilliseconds/1000f/60f),
+                    _logTop, ConsoleColor.DarkGreen);
+
+            return rowCount;
+        }
+
+        #region Plumbing
+
+        private static int Write(string message, int top, ConsoleColor color)
+        {
+            return Write(message, top, 1, color);
+        }
+
+        private static int Write(string message, int top, int left, ConsoleColor color)
+        {
+            lock (LockObj)
+            {
+                ConsoleColor currColor = Console.ForegroundColor;
+                Console.CursorLeft = left;
+                Console.CursorTop = top;
+                Console.ForegroundColor = color;
+                Console.Write(message);
+                Console.ForegroundColor = currColor;
+                return Console.CursorTop;
+            }
+        }
+
+        private static Dictionary<string, string> GetSourceDirectories(string sourceDir)
+        {
+            string[] directories = Directory.GetDirectories(sourceDir);
+            if (directories.Length == 0)
+            {
+                return null;
+            }
+            var dirRx = new Regex(@"^(\d+) ([A-Z]+)$");
+            var sourceDirectories = new Dictionary<string, string>();
+
+
+            foreach (string directory in directories)
+            {
+                string directoryName = Path.GetFileName(directory);
+                Match match = dirRx.Match(directoryName);
+                if (!match.Success)
+                {
+                    return null;
+                }
+
+                string siteKey = match.Groups[2].Value;
+
+                sourceDirectories.Add(siteKey, directory);
+            }
+            return sourceDirectories;
+        }
+
+        private static void Usage()
+        {
+            Console.SetWindowSize(100, 50);
+
+
+            string usage = GetTextResource("StackOverflowDataDumpImport.README.txt");
+            Console.WriteLine(usage);
+
+
+#if DEBUG
+            Console.WriteLine("Press a key to quit");
+            Console.ReadKey();
+#endif
+        }
+
+        private static void PrepareDatabase(string connectionString, string schema, ImportOptions options)
+        {
+            _logTop = Write("Preparing Database....\r\n", _logTop, ConsoleColor.Gray);
+            using (var conn = new SqlConnection(connectionString))
+            {
+                conn.Open();
+
+                using (SqlCommand cmd = conn.CreateCommand())
+                {
+                    cmd.CommandTimeout = 300;
+
+                    string script = GetTextResource("StackOverflowDataDumpImport.Scripts.SoDump.sql");
+
+                    script = script.Replace("[dbo]", schema);
+
+                    if ((options & ImportOptions.Drop) == ImportOptions.Drop)
+                    {
+                        script = script.Replace("IF 0 = 1--DROP", "");
+                    }
+                    if ((options & ImportOptions.Fulltext) == ImportOptions.Fulltext)
+                    {
+                        script = script.Replace("IF 0 = 1--FULLTEXT", "");
+                    }
+                    if ((options & ImportOptions.Indexes) == ImportOptions.Indexes)
+                    {
+                        script = script.Replace("IF 0 = 1--INDEXES", "");
+                    }
+                    if ((options & ImportOptions.Pk) == ImportOptions.Pk)
+                    {
+                        script = script.Replace("--PK", "");
+                    }
+                    if ((options & ImportOptions.Rowid) == ImportOptions.Rowid)
+                    {
+                        script = script.Replace("--ROWID", "");
+                    }
+                    if ((options & ImportOptions.Unique) == ImportOptions.Unique)
+                    {
+                        script = script.Replace("--UNIQUE", "");
+                    }
+                    cmd.CommandText = script;
+
+                    cmd.ExecuteNonQuery();
+                }
+            }
+        }
+
+        private static string GetTextResource(string resourcePath)
+        {
+            using (Stream stream = typeof (User).Assembly.GetManifestResourceStream(resourcePath))
+            {
+                if (stream != null)
+                    using (var reader = new StreamReader(stream))
+                    {
+                        string script = reader.ReadToEnd();
+                        return script;
+                    }
+            }
+            throw new FileNotFoundException(resourcePath);
+        }
+
+        private static List<string> ParseOptionalArguments(string[] args, out ImportOptions options, out string schema,
+                                                           out int batchSize)
+        {
+            batchSize = 400000;
+            options = ImportOptions.None;
+            schema = "[dbo]";
+            var include = new List<string>();
+            if (args.Length > 2)
+            {
+                for (int i = 2; i < args.Length; i++)
+                {
+                    string arg = args[i].Trim().ToLowerInvariant();
+                    if (arg.StartsWith("schema:"))
+                    {
+                        schema = string.Format("[{0}]", arg.Substring(arg.IndexOf(":") + 1).Trim('[', ']'));
+                    }
+                    else if (arg.StartsWith("batch:"))
+                    {
+                        int.TryParse(arg.Substring(arg.IndexOf(":") + 1), out batchSize);
+                    }
+                    else
+                    {
+                        switch (arg)
+                        {
+                            case "drop":
+                                options = options | ImportOptions.Drop;
+                                break;
+                            case "indexes":
+                                options = options | ImportOptions.Indexes;
+                                break;
+                            case "fulltext":
+                                options = options | ImportOptions.Fulltext;
+                                break;
+                            case "rowid":
+                                options = options | ImportOptions.Rowid;
+                                break;
+                            case "pk":
+                                options = options | ImportOptions.Pk | ImportOptions.Rowid;
+                                break;
+                            case "unique":
+                                options = options | ImportOptions.Unique;
+                                break;
+                            case "split":
+                                options = options | ImportOptions.Split;
+                                break;
+                            case "all":
+                                options = ImportOptions.All;
+                                break;
+                            default:
+                                include.Add(arg);
+                                break;
+                        }
+                    }
+                }
+            }
+            return include;
+        }
+
+        private static void InitializeLog(bool split, int cursorTop)
+        {
+            _logTop = cursorTop + 2 + (split ? 6 : 0);
+            _logTop = Write("LOG\r\n=================================================\r\n", _logTop,
+                            ConsoleColor.DarkCyan);
+        }
+
+        private static void WriteCopyMessage(string message, RowsCopiedEventArgs args, int position)
+        {
+            ConsoleColor color;
+
+            switch (args.Type)
+            {
+                case CopyEventType.None:
+                    color = ConsoleColor.Gray;
+                    break;
+                case CopyEventType.Begin:
+                    color = ConsoleColor.DarkGray;
+                    break;
+                case CopyEventType.Active:
+                    color = ConsoleColor.Green;
+                    break;
+                case CopyEventType.Complete:
+                    color = ConsoleColor.DarkGreen;
+                    break;
+                case CopyEventType.Error:
+                    color = ConsoleColor.Red;
+                    break;
+                default:
+                    color = ConsoleColor.Gray;
+                    break;
+            }
+            Write(message, position, 1, color);
+            Thread.Sleep(10);
+        }
+
+        #endregion
+    }
+}

File trunk/soddi/Properties/AssemblyInfo.cs

+using System.Reflection;
+using System.Runtime.InteropServices;
+
+// General Information about an assembly is controlled through the following 
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+
+[assembly: AssemblyTitle("SODDI")]
+[assembly: AssemblyDescription("StackOverflow Data Dump Import - March 2010 Schema")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyCompany("Salient Solutions")]
+[assembly: AssemblyProduct("SODDI")]
+[assembly: AssemblyCopyright("Copyright © Sky Sanders  2010")]
+[assembly: AssemblyTrademark("")]
+[assembly: AssemblyCulture("")]
+
+// Setting ComVisible to false makes the types in this assembly not visible 
+// to COM components.  If you need to access a type in this assembly from 
+// COM, set the ComVisible attribute to true on that type.
+
+[assembly: ComVisible(false)]
+
+// The following GUID is for the ID of the typelib if this project is exposed to COM
+
+[assembly: Guid("b17cf004-de1d-41c6-8f6e-fa2949e2529b")]
+
+// Version information for an assembly consists of the following four values:
+//
+//      Major Version
+//      Minor Version 
+//      Build Number
+//      Revision
+//
+// You can specify all the values or you can default the Build and Revision Numbers 
+// by using the '*' as shown below:
+// [assembly: AssemblyVersion("1.0.*")]
+
+[assembly: AssemblyVersion("1.0.0.0")]
+[assembly: AssemblyFileVersion("1.0.0.0")]

File trunk/soddi/README.txt

+
+
+StackOverflow Data Dump Import v.09
+  (c) 2010 Sky Sanders - http://skysanders.net/tools/stackexchange
+  licensed under MIT/GPL - see license.txt
+
+Imports StackOverflow Data Dump XML files into MS Sql Server 2000/05/08
+
+The latest data dump can be found at
+http://blog.stackoverflow.com/category/cc-wiki-dump/
+
+All SO sites share a common schema and this utility imports all sites into
+common tables, using a SiteId field along with the row Id as a composite
+key.
+
+This results in large datasets, but enables interesting aggregation and mining
+scenarios. You may use the SiteId field to migrate data out of the import location
+after import, if your usage requires.
+
+
+USAGE:
+
+SODDI SOURCE CONNECTION [SCHEMA:schema name] [BATCH:batch size] [SPLIT] [DROP] 
+   [UNIQUE] [ROWID] [PK] [INDEXES] [FULLTEXT] [ALL] [[META] [SO] [SU] [SF]]
+
+-- REQUIRED ARGUMENTS
+
+SOURCE          The directory containing the individual site directories.
+                NOTE: do not include trailing slash in quoted path as the arg
+                parser will interpret it as an escaped quote and puke.
+
+CONNECTION      A Sql Server connection string pointing to the existing database
+                in which to import the data dump.
+                NOTE: the database must exist, this utility does not create
+                databases or schemas.
+
+-- OPTIONAL ARGUMENTS
+
+SCHEMA          Specify a schema other than 'dbo' for the database objects.
+                NOTE: the schema must exist, this utility does not create
+                databases or schemas.
+                
+BATCH           Specifies the bulk copy batch size. Default 400,000.
+
+SPLIT           Normalize post tags by splitting the concatenated Posts.Tags
+                field into individual rows in a separate PostsTags table.
+
+DROP            Drop all objects before import. If you previously imported
+
+UNIQUE          Enables a Unique index on the composite key composed of the table
+                natural Id and the SiteId segregator.
+
+ROWID           Adds an identity field to each table. Can be used as a surrogate
+                key using PK option.
+
+PK              Marks the RowId identity field as PK. ROWID is set implicitly
+
+INDEXES         Enables typical indexes on each table.
+
+FULLTEXT        Enables a full text index on Posts.Body and Posts.Title
+
+ALL             All import options are enabled. If you plan to use the data as
+                imported, i.e. you are not simply staging it, you should specify
+                ALL. The database is quite large and without indexing querying
+                will be painfully slow. A full import with ALL options on a mid
+                level desktop machine in approximately 17 minutes.
+
+META|SO|SU|SF Specifies which sites to import. If none are specified, all
+                site directories found in SOURCE will be imported
+
+Options are not case sensitive.
+
+Example command lines.
+
+   soddi "F:\Export-030110" "{connection string}"
+
+This is the minimum required information. This simply creates tables with no
+keys or indexes and bulk copies the data. This is the fastest option, typically
+taking less than 5 minutes to import the entire March 2010 dump. Use this option
+if you simply need to get the data in a table for further massaging.
+
+   soddi "F:\Export-030110" "{connection string}" split schema:sx drop pk indexes so
+
+This example will import only the StackOverflow data, dropping all involved objects
+and recreating them with the schema 'sx'. The surrogate identity PK, 'RowId', will
+be included and typical indexes will be created. NOTE: this is a usage example only.
+If you are going to enable any import options it would be safe to assume that you
+plan to query the data from the import location. In this case, it is probably a good
+idea to just specify ALL. If keys and indexes are created before import, the typical
+import time is just over 15 minutes. If indexes and keys are applied to a populated
+database it could take hours.
+
+
+03/28/2010 - Sky Sanders <sky.sanders@gmail.com>

File trunk/soddi/SODDI.csproj.user

+<Project xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+    <StartArguments>"F:\Export-030110"  "Data Source=(local);Initial Catalog=SODataDump;Integrated Security=True" ALL schema:sx Sf</StartArguments>
+  </PropertyGroup>
+  <PropertyGroup>
+    <PublishUrlHistory>
+    </PublishUrlHistory>
+    <InstallUrlHistory>
+    </InstallUrlHistory>
+    <SupportUrlHistory>
+    </SupportUrlHistory>
+    <UpdateUrlHistory>
+    </UpdateUrlHistory>
+    <BootstrapperUrlHistory>
+    </BootstrapperUrlHistory>
+    <ErrorReportUrlHistory>
+    </ErrorReportUrlHistory>
+    <FallbackCulture>en-US</FallbackCulture>
+    <VerifyUploadedFiles>false</VerifyUploadedFiles>
+    <ProjectView>ProjectFiles</ProjectView>
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
+    <StartArguments>"F:\Export-030110" "Data Source=(local);Initial Catalog=SODataDump;Integrated Security=True" split</StartArguments>
+  </PropertyGroup>
+</Project>

File trunk/soddi/Salient.Data/EnumerableDataReader.cs

+/*!
+ * Project: Salient.Data
+ * File   : EnumerableDataReader.cs
+ * http://spikes.codeplex.com
+ *
+ * Copyright 2010, Sky Sanders
+ * Dual licensed under the MIT or GPL Version 2 licenses.
+ * See LICENSE.TXT
+ * Date: Sat Mar 28 2010 
+ */
+
+
+using System;
+using System.Collections;
+using System.Collections.Generic;
+
+namespace Salient.Data
+{
+    /// <summary>
+    /// Creates an IDataReader over an instance of IEnumerable&lt;> or IEnumerable.
+    /// Anonymous type arguments are acceptable.
+    /// </summary>
+    public class EnumerableDataReader : ObjectDataReader
+    {
+        private readonly IEnumerator _enumerator;
+        private readonly Type _type;
+        private object _current;
+
+        /// <summary>
+        /// Create an IDataReader over an instance of IEnumerable&lt;>.
+        /// 
+        /// Note: anonymous type arguments are acceptable.
+        /// 
+        /// Use other constructor for IEnumerable.
+        /// </summary>
+        /// <param name="collection">IEnumerable&lt;>. For IEnumerable use other constructor and specify type.</param>
+        public EnumerableDataReader(IEnumerable collection)
+        {
+            if (collection.GetType().IsGenericType)
+            {
+                _type = collection.GetType().GetGenericArguments()[0];
+                SetFields(_type);
+            }
+            else
+            {
+                throw new ArgumentException(
+                    "collection must be IEnumerable<>. Use other constructor for IEnumerable and specify type");
+            }
+
+            _enumerator = collection.GetEnumerator();
+        }
+
+        /// <summary>
+        /// Create an IDataReader over an instance of IEnumerable.
+        /// Use other constructor for IEnumerable&lt;>
+        /// </summary>
+        /// <param name="collection"></param>
+        /// <param name="elementType"></param>
+        public EnumerableDataReader(IEnumerable collection, Type elementType)
+            : base(elementType)
+        {
+            _type = elementType;
+            _enumerator = collection.GetEnumerator();
+        }
+
+        /// <summary>
+        /// Helper method to create generic lists from anonymous type
+        /// </summary>
+        /// <param name="type"></param>
+        /// <returns></returns>
+        public static IList ToGenericList(Type type)
+        {
+            return (IList) Activator.CreateInstance(typeof (List<>).MakeGenericType(new[] {type}));
+        }
+
+        /// <summary>
+        /// Return the value of the specified field.
+        /// </summary>
+        /// <returns>
+        /// The <see cref="T:System.Object"/> which will contain the field value upon return.
+        /// </returns>
+        /// <param name="i">The index of the field to find. 
+        /// </param><exception cref="T:System.IndexOutOfRangeException">The index passed was outside the range of 0 through <see cref="P:System.Data.IDataRecord.FieldCount"/>. 
+        /// </exception><filterpriority>2</filterpriority>
+        public override object GetValue(int i)
+        {
+            if (i < 0 || i >= Fields.Count)
+            {
+                throw new IndexOutOfRangeException();
+            }
+
+            return Fields[i].Getter(_current);
+        }
+
+        /// <summary>
+        /// Advances the <see cref="T:System.Data.IDataReader"/> to the next record.
+        /// </summary>
+        /// <returns>
+        /// true if there are more rows; otherwise, false.
+        /// </returns>
+        /// <filterpriority>2</filterpriority>
+        public override bool Read()
+        {
+            bool returnValue = _enumerator.MoveNext();
+            _current = returnValue ? _enumerator.Current : _type.IsValueType ? Activator.CreateInstance(_type) : null;
+            return returnValue;
+        }
+    }
+}

File trunk/soddi/Salient.Data/ObjectDataReader.cs

+/*!
+ * Project: Salient.Data
+ * File   : ObjectDataReader.cs
+ * http://spikes.codeplex.com
+ *
+ * Copyright 2010, Sky Sanders
+ * Dual licensed under the MIT or GPL Version 2 licenses.
+ * See LICENSE.TXT
+ * Date: Sat Mar 28 2010 
+ */
+
+
+using System;
+using System.Collections.Generic;
+using System.Data;
+using Salient.Reflection;
+
+namespace Salient.Data
+{
+    public abstract class ObjectDataReader : IDataReader
+    {
+        protected bool Closed;
+        protected IList<DynamicProperties.Property> Fields;
+
+        protected ObjectDataReader()
+        {
+        }
+
+        protected ObjectDataReader(Type elementType)
+        {
+            SetFields(elementType);
+            Closed = false;
+        }
+
+        #region IDataReader Members
+
+        /// <summary>
+        /// Return the value of the specified field.
+        /// </summary>
+        /// <returns>
+        /// The <see cref="T:System.Object"/> which will contain the field value upon return.
+        /// </returns>
+        /// <param name="i">The index of the field to find. 
+        /// </param><exception cref="T:System.IndexOutOfRangeException">The index passed was outside the range of 0 through <see cref="P:System.Data.IDataRecord.FieldCount"/>. 
+        /// </exception><filterpriority>2</filterpriority>
+        public abstract object GetValue(int i);
+
+        /// <summary>
+        /// Advances the <see cref="T:System.Data.IDataReader"/> to the next record.
+        /// </summary>
+        /// <returns>
+        /// true if there are more rows; otherwise, false.
+        /// </returns>
+        /// <filterpriority>2</filterpriority>
+        public abstract bool Read();
+
+        #endregion
+
+        #region Implementation of IDataRecord
+
+        /// <summary>
+        /// Gets the number of columns in the current row.
+        /// </summary>
+        /// <returns>
+        /// When not positioned in a valid recordset, 0; otherwise, the number of columns in the current record. The default is -1.
+        /// </returns>
+        /// <filterpriority>2</filterpriority>
+        public int FieldCount
+        {
+            get { return Fields.Count; }
+        }
+
+        /// <summary>
+        /// Return the index of the named field.
+        /// </summary>
+        /// <returns>
+        /// The index of the named field.
+        /// </returns>
+        /// <param name="name">The name of the field to find. 
+        /// </param><filterpriority>2</filterpriority>
+        public virtual int GetOrdinal(string name)
+        {
+            for (int i = 0; i < Fields.Count; i++)
+            {
+                if (Fields[i].Info.Name == name)
+                {
+                    return i;
+                }
+            }
+
+            throw new IndexOutOfRangeException("name");
+        }
+
+
+        /// <summary>
+        /// Gets the column located at the specified index.
+        /// </summary>
+        /// <returns>
+        /// The column located at the specified index as an <see cref="T:System.Object"/>.
+        /// </returns>
+        /// <param name="i">The zero-based index of the column to get. 
+        /// </param><exception cref="T:System.IndexOutOfRangeException">The index passed was outside the range of 0 through <see cref="P:System.Data.IDataRecord.FieldCount"/>. 
+        /// </exception><filterpriority>2</filterpriority>
+        object IDataRecord.this[int i]
+        {
+            get { return GetValue(i); }
+        }
+
+
+        /// <summary>
+        /// Gets the value of the specified column as a Boolean.
+        /// </summary>
+        /// <returns>
+        /// The value of the column.
+        /// </returns>
+        /// <param name="i">The zero-based column ordinal. 
+        /// </param><exception cref="T:System.IndexOutOfRangeException">The index passed was outside the range of 0 through <see cref="P:System.Data.IDataRecord.FieldCount"/>. 
+        /// </exception><filterpriority>2</filterpriority>
+        public virtual bool GetBoolean(int i)
+        {
+            return (Boolean) GetValue(i);
+        }
+
+        /// <summary>
+        /// Gets the 8-bit unsigned integer value of the specified column.
+        /// </summary>
+        /// <returns>
+        /// The 8-bit unsigned integer value of the specified column.
+        /// </returns>
+        /// <param name="i">The zero-based column ordinal. 
+        /// </param><exception cref="T:System.IndexOutOfRangeException">The index passed was outside the range of 0 through <see cref="P:System.Data.IDataRecord.FieldCount"/>. 
+        /// </exception><filterpriority>2</filterpriority>
+        public virtual byte GetByte(int i)
+        {
+            return (Byte) GetValue(i);
+        }
+
+        /// <summary>
+        /// Gets the character value of the specified column.
+        /// </summary>
+        /// <returns>
+        /// The character value of the specified column.
+        /// </returns>
+        /// <param name="i">The zero-based column ordinal. 
+        /// </param><exception cref="T:System.IndexOutOfRangeException">The index passed was outside the range of 0 through <see cref="P:System.Data.IDataRecord.FieldCount"/>. 
+        /// </exception><filterpriority>2</filterpriority>
+        public virtual char GetChar(int i)
+        {
+            return (Char) GetValue(i);
+        }
+
+        /// <summary>
+        /// Gets the date and time data value of the specified field.
+        /// </summary>
+        /// <returns>
+        /// The date and time data value of the specified field.
+        /// </returns>
+        /// <param name="i">The index of the field to find. 
+        /// </param><exception cref="T:System.IndexOutOfRangeException">The index passed was outside the range of 0 through <see cref="P:System.Data.IDataRecord.FieldCount"/>. 
+        /// </exception><filterpriority>2</filterpriority>
+        public virtual DateTime GetDateTime(int i)
+        {
+            return (DateTime) GetValue(i);
+        }
+
+        /// <summary>
+        /// Gets the fixed-position numeric value of the specified field.
+        /// </summary>
+        /// <returns>
+        /// The fixed-position numeric value of the specified field.
+        /// </returns>
+        /// <param name="i">The index of the field to find. 
+        /// </param><exception cref="T:System.IndexOutOfRangeException">The index passed was outside the range of 0 through <see cref="P:System.Data.IDataRecord.FieldCount"/>. 
+        /// </exception><filterpriority>2</filterpriority>
+        public virtual decimal GetDecimal(int i)
+        {
+            return (Decimal) GetValue(i);
+        }
+
+        /// <summary>
+        /// Gets the double-precision floating point number of the specified field.
+        /// </summary>
+        /// <returns>
+        /// The double-precision floating point number of the specified field.
+        /// </returns>
+        /// <param name="i">The index of the field to find. 
+        /// </param><exception cref="T:System.IndexOutOfRangeException">The index passed was outside the range of 0 through <see cref="P:System.Data.IDataRecord.FieldCount"/>. 
+        /// </exception><filterpriority>2</filterpriority>
+        public virtual double GetDouble(int i)
+        {
+            return (Double) GetValue(i);
+        }
+
+        /// <summary>
+        /// Gets the <see cref="T:System.Type"/> information corresponding to the type of <see cref="T:System.Object"/> that would be returned from <see cref="M:System.Data.IDataRecord.GetValue(System.Int32)"/>.
+        /// </summary>
+        /// <returns>
+        /// The <see cref="T:System.Type"/> information corresponding to the type of <see cref="T:System.Object"/> that would be returned from <see cref="M:System.Data.IDataRecord.GetValue(System.Int32)"/>.
+        /// </returns>
+        /// <param name="i">The index of the field to find. 
+        /// </param><exception cref="T:System.IndexOutOfRangeException">The index passed was outside the range of 0 through <see cref="P:System.Data.IDataRecord.FieldCount"/>. 
+        /// </exception><filterpriority>2</filterpriority>
+        public virtual Type GetFieldType(int i)
+        {
+            return Fields[i].Info.PropertyType;
+        }
+
+        /// <summary>
+        /// Gets the single-precision floating point number of the specified field.
+        /// </summary>
+        /// <returns>
+        /// The single-precision floating point number of the specified field.
+        /// </returns>
+        /// <param name="i">The index of the field to find. 
+        /// </param><exception cref="T:System.IndexOutOfRangeException">The index passed was outside the range of 0 through <see cref="P:System.Data.IDataRecord.FieldCount"/>. 
+        /// </exception><filterpriority>2</filterpriority>
+        public virtual float GetFloat(int i)
+        {
+            return (float) GetValue(i);
+        }
+
+        /// <summary>
+        /// Returns the GUID value of the specified field.
+        /// </summary>
+        /// <returns>
+        /// The GUID value of the specified field.
+        /// </returns>
+        /// <param name="i">The index of the field to find. 
+        /// </param><exception cref="T:System.IndexOutOfRangeException">The index passed was outside the range of 0 through <see cref="P:System.Data.IDataRecord.FieldCount"/>. 
+        /// </exception><filterpriority>2</filterpriority>
+        public virtual Guid GetGuid(int i)
+        {
+            return (Guid) GetValue(i);
+        }
+
+        /// <summary>
+        /// Gets the 16-bit signed integer value of the specified field.
+        /// </summary>
+        /// <returns>
+        /// The 16-bit signed integer value of the specified field.
+        /// </returns>
+        /// <param name="i">The index of the field to find. 
+        /// </param><exception cref="T:System.IndexOutOfRangeException">The index passed was outside the range of 0 through <see cref="P:System.Data.IDataRecord.FieldCount"/>. 
+        /// </exception><filterpriority>2</filterpriority>
+        public virtual short GetInt16(int i)
+        {
+            return (Int16) GetValue(i);
+        }
+
+        /// <summary>
+        /// Gets the 32-bit signed integer value of the specified field.
+        /// </summary>
+        /// <returns>
+        /// The 32-bit signed integer value of the specified field.
+        /// </returns>
+        /// <param name="i">The index of the field to find. 
+        /// </param><exception cref="T:System.IndexOutOfRangeException">The index passed was outside the range of 0 through <see cref="P:System.Data.IDataRecord.FieldCount"/>. 
+        /// </exception><filterpriority>2</filterpriority>
+        public virtual int GetInt32(int i)
+        {
+            return (Int32) GetValue(i);
+        }
+
+        /// <summary>
+        /// Gets the 64-bit signed integer value of the specified field.
+        /// </summary>
+        /// <returns>
+        /// The 64-bit signed integer value of the specified field.
+        /// </returns>
+        /// <param name="i">The index of the field to find. 
+        /// </param><exception cref="T:System.IndexOutOfRangeException">The index passed was outside the range of 0 through <see cref="P:System.Data.IDataRecord.FieldCount"/>. 
+        /// </exception><filterpriority>2</filterpriority>
+        public virtual long GetInt64(int i)
+        {
+            return (Int64) GetValue(i);
+        }
+
+        /// <summary>
+        /// Gets the string value of the specified field.
+        /// </summary>
+        /// <returns>
+        /// The string value of the specified field.
+        /// </returns>
+        /// <param name="i">The index of the field to find. 
+        /// </param><exception cref="T:System.IndexOutOfRangeException">The index passed was outside the range of 0 through <see cref="P:System.Data.IDataRecord.FieldCount"/>. 
+        /// </exception><filterpriority>2</filterpriority>
+        public virtual string GetString(int i)
+        {
+            return (string) GetValue(i);
+        }
+
+        /// <summary>
+        /// Return whether the specified field is set to null.
+        /// </summary>
+        /// <returns>
+        /// true if the specified field is set to null; otherwise, false.
+        /// </returns>
+        /// <param name="i">The index of the field to find. 
+        /// </param><exception cref="T:System.IndexOutOfRangeException">The index passed was outside the range of 0 through <see cref="P:System.Data.IDataRecord.FieldCount"/>. 
+        /// </exception><filterpriority>2</filterpriority>
+        public virtual bool IsDBNull(int i)
+        {
+            return GetValue(i) == null;
+        }
+
+        /// <summary>
+        /// Gets the column with the specified name.
+        /// </summary>
+        /// <returns>
+        /// The column with the specified name as an <see cref="T:System.Object"/>.
+        /// </returns>