Commits

Hernan Rojas  committed dcbca02

adding more lessons

  • Participants
  • Parent commits 7cf4b0f

Comments (0)

Files changed (4)

File lessons/07 - Lesson.ipynb

+{
+ "metadata": {
+  "name": "07 - Lesson"
+ },
+ "nbformat": 3,
+ "nbformat_minor": 0,
+ "worksheets": [
+  {
+   "cells": [
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "# Lesson 7  \n",
+      "\n",
+      "### Outliers"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "from pandas import DataFrame, date_range, concat\n",
+      "import pandas as pd"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 1
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "print 'Pandas version: ' + pd.__version__"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "Pandas version: 0.10.1\n"
+       ]
+      }
+     ],
+     "prompt_number": 2
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "# Create a dataframe with dates as your index\n",
+      "States = ['NY', 'NY', 'NY', 'NY', 'FL', 'FL', 'GA', 'GA', 'FL', 'FL'] \n",
+      "data = [1.0, 2, 3, 4, 5, 6, 7, 8, 9, 10]\n",
+      "idx = date_range('1/1/2012', periods=10, freq='MS')\n",
+      "df1 = DataFrame(data, index=idx, columns=['Revenue'])\n",
+      "df1['State'] = States\n",
+      "\n",
+      "# Create a second dataframe\n",
+      "data2 = [10.0, 10.0, 9, 9, 8, 8, 7, 7, 6, 6]\n",
+      "idx2 = date_range('1/1/2013', periods=10, freq='MS')\n",
+      "df2 = DataFrame(data2, index=idx2, columns=['Revenue'])\n",
+      "df2['State'] = States"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 3
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "# Combine dataframes\n",
+      "df = concat([df1,df2])\n",
+      "df"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>Revenue</th>\n",
+        "      <th>State</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>2012-01-01</th>\n",
+        "      <td>  1</td>\n",
+        "      <td> NY</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-02-01</th>\n",
+        "      <td>  2</td>\n",
+        "      <td> NY</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-03-01</th>\n",
+        "      <td>  3</td>\n",
+        "      <td> NY</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-04-01</th>\n",
+        "      <td>  4</td>\n",
+        "      <td> NY</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-05-01</th>\n",
+        "      <td>  5</td>\n",
+        "      <td> FL</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-06-01</th>\n",
+        "      <td>  6</td>\n",
+        "      <td> FL</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-07-01</th>\n",
+        "      <td>  7</td>\n",
+        "      <td> GA</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-08-01</th>\n",
+        "      <td>  8</td>\n",
+        "      <td> GA</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-09-01</th>\n",
+        "      <td>  9</td>\n",
+        "      <td> FL</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-10-01</th>\n",
+        "      <td> 10</td>\n",
+        "      <td> FL</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-01-01</th>\n",
+        "      <td> 10</td>\n",
+        "      <td> NY</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-02-01</th>\n",
+        "      <td> 10</td>\n",
+        "      <td> NY</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-03-01</th>\n",
+        "      <td>  9</td>\n",
+        "      <td> NY</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-04-01</th>\n",
+        "      <td>  9</td>\n",
+        "      <td> NY</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-05-01</th>\n",
+        "      <td>  8</td>\n",
+        "      <td> FL</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-06-01</th>\n",
+        "      <td>  8</td>\n",
+        "      <td> FL</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-07-01</th>\n",
+        "      <td>  7</td>\n",
+        "      <td> GA</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-08-01</th>\n",
+        "      <td>  7</td>\n",
+        "      <td> GA</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-09-01</th>\n",
+        "      <td>  6</td>\n",
+        "      <td> FL</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-10-01</th>\n",
+        "      <td>  6</td>\n",
+        "      <td> FL</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "output_type": "pyout",
+       "prompt_number": 4,
+       "text": [
+        "            Revenue State\n",
+        "2012-01-01        1    NY\n",
+        "2012-02-01        2    NY\n",
+        "2012-03-01        3    NY\n",
+        "2012-04-01        4    NY\n",
+        "2012-05-01        5    FL\n",
+        "2012-06-01        6    FL\n",
+        "2012-07-01        7    GA\n",
+        "2012-08-01        8    GA\n",
+        "2012-09-01        9    FL\n",
+        "2012-10-01       10    FL\n",
+        "2013-01-01       10    NY\n",
+        "2013-02-01       10    NY\n",
+        "2013-03-01        9    NY\n",
+        "2013-04-01        9    NY\n",
+        "2013-05-01        8    FL\n",
+        "2013-06-01        8    FL\n",
+        "2013-07-01        7    GA\n",
+        "2013-08-01        7    GA\n",
+        "2013-09-01        6    FL\n",
+        "2013-10-01        6    FL"
+       ]
+      }
+     ],
+     "prompt_number": 4
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "# Ways to Calculate Outliers    \n",
+      "\n",
+      "Note: Average and Standard Deviation are only valid for gaussian distributions."
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "# Method 1\n",
+      "\n",
+      "df['x-Mean'] = abs(df['Revenue'] - df['Revenue'].mean())\n",
+      "df['1.96*std'] = 1.96*df['Revenue'].std()  \n",
+      "df['Outlier'] = abs(df['Revenue'] - df['Revenue'].mean()) > 1.96*df['Revenue'].std()\n",
+      "df"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>Revenue</th>\n",
+        "      <th>State</th>\n",
+        "      <th>x-Mean</th>\n",
+        "      <th>1.96*std</th>\n",
+        "      <th>Outlier</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>2012-01-01</th>\n",
+        "      <td>  1</td>\n",
+        "      <td> NY</td>\n",
+        "      <td> 5.75</td>\n",
+        "      <td> 5.200273</td>\n",
+        "      <td>  True</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-02-01</th>\n",
+        "      <td>  2</td>\n",
+        "      <td> NY</td>\n",
+        "      <td> 4.75</td>\n",
+        "      <td> 5.200273</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-03-01</th>\n",
+        "      <td>  3</td>\n",
+        "      <td> NY</td>\n",
+        "      <td> 3.75</td>\n",
+        "      <td> 5.200273</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-04-01</th>\n",
+        "      <td>  4</td>\n",
+        "      <td> NY</td>\n",
+        "      <td> 2.75</td>\n",
+        "      <td> 5.200273</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-05-01</th>\n",
+        "      <td>  5</td>\n",
+        "      <td> FL</td>\n",
+        "      <td> 1.75</td>\n",
+        "      <td> 5.200273</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-06-01</th>\n",
+        "      <td>  6</td>\n",
+        "      <td> FL</td>\n",
+        "      <td> 0.75</td>\n",
+        "      <td> 5.200273</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-07-01</th>\n",
+        "      <td>  7</td>\n",
+        "      <td> GA</td>\n",
+        "      <td> 0.25</td>\n",
+        "      <td> 5.200273</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-08-01</th>\n",
+        "      <td>  8</td>\n",
+        "      <td> GA</td>\n",
+        "      <td> 1.25</td>\n",
+        "      <td> 5.200273</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-09-01</th>\n",
+        "      <td>  9</td>\n",
+        "      <td> FL</td>\n",
+        "      <td> 2.25</td>\n",
+        "      <td> 5.200273</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-10-01</th>\n",
+        "      <td> 10</td>\n",
+        "      <td> FL</td>\n",
+        "      <td> 3.25</td>\n",
+        "      <td> 5.200273</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-01-01</th>\n",
+        "      <td> 10</td>\n",
+        "      <td> NY</td>\n",
+        "      <td> 3.25</td>\n",
+        "      <td> 5.200273</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-02-01</th>\n",
+        "      <td> 10</td>\n",
+        "      <td> NY</td>\n",
+        "      <td> 3.25</td>\n",
+        "      <td> 5.200273</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-03-01</th>\n",
+        "      <td>  9</td>\n",
+        "      <td> NY</td>\n",
+        "      <td> 2.25</td>\n",
+        "      <td> 5.200273</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-04-01</th>\n",
+        "      <td>  9</td>\n",
+        "      <td> NY</td>\n",
+        "      <td> 2.25</td>\n",
+        "      <td> 5.200273</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-05-01</th>\n",
+        "      <td>  8</td>\n",
+        "      <td> FL</td>\n",
+        "      <td> 1.25</td>\n",
+        "      <td> 5.200273</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-06-01</th>\n",
+        "      <td>  8</td>\n",
+        "      <td> FL</td>\n",
+        "      <td> 1.25</td>\n",
+        "      <td> 5.200273</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-07-01</th>\n",
+        "      <td>  7</td>\n",
+        "      <td> GA</td>\n",
+        "      <td> 0.25</td>\n",
+        "      <td> 5.200273</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-08-01</th>\n",
+        "      <td>  7</td>\n",
+        "      <td> GA</td>\n",
+        "      <td> 0.25</td>\n",
+        "      <td> 5.200273</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-09-01</th>\n",
+        "      <td>  6</td>\n",
+        "      <td> FL</td>\n",
+        "      <td> 0.75</td>\n",
+        "      <td> 5.200273</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-10-01</th>\n",
+        "      <td>  6</td>\n",
+        "      <td> FL</td>\n",
+        "      <td> 0.75</td>\n",
+        "      <td> 5.200273</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "output_type": "pyout",
+       "prompt_number": 5,
+       "text": [
+        "            Revenue State  x-Mean  1.96*std Outlier\n",
+        "2012-01-01        1    NY    5.75  5.200273    True\n",
+        "2012-02-01        2    NY    4.75  5.200273   False\n",
+        "2012-03-01        3    NY    3.75  5.200273   False\n",
+        "2012-04-01        4    NY    2.75  5.200273   False\n",
+        "2012-05-01        5    FL    1.75  5.200273   False\n",
+        "2012-06-01        6    FL    0.75  5.200273   False\n",
+        "2012-07-01        7    GA    0.25  5.200273   False\n",
+        "2012-08-01        8    GA    1.25  5.200273   False\n",
+        "2012-09-01        9    FL    2.25  5.200273   False\n",
+        "2012-10-01       10    FL    3.25  5.200273   False\n",
+        "2013-01-01       10    NY    3.25  5.200273   False\n",
+        "2013-02-01       10    NY    3.25  5.200273   False\n",
+        "2013-03-01        9    NY    2.25  5.200273   False\n",
+        "2013-04-01        9    NY    2.25  5.200273   False\n",
+        "2013-05-01        8    FL    1.25  5.200273   False\n",
+        "2013-06-01        8    FL    1.25  5.200273   False\n",
+        "2013-07-01        7    GA    0.25  5.200273   False\n",
+        "2013-08-01        7    GA    0.25  5.200273   False\n",
+        "2013-09-01        6    FL    0.75  5.200273   False\n",
+        "2013-10-01        6    FL    0.75  5.200273   False"
+       ]
+      }
+     ],
+     "prompt_number": 5
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "df = concat([df1,df2]) #reset dataframe to prevent error\n",
+      "\n",
+      "# Method 2\n",
+      "# Group by item\n",
+      "\n",
+      "State = df.groupby('State')\n",
+      "\n",
+      "df['Outlier'] = State.transform( lambda x: abs(x-x.mean()) > 1.96*x.std() )\n",
+      "df['x-Mean'] = State.transform( lambda x: abs(x-x.mean()) )\n",
+      "df['1.96*std'] = State.transform( lambda x: 1.96*x.std() )\n",
+      "df"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stderr",
+       "text": [
+        "C:\\Python27\\lib\\site-packages\\pandas\\core\\frame.py:3576: FutureWarning: rename with inplace=True  will return None from pandas 0.11 onward\n",
+        "  \" from pandas 0.11 onward\", FutureWarning)\n"
+       ]
+      },
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>Revenue</th>\n",
+        "      <th>State</th>\n",
+        "      <th>Outlier</th>\n",
+        "      <th>x-Mean</th>\n",
+        "      <th>1.96*std</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>2012-01-01</th>\n",
+        "      <td>  1</td>\n",
+        "      <td> NY</td>\n",
+        "      <td> False</td>\n",
+        "      <td> 5.00</td>\n",
+        "      <td> 7.554813</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-02-01</th>\n",
+        "      <td>  2</td>\n",
+        "      <td> NY</td>\n",
+        "      <td> False</td>\n",
+        "      <td> 4.00</td>\n",
+        "      <td> 7.554813</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-03-01</th>\n",
+        "      <td>  3</td>\n",
+        "      <td> NY</td>\n",
+        "      <td> False</td>\n",
+        "      <td> 3.00</td>\n",
+        "      <td> 7.554813</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-04-01</th>\n",
+        "      <td>  4</td>\n",
+        "      <td> NY</td>\n",
+        "      <td> False</td>\n",
+        "      <td> 2.00</td>\n",
+        "      <td> 7.554813</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-05-01</th>\n",
+        "      <td>  5</td>\n",
+        "      <td> FL</td>\n",
+        "      <td> False</td>\n",
+        "      <td> 2.25</td>\n",
+        "      <td> 3.434996</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-06-01</th>\n",
+        "      <td>  6</td>\n",
+        "      <td> FL</td>\n",
+        "      <td> False</td>\n",
+        "      <td> 1.25</td>\n",
+        "      <td> 3.434996</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-07-01</th>\n",
+        "      <td>  7</td>\n",
+        "      <td> GA</td>\n",
+        "      <td> False</td>\n",
+        "      <td> 0.25</td>\n",
+        "      <td> 0.980000</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-08-01</th>\n",
+        "      <td>  8</td>\n",
+        "      <td> GA</td>\n",
+        "      <td> False</td>\n",
+        "      <td> 0.75</td>\n",
+        "      <td> 0.980000</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-09-01</th>\n",
+        "      <td>  9</td>\n",
+        "      <td> FL</td>\n",
+        "      <td> False</td>\n",
+        "      <td> 1.75</td>\n",
+        "      <td> 3.434996</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-10-01</th>\n",
+        "      <td> 10</td>\n",
+        "      <td> FL</td>\n",
+        "      <td> False</td>\n",
+        "      <td> 2.75</td>\n",
+        "      <td> 3.434996</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-01-01</th>\n",
+        "      <td> 10</td>\n",
+        "      <td> NY</td>\n",
+        "      <td> False</td>\n",
+        "      <td> 4.00</td>\n",
+        "      <td> 7.554813</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-02-01</th>\n",
+        "      <td> 10</td>\n",
+        "      <td> NY</td>\n",
+        "      <td> False</td>\n",
+        "      <td> 4.00</td>\n",
+        "      <td> 7.554813</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-03-01</th>\n",
+        "      <td>  9</td>\n",
+        "      <td> NY</td>\n",
+        "      <td> False</td>\n",
+        "      <td> 3.00</td>\n",
+        "      <td> 7.554813</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-04-01</th>\n",
+        "      <td>  9</td>\n",
+        "      <td> NY</td>\n",
+        "      <td> False</td>\n",
+        "      <td> 3.00</td>\n",
+        "      <td> 7.554813</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-05-01</th>\n",
+        "      <td>  8</td>\n",
+        "      <td> FL</td>\n",
+        "      <td> False</td>\n",
+        "      <td> 0.75</td>\n",
+        "      <td> 3.434996</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-06-01</th>\n",
+        "      <td>  8</td>\n",
+        "      <td> FL</td>\n",
+        "      <td> False</td>\n",
+        "      <td> 0.75</td>\n",
+        "      <td> 3.434996</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-07-01</th>\n",
+        "      <td>  7</td>\n",
+        "      <td> GA</td>\n",
+        "      <td> False</td>\n",
+        "      <td> 0.25</td>\n",
+        "      <td> 0.980000</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-08-01</th>\n",
+        "      <td>  7</td>\n",
+        "      <td> GA</td>\n",
+        "      <td> False</td>\n",
+        "      <td> 0.25</td>\n",
+        "      <td> 0.980000</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-09-01</th>\n",
+        "      <td>  6</td>\n",
+        "      <td> FL</td>\n",
+        "      <td> False</td>\n",
+        "      <td> 1.25</td>\n",
+        "      <td> 3.434996</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-10-01</th>\n",
+        "      <td>  6</td>\n",
+        "      <td> FL</td>\n",
+        "      <td> False</td>\n",
+        "      <td> 1.25</td>\n",
+        "      <td> 3.434996</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "output_type": "pyout",
+       "prompt_number": 6,
+       "text": [
+        "            Revenue State Outlier  x-Mean  1.96*std\n",
+        "2012-01-01        1    NY   False    5.00  7.554813\n",
+        "2012-02-01        2    NY   False    4.00  7.554813\n",
+        "2012-03-01        3    NY   False    3.00  7.554813\n",
+        "2012-04-01        4    NY   False    2.00  7.554813\n",
+        "2012-05-01        5    FL   False    2.25  3.434996\n",
+        "2012-06-01        6    FL   False    1.25  3.434996\n",
+        "2012-07-01        7    GA   False    0.25  0.980000\n",
+        "2012-08-01        8    GA   False    0.75  0.980000\n",
+        "2012-09-01        9    FL   False    1.75  3.434996\n",
+        "2012-10-01       10    FL   False    2.75  3.434996\n",
+        "2013-01-01       10    NY   False    4.00  7.554813\n",
+        "2013-02-01       10    NY   False    4.00  7.554813\n",
+        "2013-03-01        9    NY   False    3.00  7.554813\n",
+        "2013-04-01        9    NY   False    3.00  7.554813\n",
+        "2013-05-01        8    FL   False    0.75  3.434996\n",
+        "2013-06-01        8    FL   False    0.75  3.434996\n",
+        "2013-07-01        7    GA   False    0.25  0.980000\n",
+        "2013-08-01        7    GA   False    0.25  0.980000\n",
+        "2013-09-01        6    FL   False    1.25  3.434996\n",
+        "2013-10-01        6    FL   False    1.25  3.434996"
+       ]
+      }
+     ],
+     "prompt_number": 6
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "df = concat([df1,df2]) #reset dataframe to prevent error\n",
+      "\n",
+      "# Method 2\n",
+      "# Group by multiple items\n",
+      "\n",
+      "StateMonth = df.groupby(['State', lambda x: x.month])\n",
+      "\n",
+      "df['Outlier'] = StateMonth.transform( lambda x: abs(x-x.mean()) > 1.96*x.std() )\n",
+      "df['x-Mean'] = StateMonth.transform( lambda x: abs(x-x.mean()) )\n",
+      "df['1.96*std'] = StateMonth.transform( lambda x: 1.96*x.std() )\n",
+      "df"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>Revenue</th>\n",
+        "      <th>State</th>\n",
+        "      <th>Outlier</th>\n",
+        "      <th>x-Mean</th>\n",
+        "      <th>1.96*std</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>2012-01-01</th>\n",
+        "      <td>  1</td>\n",
+        "      <td> NY</td>\n",
+        "      <td> False</td>\n",
+        "      <td> 4.5</td>\n",
+        "      <td> 12.473364</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-02-01</th>\n",
+        "      <td>  2</td>\n",
+        "      <td> NY</td>\n",
+        "      <td> False</td>\n",
+        "      <td> 4.0</td>\n",
+        "      <td> 11.087434</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-03-01</th>\n",
+        "      <td>  3</td>\n",
+        "      <td> NY</td>\n",
+        "      <td> False</td>\n",
+        "      <td> 3.0</td>\n",
+        "      <td>  8.315576</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-04-01</th>\n",
+        "      <td>  4</td>\n",
+        "      <td> NY</td>\n",
+        "      <td> False</td>\n",
+        "      <td> 2.5</td>\n",
+        "      <td>  6.929646</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-05-01</th>\n",
+        "      <td>  5</td>\n",
+        "      <td> FL</td>\n",
+        "      <td> False</td>\n",
+        "      <td> 1.5</td>\n",
+        "      <td>  4.157788</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-06-01</th>\n",
+        "      <td>  6</td>\n",
+        "      <td> FL</td>\n",
+        "      <td> False</td>\n",
+        "      <td> 1.0</td>\n",
+        "      <td>  2.771859</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-07-01</th>\n",
+        "      <td>  7</td>\n",
+        "      <td> GA</td>\n",
+        "      <td> False</td>\n",
+        "      <td> 0.0</td>\n",
+        "      <td>  0.000000</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-08-01</th>\n",
+        "      <td>  8</td>\n",
+        "      <td> GA</td>\n",
+        "      <td> False</td>\n",
+        "      <td> 0.5</td>\n",
+        "      <td>  1.385929</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-09-01</th>\n",
+        "      <td>  9</td>\n",
+        "      <td> FL</td>\n",
+        "      <td> False</td>\n",
+        "      <td> 1.5</td>\n",
+        "      <td>  4.157788</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-10-01</th>\n",
+        "      <td> 10</td>\n",
+        "      <td> FL</td>\n",
+        "      <td> False</td>\n",
+        "      <td> 2.0</td>\n",
+        "      <td>  5.543717</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-01-01</th>\n",
+        "      <td> 10</td>\n",
+        "      <td> NY</td>\n",
+        "      <td> False</td>\n",
+        "      <td> 4.5</td>\n",
+        "      <td> 12.473364</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-02-01</th>\n",
+        "      <td> 10</td>\n",
+        "      <td> NY</td>\n",
+        "      <td> False</td>\n",
+        "      <td> 4.0</td>\n",
+        "      <td> 11.087434</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-03-01</th>\n",
+        "      <td>  9</td>\n",
+        "      <td> NY</td>\n",
+        "      <td> False</td>\n",
+        "      <td> 3.0</td>\n",
+        "      <td>  8.315576</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-04-01</th>\n",
+        "      <td>  9</td>\n",
+        "      <td> NY</td>\n",
+        "      <td> False</td>\n",
+        "      <td> 2.5</td>\n",
+        "      <td>  6.929646</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-05-01</th>\n",
+        "      <td>  8</td>\n",
+        "      <td> FL</td>\n",
+        "      <td> False</td>\n",
+        "      <td> 1.5</td>\n",
+        "      <td>  4.157788</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-06-01</th>\n",
+        "      <td>  8</td>\n",
+        "      <td> FL</td>\n",
+        "      <td> False</td>\n",
+        "      <td> 1.0</td>\n",
+        "      <td>  2.771859</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-07-01</th>\n",
+        "      <td>  7</td>\n",
+        "      <td> GA</td>\n",
+        "      <td> False</td>\n",
+        "      <td> 0.0</td>\n",
+        "      <td>  0.000000</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-08-01</th>\n",
+        "      <td>  7</td>\n",
+        "      <td> GA</td>\n",
+        "      <td> False</td>\n",
+        "      <td> 0.5</td>\n",
+        "      <td>  1.385929</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-09-01</th>\n",
+        "      <td>  6</td>\n",
+        "      <td> FL</td>\n",
+        "      <td> False</td>\n",
+        "      <td> 1.5</td>\n",
+        "      <td>  4.157788</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-10-01</th>\n",
+        "      <td>  6</td>\n",
+        "      <td> FL</td>\n",
+        "      <td> False</td>\n",
+        "      <td> 2.0</td>\n",
+        "      <td>  5.543717</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "output_type": "pyout",
+       "prompt_number": 7,
+       "text": [
+        "            Revenue State Outlier  x-Mean   1.96*std\n",
+        "2012-01-01        1    NY   False     4.5  12.473364\n",
+        "2012-02-01        2    NY   False     4.0  11.087434\n",
+        "2012-03-01        3    NY   False     3.0   8.315576\n",
+        "2012-04-01        4    NY   False     2.5   6.929646\n",
+        "2012-05-01        5    FL   False     1.5   4.157788\n",
+        "2012-06-01        6    FL   False     1.0   2.771859\n",
+        "2012-07-01        7    GA   False     0.0   0.000000\n",
+        "2012-08-01        8    GA   False     0.5   1.385929\n",
+        "2012-09-01        9    FL   False     1.5   4.157788\n",
+        "2012-10-01       10    FL   False     2.0   5.543717\n",
+        "2013-01-01       10    NY   False     4.5  12.473364\n",
+        "2013-02-01       10    NY   False     4.0  11.087434\n",
+        "2013-03-01        9    NY   False     3.0   8.315576\n",
+        "2013-04-01        9    NY   False     2.5   6.929646\n",
+        "2013-05-01        8    FL   False     1.5   4.157788\n",
+        "2013-06-01        8    FL   False     1.0   2.771859\n",
+        "2013-07-01        7    GA   False     0.0   0.000000\n",
+        "2013-08-01        7    GA   False     0.5   1.385929\n",
+        "2013-09-01        6    FL   False     1.5   4.157788\n",
+        "2013-10-01        6    FL   False     2.0   5.543717"
+       ]
+      }
+     ],
+     "prompt_number": 7
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "df = concat([df1,df2]) #reset dataframe to prevent error\n",
+      "\n",
+      "# Method 3\n",
+      "# Group by item\n",
+      "\n",
+      "State = df.groupby('State')\n",
+      "\n",
+      "def s(group):\n",
+      "    group['x-Mean'] = abs(group['Revenue'] - group['Revenue'].mean())\n",
+      "    group['1.96*std'] = 1.96*group['Revenue'].std()  \n",
+      "    group['Outlier'] = abs(group['Revenue'] - group['Revenue'].mean()) > 1.96*group['Revenue'].std()\n",
+      "    return group\n",
+      "\n",
+      "Newdf = State.apply(s)\n",
+      "Newdf"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>Revenue</th>\n",
+        "      <th>State</th>\n",
+        "      <th>x-Mean</th>\n",
+        "      <th>1.96*std</th>\n",
+        "      <th>Outlier</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>2012-01-01</th>\n",
+        "      <td>  1</td>\n",
+        "      <td> NY</td>\n",
+        "      <td> 5.00</td>\n",
+        "      <td> 7.554813</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-02-01</th>\n",
+        "      <td>  2</td>\n",
+        "      <td> NY</td>\n",
+        "      <td> 4.00</td>\n",
+        "      <td> 7.554813</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-03-01</th>\n",
+        "      <td>  3</td>\n",
+        "      <td> NY</td>\n",
+        "      <td> 3.00</td>\n",
+        "      <td> 7.554813</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-04-01</th>\n",
+        "      <td>  4</td>\n",
+        "      <td> NY</td>\n",
+        "      <td> 2.00</td>\n",
+        "      <td> 7.554813</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-05-01</th>\n",
+        "      <td>  5</td>\n",
+        "      <td> FL</td>\n",
+        "      <td> 2.25</td>\n",
+        "      <td> 3.434996</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-06-01</th>\n",
+        "      <td>  6</td>\n",
+        "      <td> FL</td>\n",
+        "      <td> 1.25</td>\n",
+        "      <td> 3.434996</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-07-01</th>\n",
+        "      <td>  7</td>\n",
+        "      <td> GA</td>\n",
+        "      <td> 0.25</td>\n",
+        "      <td> 0.980000</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-08-01</th>\n",
+        "      <td>  8</td>\n",
+        "      <td> GA</td>\n",
+        "      <td> 0.75</td>\n",
+        "      <td> 0.980000</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-09-01</th>\n",
+        "      <td>  9</td>\n",
+        "      <td> FL</td>\n",
+        "      <td> 1.75</td>\n",
+        "      <td> 3.434996</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-10-01</th>\n",
+        "      <td> 10</td>\n",
+        "      <td> FL</td>\n",
+        "      <td> 2.75</td>\n",
+        "      <td> 3.434996</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-01-01</th>\n",
+        "      <td> 10</td>\n",
+        "      <td> NY</td>\n",
+        "      <td> 4.00</td>\n",
+        "      <td> 7.554813</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-02-01</th>\n",
+        "      <td> 10</td>\n",
+        "      <td> NY</td>\n",
+        "      <td> 4.00</td>\n",
+        "      <td> 7.554813</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-03-01</th>\n",
+        "      <td>  9</td>\n",
+        "      <td> NY</td>\n",
+        "      <td> 3.00</td>\n",
+        "      <td> 7.554813</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-04-01</th>\n",
+        "      <td>  9</td>\n",
+        "      <td> NY</td>\n",
+        "      <td> 3.00</td>\n",
+        "      <td> 7.554813</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-05-01</th>\n",
+        "      <td>  8</td>\n",
+        "      <td> FL</td>\n",
+        "      <td> 0.75</td>\n",
+        "      <td> 3.434996</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-06-01</th>\n",
+        "      <td>  8</td>\n",
+        "      <td> FL</td>\n",
+        "      <td> 0.75</td>\n",
+        "      <td> 3.434996</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-07-01</th>\n",
+        "      <td>  7</td>\n",
+        "      <td> GA</td>\n",
+        "      <td> 0.25</td>\n",
+        "      <td> 0.980000</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-08-01</th>\n",
+        "      <td>  7</td>\n",
+        "      <td> GA</td>\n",
+        "      <td> 0.25</td>\n",
+        "      <td> 0.980000</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-09-01</th>\n",
+        "      <td>  6</td>\n",
+        "      <td> FL</td>\n",
+        "      <td> 1.25</td>\n",
+        "      <td> 3.434996</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-10-01</th>\n",
+        "      <td>  6</td>\n",
+        "      <td> FL</td>\n",
+        "      <td> 1.25</td>\n",
+        "      <td> 3.434996</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "output_type": "pyout",
+       "prompt_number": 8,
+       "text": [
+        "            Revenue State  x-Mean  1.96*std Outlier\n",
+        "2012-01-01        1    NY    5.00  7.554813   False\n",
+        "2012-02-01        2    NY    4.00  7.554813   False\n",
+        "2012-03-01        3    NY    3.00  7.554813   False\n",
+        "2012-04-01        4    NY    2.00  7.554813   False\n",
+        "2012-05-01        5    FL    2.25  3.434996   False\n",
+        "2012-06-01        6    FL    1.25  3.434996   False\n",
+        "2012-07-01        7    GA    0.25  0.980000   False\n",
+        "2012-08-01        8    GA    0.75  0.980000   False\n",
+        "2012-09-01        9    FL    1.75  3.434996   False\n",
+        "2012-10-01       10    FL    2.75  3.434996   False\n",
+        "2013-01-01       10    NY    4.00  7.554813   False\n",
+        "2013-02-01       10    NY    4.00  7.554813   False\n",
+        "2013-03-01        9    NY    3.00  7.554813   False\n",
+        "2013-04-01        9    NY    3.00  7.554813   False\n",
+        "2013-05-01        8    FL    0.75  3.434996   False\n",
+        "2013-06-01        8    FL    0.75  3.434996   False\n",
+        "2013-07-01        7    GA    0.25  0.980000   False\n",
+        "2013-08-01        7    GA    0.25  0.980000   False\n",
+        "2013-09-01        6    FL    1.25  3.434996   False\n",
+        "2013-10-01        6    FL    1.25  3.434996   False"
+       ]
+      }
+     ],
+     "prompt_number": 8
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "df = concat([df1,df2]) #reset dataframe to prevent error\n",
+      "\n",
+      "# Method 3\n",
+      "# Group by multiple items\n",
+      "\n",
+      "StateMonth = df.groupby(['State', lambda x: x.month])\n",
+      "\n",
+      "def s(group):\n",
+      "    group['x-Mean'] = abs(group['Revenue'] - group['Revenue'].mean())\n",
+      "    group['1.96*std'] = 1.96*group['Revenue'].std()  \n",
+      "    group['Outlier'] = abs(group['Revenue'] - group['Revenue'].mean()) > 1.96*group['Revenue'].std()\n",
+      "    return group\n",
+      "\n",
+      "Newdf = StateMonth.apply(s)\n",
+      "Newdf"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>Revenue</th>\n",
+        "      <th>State</th>\n",
+        "      <th>x-Mean</th>\n",
+        "      <th>1.96*std</th>\n",
+        "      <th>Outlier</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>2012-01-01</th>\n",
+        "      <td>  1</td>\n",
+        "      <td> NY</td>\n",
+        "      <td> 4.5</td>\n",
+        "      <td> 12.473364</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-02-01</th>\n",
+        "      <td>  2</td>\n",
+        "      <td> NY</td>\n",
+        "      <td> 4.0</td>\n",
+        "      <td> 11.087434</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-03-01</th>\n",
+        "      <td>  3</td>\n",
+        "      <td> NY</td>\n",
+        "      <td> 3.0</td>\n",
+        "      <td>  8.315576</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-04-01</th>\n",
+        "      <td>  4</td>\n",
+        "      <td> NY</td>\n",
+        "      <td> 2.5</td>\n",
+        "      <td>  6.929646</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-05-01</th>\n",
+        "      <td>  5</td>\n",
+        "      <td> FL</td>\n",
+        "      <td> 1.5</td>\n",
+        "      <td>  4.157788</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-06-01</th>\n",
+        "      <td>  6</td>\n",
+        "      <td> FL</td>\n",
+        "      <td> 1.0</td>\n",
+        "      <td>  2.771859</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-07-01</th>\n",
+        "      <td>  7</td>\n",
+        "      <td> GA</td>\n",
+        "      <td> 0.0</td>\n",
+        "      <td>  0.000000</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-08-01</th>\n",
+        "      <td>  8</td>\n",
+        "      <td> GA</td>\n",
+        "      <td> 0.5</td>\n",
+        "      <td>  1.385929</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-09-01</th>\n",
+        "      <td>  9</td>\n",
+        "      <td> FL</td>\n",
+        "      <td> 1.5</td>\n",
+        "      <td>  4.157788</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-10-01</th>\n",
+        "      <td> 10</td>\n",
+        "      <td> FL</td>\n",
+        "      <td> 2.0</td>\n",
+        "      <td>  5.543717</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-01-01</th>\n",
+        "      <td> 10</td>\n",
+        "      <td> NY</td>\n",
+        "      <td> 4.5</td>\n",
+        "      <td> 12.473364</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-02-01</th>\n",
+        "      <td> 10</td>\n",
+        "      <td> NY</td>\n",
+        "      <td> 4.0</td>\n",
+        "      <td> 11.087434</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-03-01</th>\n",
+        "      <td>  9</td>\n",
+        "      <td> NY</td>\n",
+        "      <td> 3.0</td>\n",
+        "      <td>  8.315576</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-04-01</th>\n",
+        "      <td>  9</td>\n",
+        "      <td> NY</td>\n",
+        "      <td> 2.5</td>\n",
+        "      <td>  6.929646</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-05-01</th>\n",
+        "      <td>  8</td>\n",
+        "      <td> FL</td>\n",
+        "      <td> 1.5</td>\n",
+        "      <td>  4.157788</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-06-01</th>\n",
+        "      <td>  8</td>\n",
+        "      <td> FL</td>\n",
+        "      <td> 1.0</td>\n",
+        "      <td>  2.771859</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-07-01</th>\n",
+        "      <td>  7</td>\n",
+        "      <td> GA</td>\n",
+        "      <td> 0.0</td>\n",
+        "      <td>  0.000000</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-08-01</th>\n",
+        "      <td>  7</td>\n",
+        "      <td> GA</td>\n",
+        "      <td> 0.5</td>\n",
+        "      <td>  1.385929</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-09-01</th>\n",
+        "      <td>  6</td>\n",
+        "      <td> FL</td>\n",
+        "      <td> 1.5</td>\n",
+        "      <td>  4.157788</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-10-01</th>\n",
+        "      <td>  6</td>\n",
+        "      <td> FL</td>\n",
+        "      <td> 2.0</td>\n",
+        "      <td>  5.543717</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "output_type": "pyout",
+       "prompt_number": 9,
+       "text": [
+        "            Revenue State  x-Mean   1.96*std Outlier\n",
+        "2012-01-01        1    NY     4.5  12.473364   False\n",
+        "2012-02-01        2    NY     4.0  11.087434   False\n",
+        "2012-03-01        3    NY     3.0   8.315576   False\n",
+        "2012-04-01        4    NY     2.5   6.929646   False\n",
+        "2012-05-01        5    FL     1.5   4.157788   False\n",
+        "2012-06-01        6    FL     1.0   2.771859   False\n",
+        "2012-07-01        7    GA     0.0   0.000000   False\n",
+        "2012-08-01        8    GA     0.5   1.385929   False\n",
+        "2012-09-01        9    FL     1.5   4.157788   False\n",
+        "2012-10-01       10    FL     2.0   5.543717   False\n",
+        "2013-01-01       10    NY     4.5  12.473364   False\n",
+        "2013-02-01       10    NY     4.0  11.087434   False\n",
+        "2013-03-01        9    NY     3.0   8.315576   False\n",
+        "2013-04-01        9    NY     2.5   6.929646   False\n",
+        "2013-05-01        8    FL     1.5   4.157788   False\n",
+        "2013-06-01        8    FL     1.0   2.771859   False\n",
+        "2013-07-01        7    GA     0.0   0.000000   False\n",
+        "2013-08-01        7    GA     0.5   1.385929   False\n",
+        "2013-09-01        6    FL     1.5   4.157788   False\n",
+        "2013-10-01        6    FL     2.0   5.543717   False"
+       ]
+      }
+     ],
+     "prompt_number": 9
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "Assumign a non gaussian distribution (if you plot it, it will not look like a normal distribution)"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "df = concat([df1,df2]) #reset dataframe to prevent error\n",
+      "\n",
+      "State = df.groupby('State')\n",
+      "\n",
+      "df['Lower'] = State['Revenue'].transform( lambda x: x.quantile(q=.25) - (1.5*x.quantile(q=.75)-x.quantile(q=.25)) )\n",
+      "df['Upper'] = State['Revenue'].transform( lambda x: x.quantile(q=.75) + (1.5*x.quantile(q=.75)-x.quantile(q=.25)) )\n",
+      "df['Outlier'] = (df['Revenue'] < df['Lower']) | (df['Revenue'] > df['Upper']) \n",
+      "df"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>Revenue</th>\n",
+        "      <th>State</th>\n",
+        "      <th>Lower</th>\n",
+        "      <th>Upper</th>\n",
+        "      <th>Outlier</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>2012-01-01</th>\n",
+        "      <td>  1</td>\n",
+        "      <td> NY</td>\n",
+        "      <td>-8.375</td>\n",
+        "      <td> 20.375</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-02-01</th>\n",
+        "      <td>  2</td>\n",
+        "      <td> NY</td>\n",
+        "      <td>-8.375</td>\n",
+        "      <td> 20.375</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-03-01</th>\n",
+        "      <td>  3</td>\n",
+        "      <td> NY</td>\n",
+        "      <td>-8.375</td>\n",
+        "      <td> 20.375</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-04-01</th>\n",
+        "      <td>  4</td>\n",
+        "      <td> NY</td>\n",
+        "      <td>-8.375</td>\n",
+        "      <td> 20.375</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-05-01</th>\n",
+        "      <td>  5</td>\n",
+        "      <td> FL</td>\n",
+        "      <td>-0.375</td>\n",
+        "      <td> 14.625</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-06-01</th>\n",
+        "      <td>  6</td>\n",
+        "      <td> FL</td>\n",
+        "      <td>-0.375</td>\n",
+        "      <td> 14.625</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-07-01</th>\n",
+        "      <td>  7</td>\n",
+        "      <td> GA</td>\n",
+        "      <td> 3.125</td>\n",
+        "      <td> 11.125</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-08-01</th>\n",
+        "      <td>  8</td>\n",
+        "      <td> GA</td>\n",
+        "      <td> 3.125</td>\n",
+        "      <td> 11.125</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-09-01</th>\n",
+        "      <td>  9</td>\n",
+        "      <td> FL</td>\n",
+        "      <td>-0.375</td>\n",
+        "      <td> 14.625</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2012-10-01</th>\n",
+        "      <td> 10</td>\n",
+        "      <td> FL</td>\n",
+        "      <td>-0.375</td>\n",
+        "      <td> 14.625</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-01-01</th>\n",
+        "      <td> 10</td>\n",
+        "      <td> NY</td>\n",
+        "      <td>-8.375</td>\n",
+        "      <td> 20.375</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-02-01</th>\n",
+        "      <td> 10</td>\n",
+        "      <td> NY</td>\n",
+        "      <td>-8.375</td>\n",
+        "      <td> 20.375</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-03-01</th>\n",
+        "      <td>  9</td>\n",
+        "      <td> NY</td>\n",
+        "      <td>-8.375</td>\n",
+        "      <td> 20.375</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-04-01</th>\n",
+        "      <td>  9</td>\n",
+        "      <td> NY</td>\n",
+        "      <td>-8.375</td>\n",
+        "      <td> 20.375</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-05-01</th>\n",
+        "      <td>  8</td>\n",
+        "      <td> FL</td>\n",
+        "      <td>-0.375</td>\n",
+        "      <td> 14.625</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-06-01</th>\n",
+        "      <td>  8</td>\n",
+        "      <td> FL</td>\n",
+        "      <td>-0.375</td>\n",
+        "      <td> 14.625</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-07-01</th>\n",
+        "      <td>  7</td>\n",
+        "      <td> GA</td>\n",
+        "      <td> 3.125</td>\n",
+        "      <td> 11.125</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-08-01</th>\n",
+        "      <td>  7</td>\n",
+        "      <td> GA</td>\n",
+        "      <td> 3.125</td>\n",
+        "      <td> 11.125</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-09-01</th>\n",
+        "      <td>  6</td>\n",
+        "      <td> FL</td>\n",
+        "      <td>-0.375</td>\n",
+        "      <td> 14.625</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-10-01</th>\n",
+        "      <td>  6</td>\n",
+        "      <td> FL</td>\n",
+        "      <td>-0.375</td>\n",
+        "      <td> 14.625</td>\n",
+        "      <td> False</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "output_type": "pyout",
+       "prompt_number": 10,
+       "text": [
+        "            Revenue State  Lower   Upper Outlier\n",
+        "2012-01-01        1    NY -8.375  20.375   False\n",
+        "2012-02-01        2    NY -8.375  20.375   False\n",
+        "2012-03-01        3    NY -8.375  20.375   False\n",
+        "2012-04-01        4    NY -8.375  20.375   False\n",
+        "2012-05-01        5    FL -0.375  14.625   False\n",
+        "2012-06-01        6    FL -0.375  14.625   False\n",
+        "2012-07-01        7    GA  3.125  11.125   False\n",
+        "2012-08-01        8    GA  3.125  11.125   False\n",
+        "2012-09-01        9    FL -0.375  14.625   False\n",
+        "2012-10-01       10    FL -0.375  14.625   False\n",
+        "2013-01-01       10    NY -8.375  20.375   False\n",
+        "2013-02-01       10    NY -8.375  20.375   False\n",
+        "2013-03-01        9    NY -8.375  20.375   False\n",
+        "2013-04-01        9    NY -8.375  20.375   False\n",
+        "2013-05-01        8    FL -0.375  14.625   False\n",
+        "2013-06-01        8    FL -0.375  14.625   False\n",
+        "2013-07-01        7    GA  3.125  11.125   False\n",
+        "2013-08-01        7    GA  3.125  11.125   False\n",
+        "2013-09-01        6    FL -0.375  14.625   False\n",
+        "2013-10-01        6    FL -0.375  14.625   False"
+       ]
+      }
+     ],
+     "prompt_number": 10
+    }
+   ],
+   "metadata": {}
+  }
+ ]
+}

File lessons/08 - Lesson.ipynb

+{
+ "metadata": {
+  "name": "08 - Lesson"
+ },
+ "nbformat": 3,
+ "nbformat_minor": 0,
+ "worksheets": [
+  {
+   "cells": [
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "# Lesson 8  \n",
+      "\n",
+      "How to pull data from a microsoft sql database"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "# Import libraries\n",
+      "from pandas import DataFrame\n",
+      "import pandas as pd\n",
+      "from sqlalchemy import create_engine, MetaData, Table, select"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 1
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "print 'Pandas version: ' + pd.__version__"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "Pandas version: 0.10.1\n"
+       ]
+      }
+     ],
+     "prompt_number": 2
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "# Grab Data from SQL  \n",
+      "\n",
+      "In this section we use the ***sqlalchemy*** library to grab data from a sql database. Make sure to use your own ***ServerName***, ***Database***, ***TableName***."
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "# Parameters\n",
+      "ServerName = \"RepSer2\"\n",
+      "Database = \"BizIntel\"\n",
+      "TableName = \"DimDate\"\n",
+      "\n",
+      "# Create the connection\n",
+      "engine = create_engine('mssql+pyodbc://' + ServerName + '/' + Database)\n",
+      "conn = engine.connect()\n",
+      "\n",
+      "# Required for querying tables\n",
+      "metadata = MetaData(conn)\n",
+      "\n",
+      "# Table to query\n",
+      "tbl = Table(TableName, metadata, autoload=True, schema=\"dbo\")\n",
+      "#tbl.create(checkfirst=True)\n",
+      "\n",
+      "# Select all\n",
+      "sql = tbl.select()\n",
+      "\n",
+      "# run sql code\n",
+      "result = conn.execute(sql)\n",
+      "\n",
+      "# Insert to a dataframe\n",
+      "df = DataFrame(data=list(result), columns=result.keys())\n",
+      "\n",
+      "# Close connection\n",
+      "conn.close()\n",
+      "\n",
+      "print 'Done'"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "Done\n"
+       ]
+      }
+     ],
+     "prompt_number": 35
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "Select the contents in the dataframe."
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "df"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "pyout",
+       "prompt_number": 36,
+       "text": [
+        "<class 'pandas.core.frame.DataFrame'>\n",
+        "Int64Index: 5479 entries, 0 to 5478\n",
+        "Data columns:\n",
+        "DateSK          5479  non-null values\n",
+        "Date            5479  non-null values\n",
+        "Day             5479  non-null values\n",
+        "DaySuffix       5479  non-null values\n",
+        "DayOfWeek       5479  non-null values\n",
+        "DOWInMonth      5479  non-null values\n",
+        "DayOfYear       5479  non-null values\n",
+        "WeekOfYear      5479  non-null values\n",
+        "WeekOfMonth     5479  non-null values\n",
+        "Month           5479  non-null values\n",
+        "MonthName       5479  non-null values\n",
+        "Quarter         5479  non-null values\n",
+        "QuarterName     5479  non-null values\n",
+        "Year            5479  non-null values\n",
+        "StandardDate    5479  non-null values\n",
+        "HolidayText     210  non-null values\n",
+        "dtypes: object(16)"
+       ]
+      }
+     ],
+     "prompt_number": 36
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "df.head()"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>DateSK</th>\n",
+        "      <th>Date</th>\n",
+        "      <th>Day</th>\n",
+        "      <th>DaySuffix</th>\n",
+        "      <th>DayOfWeek</th>\n",
+        "      <th>DOWInMonth</th>\n",
+        "      <th>DayOfYear</th>\n",
+        "      <th>WeekOfYear</th>\n",
+        "      <th>WeekOfMonth</th>\n",
+        "      <th>Month</th>\n",
+        "      <th>MonthName</th>\n",
+        "      <th>Quarter</th>\n",
+        "      <th>QuarterName</th>\n",
+        "      <th>Year</th>\n",
+        "      <th>StandardDate</th>\n",
+        "      <th>HolidayText</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> 20000101</td>\n",
+        "      <td> 2000-01-01 00:00:00</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 1st</td>\n",
+        "      <td>  Saturday</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> January</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> First</td>\n",
+        "      <td> 2000</td>\n",
+        "      <td> 01/01/2000</td>\n",
+        "      <td> New Year's Day</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td> 20000102</td>\n",
+        "      <td> 2000-01-02 00:00:00</td>\n",
+        "      <td> 2</td>\n",
+        "      <td> 2nd</td>\n",
+        "      <td>    Sunday</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 2</td>\n",
+        "      <td> 2</td>\n",
+        "      <td> 2</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> January</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> First</td>\n",
+        "      <td> 2000</td>\n",
+        "      <td> 01/02/2000</td>\n",
+        "      <td>           None</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td> 20000103</td>\n",
+        "      <td> 2000-01-03 00:00:00</td>\n",
+        "      <td> 3</td>\n",
+        "      <td> 3rd</td>\n",
+        "      <td>    Monday</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 3</td>\n",
+        "      <td> 2</td>\n",
+        "      <td> 2</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> January</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> First</td>\n",
+        "      <td> 2000</td>\n",
+        "      <td> 01/03/2000</td>\n",
+        "      <td>           None</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>3</th>\n",
+        "      <td> 20000104</td>\n",
+        "      <td> 2000-01-04 00:00:00</td>\n",
+        "      <td> 4</td>\n",
+        "      <td> 4th</td>\n",
+        "      <td>   Tuesday</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 4</td>\n",
+        "      <td> 2</td>\n",
+        "      <td> 2</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> January</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> First</td>\n",
+        "      <td> 2000</td>\n",
+        "      <td> 01/04/2000</td>\n",
+        "      <td>           None</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>4</th>\n",
+        "      <td> 20000105</td>\n",
+        "      <td> 2000-01-05 00:00:00</td>\n",
+        "      <td> 5</td>\n",
+        "      <td> 5th</td>\n",
+        "      <td> Wednesday</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 5</td>\n",
+        "      <td> 2</td>\n",
+        "      <td> 2</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> January</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> First</td>\n",
+        "      <td> 2000</td>\n",
+        "      <td> 01/05/2000</td>\n",
+        "      <td>           None</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "output_type": "pyout",
+       "prompt_number": 37,
+       "text": [
+        "     DateSK                 Date Day DaySuffix  DayOfWeek DOWInMonth DayOfYear  \\\n",
+        "0  20000101  2000-01-01 00:00:00   1       1st   Saturday          1         1   \n",
+        "1  20000102  2000-01-02 00:00:00   2       2nd     Sunday          1         2   \n",
+        "2  20000103  2000-01-03 00:00:00   3       3rd     Monday          1         3   \n",
+        "3  20000104  2000-01-04 00:00:00   4       4th    Tuesday          1         4   \n",
+        "4  20000105  2000-01-05 00:00:00   5       5th  Wednesday          1         5   \n",
+        "\n",
+        "  WeekOfYear WeekOfMonth Month MonthName Quarter QuarterName  Year StandardDate  \\\n",
+        "0          1           1     1   January       1       First  2000   01/01/2000   \n",
+        "1          2           2     1   January       1       First  2000   01/02/2000   \n",
+        "2          2           2     1   January       1       First  2000   01/03/2000   \n",
+        "3          2           2     1   January       1       First  2000   01/04/2000   \n",
+        "4          2           2     1   January       1       First  2000   01/05/2000   \n",
+        "\n",
+        "      HolidayText  \n",
+        "0  New Year's Day  \n",
+        "1            None  \n",
+        "2            None  \n",
+        "3            None  \n",
+        "4            None  "
+       ]
+      }
+     ],
+     "prompt_number": 37
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "Convert to specific data types. The code below will have to be modified to match your table."
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "# Convert data types \n",
+      "df.Date = df.Date.astype('datetime64')\n",
+      "df.StandardDate = df.StandardDate.astype('datetime64')\n",
+      "df.DateSK = df.DateSK.astype('int')\n",
+      "df.Day = df.Day.astype('int')\n",
+      "df.DOWInMonth = df.DOWInMonth.astype('int')\n",
+      "df.DayOfYear = df.DayOfYear.astype('int')\n",
+      "df.WeekOfYear = df.WeekOfYear.astype('int')\n",
+      "df.WeekOfMonth = df.WeekOfMonth.astype('int')\n",
+      "df.Month = df.Month.astype('int')\n",
+      "df.Quarter = df.Quarter.astype('int')\n",
+      "df.Year = df.Year.astype('int')\n",
+      "\n",
+      "print 'Data Types'\n",
+      "print df.dtypes"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "Data Types\n",
+        "DateSK                   int64\n",
+        "Date            datetime64[ns]\n",
+        "Day                      int64\n",
+        "DaySuffix               object\n",
+        "DayOfWeek               object\n",
+        "DOWInMonth               int64\n",
+        "DayOfYear                int64\n",
+        "WeekOfYear               int64\n",
+        "WeekOfMonth              int64\n",
+        "Month                    int64\n",
+        "MonthName               object\n",
+        "Quarter                  int64\n",
+        "QuarterName             object\n",
+        "Year                     int64\n",
+        "StandardDate    datetime64[ns]\n",
+        "HolidayText             object\n"
+       ]
+      }
+     ],
+     "prompt_number": 38
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "df.head()"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>DateSK</th>\n",
+        "      <th>Date</th>\n",
+        "      <th>Day</th>\n",
+        "      <th>DaySuffix</th>\n",
+        "      <th>DayOfWeek</th>\n",
+        "      <th>DOWInMonth</th>\n",
+        "      <th>DayOfYear</th>\n",
+