Commits

Anonymous committed 5abd3a7

cookbooks

  • Participants
  • Parent commits fc121a0

Comments (0)

Files changed (5)

 * [03 - Exercise](http://nbviewer.ipython.org/urls/bitbucket.org/hrojas/learn-pandas/raw/master/lessons/03%20-%20Exercise.ipynb)
 * [04 - Exercise](http://nbviewer.ipython.org/urls/bitbucket.org/hrojas/learn-pandas/raw/master/lessons/04%20-%20Exercise.ipynb)
 
+Cookbooks
+---------
+
+* [Compute](http://nbviewer.ipython.org/urls/bitbucket.org/hrojas/learn-pandas/raw/master/lessons/Cookbook%20-%20Compute.ipynb)
+* [Merge](http://nbviewer.ipython.org/urls/bitbucket.org/hrojas/learn-pandas/raw/master/lessons/Cookbook%20-%20Merge.ipynb)
+* [Select](http://nbviewer.ipython.org/urls/bitbucket.org/hrojas/learn-pandas/raw/master/lessons/Cookbook%20-%20Select.ipynb)
+* [Sort](http://nbviewer.ipython.org/urls/bitbucket.org/hrojas/learn-pandas/raw/master/lessons/Cookbook%20-%20Sort.ipynb)
+
+
 Cheat Sheets
 ---------
 
+* [Python 101](http://nbviewer.ipython.org/urls/bitbucket.org/hrojas/learn-pandas/raw/master/lessons/Python_101.ipynb)
 * [Pandas for Excel Developers](http://nbviewer.ipython.org/urls/bitbucket.org/hrojas/learn-pandas/raw/master/lessons/Pandas%20for%20Excel%20Developers.ipynb)
 * [Pandas for SQL Developers](http://nbviewer.ipython.org/urls/bitbucket.org/hrojas/learn-pandas/raw/master/lessons/Pandas%20for%20SQL%20Developers.ipynb)
 * [Dates](https://squareup.com/market/david-rojas-llc/data-analysis-dates)

lessons/Cookbook - Compute.ipynb

+{
+ "metadata": {
+  "name": ""
+ },
+ "nbformat": 3,
+ "nbformat_minor": 0,
+ "worksheets": [
+  {
+   "cells": [
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "from pandas import DataFrame\n",
+      "import pandas as pd\n",
+      "import numpy as np\n",
+      "%matplotlib inline"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 1
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "print 'Pandas Version: ' + pd.__version__"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "Pandas Version: 0.13.0rc1\n"
+       ]
+      }
+     ],
+     "prompt_number": 2
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "# Compute"
+     ]
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "### How to get the sum and length of a group?"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "df = DataFrame({'group1':[\"a\",\"a\",\"b\",\"b\"],\n",
+      "                'value':[10,20,30,40]\n",
+      "                })\n",
+      "df"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>group1</th>\n",
+        "      <th>value</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> a</td>\n",
+        "      <td> 10</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td> a</td>\n",
+        "      <td> 20</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td> b</td>\n",
+        "      <td> 30</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>3</th>\n",
+        "      <td> b</td>\n",
+        "      <td> 40</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "<p>4 rows \u00d7 2 columns</p>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 3,
+       "text": [
+        "  group1  value\n",
+        "0      a     10\n",
+        "1      a     20\n",
+        "2      b     30\n",
+        "3      b     40\n",
+        "\n",
+        "[4 rows x 2 columns]"
+       ]
+      }
+     ],
+     "prompt_number": 3
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "group = df.groupby('group1')\n",
+      "group.agg([len,sum])"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr>\n",
+        "      <th></th>\n",
+        "      <th colspan=\"2\" halign=\"left\">value</th>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th></th>\n",
+        "      <th>len</th>\n",
+        "      <th>sum</th>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>group1</th>\n",
+        "      <th></th>\n",
+        "      <th></th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>a</th>\n",
+        "      <td> 2</td>\n",
+        "      <td> 30</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>b</th>\n",
+        "      <td> 2</td>\n",
+        "      <td> 70</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "<p>2 rows \u00d7 2 columns</p>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 4,
+       "text": [
+        "        value     \n",
+        "          len  sum\n",
+        "group1            \n",
+        "a           2   30\n",
+        "b           2   70\n",
+        "\n",
+        "[2 rows x 2 columns]"
+       ]
+      }
+     ],
+     "prompt_number": 4
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "### How can I add a column that is equal to the sum of a group?"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "df = DataFrame({'labels':[\"a\",\"a\",\"b\",\"b\"],\n",
+      "                'value':[10,20,30,40]\n",
+      "                })\n",
+      "df"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>labels</th>\n",
+        "      <th>value</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> a</td>\n",
+        "      <td> 10</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td> a</td>\n",
+        "      <td> 20</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td> b</td>\n",
+        "      <td> 30</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>3</th>\n",
+        "      <td> b</td>\n",
+        "      <td> 40</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "<p>4 rows \u00d7 2 columns</p>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 5,
+       "text": [
+        "  labels  value\n",
+        "0      a     10\n",
+        "1      a     20\n",
+        "2      b     30\n",
+        "3      b     40\n",
+        "\n",
+        "[4 rows x 2 columns]"
+       ]
+      }
+     ],
+     "prompt_number": 5
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "group = df.groupby('labels')['value']\n",
+      "df['value.sum'] = group.transform('sum')\n",
+      "df"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>labels</th>\n",
+        "      <th>value</th>\n",
+        "      <th>value.sum</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> a</td>\n",
+        "      <td> 10</td>\n",
+        "      <td> 30</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td> a</td>\n",
+        "      <td> 20</td>\n",
+        "      <td> 30</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td> b</td>\n",
+        "      <td> 30</td>\n",
+        "      <td> 70</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>3</th>\n",
+        "      <td> b</td>\n",
+        "      <td> 40</td>\n",
+        "      <td> 70</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "<p>4 rows \u00d7 3 columns</p>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 6,
+       "text": [
+        "  labels  value  value.sum\n",
+        "0      a     10         30\n",
+        "1      a     20         30\n",
+        "2      b     30         70\n",
+        "3      b     40         70\n",
+        "\n",
+        "[4 rows x 3 columns]"
+       ]
+      }
+     ],
+     "prompt_number": 6
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "### How to get the month name out of a date column?"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "df = DataFrame({'col1':[pd.Timestamp('20130102000030'),\n",
+      "                         pd.Timestamp('2013-02-03 00:00:30'),\n",
+      "                         pd.Timestamp('3/4/2013 000030')]\n",
+      "                 })\n",
+      "df"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>col1</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td>2013-01-02 00:00:30</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td>2013-02-03 00:00:30</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td>2013-03-04 00:00:30</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "<p>3 rows \u00d7 1 columns</p>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 7,
+       "text": [
+        "                 col1\n",
+        "0 2013-01-02 00:00:30\n",
+        "1 2013-02-03 00:00:30\n",
+        "2 2013-03-04 00:00:30\n",
+        "\n",
+        "[3 rows x 1 columns]"
+       ]
+      }
+     ],
+     "prompt_number": 7
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "df['MonthNumber'] = df['col1'].apply(lambda x: x.month)\n",
+      "df['Day'] = df['col1'].apply(lambda x: x.day)\n",
+      "df['Year'] = df['col1'].apply(lambda x: x.year)\n",
+      "df['MonthName'] = df['col1'].apply(lambda x: x.strftime('%B'))\n",
+      "df['WeekDay'] = df['col1'].apply(lambda x: x.strftime('%A'))\n",
+      "df"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>col1</th>\n",
+        "      <th>MonthNumber</th>\n",
+        "      <th>Day</th>\n",
+        "      <th>Year</th>\n",
+        "      <th>MonthName</th>\n",
+        "      <th>WeekDay</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td>2013-01-02 00:00:30</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 2</td>\n",
+        "      <td> 2013</td>\n",
+        "      <td>  January</td>\n",
+        "      <td> Wednesday</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td>2013-02-03 00:00:30</td>\n",
+        "      <td> 2</td>\n",
+        "      <td> 3</td>\n",
+        "      <td> 2013</td>\n",
+        "      <td> February</td>\n",
+        "      <td>    Sunday</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td>2013-03-04 00:00:30</td>\n",
+        "      <td> 3</td>\n",
+        "      <td> 4</td>\n",
+        "      <td> 2013</td>\n",
+        "      <td>    March</td>\n",
+        "      <td>    Monday</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "<p>3 rows \u00d7 6 columns</p>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 8,
+       "text": [
+        "                 col1  MonthNumber  Day  Year MonthName    WeekDay\n",
+        "0 2013-01-02 00:00:30            1    2  2013   January  Wednesday\n",
+        "1 2013-02-03 00:00:30            2    3  2013  February     Sunday\n",
+        "2 2013-03-04 00:00:30            3    4  2013     March     Monday\n",
+        "\n",
+        "[3 rows x 6 columns]"
+       ]
+      }
+     ],
+     "prompt_number": 8
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "### How can I create a column based on two other columns?"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "df = DataFrame({'col1':['minus','minus','positive','nan'],\n",
+      "                'col2':[10,20,30,40]\n",
+      "                })\n",
+      "df"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>col1</th>\n",
+        "      <th>col2</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td>    minus</td>\n",
+        "      <td> 10</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td>    minus</td>\n",
+        "      <td> 20</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td> positive</td>\n",
+        "      <td> 30</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>3</th>\n",
+        "      <td>      nan</td>\n",
+        "      <td> 40</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "<p>4 rows \u00d7 2 columns</p>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 9,
+       "text": [
+        "       col1  col2\n",
+        "0     minus    10\n",
+        "1     minus    20\n",
+        "2  positive    30\n",
+        "3       nan    40\n",
+        "\n",
+        "[4 rows x 2 columns]"
+       ]
+      }
+     ],
+     "prompt_number": 9
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "df['col3'] = df['col2']*df['col1'].apply(lambda x: -1 if x=='minus' else (1 if x=='positive' else np.nan))\n",
+      "df"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>col1</th>\n",
+        "      <th>col2</th>\n",
+        "      <th>col3</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td>    minus</td>\n",
+        "      <td> 10</td>\n",
+        "      <td>-10</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td>    minus</td>\n",
+        "      <td> 20</td>\n",
+        "      <td>-20</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td> positive</td>\n",
+        "      <td> 30</td>\n",
+        "      <td> 30</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>3</th>\n",
+        "      <td>      nan</td>\n",
+        "      <td> 40</td>\n",
+        "      <td>NaN</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "<p>4 rows \u00d7 3 columns</p>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 10,
+       "text": [
+        "       col1  col2  col3\n",
+        "0     minus    10   -10\n",
+        "1     minus    20   -20\n",
+        "2  positive    30    30\n",
+        "3       nan    40   NaN\n",
+        "\n",
+        "[4 rows x 3 columns]"
+       ]
+      }
+     ],
+     "prompt_number": 10
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "### How can I apply a function to a group and add the results to my original data frame?"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "df = DataFrame({'group1':['a','a','a','b','b','b'],\n",
+      "                       'group2':['c','c','d','d','d','e'],\n",
+      "                       'value1':[1.1,2,3,4,5,6],\n",
+      "                       'value2':[7.1,8,9,10,11,12]\n",
+      "})\n",
+      "\n",
+      "df"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>group1</th>\n",
+        "      <th>group2</th>\n",
+        "      <th>value1</th>\n",
+        "      <th>value2</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> a</td>\n",
+        "      <td> c</td>\n",
+        "      <td> 1.1</td>\n",
+        "      <td>  7.1</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td> a</td>\n",
+        "      <td> c</td>\n",
+        "      <td> 2.0</td>\n",
+        "      <td>  8.0</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td> a</td>\n",
+        "      <td> d</td>\n",
+        "      <td> 3.0</td>\n",
+        "      <td>  9.0</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>3</th>\n",
+        "      <td> b</td>\n",
+        "      <td> d</td>\n",
+        "      <td> 4.0</td>\n",
+        "      <td> 10.0</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>4</th>\n",
+        "      <td> b</td>\n",
+        "      <td> d</td>\n",
+        "      <td> 5.0</td>\n",
+        "      <td> 11.0</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>5</th>\n",
+        "      <td> b</td>\n",
+        "      <td> e</td>\n",
+        "      <td> 6.0</td>\n",
+        "      <td> 12.0</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "<p>6 rows \u00d7 4 columns</p>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 11,
+       "text": [
+        "  group1 group2  value1  value2\n",
+        "0      a      c     1.1     7.1\n",
+        "1      a      c     2.0     8.0\n",
+        "2      a      d     3.0     9.0\n",
+        "3      b      d     4.0    10.0\n",
+        "4      b      d     5.0    11.0\n",
+        "5      b      e     6.0    12.0\n",
+        "\n",
+        "[6 rows x 4 columns]"
+       ]
+      }
+     ],
+     "prompt_number": 11
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "group = df.groupby(['group1','group2'])\n",
+      "\n",
+      "def Half(x):\n",
+      "    return x.sum()\n",
+      "\n",
+      "df['new'] = group['value1'].transform(Half)\n",
+      "df"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>group1</th>\n",
+        "      <th>group2</th>\n",
+        "      <th>value1</th>\n",
+        "      <th>value2</th>\n",
+        "      <th>new</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> a</td>\n",
+        "      <td> c</td>\n",
+        "      <td> 1.1</td>\n",
+        "      <td>  7.1</td>\n",
+        "      <td> 3.1</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td> a</td>\n",
+        "      <td> c</td>\n",
+        "      <td> 2.0</td>\n",
+        "      <td>  8.0</td>\n",
+        "      <td> 3.1</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td> a</td>\n",
+        "      <td> d</td>\n",
+        "      <td> 3.0</td>\n",
+        "      <td>  9.0</td>\n",
+        "      <td> 3.0</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>3</th>\n",
+        "      <td> b</td>\n",
+        "      <td> d</td>\n",
+        "      <td> 4.0</td>\n",
+        "      <td> 10.0</td>\n",
+        "      <td> 9.0</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>4</th>\n",
+        "      <td> b</td>\n",
+        "      <td> d</td>\n",
+        "      <td> 5.0</td>\n",
+        "      <td> 11.0</td>\n",
+        "      <td> 9.0</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>5</th>\n",
+        "      <td> b</td>\n",
+        "      <td> e</td>\n",
+        "      <td> 6.0</td>\n",
+        "      <td> 12.0</td>\n",
+        "      <td> 6.0</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "<p>6 rows \u00d7 5 columns</p>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 12,
+       "text": [
+        "  group1 group2  value1  value2  new\n",
+        "0      a      c     1.1     7.1  3.1\n",
+        "1      a      c     2.0     8.0  3.1\n",
+        "2      a      d     3.0     9.0  3.0\n",
+        "3      b      d     4.0    10.0  9.0\n",
+        "4      b      d     5.0    11.0  9.0\n",
+        "5      b      e     6.0    12.0  6.0\n",
+        "\n",
+        "[6 rows x 5 columns]"
+       ]
+      }
+     ],
+     "prompt_number": 12
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "# For multiple functions\n",
+      "def HalfPlus(x):\n",
+      "    return x.sum() + 1\n",
+      "\n",
+      "newcol = group['value1'].agg([Half,HalfPlus])\n",
+      "newcol"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th></th>\n",
+        "      <th>Half</th>\n",
+        "      <th>HalfPlus</th>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>group1</th>\n",
+        "      <th>group2</th>\n",
+        "      <th></th>\n",
+        "      <th></th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th rowspan=\"2\" valign=\"top\">a</th>\n",
+        "      <th>c</th>\n",
+        "      <td> 3.1</td>\n",
+        "      <td>  4.1</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>d</th>\n",
+        "      <td> 3.0</td>\n",
+        "      <td>  4.0</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th rowspan=\"2\" valign=\"top\">b</th>\n",
+        "      <th>d</th>\n",
+        "      <td> 9.0</td>\n",
+        "      <td> 10.0</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>e</th>\n",
+        "      <td> 6.0</td>\n",
+        "      <td>  7.0</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "<p>4 rows \u00d7 2 columns</p>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 13,
+       "text": [
+        "               Half  HalfPlus\n",
+        "group1 group2                \n",
+        "a      c        3.1       4.1\n",
+        "       d        3.0       4.0\n",
+        "b      d        9.0      10.0\n",
+        "       e        6.0       7.0\n",
+        "\n",
+        "[4 rows x 2 columns]"
+       ]
+      }
+     ],
+     "prompt_number": 13
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "df.merge(newcol, left_on=['group1','group2'], right_index=True)"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>group1</th>\n",
+        "      <th>group2</th>\n",
+        "      <th>value1</th>\n",
+        "      <th>value2</th>\n",
+        "      <th>new</th>\n",
+        "      <th>Half</th>\n",
+        "      <th>HalfPlus</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> a</td>\n",
+        "      <td> c</td>\n",
+        "      <td> 1.1</td>\n",
+        "      <td>  7.1</td>\n",
+        "      <td> 3.1</td>\n",
+        "      <td> 3.1</td>\n",
+        "      <td>  4.1</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td> a</td>\n",
+        "      <td> c</td>\n",
+        "      <td> 2.0</td>\n",
+        "      <td>  8.0</td>\n",
+        "      <td> 3.1</td>\n",
+        "      <td> 3.1</td>\n",
+        "      <td>  4.1</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td> a</td>\n",
+        "      <td> d</td>\n",
+        "      <td> 3.0</td>\n",
+        "      <td>  9.0</td>\n",
+        "      <td> 3.0</td>\n",
+        "      <td> 3.0</td>\n",
+        "      <td>  4.0</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>3</th>\n",
+        "      <td> b</td>\n",
+        "      <td> d</td>\n",
+        "      <td> 4.0</td>\n",
+        "      <td> 10.0</td>\n",
+        "      <td> 9.0</td>\n",
+        "      <td> 9.0</td>\n",
+        "      <td> 10.0</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>4</th>\n",
+        "      <td> b</td>\n",
+        "      <td> d</td>\n",
+        "      <td> 5.0</td>\n",
+        "      <td> 11.0</td>\n",
+        "      <td> 9.0</td>\n",
+        "      <td> 9.0</td>\n",
+        "      <td> 10.0</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>5</th>\n",
+        "      <td> b</td>\n",
+        "      <td> e</td>\n",
+        "      <td> 6.0</td>\n",
+        "      <td> 12.0</td>\n",
+        "      <td> 6.0</td>\n",
+        "      <td> 6.0</td>\n",
+        "      <td>  7.0</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "<p>6 rows \u00d7 7 columns</p>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 14,
+       "text": [
+        "  group1 group2  value1  value2  new  Half  HalfPlus\n",
+        "0      a      c     1.1     7.1  3.1   3.1       4.1\n",
+        "1      a      c     2.0     8.0  3.1   3.1       4.1\n",
+        "2      a      d     3.0     9.0  3.0   3.0       4.0\n",
+        "3      b      d     4.0    10.0  9.0   9.0      10.0\n",
+        "4      b      d     5.0    11.0  9.0   9.0      10.0\n",
+        "5      b      e     6.0    12.0  6.0   6.0       7.0\n",
+        "\n",
+        "[6 rows x 7 columns]"
+       ]
+      }
+     ],
+     "prompt_number": 14
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "### How to add two data frames and not get null values?"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "df1 = DataFrame(data=[26371, 1755, 2], index=[-9999, 240, 138.99], columns=['value'])\n",
+      "df1"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>value</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>-9999.00</th>\n",
+        "      <td> 26371</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th> 240.00 </th>\n",
+        "      <td>  1755</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th> 138.99 </th>\n",
+        "      <td>     2</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "<p>3 rows \u00d7 1 columns</p>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 15,
+       "text": [
+        "          value\n",
+        "-9999.00  26371\n",
+        " 240.00    1755\n",
+        " 138.99       2\n",
+        "\n",
+        "[3 rows x 1 columns]"
+       ]
+      }
+     ],
+     "prompt_number": 15
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "df2 = DataFrame(data=[26371, 1755, 6, 4], index=[-9999, 240, 113.03, 110], columns=['value'])\n",
+      "df2"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>value</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>-9999.00</th>\n",
+        "      <td> 26371</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th> 240.00 </th>\n",
+        "      <td>  1755</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th> 113.03 </th>\n",
+        "      <td>     6</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th> 110.00 </th>\n",
+        "      <td>     4</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "<p>4 rows \u00d7 1 columns</p>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 16,
+       "text": [
+        "          value\n",
+        "-9999.00  26371\n",
+        " 240.00    1755\n",
+        " 113.03       6\n",
+        " 110.00       4\n",
+        "\n",
+        "[4 rows x 1 columns]"
+       ]
+      }
+     ],
+     "prompt_number": 16
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "# If you simply add them, you will get null values\n",
+      "# were the index does not match\n",
+      "df1 + df2"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>value</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>-9999.00</th>\n",
+        "      <td> 52742</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th> 110.00 </th>\n",
+        "      <td>   NaN</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th> 113.03 </th>\n",
+        "      <td>   NaN</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th> 138.99 </th>\n",
+        "      <td>   NaN</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th> 240.00 </th>\n",
+        "      <td>  3510</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "<p>5 rows \u00d7 1 columns</p>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 17,
+       "text": [
+        "          value\n",
+        "-9999.00  52742\n",
+        " 110.00     NaN\n",
+        " 113.03     NaN\n",
+        " 138.99     NaN\n",
+        " 240.00    3510\n",
+        "\n",
+        "[5 rows x 1 columns]"
+       ]
+      }
+     ],
+     "prompt_number": 17
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "# Here we fix this issue\n",
+      "df1.add(df2, fill_value=0)"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>value</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>-9999.00</th>\n",
+        "      <td> 52742</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th> 110.00 </th>\n",
+        "      <td>     4</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th> 113.03 </th>\n",
+        "      <td>     6</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th> 138.99 </th>\n",
+        "      <td>     2</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th> 240.00 </th>\n",
+        "      <td>  3510</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "<p>5 rows \u00d7 1 columns</p>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 18,
+       "text": [
+        "          value\n",
+        "-9999.00  52742\n",
+        " 110.00       4\n",
+        " 113.03       6\n",
+        " 138.99       2\n",
+        " 240.00    3510\n",
+        "\n",
+        "[5 rows x 1 columns]"
+       ]
+      }
+     ],
+     "prompt_number": 18
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "**Author:** [David Rojas LLC](http://hdrojas.pythonanywhere.com/)  "
+     ]
+    }
+   ],
+   "metadata": {}
+  }
+ ]
+}

lessons/Cookbook - Merge.ipynb

+{
+ "metadata": {
+  "name": ""
+ },
+ "nbformat": 3,
+ "nbformat_minor": 0,
+ "worksheets": [
+  {
+   "cells": [
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "from pandas import DataFrame\n",
+      "import pandas as pd\n",
+      "%matplotlib inline"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 1
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "print 'Pandas Version: ' + pd.__version__"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "Pandas Version: 0.13.0rc1\n"
+       ]
+      }
+     ],
+     "prompt_number": 2
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "# Merge"
+     ]
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "### I have two dataframes that have dates as their index. The problem is that one of the dataframes has a timestamp and this is preventing me from adding the dataframes together. How can I match up the time stamps?"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "df1 = DataFrame({'col1':[pd.Timestamp('20130102000030'),\n",
+      "                         pd.Timestamp('2013-01-03 00:00:30'),\n",
+      "                         pd.Timestamp('1/4/2013 000030')],\n",
+      "                 'col2':[1,10,18]\n",
+      "                 })\n",
+      "df1"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>col1</th>\n",
+        "      <th>col2</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td>2013-01-02 00:00:30</td>\n",
+        "      <td>  1</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td>2013-01-03 00:00:30</td>\n",
+        "      <td> 10</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td>2013-01-04 00:00:30</td>\n",
+        "      <td> 18</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "<p>3 rows \u00d7 2 columns</p>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 3,
+       "text": [
+        "                 col1  col2\n",
+        "0 2013-01-02 00:00:30     1\n",
+        "1 2013-01-03 00:00:30    10\n",
+        "2 2013-01-04 00:00:30    18\n",
+        "\n",
+        "[3 rows x 2 columns]"
+       ]
+      }
+     ],
+     "prompt_number": 3
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "df1 = df1.set_index('col1')\n",
+      "df1"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>col2</th>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>col1</th>\n",
+        "      <th></th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>2013-01-02 00:00:30</th>\n",
+        "      <td>  1</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-01-03 00:00:30</th>\n",
+        "      <td> 10</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-01-04 00:00:30</th>\n",
+        "      <td> 18</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "<p>3 rows \u00d7 1 columns</p>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 4,
+       "text": [
+        "                     col2\n",
+        "col1                     \n",
+        "2013-01-02 00:00:30     1\n",
+        "2013-01-03 00:00:30    10\n",
+        "2013-01-04 00:00:30    18\n",
+        "\n",
+        "[3 rows x 1 columns]"
+       ]
+      }
+     ],
+     "prompt_number": 4
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "d = {'col2':[22,10,113]}\n",
+      "\n",
+      "i = [pd.Timestamp('20130102'),\n",
+      "     pd.Timestamp('2013-01-03'),\n",
+      "     pd.Timestamp('1/4/2013')]\n",
+      "                 \n",
+      "\n",
+      "df2 = DataFrame(data=d, index = i)\n",
+      "df2.index.name = 'col1'\n",
+      "df2"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>col2</th>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>col1</th>\n",
+        "      <th></th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>2013-01-02</th>\n",
+        "      <td>  22</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-01-03</th>\n",
+        "      <td>  10</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-01-04</th>\n",
+        "      <td> 113</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "<p>3 rows \u00d7 1 columns</p>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 5,
+       "text": [
+        "            col2\n",
+        "col1            \n",
+        "2013-01-02    22\n",
+        "2013-01-03    10\n",
+        "2013-01-04   113\n",
+        "\n",
+        "[3 rows x 1 columns]"
+       ]
+      }
+     ],
+     "prompt_number": 5
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "# If we try to add the data frames together, we do not get the results we want.\n",
+      "df2+df1"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>col2</th>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>col1</th>\n",
+        "      <th></th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>2013-01-02 00:00:00</th>\n",
+        "      <td>NaN</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-01-02 00:00:30</th>\n",
+        "      <td>NaN</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-01-03 00:00:00</th>\n",
+        "      <td>NaN</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-01-03 00:00:30</th>\n",
+        "      <td>NaN</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-01-04 00:00:00</th>\n",
+        "      <td>NaN</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-01-04 00:00:30</th>\n",
+        "      <td>NaN</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "<p>6 rows \u00d7 1 columns</p>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 6,
+       "text": [
+        "                     col2\n",
+        "col1                     \n",
+        "2013-01-02 00:00:00   NaN\n",
+        "2013-01-02 00:00:30   NaN\n",
+        "2013-01-03 00:00:00   NaN\n",
+        "2013-01-03 00:00:30   NaN\n",
+        "2013-01-04 00:00:00   NaN\n",
+        "2013-01-04 00:00:30   NaN\n",
+        "\n",
+        "[6 rows x 1 columns]"
+       ]
+      }
+     ],
+     "prompt_number": 6
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "# Make the index of df2 the same as the index of df1\n",
+      "# Fill the missing values with previous known value\n",
+      "#\n",
+      "#2013-01-02 00:00:00 => 22\n",
+      "#2013-01-02 00:00:30 => 22 \n",
+      "#2013-01-03 00:00:00 => 10\n",
+      "#2013-01-03 00:00:00 => 10\n",
+      "#2013-01-04 00:00:00 => 113\n",
+      "#2013-01-04 00:00:00 => 113\n",
+      "df2.reindex(df1.index, method='pad')"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>col2</th>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>col1</th>\n",
+        "      <th></th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>2013-01-02 00:00:30</th>\n",
+        "      <td>  22</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-01-03 00:00:30</th>\n",
+        "      <td>  10</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-01-04 00:00:30</th>\n",
+        "      <td> 113</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "<p>3 rows \u00d7 1 columns</p>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 7,
+       "text": [
+        "                     col2\n",
+        "col1                     \n",
+        "2013-01-02 00:00:30    22\n",
+        "2013-01-03 00:00:30    10\n",
+        "2013-01-04 00:00:30   113\n",
+        "\n",
+        "[3 rows x 1 columns]"
+       ]
+      }
+     ],
+     "prompt_number": 7
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "# Now we can add them\n",
+      "df2 = df2.reindex(df1.index, method='pad')\n",
+      "df1+df2"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>col2</th>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>col1</th>\n",
+        "      <th></th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>2013-01-02 00:00:30</th>\n",
+        "      <td>  23</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-01-03 00:00:30</th>\n",
+        "      <td>  20</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-01-04 00:00:30</th>\n",
+        "      <td> 131</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "<p>3 rows \u00d7 1 columns</p>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 8,
+       "text": [
+        "                     col2\n",
+        "col1                     \n",
+        "2013-01-02 00:00:30    23\n",
+        "2013-01-03 00:00:30    20\n",
+        "2013-01-04 00:00:30   131\n",
+        "\n",
+        "[3 rows x 1 columns]"
+       ]
+      }
+     ],
+     "prompt_number": 8
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "### How do I add two dataframes together by row?"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "df1 = DataFrame([1,2,3])\n",
+      "df1"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>0</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> 1</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td> 2</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td> 3</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "<p>3 rows \u00d7 1 columns</p>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 9,
+       "text": [
+        "   0\n",
+        "0  1\n",
+        "1  2\n",
+        "2  3\n",
+        "\n",
+        "[3 rows x 1 columns]"
+       ]
+      }
+     ],
+     "prompt_number": 9
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "df2 = DataFrame([4,5,6])\n",
+      "df2"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>0</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> 4</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td> 5</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td> 6</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "<p>3 rows \u00d7 1 columns</p>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 10,
+       "text": [
+        "   0\n",
+        "0  4\n",
+        "1  5\n",
+        "2  6\n",
+        "\n",
+        "[3 rows x 1 columns]"
+       ]
+      }
+     ],
+     "prompt_number": 10
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "pd.concat([df1,df2])"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>0</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> 1</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td> 2</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td> 3</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> 4</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td> 5</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td> 6</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "<p>6 rows \u00d7 1 columns</p>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 11,
+       "text": [
+        "   0\n",
+        "0  1\n",
+        "1  2\n",
+        "2  3\n",
+        "0  4\n",
+        "1  5\n",
+        "2  6\n",
+        "\n",
+        "[6 rows x 1 columns]"
+       ]
+      }
+     ],
+     "prompt_number": 11
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "### How do I join two data frames by index?"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "d = {'col1':[22,10,113]}\n",
+      "\n",
+      "i = [pd.Timestamp('1/1/2013'),\n",
+      "     pd.Timestamp('1/2/2013'),\n",
+      "     pd.Timestamp('1/3/2013')]\n",
+      "                 \n",
+      "\n",
+      "df1 = DataFrame(data=d, index = i)\n",
+      "df1"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>col1</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>2013-01-01</th>\n",
+        "      <td>  22</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-01-02</th>\n",
+        "      <td>  10</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-01-03</th>\n",
+        "      <td> 113</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "<p>3 rows \u00d7 1 columns</p>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 12,
+       "text": [
+        "            col1\n",
+        "2013-01-01    22\n",
+        "2013-01-02    10\n",
+        "2013-01-03   113\n",
+        "\n",
+        "[3 rows x 1 columns]"
+       ]
+      }
+     ],
+     "prompt_number": 12
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "d = {'col2':[5,5]}\n",
+      "\n",
+      "i = [pd.Timestamp('1/1/2013'),\n",
+      "     pd.Timestamp('1/3/2013')]\n",
+      "                 \n",
+      "\n",
+      "df2 = DataFrame(data=d, index = i)\n",
+      "df2"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>col2</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>2013-01-01</th>\n",
+        "      <td> 5</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2013-01-03</th>\n",
+        "      <td> 5</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "<p>2 rows \u00d7 1 columns</p>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 13,
+       "text": [
+        "            col2\n",