# Commits

committed 6ee2a97

• Participants
• Parent commits 629fccf
• Branches master

# File lessons/03 - Lesson.ipynb

• Ignore whitespace
{
-  "name": "03 - Lesson"
+  "name": ""
},
"nbformat": 3,
"nbformat_minor": 0,
"collapsed": false,
"input": [
"# Import libraries\n",
-      "from pandas import ExcelFile, DataFrame, concat, date_range\n",
+      "from pandas import DataFrame, date_range, read_excel, concat\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
-      "import numpy as np"
+      "import numpy.random as np\n",
+      "\n",
+      "# Turn on inline plotting\n",
+      "%matplotlib inline"
],
"language": "python",
"output_type": "stream",
"stream": "stdout",
"text": [
-        "Pandas version: 0.11.0\n"
+        "Pandas version: 0.13.0\n"
]
}
],
"cell_type": "code",
"collapsed": false,
"input": [
+      "# set seed\n",
+      "np.seed(111)\n",
+      "\n",
"# Function to generate test data\n",
"def CreateDataSet(Number=1):\n",
"    \n",
"    for i in range(Number):\n",
"        \n",
"        # Create a weekly (mondays) date range\n",
-      "        rng = date_range(start='1/1/2009', end='12/31/2012', freq='W@MON')\n",
+      "        rng = date_range(start='1/1/2009', end='12/31/2012', freq='W-MON')\n",
"        \n",
"        # Create random data\n",
-      "        data = np.random.randint(low=25,high=1000,size=len(rng))\n",
+      "        data = np.randint(low=25,high=1000,size=len(rng))\n",
"        \n",
"        # Status pool\n",
"        status = [1,2,3]\n",
"        \n",
"        # Make a random list of statuses\n",
-      "        seed(i)\n",
-      "        random_status = [status[randint(low=0,high=len(status))] for i in range(len(rng))]\n",
+      "        random_status = [status[np.randint(low=0,high=len(status))] for i in range(len(rng))]\n",
"        \n",
"        # State pool\n",
"        states = ['GA','FL','fl','NY','NJ','TX']\n",
"        \n",
"        # Make a random list of states \n",
-      "        random_states = [states[randint(low=0,high=len(states))] for i in range(len(rng))]\n",
+      "        random_states = [states[np.randint(low=0,high=len(states))] for i in range(len(rng))]\n",
"    \n",
"        Output.extend(zip(random_states, random_status, data, rng))\n",
"        \n",
"input": [
"dataset = CreateDataSet(4)\n",
"df = DataFrame(data=dataset, columns=['State','Status','CustomerCount','StatusDate'])\n",
-      "df"
+      "df.info()"
],
"language": "python",
"outputs": [
{
-       "html": [
-        "<pre>\n",
-        "&ltclass 'pandas.core.frame.DataFrame'&gt\n",
-        "Int64Index: 836 entries, 0 to 835\n",
-        "Data columns (total 4 columns):\n",
-        "State            836  non-null values\n",
-        "Status           836  non-null values\n",
-        "CustomerCount    836  non-null values\n",
-        "StatusDate       836  non-null values\n",
-        "dtypes: datetime64[ns](1), int64(2), object(1)\n",
-        "</pre>"
-       ],
-       "output_type": "pyout",
-       "prompt_number": 4,
+       "output_type": "stream",
+       "stream": "stdout",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Int64Index: 836 entries, 0 to 835\n",
"prompt_number": 4
},
{
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+     ],
+     "language": "python",
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>State</th>\n",
+        "      <th>Status</th>\n",
+        "      <th>CustomerCount</th>\n",
+        "      <th>StatusDate</th>\n",
+        "    </tr>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> GA</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 877</td>\n",
+        "      <td>2009-01-05 00:00:00</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td> FL</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 901</td>\n",
+        "      <td>2009-01-12 00:00:00</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td> fl</td>\n",
+        "      <td> 3</td>\n",
+        "      <td> 749</td>\n",
+        "      <td>2009-01-19 00:00:00</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>3</th>\n",
+        "      <td> FL</td>\n",
+        "      <td> 3</td>\n",
+        "      <td> 111</td>\n",
+        "      <td>2009-01-26 00:00:00</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>4</th>\n",
+        "      <td> GA</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 300</td>\n",
+        "      <td>2009-02-02 00:00:00</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "<p>5 rows \u00d7 4 columns</p>\n",
+        "</div>"
+       ],
+       "output_type": "pyout",
+       "prompt_number": 5,
+       "text": [
+        "  State  Status  CustomerCount          StatusDate\n",
+        "0    GA       1            877 2009-01-05 00:00:00\n",
+        "1    FL       1            901 2009-01-12 00:00:00\n",
+        "2    fl       3            749 2009-01-19 00:00:00\n",
+        "3    FL       3            111 2009-01-26 00:00:00\n",
+        "4    GA       1            300 2009-02-02 00:00:00\n",
+        "\n",
+        "[5 rows x 4 columns]"
+       ]
+      }
+     ],
+     "prompt_number": 5
+    },
+    {
"cell_type": "markdown",
"source": [
]
}
],
-     "prompt_number": 5
+     "prompt_number": 6
},
{
"cell_type": "markdown",
"source": [
"# Grab Data from Excel  \n",
"\n",
-      "We will be using the ***ExcelFile*** function and the ***parse*** function to read in data from an excel file. The *ExcelFile* function creates an object and *parse* will help with the actually parsing of the file. Lets take a loook at both of these.  \n",
+      "We will be using the ***read_excel*** function to read in data from an excel file. The function allows you to read in specfic tabs by name or location.  \n",
"\n",
-      "**ExcelFile:**  \n",
-      "Parameters  \n",
-      "path : string or file-like objec, Path to xls file  \n",
-      "kind : {'xls', 'xlsx', None}, default None  \n",
-      "Definition:ExcelFile(self, path_or_buf)  \n",
-      "\n",
-      "**parse:**  \n",
-      "Definition: ExcelFile.parse(self, sheetname, header=0, skiprows=None, skip_footer=0, index_col=None, parse_cols=None, parse_dates=False, date_parser=None,   na_values=None, thousands=None, chunksize=None, **kwds)  \n",
-      "Docstring: Read Excel table into DataFrame  "
+      "Parameters    \n",
+      "<u>io</u> : string, file-like object or xlrd workbook  \n",
+      "&nbsp;If a string, expected to be a path to xls or xlsx file  \n",
+      "<u>sheetname</u> : string  \n",
+      "&nbsp;Name of Excel sheet  \n",
+      "<u>index_col</u> : int, default None  \n",
+      "&nbsp;Column to use as the row labels of the DataFrame. Pass None if\n",
+      "there is no such column  "
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
-      "ExcelFile?"
-     ],
-     "language": "python",
-     "outputs": [],
-     "prompt_number": 6
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "ExcelFile.parse?"
],
"language": "python",
"collapsed": false,
"input": [
"# Location of file\n",
-      "Location = r'C:\\Users\\hdrojas\\.xy\\startups\\Lesson3.xlsx'\n",
-      "\n",
-      "# Create ExcelFile object\n",
-      "xlsx = ExcelFile(Location)\n",
+      "Location = r'C:\\Users\\David\\Lesson3.xlsx'\n",
"\n",
"# Parse a specific sheet\n",
-      "df = xlsx.parse('sheet1',index_col='StatusDate')\n",
+      "df = read_excel(Location, 0, index_col='StatusDate')\n",
"df.dtypes"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 8,
"text": [
-        "State             object\n",
-        "Status           float64\n",
-        "CustomerCount    float64\n",
+        "State            object\n",
+        "Status            int64\n",
+        "CustomerCount     int64\n",
"dtype: object"
]
}
"outputs": [
{
"output_type": "pyout",
"prompt_number": 9,
"text": [
"prompt_number": 9
},
{
-     "cell_type": "markdown",
-     "source": [
-      "As you can see the first problem we have here is that the column ***Status*** and the column ***CustomerCount*** are of data type *float64*. We would rather have these columns be an ***int*** data type. Below we will simply solve this issue by casting the columns to their appropriate type."
-     ]
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "# Convert data types \n",
-      "df.Status = df.Status.astype('int')\n",
-      "df.CustomerCount = df.CustomerCount.astype('int')\n",
-      "print 'Data Types'\n",
-      "print df.dtypes"
-     ],
-     "language": "python",
-     "outputs": [
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "Data Types\n",
-        "State            object\n",
-        "Status            int32\n",
-        "CustomerCount     int32\n",
-        "dtype: object\n"
-       ]
-      }
-     ],
-     "prompt_number": 10
-    },
-    {
"cell_type": "code",
"collapsed": false,
"input": [
"  <tbody>\n",
"    <tr>\n",
"      <th>2009-01-05</th>\n",
-        "      <td> NY</td>\n",
+        "      <td> GA</td>\n",
"      <td> 1</td>\n",
-        "      <td> 721</td>\n",
+        "      <td> 877</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>2009-01-12</th>\n",
-        "      <td> GA</td>\n",
-        "      <td> 2</td>\n",
-        "      <td>  86</td>\n",
+        "      <td> FL</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 901</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>2009-01-19</th>\n",
-        "      <td> NY</td>\n",
-        "      <td> 1</td>\n",
-        "      <td> 441</td>\n",
+        "      <td> fl</td>\n",
+        "      <td> 3</td>\n",
+        "      <td> 749</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>2009-01-26</th>\n",
-        "      <td> GA</td>\n",
-        "      <td> 2</td>\n",
-        "      <td> 992</td>\n",
+        "      <td> FL</td>\n",
+        "      <td> 3</td>\n",
+        "      <td> 111</td>\n",
"    </tr>\n",
"    <tr>\n",
"      <th>2009-02-02</th>\n",
-        "      <td> NJ</td>\n",
-        "      <td> 2</td>\n",
-        "      <td> 614</td>\n",
+        "      <td> GA</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 300</td>\n",
"    </tr>\n",
"  </tbody>\n",
"</table>\n",
+        "<p>5 rows \u00d7 3 columns</p>\n",
"</div>"
],
"output_type": "pyout",
-       "prompt_number": 11,
+       "prompt_number": 10,
"text": [
"           State  Status  CustomerCount\n",
"StatusDate                             \n",
-        "2009-01-05    NY       1            721\n",
-        "2009-01-12    GA       2             86\n",
-        "2009-01-19    NY       1            441\n",
-        "2009-01-26    GA       2            992\n",
-        "2009-02-02    NJ       2            614"
+        "2009-01-05    GA       1            877\n",
+        "2009-01-12    FL       1            901\n",
+        "2009-01-19    fl       3            749\n",
+        "2009-01-26    FL       3            111\n",
+        "2009-02-02    GA       1            300\n",
+        "\n",
+        "[5 rows x 3 columns]"
]
}
],
-     "prompt_number": 11
+     "prompt_number": 10
},
{
"cell_type": "markdown",
"outputs": [
{
"output_type": "pyout",
-       "prompt_number": 12,
+       "prompt_number": 11,
"text": [
-        "array([NY, GA, NJ, fl, TX, FL], dtype=object)"
+        "array([u'GA', u'FL', u'fl', u'TX', u'NY', u'NJ'], dtype=object)"
]
}
],
-     "prompt_number": 12
+     "prompt_number": 11
},
{
"cell_type": "markdown",
"collapsed": false,
"input": [
"# Clean State Column, convert to upper case\n",
-      "df.State = df.State.apply(lambda x: x.upper())"
+      "df['State'] = df.State.apply(lambda x: x.upper())"
],
"language": "python",
"outputs": [],
-     "prompt_number": 13
+     "prompt_number": 12
},
{
"cell_type": "code",
"outputs": [
{
"output_type": "pyout",
-       "prompt_number": 14,
+       "prompt_number": 13,
"text": [
-        "array([NY, GA, NJ, FL, TX], dtype=object)"
+        "array([u'GA', u'FL', u'TX', u'NY', u'NJ'], dtype=object)"
]
}
],
-     "prompt_number": 14
+     "prompt_number": 13
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# Only grab where Status == 1\n",
-      "df = df[df['Status'] == 1]"
+      "mask = df['Status'] == 1\n",
],
"language": "python",
"outputs": [],
-     "prompt_number": 15
+     "prompt_number": 14
},
{
"cell_type": "markdown",
"source": [
-      "To turn the ***NJ*** states to ***NY*** we simply.  \n",
+      "To turn the ***NJ*** states to ***NY*** we simply...  \n",
"\n",
"***[df.State == 'NJ']*** - Find all records in the *State* column where they are equal to *NJ*.  \n",
"***df.State[df.State == 'NJ'] = 'NY'*** - For all records in the *State* column where they are equal to *NJ*, replace them with *NY*."
"collapsed": false,
"input": [
"# Convert NJ to NY\n",
-      "df.State[df.State == 'NJ'] = 'NY'"
+      "mask = df.State == 'NJ'\n",
],
"language": "python",
-     "outputs": [],
-     "prompt_number": 16
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stderr",
+       "text": [
+        "C:\\Users\\david\\Anaconda\\lib\\site-packages\\pandas\\core\\series.py:628: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame\n",
+        "  self.where(~key, value, inplace=True)\n"
+       ]
+      }
+     ],
+     "prompt_number": 15
},
{
"cell_type": "markdown",
"outputs": [
{
"output_type": "pyout",
-       "prompt_number": 17,
+       "prompt_number": 16,
"text": [
-        "array([NY, TX, GA, FL], dtype=object)"
+        "array([u'GA', u'FL', u'NY', u'TX'], dtype=object)"
]
}
],
-     "prompt_number": 17
+     "prompt_number": 16
},
{
"cell_type": "markdown",
"cell_type": "code",
"collapsed": false,
"input": [
-      "df['CustomerCount'].plot()"
+      "df['CustomerCount'].plot();"
],
"language": "python",
"outputs": [
{
-       "output_type": "pyout",
-       "prompt_number": 18,
+       "output_type": "display_data",
"text": [
-        "<matplotlib.axes.AxesSubplot at 0x6cf0b90>"
+        "<matplotlib.figure.Figure at 0x4fbe7f0>"
]
-      },
-      {
-       "output_type": "display_data",
}
],
-     "prompt_number": 18
+     "prompt_number": 17
},
{
"cell_type": "markdown",
"source": [
-      "If we take a look at the data, we begin to realize that there are multiple values per State, StatusDate, and Status. It is possible that this means the data you are working with is dirty/bad/inaccurate, but we will assume otherwise. We can assume this data set is a subset of a bigger data set and if we simply add the values in the ***CustomerCount*** column per State, StatusDate, and Status we will get the ***Total Customer Count*** per day.  "
+      "If we take a look at the data, we begin to realize that there are multiple values for the same State, StatusDate, and Status combination. It is possible that this means the data you are working with is dirty/bad/inaccurate, but we will assume otherwise. We can assume this data set is a subset of a bigger data set and if we simply add the values in the ***CustomerCount*** column per State, StatusDate, and Status we will get the ***Total Customer Count*** per day.  "
]
},
{
"collapsed": false,
"input": [
"sortdf = df[df['State']=='NY'].sort(axis=0)\n",
],
"language": "python",
"  <tbody>\n",
"    <tr>\n",
-        "      <th>2009-01-05</th>\n",
+        "      <th>2009-01-19</th>\n",
"      <td> NY</td>\n",
"      <td> 1</td>\n",
-        "      <td> 721</td>\n",
+        "      <td> 522</td>\n",
"    </tr>\n",
"    <tr>\n",
-        "      <th>2009-01-12</th>\n",
+        "      <th>2009-02-23</th>\n",
"      <td> NY</td>\n",
"      <td> 1</td>\n",
-        "      <td> 368</td>\n",
+        "      <td> 710</td>\n",
"    </tr>\n",
"    <tr>\n",
-        "      <th>2009-01-12</th>\n",
+        "      <th>2009-03-09</th>\n",
"      <td> NY</td>\n",
"      <td> 1</td>\n",
-        "      <td> 103</td>\n",
+        "      <td> 992</td>\n",
"    </tr>\n",
"    <tr>\n",
-        "      <th>2009-01-19</th>\n",
+        "      <th>2009-03-16</th>\n",
"      <td> NY</td>\n",
"      <td> 1</td>\n",
-        "      <td> 441</td>\n",
+        "      <td> 355</td>\n",
"    </tr>\n",
"    <tr>\n",
-        "      <th>2009-01-26</th>\n",
+        "      <th>2009-03-23</th>\n",
+        "      <td> NY</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 728</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2009-03-30</th>\n",
+        "      <td> NY</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 863</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2009-04-13</th>\n",
+        "      <td> NY</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 520</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2009-04-20</th>\n",
+        "      <td> NY</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 820</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2009-04-20</th>\n",
+        "      <td> NY</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 937</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2009-04-27</th>\n",
"      <td> NY</td>\n",
"      <td> 1</td>\n",
-        "      <td> 408</td>\n",
+        "      <td> 753</td>\n",
"    </tr>\n",
"  </tbody>\n",
"</table>\n",
+        "<p>10 rows \u00d7 3 columns</p>\n",
"</div>"
],
"output_type": "pyout",
-       "prompt_number": 19,
+       "prompt_number": 18,
"text": [
"           State  Status  CustomerCount\n",
"StatusDate                             \n",
-        "2009-01-05    NY       1            721\n",
-        "2009-01-12    NY       1            368\n",
-        "2009-01-12    NY       1            103\n",
-        "2009-01-19    NY       1            441\n",
-        "2009-01-26    NY       1            408"
+        "2009-01-19    NY       1            522\n",
+        "2009-02-23    NY       1            710\n",
+        "2009-03-09    NY       1            992\n",
+        "2009-03-16    NY       1            355\n",
+        "2009-03-23    NY       1            728\n",
+        "2009-03-30    NY       1            863\n",
+        "2009-04-13    NY       1            520\n",
+        "2009-04-20    NY       1            820\n",
+        "2009-04-20    NY       1            937\n",
+        "2009-04-27    NY       1            753\n",
+        "\n",
+        "[10 rows x 3 columns]"
]
}
],
-     "prompt_number": 19
+     "prompt_number": 18
},
{
"cell_type": "markdown",
"  <tbody>\n",
"    <tr>\n",
"      <th rowspan=\"5\" valign=\"top\">FL</th>\n",
-        "      <th>2009-02-02</th>\n",
+        "      <th>2009-01-12</th>\n",
"      <td> 1</td>\n",
-        "      <td> 385</td>\n",
+        "      <td>  901</td>\n",
"    </tr>\n",
"    <tr>\n",
-        "      <th>2009-02-09</th>\n",
+        "      <th>2009-02-02</th>\n",
"      <td> 1</td>\n",
-        "      <td> 125</td>\n",
+        "      <td>  653</td>\n",
"    </tr>\n",
"    <tr>\n",
-        "      <th>2009-02-16</th>\n",
+        "      <th>2009-03-23</th>\n",
"      <td> 1</td>\n",
-        "      <td> 378</td>\n",
+        "      <td>  752</td>\n",
"    </tr>\n",
"    <tr>\n",
-        "      <th>2009-03-02</th>\n",
-        "      <td> 1</td>\n",
-        "      <td> 722</td>\n",
+        "      <th>2009-04-06</th>\n",
+        "      <td> 2</td>\n",
+        "      <td> 1086</td>\n",
"    </tr>\n",
"    <tr>\n",
-        "      <th>2009-05-18</th>\n",
+        "      <th>2009-06-08</th>\n",
"      <td> 1</td>\n",
-        "      <td> 962</td>\n",
+        "      <td>  649</td>\n",
"    </tr>\n",
"  </tbody>\n",
"</table>\n",
+        "<p>5 rows \u00d7 2 columns</p>\n",
"</div>"
],
"output_type": "pyout",
-       "prompt_number": 20,
+       "prompt_number": 19,
"text": [
"                  Status  CustomerCount\n",
"State StatusDate                       \n",
-        "FL    2009-02-02       1            385\n",
-        "      2009-02-09       1            125\n",
-        "      2009-02-16       1            378\n",
-        "      2009-03-02       1            722\n",
-        "      2009-05-18       1            962"
+        "FL    2009-01-12       1            901\n",
+        "      2009-02-02       1            653\n",
+        "      2009-03-23       1            752\n",
+        "      2009-04-06       2           1086\n",
+        "      2009-06-08       1            649\n",
+        "\n",
+        "[5 rows x 2 columns]"
]
}
],
-     "prompt_number": 20
+     "prompt_number": 19
},
{
"cell_type": "markdown",
"  <tbody>\n",
"    <tr>\n",
"      <th rowspan=\"5\" valign=\"top\">FL</th>\n",
-        "      <th>2009-02-02</th>\n",
-        "      <td> 385</td>\n",
+        "      <th>2009-01-12</th>\n",
+        "      <td>  901</td>\n",
"    </tr>\n",
"    <tr>\n",
-        "      <th>2009-02-09</th>\n",
-        "      <td> 125</td>\n",
+        "      <th>2009-02-02</th>\n",
+        "      <td>  653</td>\n",
"    </tr>\n",
"    <tr>\n",
-        "      <th>2009-02-16</th>\n",
-        "      <td> 378</td>\n",
+        "      <th>2009-03-23</th>\n",
+        "      <td>  752</td>\n",
"    </tr>\n",
"    <tr>\n",
-        "      <th>2009-03-02</th>\n",
-        "      <td> 722</td>\n",
+        "      <th>2009-04-06</th>\n",
+        "      <td> 1086</td>\n",
"    </tr>\n",
"    <tr>\n",
-        "      <th>2009-05-18</th>\n",
-        "      <td> 962</td>\n",
+        "      <th>2009-06-08</th>\n",
+        "      <td>  649</td>\n",
"    </tr>\n",
"  </tbody>\n",
"</table>\n",
+        "<p>5 rows \u00d7 1 columns</p>\n",
"</div>"
],
"output_type": "pyout",
-       "prompt_number": 21,
+       "prompt_number": 20,
"text": [
"                  CustomerCount\n",
"State StatusDate               \n",
-        "FL    2009-02-02            385\n",
-        "      2009-02-09            125\n",
-        "      2009-02-16            378\n",
-        "      2009-03-02            722\n",
-        "      2009-05-18            962"
+        "FL    2009-01-12            901\n",
+        "      2009-02-02            653\n",
+        "      2009-03-23            752\n",
+        "      2009-04-06           1086\n",
+        "      2009-06-08            649\n",
+        "\n",
+        "[5 rows x 1 columns]"
]
}
],
-     "prompt_number": 21
+     "prompt_number": 20
},
{
"cell_type": "code",
"outputs": [
{
"output_type": "pyout",
-       "prompt_number": 22,
+       "prompt_number": 21,
"text": [
-        "MultiIndex\n",
-        "[FL  2009-02-02,     2009-02-09,     2009-02-16,     2009-03-02,     2009-05-18,     2009-06-08,     2009-06-15,     2009-06-22,     2009-07-06,     2009-07-27,     2009-08-03,     2009-09-14,     2009-09-28,     2009-10-19,     2009-11-23,     2009-11-30,     2010-01-04,     2010-01-11,     2010-02-01,     2010-02-15,     2010-03-15,     2010-03-22,     2010-04-12,     2010-04-19,     2010-04-26,     2010-05-03,     2010-05-10,     2010-05-17,     2010-06-07,     2010-06-21,     2010-06-28,     2010-07-05,     2010-08-30,     2010-10-11,     2010-10-18,     2010-10-25,     2010-11-01,     2010-11-15,     2010-11-29,     2010-12-27,     2011-01-03,     2011-01-10,     2011-01-24,     2011-02-07,     2011-03-07,     2011-03-14,     2011-03-28,     2011-04-04,     2011-04-18,     2011-04-25, ..., NY  2012-10-22,     2012-11-12,     2012-11-26,     2012-12-03, TX  2009-01-05,     2009-01-19,     2009-03-02,     2009-03-16,     2009-04-13,     2009-04-20,     2009-06-01,     2009-08-03,     2009-08-31,     2009-09-21,     2009-12-14,     2010-01-04,     2010-02-15,     2010-04-19,     2010-05-31,     2010-06-07,     2010-06-14,     2010-06-28,     2010-07-05,     2010-08-09,     2010-08-23,     2010-09-06,     2010-10-04,     2010-11-01,     2010-11-08,     2010-12-13,     2011-01-17,     2011-02-14,     2011-02-28,     2011-03-14,     2011-05-16,     2011-06-13,     2011-09-12,     2011-09-26,     2011-10-17,     2011-11-07,     2011-11-21,     2011-12-05,     2012-01-30,     2012-03-05,     2012-03-26,     2012-06-04,     2012-07-02,     2012-07-30,     2012-10-08,     2012-11-12]"
+        "MultiIndex(levels=[[u'FL', u'GA', u'NY', u'TX'], [2009-01-05 00:00:00, 2009-01-12 00:00:00, 2009-01-19 00:00:00, 2009-02-02 00:00:00, 2009-02-23 00:00:00, 2009-03-09 00:00:00, 2009-03-16 00:00:00, 2009-03-23 00:00:00, 2009-03-30 00:00:00, 2009-04-06 00:00:00, 2009-04-13 00:00:00, 2009-04-20 00:00:00, 2009-04-27 00:00:00, 2009-05-04 00:00:00, 2009-05-11 00:00:00, 2009-05-18 00:00:00, 2009-05-25 00:00:00, 2009-06-08 00:00:00, 2009-06-22 00:00:00, 2009-07-06 00:00:00, 2009-07-13 00:00:00, 2009-07-20 00:00:00, 2009-07-27 00:00:00, 2009-08-10 00:00:00, 2009-08-17 00:00:00, 2009-08-24 00:00:00, 2009-08-31 00:00:00, 2009-09-07 00:00:00, 2009-09-14 00:00:00, 2009-09-21 00:00:00, 2009-09-28 00:00:00, 2009-10-05 00:00:00, 2009-10-12 00:00:00, 2009-10-19 00:00:00, 2009-10-26 00:00:00, 2009-11-02 00:00:00, 2009-11-23 00:00:00, 2009-11-30 00:00:00, 2009-12-07 00:00:00, 2009-12-14 00:00:00, 2010-01-04 00:00:00, 2010-01-11 00:00:00, 2010-01-18 00:00:00, 2010-01-25 00:00:00, 2010-02-08 00:00:00, 2010-02-15 00:00:00, 2010-02-22 00:00:00, 2010-03-01 00:00:00, 2010-03-08 00:00:00, 2010-03-15 00:00:00, 2010-04-05 00:00:00, 2010-04-12 00:00:00, 2010-04-26 00:00:00, 2010-05-03 00:00:00, 2010-05-10 00:00:00, 2010-05-17 00:00:00, 2010-05-24 00:00:00, 2010-05-31 00:00:00, 2010-06-14 00:00:00, 2010-06-28 00:00:00, 2010-07-05 00:00:00, 2010-07-19 00:00:00, 2010-07-26 00:00:00, 2010-08-02 00:00:00, 2010-08-09 00:00:00, 2010-08-16 00:00:00, 2010-08-30 00:00:00, 2010-09-06 00:00:00, 2010-09-13 00:00:00, 2010-09-20 00:00:00, 2010-09-27 00:00:00, 2010-10-04 00:00:00, 2010-10-11 00:00:00, 2010-10-18 00:00:00, 2010-10-25 00:00:00, 2010-11-01 00:00:00, 2010-11-08 00:00:00, 2010-11-15 00:00:00, 2010-11-29 00:00:00, 2010-12-20 00:00:00, 2011-01-03 00:00:00, 2011-01-10 00:00:00, 2011-01-17 00:00:00, 2011-02-07 00:00:00, 2011-02-14 00:00:00, 2011-02-21 00:00:00, 2011-02-28 00:00:00, 2011-03-07 00:00:00, 2011-03-14 00:00:00, 2011-03-21 00:00:00, 2011-03-28 00:00:00, 2011-04-04 00:00:00, 2011-04-18 00:00:00, 2011-04-25 00:00:00, 2011-05-02 00:00:00, 2011-05-09 00:00:00, 2011-05-16 00:00:00, 2011-05-23 00:00:00, 2011-05-30 00:00:00, 2011-06-06 00:00:00, ...]],\n",
+        "           labels=[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...], [1, 3, 7, 9, 17, 19, 20, 21, 23, 25, 27, 28, 29, 30, 31, 35, 38, 40, 41, 44, 45, 46, 47, 48, 49, 52, 54, 56, 57, 59, 60, 62, 66, 68, 69, 70, 71, 72, 75, 76, 77, 78, 79, 85, 88, 89, 92, 96, 97, 99, 100, 101, 103, 104, 105, 108, 109, 110, 112, 114, 115, 117, 118, 119, 125, 126, 127, 128, 129, 131, 133, 134, 135, 136, 137, 140, 146, 150, 151, 152, 153, 157, 0, 3, 7, 22, 23, 24, 27, 28, 34, 37, 42, 47, 50, 55, 58, 66, 67, 69, ...]],\n",
+        "           names=[u'State', u'StatusDate'])"
]
}
],
-     "prompt_number": 22
+     "prompt_number": 21
},
{
"cell_type": "code",
"outputs": [
{
"output_type": "pyout",
-       "prompt_number": 23,
+       "prompt_number": 22,
"text": [
-        "Index([FL, GA, NY, TX], dtype=object)"
+        "Index([u'FL', u'GA', u'NY', u'TX'], dtype='object')"
]
}
],
-     "prompt_number": 23
+     "prompt_number": 22
},
{
"cell_type": "code",
"outputs": [
{
"output_type": "pyout",
-       "prompt_number": 24,
+       "prompt_number": 23,
"text": [
"<class 'pandas.tseries.index.DatetimeIndex'>\n",
-        "[2009-01-05 00:00:00, ..., 2012-12-31 00:00:00]\n",
-        "Length: 181, Freq: None, Timezone: None"
+        "[2009-01-05 00:00:00, ..., 2012-12-10 00:00:00]\n",
+        "Length: 161, Freq: None, Timezone: None"
]
}
],
-     "prompt_number": 24
+     "prompt_number": 23
},
{
"cell_type": "markdown",
"Daily.loc['FL'].plot()\n",
"Daily.loc['GA'].plot()\n",
"Daily.loc['NY'].plot()\n",
-      "Daily.loc['TX'].plot()"
+      "Daily.loc['TX'].plot();"
],
"language": "python",
"outputs": [
{
-       "output_type": "pyout",
-       "prompt_number": 25,
+       "output_type": "display_data",
"text": [
-        "<matplotlib.axes.AxesSubplot at 0x5850bd0>"
+        "<matplotlib.figure.Figure at 0xb9f0400>"
]
},
{
"output_type": "display_data",
-      },
-      {
-       "output_type": "display_data",
+       "text": [
+        "<matplotlib.figure.Figure at 0xb67cd68>"
+       ]
},
{
"output_type": "display_data",
+       "text": [
+        "<matplotlib.figure.Figure at 0xb9f1470>"
+       ]
},
{
"output_type": "display_data",
+       "text": [
+        "<matplotlib.figure.Figure at 0xb9fc748>"
+       ]
}
],
-     "prompt_number": 25
+     "prompt_number": 24
},
{
"cell_type": "markdown",
"Daily.loc['FL']['2012':].plot()\n",
"Daily.loc['GA']['2012':].plot()\n",
"Daily.loc['NY']['2012':].plot()\n",
-      "Daily.loc['TX']['2012':].plot()"
+      "Daily.loc['TX']['2012':].plot();"
],
"language": "python",
"outputs": [
{
-       "output_type": "pyout",
-       "prompt_number": 26,
+       "output_type": "display_data",
"text": [
-        "<matplotlib.axes.AxesSubplot at 0x6d49530>"
+        "<matplotlib.figure.Figure at 0xb9f9d68>"
]
},
{