Commits

Hernan Rojas  committed 33019e6

added some files

  • Participants
  • Parent commits 4e60896

Comments (0)

Files changed (2)

File notebooks/Bosch.ipynb

+{
+ "metadata": {
+  "name": "Bosch"
+ },
+ "nbformat": 3,
+ "nbformat_minor": 0,
+ "worksheets": [
+  {
+   "cells": [
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "The goal of this lesson is to show you how to pull data from three seperate flat files into memory.  \n",
+      "\n",
+      "**Flat Files:**  \n",
+      "***1.*** Patient.xls - Excel file  \n",
+      "***2.*** Visits.csv - CSV file  \n",
+      "***3.*** Session.json - JSON file  \n"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "# Import python libraries\n",
+      "from pandas import DataFrame, ExcelFile, read_csv\n",
+      "import json"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 1
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "## Import Excel File"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "# Path to excel file\n",
+      "location = r'C:\\Users\\David\\Desktop\\Patient.xls'\n",
+      "\n",
+      "# Create ExcelFile object\n",
+      "xls = ExcelFile(location)\n",
+      "\n",
+      "# Parse the excel file, tab named \"Patient\"\n",
+      "patient = xls.parse('Patient')\n",
+      "patient.head()"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>Id</th>\n",
+        "      <th>FirstName</th>\n",
+        "      <th>LastName</th>\n",
+        "      <th>DateRegistered</th>\n",
+        "      <th>Sex</th>\n",
+        "      <th>Age</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> 1</td>\n",
+        "      <td>    Bob</td>\n",
+        "      <td>    Smith</td>\n",
+        "      <td> 2013-01-01 00:00:00</td>\n",
+        "      <td> M</td>\n",
+        "      <td> 67</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td> 2</td>\n",
+        "      <td>    Sue</td>\n",
+        "      <td>   Donald</td>\n",
+        "      <td> 2013-01-01 00:00:00</td>\n",
+        "      <td> F</td>\n",
+        "      <td> 59</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td> 3</td>\n",
+        "      <td>  Billy</td>\n",
+        "      <td> Ferguson</td>\n",
+        "      <td> 2013-01-01 00:00:00</td>\n",
+        "      <td> M</td>\n",
+        "      <td> 78</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>3</th>\n",
+        "      <td> 4</td>\n",
+        "      <td>   Jhon</td>\n",
+        "      <td>  Jhonson</td>\n",
+        "      <td> 2013-01-01 00:00:00</td>\n",
+        "      <td> M</td>\n",
+        "      <td> 72</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>4</th>\n",
+        "      <td> 5</td>\n",
+        "      <td> Andrea</td>\n",
+        "      <td>     Leaf</td>\n",
+        "      <td> 2013-01-01 00:00:00</td>\n",
+        "      <td> F</td>\n",
+        "      <td> 71</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "output_type": "pyout",
+       "prompt_number": 2,
+       "text": [
+        "   Id FirstName  LastName       DateRegistered Sex  Age\n",
+        "0   1       Bob     Smith  2013-01-01 00:00:00   M   67\n",
+        "1   2       Sue    Donald  2013-01-01 00:00:00   F   59\n",
+        "2   3     Billy  Ferguson  2013-01-01 00:00:00   M   78\n",
+        "3   4      Jhon   Jhonson  2013-01-01 00:00:00   M   72\n",
+        "4   5    Andrea      Leaf  2013-01-01 00:00:00   F   71"
+       ]
+      }
+     ],
+     "prompt_number": 2
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "## Import CSV File"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "# Path to csv file\n",
+      "location = r'C:\\Users\\David\\Desktop\\visit.csv'\n",
+      "\n",
+      "# Grab data\n",
+      "visits = read_csv(location)\n",
+      "visits.head()"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>Id</th>\n",
+        "      <th>PatientId</th>\n",
+        "      <th>VisitDate</th>\n",
+        "      <th>Cost</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> 1</td>\n",
+        "      <td>  6</td>\n",
+        "      <td> 01/01/13</td>\n",
+        "      <td>  695.861816</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td> 2</td>\n",
+        "      <td> 10</td>\n",
+        "      <td> 01/02/13</td>\n",
+        "      <td> 1980.224609</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td> 3</td>\n",
+        "      <td>  4</td>\n",
+        "      <td> 01/03/13</td>\n",
+        "      <td> 1791.931152</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>3</th>\n",
+        "      <td> 4</td>\n",
+        "      <td>  4</td>\n",
+        "      <td> 01/04/13</td>\n",
+        "      <td> 1216.552734</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>4</th>\n",
+        "      <td> 5</td>\n",
+        "      <td>  2</td>\n",
+        "      <td> 01/05/13</td>\n",
+        "      <td> 1800.048828</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "output_type": "pyout",
+       "prompt_number": 3,
+       "text": [
+        "   Id  PatientId VisitDate         Cost\n",
+        "0   1          6  01/01/13   695.861816\n",
+        "1   2         10  01/02/13  1980.224609\n",
+        "2   3          4  01/03/13  1791.931152\n",
+        "3   4          4  01/04/13  1216.552734\n",
+        "4   5          2  01/05/13  1800.048828"
+       ]
+      }
+     ],
+     "prompt_number": 3
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "## Import JSON File"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "# Path to json file\n",
+      "location = r'C:\\Users\\David\\Desktop\\Session.json'\n",
+      "\n",
+      "# Read json file\n",
+      "json = [json.loads(line) for line in open(location)]"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 4
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "# Grab json data\n",
+      "session = DataFrame([rec for rec in json[0]])\n",
+      "session.head()"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>BuddyId</th>\n",
+        "      <th>DateAdded</th>\n",
+        "      <th>Id</th>\n",
+        "      <th>PatientId</th>\n",
+        "      <th>QuestionId</th>\n",
+        "      <th>ResponseId</th>\n",
+        "      <th>Status</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> 116</td>\n",
+        "      <td> 2013-01-01 00:00:00</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 6</td>\n",
+        "      <td> 3</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 1</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td> 116</td>\n",
+        "      <td> 2013-01-02 00:00:00</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 3</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 0</td>\n",
+        "      <td> 0</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td> 120</td>\n",
+        "      <td> 2013-01-03 00:00:00</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 3</td>\n",
+        "      <td> 3</td>\n",
+        "      <td> 0</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>3</th>\n",
+        "      <td> 111</td>\n",
+        "      <td> 2013-01-04 00:00:00</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 6</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 2</td>\n",
+        "      <td> 1</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>4</th>\n",
+        "      <td> 119</td>\n",
+        "      <td> 2013-01-05 00:00:00</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 9</td>\n",
+        "      <td> 3</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 0</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "output_type": "pyout",
+       "prompt_number": 5,
+       "text": [
+        "  BuddyId            DateAdded Id PatientId QuestionId ResponseId Status\n",
+        "0     116  2013-01-01 00:00:00  1         6          3          1      1\n",
+        "1     116  2013-01-02 00:00:00  1         3          1          0      0\n",
+        "2     120  2013-01-03 00:00:00  1         1          3          3      0\n",
+        "3     111  2013-01-04 00:00:00  1         6          1          2      1\n",
+        "4     119  2013-01-05 00:00:00  1         9          3          1      0"
+       ]
+      }
+     ],
+     "prompt_number": 5
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "Now that we have all of our data in memory, we can start to actually perform some data anlaysis. We can combine the different data sets, graph them, and add additional columns to them. "
+     ]
+    }
+   ],
+   "metadata": {}
+  }
+ ]
+}

File notebooks/excel to json to df.ipynb

+{
+ "metadata": {
+  "name": "excel to json to df"
+ },
+ "nbformat": 3,
+ "nbformat_minor": 0,
+ "worksheets": [
+  {
+   "cells": [
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "from pandas import DataFrame, ExcelFile\n",
+      "import json"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 1
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "# Path to excel file\n",
+      "location = r'C:\\Users\\David\\Desktop\\Bosch2.xls'\n",
+      "\n",
+      "# Create ExcelFile object\n",
+      "xls = ExcelFile(location)\n",
+      "\n",
+      "# Parse the excel file\n",
+      "df = xls.parse('Session')\n",
+      "df.head()"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>Id</th>\n",
+        "      <th>PatientId</th>\n",
+        "      <th>BuddyId</th>\n",
+        "      <th>DateAdded</th>\n",
+        "      <th>QuestionId</th>\n",
+        "      <th>ResponseId</th>\n",
+        "      <th>Status</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> 1</td>\n",
+        "      <td> 6</td>\n",
+        "      <td> 116</td>\n",
+        "      <td>2013-01-01 00:00:00</td>\n",
+        "      <td> 3</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 1</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td> 1</td>\n",
+        "      <td> 3</td>\n",
+        "      <td> 116</td>\n",
+        "      <td>2013-01-02 00:00:00</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 0</td>\n",
+        "      <td> 0</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td> 1</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 120</td>\n",
+        "      <td>2013-01-03 00:00:00</td>\n",
+        "      <td> 3</td>\n",
+        "      <td> 3</td>\n",
+        "      <td> 0</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>3</th>\n",
+        "      <td> 1</td>\n",
+        "      <td> 6</td>\n",
+        "      <td> 111</td>\n",
+        "      <td>2013-01-04 00:00:00</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 2</td>\n",
+        "      <td> 1</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>4</th>\n",
+        "      <td> 1</td>\n",
+        "      <td> 9</td>\n",
+        "      <td> 119</td>\n",
+        "      <td>2013-01-05 00:00:00</td>\n",
+        "      <td> 3</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 0</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "output_type": "pyout",
+       "prompt_number": 2,
+       "text": [
+        "   Id  PatientId  BuddyId           DateAdded  QuestionId  ResponseId  Status\n",
+        "0   1          6      116 2013-01-01 00:00:00           3           1       1\n",
+        "1   1          3      116 2013-01-02 00:00:00           1           0       0\n",
+        "2   1          1      120 2013-01-03 00:00:00           3           3       0\n",
+        "3   1          6      111 2013-01-04 00:00:00           1           2       1\n",
+        "4   1          9      119 2013-01-05 00:00:00           3           1       0"
+       ]
+      }
+     ],
+     "prompt_number": 2
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "df.dtypes"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "pyout",
+       "prompt_number": 3,
+       "text": [
+        "Id                   float64\n",
+        "PatientId            float64\n",
+        "BuddyId              float64\n",
+        "DateAdded     datetime64[ns]\n",
+        "QuestionId           float64\n",
+        "ResponseId           float64\n",
+        "Status               float64"
+       ]
+      }
+     ],
+     "prompt_number": 3
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "# Correct data types\n",
+      "df['Id'] = df['Id'].astype('int')\n",
+      "df['PatientId'] = df['PatientId'].astype('int')\n",
+      "df['BuddyId'] = df['BuddyId'].astype('int')\n",
+      "df['QuestionId'] = df['QuestionId'].astype('int')\n",
+      "df['ResponseId'] = df['ResponseId'].astype('int')\n",
+      "df['Status'] = df['Status'].astype('int')\n",
+      "df.dtypes"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "pyout",
+       "prompt_number": 4,
+       "text": [
+        "Id                     int64\n",
+        "PatientId              int64\n",
+        "BuddyId                int64\n",
+        "DateAdded     datetime64[ns]\n",
+        "QuestionId             int64\n",
+        "ResponseId             int64\n",
+        "Status                 int64"
+       ]
+      }
+     ],
+     "prompt_number": 4
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "df.tail()"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>Id</th>\n",
+        "      <th>PatientId</th>\n",
+        "      <th>BuddyId</th>\n",
+        "      <th>DateAdded</th>\n",
+        "      <th>QuestionId</th>\n",
+        "      <th>ResponseId</th>\n",
+        "      <th>Status</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>76</th>\n",
+        "      <td> 2</td>\n",
+        "      <td> 6</td>\n",
+        "      <td> 114</td>\n",
+        "      <td>2013-01-16 00:00:00</td>\n",
+        "      <td> 3</td>\n",
+        "      <td> 2</td>\n",
+        "      <td> 0</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>77</th>\n",
+        "      <td> 2</td>\n",
+        "      <td> 4</td>\n",
+        "      <td> 120</td>\n",
+        "      <td>2013-01-17 00:00:00</td>\n",
+        "      <td> 4</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 1</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>78</th>\n",
+        "      <td> 2</td>\n",
+        "      <td> 9</td>\n",
+        "      <td> 114</td>\n",
+        "      <td>2013-01-18 00:00:00</td>\n",
+        "      <td> 2</td>\n",
+        "      <td> 3</td>\n",
+        "      <td> 1</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>79</th>\n",
+        "      <td> 2</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 111</td>\n",
+        "      <td>2013-01-19 00:00:00</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 2</td>\n",
+        "      <td> 1</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>80</th>\n",
+        "      <td> 2</td>\n",
+        "      <td> 9</td>\n",
+        "      <td> 117</td>\n",
+        "      <td>2013-01-20 00:00:00</td>\n",
+        "      <td> 4</td>\n",
+        "      <td> 3</td>\n",
+        "      <td> 1</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "output_type": "pyout",
+       "prompt_number": 5,
+       "text": [
+        "    Id  PatientId  BuddyId           DateAdded  QuestionId  ResponseId  Status\n",
+        "76   2          6      114 2013-01-16 00:00:00           3           2       0\n",
+        "77   2          4      120 2013-01-17 00:00:00           4           1       1\n",
+        "78   2          9      114 2013-01-18 00:00:00           2           3       1\n",
+        "79   2          1      111 2013-01-19 00:00:00           1           2       1\n",
+        "80   2          9      117 2013-01-20 00:00:00           4           3       1"
+       ]
+      }
+     ],
+     "prompt_number": 5
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "# Create df to json function\n",
+      "def to_json(data):\n",
+      "    \"\"\"convertes a data frame to json using the column names\n",
+      "       could be modified to work with an index\"\"\"\n",
+      "\n",
+      "    json_result=[ dict( [(key,str(value)) for key,value in zip(data.columns,row)]) for row in data.values]\n",
+      "        \n",
+      "    return json.dumps(json_result)"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 6
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "# Write json file\n",
+      "f=open('Session.json','w')\n",
+      "f.write(to_json(df))\n",
+      "f.close()"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 7
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "jsonloc = r'C:\\Users\\David\\.xy\\startups\\Session.json'\n",
+      "\n",
+      "# read json file\n",
+      "json = [json.loads(line) for line in open(jsonloc)]"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 8
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "df2 = DataFrame([rec for rec in json[0]])\n",
+      "df2.head()"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>BuddyId</th>\n",
+        "      <th>DateAdded</th>\n",
+        "      <th>Id</th>\n",
+        "      <th>PatientId</th>\n",
+        "      <th>QuestionId</th>\n",
+        "      <th>ResponseId</th>\n",
+        "      <th>Status</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> 116</td>\n",
+        "      <td> 2013-01-01 00:00:00</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 6</td>\n",
+        "      <td> 3</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 1</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td> 116</td>\n",
+        "      <td> 2013-01-02 00:00:00</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 3</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 0</td>\n",
+        "      <td> 0</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td> 120</td>\n",
+        "      <td> 2013-01-03 00:00:00</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 3</td>\n",
+        "      <td> 3</td>\n",
+        "      <td> 0</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>3</th>\n",
+        "      <td> 111</td>\n",
+        "      <td> 2013-01-04 00:00:00</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 6</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 2</td>\n",
+        "      <td> 1</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>4</th>\n",
+        "      <td> 119</td>\n",
+        "      <td> 2013-01-05 00:00:00</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 9</td>\n",
+        "      <td> 3</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 0</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "output_type": "pyout",
+       "prompt_number": 11,
+       "text": [
+        "  BuddyId            DateAdded Id PatientId QuestionId ResponseId Status\n",
+        "0     116  2013-01-01 00:00:00  1         6          3          1      1\n",
+        "1     116  2013-01-02 00:00:00  1         3          1          0      0\n",
+        "2     120  2013-01-03 00:00:00  1         1          3          3      0\n",
+        "3     111  2013-01-04 00:00:00  1         6          1          2      1\n",
+        "4     119  2013-01-05 00:00:00  1         9          3          1      0"
+       ]
+      }
+     ],
+     "prompt_number": 11
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "df2.dtypes"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "pyout",
+       "prompt_number": 13,
+       "text": [
+        "BuddyId       object\n",
+        "DateAdded     object\n",
+        "Id            object\n",
+        "PatientId     object\n",
+        "QuestionId    object\n",
+        "ResponseId    object\n",
+        "Status        object"
+       ]
+      }
+     ],
+     "prompt_number": 13
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "# Correct data types\n",
+      "df2['Id'] = df2['Id'].astype('int')\n",
+      "df2['PatientId'] = df2['PatientId'].astype('int')\n",
+      "df2['BuddyId'] = df2['BuddyId'].astype('int')\n",
+      "df2['QuestionId'] = df2['QuestionId'].astype('int')\n",
+      "df2['ResponseId'] = df2['ResponseId'].astype('int')\n",
+      "df2['Status'] = df2['Status'].astype('int')\n",
+      "df2.dtypes"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "pyout",
+       "prompt_number": 14,
+       "text": [
+        "BuddyId        int64\n",
+        "DateAdded     object\n",
+        "Id             int64\n",
+        "PatientId      int64\n",
+        "QuestionId     int64\n",
+        "ResponseId     int64\n",
+        "Status         int64"
+       ]
+      }
+     ],
+     "prompt_number": 14
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "df2.tail()"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>BuddyId</th>\n",
+        "      <th>DateAdded</th>\n",
+        "      <th>Id</th>\n",
+        "      <th>PatientId</th>\n",
+        "      <th>QuestionId</th>\n",
+        "      <th>ResponseId</th>\n",
+        "      <th>Status</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>76</th>\n",
+        "      <td> 114</td>\n",
+        "      <td> 2013-01-16 00:00:00</td>\n",
+        "      <td> 2</td>\n",
+        "      <td> 6</td>\n",
+        "      <td> 3</td>\n",
+        "      <td> 2</td>\n",
+        "      <td> 0</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>77</th>\n",
+        "      <td> 120</td>\n",
+        "      <td> 2013-01-17 00:00:00</td>\n",
+        "      <td> 2</td>\n",
+        "      <td> 4</td>\n",
+        "      <td> 4</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 1</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>78</th>\n",
+        "      <td> 114</td>\n",
+        "      <td> 2013-01-18 00:00:00</td>\n",
+        "      <td> 2</td>\n",
+        "      <td> 9</td>\n",
+        "      <td> 2</td>\n",
+        "      <td> 3</td>\n",
+        "      <td> 1</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>79</th>\n",
+        "      <td> 111</td>\n",
+        "      <td> 2013-01-19 00:00:00</td>\n",
+        "      <td> 2</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> 2</td>\n",
+        "      <td> 1</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>80</th>\n",
+        "      <td> 117</td>\n",
+        "      <td> 2013-01-20 00:00:00</td>\n",
+        "      <td> 2</td>\n",
+        "      <td> 9</td>\n",
+        "      <td> 4</td>\n",
+        "      <td> 3</td>\n",
+        "      <td> 1</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "output_type": "pyout",
+       "prompt_number": 15,
+       "text": [
+        "    BuddyId            DateAdded  Id  PatientId  QuestionId  ResponseId  Status\n",
+        "76      114  2013-01-16 00:00:00   2          6           3           2       0\n",
+        "77      120  2013-01-17 00:00:00   2          4           4           1       1\n",
+        "78      114  2013-01-18 00:00:00   2          9           2           3       1\n",
+        "79      111  2013-01-19 00:00:00   2          1           1           2       1\n",
+        "80      117  2013-01-20 00:00:00   2          9           4           3       1"
+       ]
+      }
+     ],
+     "prompt_number": 15
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [],
+     "language": "python",
+     "metadata": {},
+     "outputs": []
+    }
+   ],
+   "metadata": {}
+  }
+ ]
+}