Anonymous avatar Anonymous committed 738347f

sql

Comments (0)

Files changed (1)

lessons/Pandas for SQL Developers.ipynb

+{
+ "metadata": {
+  "name": ""
+ },
+ "nbformat": 3,
+ "nbformat_minor": 0,
+ "worksheets": [
+  {
+   "cells": [
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "# Pandas set up"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "# Import libraries\n",
+      "from pandas import DataFrame\n",
+      "import pandas as pd\n",
+      "import datetime\n",
+      "%matplotlib inline\n",
+      "\n",
+      "print 'Pandas version: ' + pd.__version__"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "Pandas version: 0.12.0\n"
+       ]
+      }
+     ],
+     "prompt_number": 1
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "# Create dummy data for examples\n",
+      "df = DataFrame(data = {'Column1':[1,2,3],\n",
+      "                       'Column2':['a','b','c'],\n",
+      "                       'Column3':[pd.datetime(2013,1,1), pd.datetime(2013,1,1), pd.datetime(2013,1,1)],\n",
+      "                       'Column4':[1,2,3]})\n",
+      "\n",
+      "df"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>Column1</th>\n",
+        "      <th>Column2</th>\n",
+        "      <th>Column3</th>\n",
+        "      <th>Column4</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> 1</td>\n",
+        "      <td> a</td>\n",
+        "      <td>2013-01-01 00:00:00</td>\n",
+        "      <td> 1</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td> 2</td>\n",
+        "      <td> b</td>\n",
+        "      <td>2013-01-01 00:00:00</td>\n",
+        "      <td> 2</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td> 3</td>\n",
+        "      <td> c</td>\n",
+        "      <td>2013-01-01 00:00:00</td>\n",
+        "      <td> 3</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 2,
+       "text": [
+        "   Column1 Column2             Column3  Column4\n",
+        "0        1       a 2013-01-01 00:00:00        1\n",
+        "1        2       b 2013-01-01 00:00:00        2\n",
+        "2        3       c 2013-01-01 00:00:00        3"
+       ]
+      }
+     ],
+     "prompt_number": 2
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "# How to declare variables\n",
+      "# How to update variables\n",
+      "# How to update a table\n",
+      "# How to get current date, yesterday, last year\n",
+      "# How to get first of month or last day of month\n",
+      "# How to insert into a table from another table\n",
+      "# How to join two tables\n",
+      "# How to select n number of rows\n",
+      "# How to select rows in ascending/descending order\n",
+      "# How to select unique vales (no dups)\n",
+      "# How to write a case statement within an update\n",
+      "# How to check for NULL values\n",
+      "# How to use the Keyword \"IN\"\n",
+      "# How to count all of the rows in a table\n",
+      "# How to delete contents of a table\n",
+      "# How to select the smallest/largest value in a column\n",
+      "# How to string match"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 3
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "# How to declare variables"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "## SQL\n",
+      "\n",
+      "\"\"\"\n",
+      "Declare @Var1 datetime\n",
+      "Declare @Var2 char(20)\n",
+      "Declare @Var3 nvarchar(20)\n",
+      "Declare @Var4 int\n",
+      "Declare @Var5 decimal (18,2)\n",
+      "Declare @Var6 bit\n",
+      "\"\"\"\n",
+      "\n",
+      "## Pandas\n",
+      "\n",
+      "Var1 = pd.to_datetime(datetime.date.today())\n",
+      "Var2 = 'hello'\n",
+      "Var3 = 1\n",
+      "Var4 = 1.5"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 4
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "# How to update variables"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "## SQL\n",
+      "\n",
+      "\"\"\"\n",
+      "Declare @Var2 char(20)\n",
+      "\tSet @Var2 = 'hello'\n",
+      "\"\"\"\n",
+      "\n",
+      "## Pandas\n",
+      "\n",
+      "Var2 = 'hi' #declare\n",
+      "Var2 = 'hello' # update"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 5
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "# How to update a table"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "## SQL\n",
+      "\n",
+      "\"\"\"\n",
+      "--ver1:\n",
+      "UPDATE tblName\n",
+      "\tSET Column1 = 1,\n",
+      "\tColumn2 = 'NERevenueCode',\n",
+      "\tColumn3 = getdate()\n",
+      "WHERE Column4 = 'NEAccSummaryLoadMain'\n",
+      "\n",
+      "--ver2:\n",
+      "UPDATE cc\n",
+      "\tSET cc.Column1 = 1,\n",
+      "\tcc.Column2 = 'NERevenueCode',\n",
+      "\tcc.Column3 = getdate()\n",
+      "FROM tblName cc\n",
+      "WHERE cc.Column4 = 'NEAccSummaryLoadMain'\n",
+      "\"\"\"\n",
+      "\n",
+      "## Pandas\n",
+      "\n",
+      "# Create where statement\n",
+      "# mask = where Column4 is greater than 1\n",
+      "mask = df['Column4'] > 1\n",
+      "\n",
+      "# Update Column where Column4 is greater than 1\n",
+      "df['Column1'][mask] = 1\n",
+      "df['Column2'][mask] = 'abc'\n",
+      "df['Column3'][mask] = pd.to_datetime(datetime.date.today())\n",
+      "df"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>Column1</th>\n",
+        "      <th>Column2</th>\n",
+        "      <th>Column3</th>\n",
+        "      <th>Column4</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> 1</td>\n",
+        "      <td>   a</td>\n",
+        "      <td>2013-01-01 00:00:00</td>\n",
+        "      <td> 1</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td> 1</td>\n",
+        "      <td> abc</td>\n",
+        "      <td>2013-10-02 00:00:00</td>\n",
+        "      <td> 2</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td> 1</td>\n",
+        "      <td> abc</td>\n",
+        "      <td>2013-10-02 00:00:00</td>\n",
+        "      <td> 3</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 6,
+       "text": [
+        "   Column1 Column2             Column3  Column4\n",
+        "0        1       a 2013-01-01 00:00:00        1\n",
+        "1        1     abc 2013-10-02 00:00:00        2\n",
+        "2        1     abc 2013-10-02 00:00:00        3"
+       ]
+      }
+     ],
+     "prompt_number": 6
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "# How to get current date, yesterday, last year"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "## SQL\n",
+      "\n",
+      "\"\"\"\n",
+      "-- Date functions\n",
+      "SELECT getdate() -- Todays date with time stamp\n",
+      "SELECT dateadd (dd, datediff (dd, 0, GetDate()), 0) --Todays date with no time stamp \n",
+      "SELECT dateadd (dd, datediff (dd, 0, GetDate()), -1) --Yesterdays date with no time stamp \n",
+      "SELECT DateAdd (yy, -1, DateDiff(dd, 0, GetDate())) --One year ago no timestamp\n",
+      "\"\"\"\n",
+      "\n",
+      "## Pandas\n",
+      "\n",
+      "TodayTimeStamp = pd.to_datetime(datetime.datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\"))\n",
+      "Today = pd.to_datetime(datetime.datetime.now().strftime(\"%Y-%m-%d\"))\n",
+      "YesterdayTimeStamp = pd.to_datetime(datetime.datetime.today()-datetime.timedelta(days=1))\n",
+      "Yesterday = pd.to_datetime(datetime.date.today()-datetime.timedelta(days=1))\n",
+      "OneYearTimeStamp = pd.to_datetime(datetime.datetime.today().replace(year=datetime.datetime.today().year - 1))\n",
+      "OneYear = pd.to_datetime(datetime.date.today().replace(year=datetime.date.today().year - 1))\n",
+      "\n",
+      "print 'TodayTimeStamp ' + str(TodayTimeStamp)\n",
+      "print 'Today ' + str(Today)\n",
+      "print 'YesterdayTimeStamp ' + str(YesterdayTimeStamp)\n",
+      "print 'Yesterday ' + str(Yesterday)\n",
+      "print 'OneYearTimeStamp ' + str(OneYearTimeStamp)\n",
+      "print 'OneYear ' + str(OneYear)"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "TodayTimeStamp 2013-10-02 10:34:34\n",
+        "Today 2013-10-02 00:00:00\n",
+        "YesterdayTimeStamp 2013-10-01 10:34:34.188000\n",
+        "Yesterday 2013-10-01 00:00:00\n",
+        "OneYearTimeStamp 2012-10-02 10:34:34.188000\n",
+        "OneYear 2012-10-02 00:00:00\n"
+       ]
+      }
+     ],
+     "prompt_number": 7
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "# How to get first of month or last day of month"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "## SQL\n",
+      "\n",
+      "\"\"\"\n",
+      "--Declare variables\n",
+      "Declare @BeginDate datetime\n",
+      "Declare @EndDate datetime\n",
+      "\n",
+      "-- set @BeginDate = first day of last month\n",
+      "Set @BeginDate = DateAdd(mm, DateDiff(mm, 0, GetDate()) - 1, 0)\n",
+      "\n",
+      "-- set @EndDate = last day of last month\n",
+      "Set @EndDate = DateAdd(dd, -1, (DateAdd(mm, 1, @BeginDate)))\n",
+      "\"\"\"\n",
+      "\n",
+      "## Pandas\n",
+      "\n",
+      "FirstDay = pd.to_datetime(datetime.date.today().replace(month=datetime.date.today().month, day=1))\n",
+      "LastDay = FirstDay.replace(month=FirstDay.month+1)-datetime.timedelta(days=1)\n",
+      "\n",
+      "print 'FirstDay ' + str(FirstDay)\n",
+      "print 'LastDay ' + str(LastDay)"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "FirstDay 2013-10-01 00:00:00\n",
+        "LastDay 2013-10-31 00:00:00\n"
+       ]
+      }
+     ],
+     "prompt_number": 8
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "# How to insert into a table from another table"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "## SQL\n",
+      "\n",
+      "\"\"\"\n",
+      "IF OBJECT_ID('tempdb..#tblName','u') IS NOT NULL\n",
+      "BEGIN\n",
+      "DROP TABLE #tblName\n",
+      "END\n",
+      "CREATE TABLE #tblName \n",
+      "(\n",
+      "\tVar1 varchar (25) PRIMARY KEY,\n",
+      "\tVar2 datetime\n",
+      ")\n",
+      "\n",
+      "IF OBJECT_ID('tempdb..#tblName2','u') IS NOT NULL\n",
+      "BEGIN\n",
+      "DROP TABLE #tblName2\n",
+      "END\n",
+      "CREATE TABLE #tblName2\n",
+      "(\n",
+      "\tVar1 varchar (25) PRIMARY KEY,\n",
+      "\tVar2 datetime\n",
+      ")\n",
+      "\n",
+      "INSERT #tblName2 (Var1, Var2)\n",
+      "SELECT 'tom',\n",
+      "\t\tgetdate() \n",
+      "\n",
+      "INSERT #tblName (Var1, Var2)\t\t\n",
+      "SELECT Var1,\n",
+      "\t\tVar2\n",
+      "FROM #tblName2\t\n",
+      "\n",
+      "SELECT * FROM #tblName\n",
+      "\"\"\"\n",
+      "\n",
+      "## Pandas\n",
+      "\n",
+      "# Insert into new data frame using the data from df\n",
+      "df2 = df\n",
+      "df"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>Column1</th>\n",
+        "      <th>Column2</th>\n",
+        "      <th>Column3</th>\n",
+        "      <th>Column4</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> 1</td>\n",
+        "      <td>   a</td>\n",
+        "      <td>2013-01-01 00:00:00</td>\n",
+        "      <td> 1</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td> 1</td>\n",
+        "      <td> abc</td>\n",
+        "      <td>2013-10-02 00:00:00</td>\n",
+        "      <td> 2</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td> 1</td>\n",
+        "      <td> abc</td>\n",
+        "      <td>2013-10-02 00:00:00</td>\n",
+        "      <td> 3</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 9,
+       "text": [
+        "   Column1 Column2             Column3  Column4\n",
+        "0        1       a 2013-01-01 00:00:00        1\n",
+        "1        1     abc 2013-10-02 00:00:00        2\n",
+        "2        1     abc 2013-10-02 00:00:00        3"
+       ]
+      }
+     ],
+     "prompt_number": 9
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "# How to join two tables"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "## SQL\n",
+      "\n",
+      "\"\"\"\n",
+      "-- Only returns if col1 matches col2\n",
+      "SELECT a.*\n",
+      "FROM tbl1 a\n",
+      "JOIN tbl2 b on (a.Col1 = b.Col2)\n",
+      "\n",
+      "-- Returns all rows from tbl1 even if col1 does not match col2\n",
+      "SELECT a.*\n",
+      "FROM tbl1 a\n",
+      "LEFT JOIN tbl2 b on (a.Col1 = b.Col2)\n",
+      "\"\"\"\n",
+      "\n",
+      "## Pandas\n",
+      "\n",
+      "# inner join\n",
+      "df.merge(df2,left_on='Column1',right_on='Column1')\n",
+      "\n",
+      "# left join\n",
+      "df.merge(df2,left_on='Column1',right_on='Column1',how='left')"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>Column1</th>\n",
+        "      <th>Column2_x</th>\n",
+        "      <th>Column3_x</th>\n",
+        "      <th>Column4_x</th>\n",
+        "      <th>Column2_y</th>\n",
+        "      <th>Column3_y</th>\n",
+        "      <th>Column4_y</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> 1</td>\n",
+        "      <td>   a</td>\n",
+        "      <td>2013-01-01 00:00:00</td>\n",
+        "      <td> 1</td>\n",
+        "      <td>   a</td>\n",
+        "      <td>2013-01-01 00:00:00</td>\n",
+        "      <td> 1</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td> 1</td>\n",
+        "      <td>   a</td>\n",
+        "      <td>2013-01-01 00:00:00</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> abc</td>\n",
+        "      <td>2013-10-02 00:00:00</td>\n",
+        "      <td> 2</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td> 1</td>\n",
+        "      <td>   a</td>\n",
+        "      <td>2013-01-01 00:00:00</td>\n",
+        "      <td> 1</td>\n",
+        "      <td> abc</td>\n",
+        "      <td>2013-10-02 00:00:00</td>\n",
+        "      <td> 3</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>3</th>\n",
+        "      <td> 1</td>\n",
+        "      <td> abc</td>\n",
+        "      <td>2013-10-02 00:00:00</td>\n",
+        "      <td> 2</td>\n",
+        "      <td>   a</td>\n",
+        "      <td>2013-01-01 00:00:00</td>\n",
+        "      <td> 1</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>4</th>\n",
+        "      <td> 1</td>\n",
+        "      <td> abc</td>\n",
+        "      <td>2013-10-02 00:00:00</td>\n",
+        "      <td> 2</td>\n",
+        "      <td> abc</td>\n",
+        "      <td>2013-10-02 00:00:00</td>\n",
+        "      <td> 2</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>5</th>\n",
+        "      <td> 1</td>\n",
+        "      <td> abc</td>\n",
+        "      <td>2013-10-02 00:00:00</td>\n",
+        "      <td> 2</td>\n",
+        "      <td> abc</td>\n",
+        "      <td>2013-10-02 00:00:00</td>\n",
+        "      <td> 3</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>6</th>\n",
+        "      <td> 1</td>\n",
+        "      <td> abc</td>\n",
+        "      <td>2013-10-02 00:00:00</td>\n",
+        "      <td> 3</td>\n",
+        "      <td>   a</td>\n",
+        "      <td>2013-01-01 00:00:00</td>\n",
+        "      <td> 1</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>7</th>\n",
+        "      <td> 1</td>\n",
+        "      <td> abc</td>\n",
+        "      <td>2013-10-02 00:00:00</td>\n",
+        "      <td> 3</td>\n",
+        "      <td> abc</td>\n",
+        "      <td>2013-10-02 00:00:00</td>\n",
+        "      <td> 2</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>8</th>\n",
+        "      <td> 1</td>\n",
+        "      <td> abc</td>\n",
+        "      <td>2013-10-02 00:00:00</td>\n",
+        "      <td> 3</td>\n",
+        "      <td> abc</td>\n",
+        "      <td>2013-10-02 00:00:00</td>\n",
+        "      <td> 3</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 10,
+       "text": [
+        "   Column1 Column2_x           Column3_x  Column4_x Column2_y  \\\n",
+        "0        1         a 2013-01-01 00:00:00          1         a   \n",
+        "1        1         a 2013-01-01 00:00:00          1       abc   \n",
+        "2        1         a 2013-01-01 00:00:00          1       abc   \n",
+        "3        1       abc 2013-10-02 00:00:00          2         a   \n",
+        "4        1       abc 2013-10-02 00:00:00          2       abc   \n",
+        "5        1       abc 2013-10-02 00:00:00          2       abc   \n",
+        "6        1       abc 2013-10-02 00:00:00          3         a   \n",
+        "7        1       abc 2013-10-02 00:00:00          3       abc   \n",
+        "8        1       abc 2013-10-02 00:00:00          3       abc   \n",
+        "\n",
+        "            Column3_y  Column4_y  \n",
+        "0 2013-01-01 00:00:00          1  \n",
+        "1 2013-10-02 00:00:00          2  \n",
+        "2 2013-10-02 00:00:00          3  \n",
+        "3 2013-01-01 00:00:00          1  \n",
+        "4 2013-10-02 00:00:00          2  \n",
+        "5 2013-10-02 00:00:00          3  \n",
+        "6 2013-01-01 00:00:00          1  \n",
+        "7 2013-10-02 00:00:00          2  \n",
+        "8 2013-10-02 00:00:00          3  "
+       ]
+      }
+     ],
+     "prompt_number": 10
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "# How to select n number of rows"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "## SQL\n",
+      "\n",
+      "\"\"\"\n",
+      "-- selects only n number of rows\n",
+      "-- does not mean they will be in asc or desc order\n",
+      "SELECT top 500 Col1\n",
+      "FROM tbl1\n",
+      "\"\"\"\n",
+      "\n",
+      "## Pandas\n",
+      "\n",
+      "df.head(500)\n",
+      "df.tail(500)"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>Column1</th>\n",
+        "      <th>Column2</th>\n",
+        "      <th>Column3</th>\n",
+        "      <th>Column4</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> 1</td>\n",
+        "      <td>   a</td>\n",
+        "      <td>2013-01-01 00:00:00</td>\n",
+        "      <td> 1</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td> 1</td>\n",
+        "      <td> abc</td>\n",
+        "      <td>2013-10-02 00:00:00</td>\n",
+        "      <td> 2</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td> 1</td>\n",
+        "      <td> abc</td>\n",
+        "      <td>2013-10-02 00:00:00</td>\n",
+        "      <td> 3</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 11,
+       "text": [
+        "   Column1 Column2             Column3  Column4\n",
+        "0        1       a 2013-01-01 00:00:00        1\n",
+        "1        1     abc 2013-10-02 00:00:00        2\n",
+        "2        1     abc 2013-10-02 00:00:00        3"
+       ]
+      }
+     ],
+     "prompt_number": 11
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "# How to select rows in ascending/descending order"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "## SQL\n",
+      "\n",
+      "\"\"\"\n",
+      "-- Select rows in ascending order\n",
+      "SELECT *\n",
+      "FROM tbl1\n",
+      "ORDER BY Col1 ASC\n",
+      "\n",
+      "-- Select rows in descending order\n",
+      "SELECT *\n",
+      "FROM tbl1\n",
+      "ORDER BY Col1 DESC\n",
+      "\"\"\"\n",
+      "\n",
+      "## Pandas\n",
+      "\n",
+      "# order by Column4 ascending\n",
+      "df.sort(ascending=True,columns=['Column4'])\n",
+      "\n",
+      "# order by Column4 descending\n",
+      "df.sort(ascending=False,columns=['Column4'])"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>Column1</th>\n",
+        "      <th>Column2</th>\n",
+        "      <th>Column3</th>\n",
+        "      <th>Column4</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td> 1</td>\n",
+        "      <td> abc</td>\n",
+        "      <td>2013-10-02 00:00:00</td>\n",
+        "      <td> 3</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td> 1</td>\n",
+        "      <td> abc</td>\n",
+        "      <td>2013-10-02 00:00:00</td>\n",
+        "      <td> 2</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> 1</td>\n",
+        "      <td>   a</td>\n",
+        "      <td>2013-01-01 00:00:00</td>\n",
+        "      <td> 1</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 12,
+       "text": [
+        "   Column1 Column2             Column3  Column4\n",
+        "2        1     abc 2013-10-02 00:00:00        3\n",
+        "1        1     abc 2013-10-02 00:00:00        2\n",
+        "0        1       a 2013-01-01 00:00:00        1"
+       ]
+      }
+     ],
+     "prompt_number": 12
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "# How to select unique vales (no dups)"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "# SQL\n",
+      "\n",
+      "\"\"\"\n",
+      "SELECT DISTINCT Col1\n",
+      "FROM tbl1\n",
+      "\"\"\"\n",
+      "\n",
+      "# Pandas\n",
+      "\n",
+      "df['Column1'].unique()"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 13,
+       "text": [
+        "array([1], dtype=int64)"
+       ]
+      }
+     ],
+     "prompt_number": 13
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "# How to write a case statement within an update"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "## SQL\n",
+      "\n",
+      "\"\"\"\n",
+      "UPDATE dr\n",
+      "      SET dr.Col1 =\n",
+      "\t\t\tCASE\n",
+      "\t\t\t\tWHEN (SELECT @Var1) > 0 THEN 'Residential'\n",
+      "\t\t\t\tELSE 'Commercial'\n",
+      "\t\t\tEND\n",
+      "      FROM tblName dr\n",
+      "\"\"\"\n",
+      "\n",
+      "## Pandas\n",
+      "\n",
+      "# Set Column1 = 10 if value is equal to 2\n",
+      "# Set Column1 = 20 if value is equal to 3\n",
+      "# else Set Column1 = its current value\n",
+      "df['Column1'].apply(lambda x: 10 if x==2 else (20 if x==3 else x))"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 14,
+       "text": [
+        "0    1\n",
+        "1    1\n",
+        "2    1\n",
+        "Name: Column1, dtype: int64"
+       ]
+      }
+     ],
+     "prompt_number": 14
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "# How to check for NULL values"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "## SQL\n",
+      "\n",
+      "\"\"\"\n",
+      "--function ISNULL (Var1, 0)\t\n",
+      "-- if Var1 is null then assign it a zero, or whatever you want\n",
+      "SELECt ISNULL (Col1, 0) FROM tblName\n",
+      "\n",
+      "-- you can also just check the column without modifying it\n",
+      "SELECT * \n",
+      "FROM tblName\n",
+      "WHERE Col1 IS NOT NULL -- select everything where Col1 is not null\n",
+      "\n",
+      "SELECT * \n",
+      "FROM tblName\n",
+      "WHERE Col1 IS NULL -- select everything where Col1 is null\n",
+      "\"\"\"\n",
+      "\n",
+      "## Pandas\n",
+      "\n",
+      "# Only return non null values\n",
+      "df.dropna()\n",
+      "\n",
+      "# Check if column has any null values\n",
+      "df['Column1'].isnull()\n",
+      "\n",
+      "# Only return rows that are not null\n",
+      "df[~df['Column1'].isnull()]\n",
+      "\n",
+      "# Fill null values with a zero\n",
+      "df.fillna(0)"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>Column1</th>\n",
+        "      <th>Column2</th>\n",
+        "      <th>Column3</th>\n",
+        "      <th>Column4</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> 1</td>\n",
+        "      <td>   a</td>\n",
+        "      <td>2013-01-01 00:00:00</td>\n",
+        "      <td> 1</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td> 1</td>\n",
+        "      <td> abc</td>\n",
+        "      <td>2013-10-02 00:00:00</td>\n",
+        "      <td> 2</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td> 1</td>\n",
+        "      <td> abc</td>\n",
+        "      <td>2013-10-02 00:00:00</td>\n",
+        "      <td> 3</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 15,
+       "text": [
+        "   Column1 Column2             Column3  Column4\n",
+        "0        1       a 2013-01-01 00:00:00        1\n",
+        "1        1     abc 2013-10-02 00:00:00        2\n",
+        "2        1     abc 2013-10-02 00:00:00        3"
+       ]
+      }
+     ],
+     "prompt_number": 15
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "# How to use the Keyword \"IN\""
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "## SQL\n",
+      "\n",
+      "\"\"\"\n",
+      "SELECT * FROM tbl WHERE Col1 IN (1,2,3)\n",
+      "\"\"\"\n",
+      "\n",
+      "## Pandas\n",
+      "\n",
+      "mask = df['Column1'].isin([1,2,3])\n",
+      "df[mask]"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>Column1</th>\n",
+        "      <th>Column2</th>\n",
+        "      <th>Column3</th>\n",
+        "      <th>Column4</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> 1</td>\n",
+        "      <td>   a</td>\n",
+        "      <td>2013-01-01 00:00:00</td>\n",
+        "      <td> 1</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td> 1</td>\n",
+        "      <td> abc</td>\n",
+        "      <td>2013-10-02 00:00:00</td>\n",
+        "      <td> 2</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td> 1</td>\n",
+        "      <td> abc</td>\n",
+        "      <td>2013-10-02 00:00:00</td>\n",
+        "      <td> 3</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 16,
+       "text": [
+        "   Column1 Column2             Column3  Column4\n",
+        "0        1       a 2013-01-01 00:00:00        1\n",
+        "1        1     abc 2013-10-02 00:00:00        2\n",
+        "2        1     abc 2013-10-02 00:00:00        3"
+       ]
+      }
+     ],
+     "prompt_number": 16
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "# How to count all of the rows in a table"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "## SQL\n",
+      "\n",
+      "\"\"\"\n",
+      "SELECT COUNT(*) FROM tblName\n",
+      "\"\"\"\n",
+      "\n",
+      "## Pandas\n",
+      "\n",
+      "df.count()"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 17,
+       "text": [
+        "Column1    3\n",
+        "Column2    3\n",
+        "Column3    3\n",
+        "Column4    3\n",
+        "dtype: int64"
+       ]
+      }
+     ],
+     "prompt_number": 17
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "# How to delete contents of a table"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "## SQL\n",
+      "\n",
+      "\"\"\"\n",
+      "DELETE tblName -- deletes all contents of table\n",
+      "\n",
+      "DELETE tblName -- deletes only rows where col1 = 0\n",
+      "WHERE col1 = 0\n",
+      "\"\"\"\n",
+      "\n",
+      "## Pandas\n",
+      "\n",
+      "# delete any rows where column Var2 is less than 1/2/2013\n",
+      "mask = df['Column3'] > pd.to_datetime('2013-1-1')\n",
+      "df[mask]"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>Column1</th>\n",
+        "      <th>Column2</th>\n",
+        "      <th>Column3</th>\n",
+        "      <th>Column4</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td> 1</td>\n",
+        "      <td> abc</td>\n",
+        "      <td>2013-10-02 00:00:00</td>\n",
+        "      <td> 2</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td> 1</td>\n",
+        "      <td> abc</td>\n",
+        "      <td>2013-10-02 00:00:00</td>\n",
+        "      <td> 3</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 18,
+       "text": [
+        "   Column1 Column2             Column3  Column4\n",
+        "1        1     abc 2013-10-02 00:00:00        2\n",
+        "2        1     abc 2013-10-02 00:00:00        3"
+       ]
+      }
+     ],
+     "prompt_number": 18
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "# How to select the smallest/largest value in a column"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "## SQL\n",
+      "\n",
+      "\"\"\"\n",
+      "SELECT MIN (col1) FROM tblName\n",
+      "SELECT MAX (col1) FROM tblName\n",
+      "\"\"\"\n",
+      "\n",
+      "## Pandas\n",
+      "\n",
+      "df.max()\n",
+      "df.min()"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 19,
+       "text": [
+        "Column1                      1\n",
+        "Column2                      a\n",
+        "Column3    2013-01-01 00:00:00\n",
+        "Column4                      1\n",
+        "dtype: object"
+       ]
+      }
+     ],
+     "prompt_number": 19
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "# How to string match"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "## SQL\n",
+      "\n",
+      "\"\"\"\n",
+      "SELECT * \n",
+      "FROM tblName\n",
+      "WHERE Col1 LIKE '%StringYouAreSearchingFor%' \n",
+      "\n",
+      "SELECT * \n",
+      "FROM tblName\n",
+      "WHERE Col1 NOT LIKE '%StringYouAreSearchingFor%'\n",
+      "\"\"\"\n",
+      "\n",
+      "## Pandas\n",
+      "\n",
+      "mask1 = df['Column2'].str.startswith('b')\n",
+      "mask2 = df['Column2'].str.endswith('a')\n",
+      "mask3 = df['Column2'].str.contains('c')\n",
+      "\n",
+      "df[mask3]"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>Column1</th>\n",
+        "      <th>Column2</th>\n",
+        "      <th>Column3</th>\n",
+        "      <th>Column4</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td> 1</td>\n",
+        "      <td> abc</td>\n",
+        "      <td>2013-10-02 00:00:00</td>\n",
+        "      <td> 2</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td> 1</td>\n",
+        "      <td> abc</td>\n",
+        "      <td>2013-10-02 00:00:00</td>\n",
+        "      <td> 3</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 20,
+       "text": [
+        "   Column1 Column2             Column3  Column4\n",
+        "1        1     abc 2013-10-02 00:00:00        2\n",
+        "2        1     abc 2013-10-02 00:00:00        3"
+       ]
+      }
+     ],
+     "prompt_number": 20
+    }
+   ],
+   "metadata": {}
+  }
+ ]
+}
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.