{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# !pip install influxdb" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [], "source": [ "import pandas as pd, numpy as np\n", "from influxdb import DataFrameClient\n", "import json" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [], "source": [ "user = 'admin'\n", "password = open('auth/influxa.txt','r').read()\n", "host='influxdb'\n", "port=8086\n", "dbname='extra'\n", "protocol = 'line' #'json'\n", "client = DataFrameClient(host, port, user, password, dbname)" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Timedelta('3188 days 16:49:03.943357')" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df=pd.read_csv('data/all_stocks_5yr.csv')\n", "df['date']=df['date'].astype(np.datetime64)\n", "pd.to_datetime('now')-df['date'].min()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "purge=True\n", "write_out=True\n", "full_overwrite=False" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "if purge:\n", " client.drop_database(dbname)\n", " client.drop_retention_policy(dbname)\n", " client.create_database(dbname)\n", " client.create_retention_policy(dbname, '3300d', 1, default=True)" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "ResultSet({'('databases', None)': [{'name': '_internal'}, {'name': 'base'}, {'name': 'long'}]})" ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ "client.query('show databases')" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "import time\n", "import datetime\n", "import pytz\n", "utc=pytz.UTC" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "def push2influx(df,measurement,field_columns,tag_columns,shift=False,dbclient=client,wo=write_out,fo=full_overwrite,daily=True):\n", " if wo:\n", " df=df.sort_index()\n", " df.index=df.index.tz_localize('GMT')\n", " if shift:\n", " df.index+=pd.to_timedelta('12h')\n", " if fo: \n", " print('Purging',measurement,'...')\n", " dbclient.query('DROP MEASUREMENT '+measurement)\n", " else:\n", " latest=dbclient.query('SELECT * FROM '+measurement+' GROUP BY \"1d\" ORDER BY DESC LIMIT 1')\n", " if latest:\n", " lat=latest[list(latest.keys())[0]].index[0]\n", " if daily: lat+=pd.to_timedelta('1d')\n", " df=df[lat:]\n", " print('Slicing',measurement,'from',lat,'...')\n", " else:\n", " print('No data in db for',measurement,'...')\n", " time.sleep(3)\n", " print('Writing to',measurement,'...')\n", " bsize=5000\n", " bwait=2\n", " print(len(df),'data points will be written in',len(df)/bsize,'batches.')\n", " print('Expected query running time is:',int((len(df)/bsize)*bwait*1.1)+3,'seconds.')\n", " for i in range(int(len(df)/bsize)+1):\n", " r=range(i*bsize,min(len(df),(i+1)*bsize))\n", " dc=df.iloc[r]\n", " print('Writing batch',i+1,'...')\n", " dbclient.write_points(dc, measurement, protocol=protocol,\n", " field_columns=field_columns,\n", " tag_columns=[])\n", " time.sleep(bwait)\n", " time.sleep(3)\n", " print('Done!')\n", " else:\n", " print('Write-out not enabled. Skipping...')" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>open</th>\n", " <th>high</th>\n", " <th>low</th>\n", " <th>close</th>\n", " <th>volume</th>\n", " <th>Name</th>\n", " </tr>\n", " <tr>\n", " <th>date</th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>2013-02-08</th>\n", " <td>15.07</td>\n", " <td>15.12</td>\n", " <td>14.63</td>\n", " <td>14.75</td>\n", " <td>8407500</td>\n", " <td>AAL</td>\n", " </tr>\n", " <tr>\n", " <th>2013-02-11</th>\n", " <td>14.89</td>\n", " <td>15.01</td>\n", " <td>14.26</td>\n", " <td>14.46</td>\n", " <td>8882000</td>\n", " <td>AAL</td>\n", " </tr>\n", " <tr>\n", " <th>2013-02-12</th>\n", " <td>14.45</td>\n", " <td>14.51</td>\n", " <td>14.10</td>\n", " <td>14.27</td>\n", " <td>8126000</td>\n", " <td>AAL</td>\n", " </tr>\n", " <tr>\n", " <th>2013-02-13</th>\n", " <td>14.30</td>\n", " <td>14.94</td>\n", " <td>14.25</td>\n", " <td>14.66</td>\n", " <td>10259500</td>\n", " <td>AAL</td>\n", " </tr>\n", " <tr>\n", " <th>2013-02-14</th>\n", " <td>14.94</td>\n", " <td>14.96</td>\n", " <td>13.16</td>\n", " <td>13.99</td>\n", " <td>31879900</td>\n", " <td>AAL</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " open high low close volume Name\n", "date \n", "2013-02-08 15.07 15.12 14.63 14.75 8407500 AAL\n", "2013-02-11 14.89 15.01 14.26 14.46 8882000 AAL\n", "2013-02-12 14.45 14.51 14.10 14.27 8126000 AAL\n", "2013-02-13 14.30 14.94 14.25 14.66 10259500 AAL\n", "2013-02-14 14.94 14.96 13.16 13.99 31879900 AAL" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df=df.set_index('date')\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [], "source": [ "df['close0']=df['close']-df['open']\n", "c1=[]\n", "c2=[]\n", "for i in df['close0'].values:\n", " if i<0:\n", " c1.append(-i)\n", " c2.append(np.nan)\n", " else:\n", " c2.append(i)\n", " c1.append(np.nan)\n", "df['close0plus']=c2\n", "df['close0minus']=c1" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [], "source": [ "df1=df[df['Name'].isin(['AAPL','AAL'])]" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>open</th>\n", " <th>high</th>\n", " <th>low</th>\n", " <th>close</th>\n", " <th>volume</th>\n", " <th>Name</th>\n", " <th>close0</th>\n", " <th>close0plus</th>\n", " <th>close0minus</th>\n", " </tr>\n", " <tr>\n", " <th>date</th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>2013-02-08</th>\n", " <td>15.070</td>\n", " <td>15.12</td>\n", " <td>14.6300</td>\n", " <td>14.75</td>\n", " <td>8407500</td>\n", " <td>AAL</td>\n", " <td>-0.320</td>\n", " <td>NaN</td>\n", " <td>0.320</td>\n", " </tr>\n", " <tr>\n", " <th>2013-02-11</th>\n", " <td>14.890</td>\n", " <td>15.01</td>\n", " <td>14.2600</td>\n", " <td>14.46</td>\n", " <td>8882000</td>\n", " <td>AAL</td>\n", " <td>-0.430</td>\n", " <td>NaN</td>\n", " <td>0.430</td>\n", " </tr>\n", " <tr>\n", " <th>2013-02-12</th>\n", " <td>14.450</td>\n", " <td>14.51</td>\n", " <td>14.1000</td>\n", " <td>14.27</td>\n", " <td>8126000</td>\n", " <td>AAL</td>\n", " <td>-0.180</td>\n", " <td>NaN</td>\n", " <td>0.180</td>\n", " </tr>\n", " <tr>\n", " <th>2013-02-13</th>\n", " <td>14.300</td>\n", " <td>14.94</td>\n", " <td>14.2500</td>\n", " <td>14.66</td>\n", " <td>10259500</td>\n", " <td>AAL</td>\n", " <td>0.360</td>\n", " <td>0.360</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>2013-02-14</th>\n", " <td>14.940</td>\n", " <td>14.96</td>\n", " <td>13.1600</td>\n", " <td>13.99</td>\n", " <td>31879900</td>\n", " <td>AAL</td>\n", " <td>-0.950</td>\n", " <td>NaN</td>\n", " <td>0.950</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>2018-02-01</th>\n", " <td>167.165</td>\n", " <td>168.62</td>\n", " <td>166.7600</td>\n", " <td>167.78</td>\n", " <td>47230787</td>\n", " <td>AAPL</td>\n", " <td>0.615</td>\n", " <td>0.615</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>2018-02-02</th>\n", " <td>166.000</td>\n", " <td>166.80</td>\n", " <td>160.1000</td>\n", " <td>160.50</td>\n", " <td>86593825</td>\n", " <td>AAPL</td>\n", " <td>-5.500</td>\n", " <td>NaN</td>\n", " <td>5.500</td>\n", " </tr>\n", " <tr>\n", " <th>2018-02-05</th>\n", " <td>159.100</td>\n", " <td>163.88</td>\n", " <td>156.0000</td>\n", " <td>156.49</td>\n", " <td>72738522</td>\n", " <td>AAPL</td>\n", " <td>-2.610</td>\n", " <td>NaN</td>\n", " <td>2.610</td>\n", " </tr>\n", " <tr>\n", " <th>2018-02-06</th>\n", " <td>154.830</td>\n", " <td>163.72</td>\n", " <td>154.0000</td>\n", " <td>163.03</td>\n", " <td>68243838</td>\n", " <td>AAPL</td>\n", " <td>8.200</td>\n", " <td>8.200</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>2018-02-07</th>\n", " <td>163.085</td>\n", " <td>163.40</td>\n", " <td>159.0685</td>\n", " <td>159.54</td>\n", " <td>51608580</td>\n", " <td>AAPL</td>\n", " <td>-3.545</td>\n", " <td>NaN</td>\n", " <td>3.545</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>2518 rows × 9 columns</p>\n", "</div>" ], "text/plain": [ " open high low close volume Name close0 \\\n", "date \n", "2013-02-08 15.070 15.12 14.6300 14.75 8407500 AAL -0.320 \n", "2013-02-11 14.890 15.01 14.2600 14.46 8882000 AAL -0.430 \n", "2013-02-12 14.450 14.51 14.1000 14.27 8126000 AAL -0.180 \n", "2013-02-13 14.300 14.94 14.2500 14.66 10259500 AAL 0.360 \n", "2013-02-14 14.940 14.96 13.1600 13.99 31879900 AAL -0.950 \n", "... ... ... ... ... ... ... ... \n", "2018-02-01 167.165 168.62 166.7600 167.78 47230787 AAPL 0.615 \n", "2018-02-02 166.000 166.80 160.1000 160.50 86593825 AAPL -5.500 \n", "2018-02-05 159.100 163.88 156.0000 156.49 72738522 AAPL -2.610 \n", "2018-02-06 154.830 163.72 154.0000 163.03 68243838 AAPL 8.200 \n", "2018-02-07 163.085 163.40 159.0685 159.54 51608580 AAPL -3.545 \n", "\n", " close0plus close0minus \n", "date \n", "2013-02-08 NaN 0.320 \n", "2013-02-11 NaN 0.430 \n", "2013-02-12 NaN 0.180 \n", "2013-02-13 0.360 NaN \n", "2013-02-14 NaN 0.950 \n", "... ... ... \n", "2018-02-01 0.615 NaN \n", "2018-02-02 NaN 5.500 \n", "2018-02-05 NaN 2.610 \n", "2018-02-06 8.200 NaN \n", "2018-02-07 NaN 3.545 \n", "\n", "[2518 rows x 9 columns]" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Purging stocks ...\n", "Writing to stocks ...\n", "2518 data points will be written in 0.5036 batches.\n", "Expected query running time is: 4 seconds.\n", "Writing batch 1 ...\n", "Done!\n" ] } ], "source": [ "tag_columns=['Name']\n", "field_columns=[i for i in df1.columns if i not in tag_columns]\n", "measurement='stocks'\n", "push2influx(df1,measurement,field_columns,tag_columns,fo=True)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.6" } }, "nbformat": 4, "nbformat_minor": 4 }