{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# !pip install influxdb" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [], "source": [ "import pandas as pd, numpy as np\n", "from influxdb import DataFrameClient\n", "import json" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [], "source": [ "user = 'admin'\n", "password = open('auth/influxa.txt','r').read()\n", "host='influxdb'\n", "port=8086\n", "dbname='extra'\n", "protocol = 'line' #'json'\n", "client = DataFrameClient(host, port, user, password, dbname)" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Timedelta('3188 days 16:49:03.943357')" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df=pd.read_csv('data/all_stocks_5yr.csv')\n", "df['date']=df['date'].astype(np.datetime64)\n", "pd.to_datetime('now')-df['date'].min()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "purge=True\n", "write_out=True\n", "full_overwrite=False" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "if purge:\n", " client.drop_database(dbname)\n", " client.drop_retention_policy(dbname)\n", " client.create_database(dbname)\n", " client.create_retention_policy(dbname, '3300d', 1, default=True)" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "ResultSet({'('databases', None)': [{'name': '_internal'}, {'name': 'base'}, {'name': 'long'}]})" ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ "client.query('show databases')" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "import time\n", "import datetime\n", "import pytz\n", "utc=pytz.UTC" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "def push2influx(df,measurement,field_columns,tag_columns,shift=False,dbclient=client,wo=write_out,fo=full_overwrite,daily=True):\n", " if wo:\n", " df=df.sort_index()\n", " df.index=df.index.tz_localize('GMT')\n", " if shift:\n", " df.index+=pd.to_timedelta('12h')\n", " if fo: \n", " print('Purging',measurement,'...')\n", " dbclient.query('DROP MEASUREMENT '+measurement)\n", " else:\n", " latest=dbclient.query('SELECT * FROM '+measurement+' GROUP BY \"1d\" ORDER BY DESC LIMIT 1')\n", " if latest:\n", " lat=latest[list(latest.keys())[0]].index[0]\n", " if daily: lat+=pd.to_timedelta('1d')\n", " df=df[lat:]\n", " print('Slicing',measurement,'from',lat,'...')\n", " else:\n", " print('No data in db for',measurement,'...')\n", " time.sleep(3)\n", " print('Writing to',measurement,'...')\n", " bsize=5000\n", " bwait=2\n", " print(len(df),'data points will be written in',len(df)/bsize,'batches.')\n", " print('Expected query running time is:',int((len(df)/bsize)*bwait*1.1)+3,'seconds.')\n", " for i in range(int(len(df)/bsize)+1):\n", " r=range(i*bsize,min(len(df),(i+1)*bsize))\n", " dc=df.iloc[r]\n", " print('Writing batch',i+1,'...')\n", " dbclient.write_points(dc, measurement, protocol=protocol,\n", " field_columns=field_columns,\n", " tag_columns=[])\n", " time.sleep(bwait)\n", " time.sleep(3)\n", " print('Done!')\n", " else:\n", " print('Write-out not enabled. Skipping...')" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
openhighlowclosevolumeName
date
2013-02-0815.0715.1214.6314.758407500AAL
2013-02-1114.8915.0114.2614.468882000AAL
2013-02-1214.4514.5114.1014.278126000AAL
2013-02-1314.3014.9414.2514.6610259500AAL
2013-02-1414.9414.9613.1613.9931879900AAL
\n", "
" ], "text/plain": [ " open high low close volume Name\n", "date \n", "2013-02-08 15.07 15.12 14.63 14.75 8407500 AAL\n", "2013-02-11 14.89 15.01 14.26 14.46 8882000 AAL\n", "2013-02-12 14.45 14.51 14.10 14.27 8126000 AAL\n", "2013-02-13 14.30 14.94 14.25 14.66 10259500 AAL\n", "2013-02-14 14.94 14.96 13.16 13.99 31879900 AAL" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df=df.set_index('date')\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [], "source": [ "df['close0']=df['close']-df['open']\n", "c1=[]\n", "c2=[]\n", "for i in df['close0'].values:\n", " if i<0:\n", " c1.append(-i)\n", " c2.append(np.nan)\n", " else:\n", " c2.append(i)\n", " c1.append(np.nan)\n", "df['close0plus']=c2\n", "df['close0minus']=c1" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [], "source": [ "df1=df[df['Name'].isin(['AAPL','AAL'])]" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
openhighlowclosevolumeNameclose0close0plusclose0minus
date
2013-02-0815.07015.1214.630014.758407500AAL-0.320NaN0.320
2013-02-1114.89015.0114.260014.468882000AAL-0.430NaN0.430
2013-02-1214.45014.5114.100014.278126000AAL-0.180NaN0.180
2013-02-1314.30014.9414.250014.6610259500AAL0.3600.360NaN
2013-02-1414.94014.9613.160013.9931879900AAL-0.950NaN0.950
..............................
2018-02-01167.165168.62166.7600167.7847230787AAPL0.6150.615NaN
2018-02-02166.000166.80160.1000160.5086593825AAPL-5.500NaN5.500
2018-02-05159.100163.88156.0000156.4972738522AAPL-2.610NaN2.610
2018-02-06154.830163.72154.0000163.0368243838AAPL8.2008.200NaN
2018-02-07163.085163.40159.0685159.5451608580AAPL-3.545NaN3.545
\n", "

2518 rows × 9 columns

\n", "
" ], "text/plain": [ " open high low close volume Name close0 \\\n", "date \n", "2013-02-08 15.070 15.12 14.6300 14.75 8407500 AAL -0.320 \n", "2013-02-11 14.890 15.01 14.2600 14.46 8882000 AAL -0.430 \n", "2013-02-12 14.450 14.51 14.1000 14.27 8126000 AAL -0.180 \n", "2013-02-13 14.300 14.94 14.2500 14.66 10259500 AAL 0.360 \n", "2013-02-14 14.940 14.96 13.1600 13.99 31879900 AAL -0.950 \n", "... ... ... ... ... ... ... ... \n", "2018-02-01 167.165 168.62 166.7600 167.78 47230787 AAPL 0.615 \n", "2018-02-02 166.000 166.80 160.1000 160.50 86593825 AAPL -5.500 \n", "2018-02-05 159.100 163.88 156.0000 156.49 72738522 AAPL -2.610 \n", "2018-02-06 154.830 163.72 154.0000 163.03 68243838 AAPL 8.200 \n", "2018-02-07 163.085 163.40 159.0685 159.54 51608580 AAPL -3.545 \n", "\n", " close0plus close0minus \n", "date \n", "2013-02-08 NaN 0.320 \n", "2013-02-11 NaN 0.430 \n", "2013-02-12 NaN 0.180 \n", "2013-02-13 0.360 NaN \n", "2013-02-14 NaN 0.950 \n", "... ... ... \n", "2018-02-01 0.615 NaN \n", "2018-02-02 NaN 5.500 \n", "2018-02-05 NaN 2.610 \n", "2018-02-06 8.200 NaN \n", "2018-02-07 NaN 3.545 \n", "\n", "[2518 rows x 9 columns]" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Purging stocks ...\n", "Writing to stocks ...\n", "2518 data points will be written in 0.5036 batches.\n", "Expected query running time is: 4 seconds.\n", "Writing batch 1 ...\n", "Done!\n" ] } ], "source": [ "tag_columns=['Name']\n", "field_columns=[i for i in df1.columns if i not in tag_columns]\n", "measurement='stocks'\n", "push2influx(df1,measurement,field_columns,tag_columns,fo=True)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.6" } }, "nbformat": 4, "nbformat_minor": 4 }