{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# !pip install influxdb" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [], "source": [ "import pandas as pd, numpy as np\n", "from influxdb import DataFrameClient\n", "import json" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [], "source": [ "user = 'admin'\n", "password = open('auth/influxa.txt','r').read()\n", "host='influxdb'\n", "port=8086\n", "dbname='extra'\n", "protocol = 'line' #'json'\n", "client = DataFrameClient(host, port, user, password, dbname)" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Timedelta('3188 days 16:49:03.943357')" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df=pd.read_csv('data/all_stocks_5yr.csv')\n", "df['date']=df['date'].astype(np.datetime64)\n", "pd.to_datetime('now')-df['date'].min()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "purge=True\n", "write_out=True\n", "full_overwrite=False" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "if purge:\n", " client.drop_database(dbname)\n", " client.drop_retention_policy(dbname)\n", " client.create_database(dbname)\n", " client.create_retention_policy(dbname, '3300d', 1, default=True)" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "ResultSet({'('databases', None)': [{'name': '_internal'}, {'name': 'base'}, {'name': 'long'}]})" ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ "client.query('show databases')" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "import time\n", "import datetime\n", "import pytz\n", "utc=pytz.UTC" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "def push2influx(df,measurement,field_columns,tag_columns,shift=False,dbclient=client,wo=write_out,fo=full_overwrite,daily=True):\n", " if wo:\n", " df=df.sort_index()\n", " df.index=df.index.tz_localize('GMT')\n", " if shift:\n", " df.index+=pd.to_timedelta('12h')\n", " if fo: \n", " print('Purging',measurement,'...')\n", " dbclient.query('DROP MEASUREMENT '+measurement)\n", " else:\n", " latest=dbclient.query('SELECT * FROM '+measurement+' GROUP BY \"1d\" ORDER BY DESC LIMIT 1')\n", " if latest:\n", " lat=latest[list(latest.keys())[0]].index[0]\n", " if daily: lat+=pd.to_timedelta('1d')\n", " df=df[lat:]\n", " print('Slicing',measurement,'from',lat,'...')\n", " else:\n", " print('No data in db for',measurement,'...')\n", " time.sleep(3)\n", " print('Writing to',measurement,'...')\n", " bsize=5000\n", " bwait=2\n", " print(len(df),'data points will be written in',len(df)/bsize,'batches.')\n", " print('Expected query running time is:',int((len(df)/bsize)*bwait*1.1)+3,'seconds.')\n", " for i in range(int(len(df)/bsize)+1):\n", " r=range(i*bsize,min(len(df),(i+1)*bsize))\n", " dc=df.iloc[r]\n", " print('Writing batch',i+1,'...')\n", " dbclient.write_points(dc, measurement, protocol=protocol,\n", " field_columns=field_columns,\n", " tag_columns=[])\n", " time.sleep(bwait)\n", " time.sleep(3)\n", " print('Done!')\n", " else:\n", " print('Write-out not enabled. Skipping...')" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | open | \n", "high | \n", "low | \n", "close | \n", "volume | \n", "Name | \n", "
---|---|---|---|---|---|---|
date | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
2013-02-08 | \n", "15.07 | \n", "15.12 | \n", "14.63 | \n", "14.75 | \n", "8407500 | \n", "AAL | \n", "
2013-02-11 | \n", "14.89 | \n", "15.01 | \n", "14.26 | \n", "14.46 | \n", "8882000 | \n", "AAL | \n", "
2013-02-12 | \n", "14.45 | \n", "14.51 | \n", "14.10 | \n", "14.27 | \n", "8126000 | \n", "AAL | \n", "
2013-02-13 | \n", "14.30 | \n", "14.94 | \n", "14.25 | \n", "14.66 | \n", "10259500 | \n", "AAL | \n", "
2013-02-14 | \n", "14.94 | \n", "14.96 | \n", "13.16 | \n", "13.99 | \n", "31879900 | \n", "AAL | \n", "
\n", " | open | \n", "high | \n", "low | \n", "close | \n", "volume | \n", "Name | \n", "close0 | \n", "close0plus | \n", "close0minus | \n", "
---|---|---|---|---|---|---|---|---|---|
date | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
2013-02-08 | \n", "15.070 | \n", "15.12 | \n", "14.6300 | \n", "14.75 | \n", "8407500 | \n", "AAL | \n", "-0.320 | \n", "NaN | \n", "0.320 | \n", "
2013-02-11 | \n", "14.890 | \n", "15.01 | \n", "14.2600 | \n", "14.46 | \n", "8882000 | \n", "AAL | \n", "-0.430 | \n", "NaN | \n", "0.430 | \n", "
2013-02-12 | \n", "14.450 | \n", "14.51 | \n", "14.1000 | \n", "14.27 | \n", "8126000 | \n", "AAL | \n", "-0.180 | \n", "NaN | \n", "0.180 | \n", "
2013-02-13 | \n", "14.300 | \n", "14.94 | \n", "14.2500 | \n", "14.66 | \n", "10259500 | \n", "AAL | \n", "0.360 | \n", "0.360 | \n", "NaN | \n", "
2013-02-14 | \n", "14.940 | \n", "14.96 | \n", "13.1600 | \n", "13.99 | \n", "31879900 | \n", "AAL | \n", "-0.950 | \n", "NaN | \n", "0.950 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
2018-02-01 | \n", "167.165 | \n", "168.62 | \n", "166.7600 | \n", "167.78 | \n", "47230787 | \n", "AAPL | \n", "0.615 | \n", "0.615 | \n", "NaN | \n", "
2018-02-02 | \n", "166.000 | \n", "166.80 | \n", "160.1000 | \n", "160.50 | \n", "86593825 | \n", "AAPL | \n", "-5.500 | \n", "NaN | \n", "5.500 | \n", "
2018-02-05 | \n", "159.100 | \n", "163.88 | \n", "156.0000 | \n", "156.49 | \n", "72738522 | \n", "AAPL | \n", "-2.610 | \n", "NaN | \n", "2.610 | \n", "
2018-02-06 | \n", "154.830 | \n", "163.72 | \n", "154.0000 | \n", "163.03 | \n", "68243838 | \n", "AAPL | \n", "8.200 | \n", "8.200 | \n", "NaN | \n", "
2018-02-07 | \n", "163.085 | \n", "163.40 | \n", "159.0685 | \n", "159.54 | \n", "51608580 | \n", "AAPL | \n", "-3.545 | \n", "NaN | \n", "3.545 | \n", "
2518 rows × 9 columns
\n", "