Python3: Grabbing data from Websocket and putting it into a DataFrame
Wondering if anyone can help. I'm trying to grab data from a websocket and put it into a DataFrame without it having multiple dictionaries in each line. The code I'm using to pull from binance is:
from binance.client import Client
from binance.websockets import BinanceSocketManager
from binance.enums import *
import datetime as dt
import matplotlib.pyplot as plt
from matplotlib import style
import pandas as pd
import pandas_datareader.data as web
from pandas.io.json import json_normalize
client = Client('api-key', 'api-secret')
tickers = client.get_all_tickers()
df = pd.DataFrame()
count = 0
bm = None
### Multiplex socket
# Save incoming data
def process_message(msg):
global count, df, bm
print("stream: {} data: {}".format(msg['stream'], msg['data']))
# append message to array
df = df.append(msg, ignore_index=True)
count += 1
with open('klinesmultiplex_socket.csv', 'a') as f:
df.to_csv(f, header=False)
#df = pd.DataFrame(df)
#df.to_csv('test.csv')
def initiate():
global bm
# Connect to client
client = Client('api-key', 'api-secret')
# Setup Socket
bm = BinanceSocketManager(client)
# then start the socket manager
conn_key = bm.start_multiplex_socket(['bnbbtc@kline_1m', 'neobtc@kline_1m'], process_message)
# start the socket
bm.start()
initiate()
This is what saves in the csv:
0,"{'e': 'kline', 'E': 1521847596412, 's': 'NEOBTC', 'k': {'t': 1521847560000, 'T': 1521847619999, 's': 'NEOBTC', 'i': '1m', 'f': 11202559, 'L': 11202604, 'o': '0.00765700', 'c': '0.00765900', 'h': '0.00766000', 'l': '0.00765400', 'v': '182.38000000', 'n': 46, 'x': False, 'q': '1.39674356', 'V': '131.09000000', 'Q': '1.00404646', 'B': '0'}}",neobtc@kline_1m
0,"{'e': 'kline', 'E': 1521847596412, 's': 'NEOBTC', 'k': {'t': 1521847560000, 'T': 1521847619999, 's': 'NEOBTC', 'i': '1m', 'f': 11202559, 'L': 11202604, 'o': '0.00765700', 'c': '0.00765900', 'h': '0.00766000', 'l': '0.00765400', 'v': '182.38000000', 'n': 46, 'x': False, 'q': '1.39674356', 'V': '131.09000000', 'Q': '1.00404646', 'B': '0'}}",neobtc@kline_1m
1,"{'e': 'kline', 'E': 1521847597055, 's': 'BNBBTC', 'k': {'t': 1521847560000, 'T': 1521847619999, 's': 'BNBBTC', 'i': '1m', 'f': 12744199, 'L': 12744341, 'o': '0.00132050', 'c': '0.00131530', 'h': '0.00132200', 'l': '0.00131500', 'v': '5571.10000000', 'n': 143, 'x': False, 'q': '7.33546205', 'V': '2637.29000000', 'Q': '3.47577851', 'B': '0'}}",bnbbtc@kline_1m
0,"{'e': 'kline', 'E': 1521847596412, 's': 'NEOBTC', 'k': {'t': 1521847560000, 'T': 1521847619999, 's': 'NEOBTC', 'i': '1m', 'f': 11202559, 'L': 11202604, 'o': '0.00765700', 'c': '0.00765900', 'h': '0.00766000', 'l': '0.00765400', 'v': '182.38000000', 'n': 46, 'x': False, 'q': '1.39674356', 'V': '131.09000000', 'Q': '1.00404646', 'B': '0'}}",neobtc@kline_1m
However, I'm looking for it to look like this (or even to get rid of the value in column 0...not sure what that is):
0, 'kline', 1521847596412, NEOBTC', 1521847560000, 1521847619999, 'NEOBTC', '1m', 11202559, 11202604, 0.00765700, 0.00765900, 0.00766000, 0.00765400, 182.38000000, 46, False, 1.39674356', 131.09000000, 1.00404646,0,neobtc@kline_1m
0,'kline', 1521847596412, 'NEOBTC', 1521847560000, 1521847619999, 'NEOBTC', '1m', 11202559, 11202604, 0.00765700, 0.00765900, 0.00766000, 0.00765400, 182.38000000, 46, False, 1.39674356, 131.09000000, 1.00404646, 0,neobtc@kline_1m
1,'kline', 1521847597055, 'BNBBTC', 1521847560000, 1521847619999, 'BNBBTC', '1m', 12744199, 12744341, 0.00132050, 0.00131530, 0.00132200, 0.00131500, 5571.10000000, 143, False, 7.33546205, 2637.29000000, 3.47577851, 0,bnbbtc@kline_1m
0,'kline', 1521847596412, 'NEOBTC', 1521847560000, 1521847619999, 'NEOBTC', '1m', 11202559, 11202604,0.00765700, 0.00765900, 0.00766000, 0.00765400, 182.38000000, 46, False, 1.39674356, 131.09000000, 1.00404646, 0,neobtc@kline_1m
I've tried a bunch of things but can't get it to work: Json to DataFrame, multiple dictionaries to DataFrame, and a few other things. Please let me know if I'm even looking in the right place. Some of the number are shown up with 'xxxx.xxxx' instead of just xxxx.xxxx as well.
What I would eventually like to do is grab all the ticker data and save each ticker to an individual file from the stream above. ie NEOBTC amnd BNBBTC would be saved in their own csv. I only want it to save when 'x': True, however.
Any help would be greatly appreciated on any of my struggles and thank you for taking the time to look at this.
json python-3.x dataframe websocket
add a comment |
Wondering if anyone can help. I'm trying to grab data from a websocket and put it into a DataFrame without it having multiple dictionaries in each line. The code I'm using to pull from binance is:
from binance.client import Client
from binance.websockets import BinanceSocketManager
from binance.enums import *
import datetime as dt
import matplotlib.pyplot as plt
from matplotlib import style
import pandas as pd
import pandas_datareader.data as web
from pandas.io.json import json_normalize
client = Client('api-key', 'api-secret')
tickers = client.get_all_tickers()
df = pd.DataFrame()
count = 0
bm = None
### Multiplex socket
# Save incoming data
def process_message(msg):
global count, df, bm
print("stream: {} data: {}".format(msg['stream'], msg['data']))
# append message to array
df = df.append(msg, ignore_index=True)
count += 1
with open('klinesmultiplex_socket.csv', 'a') as f:
df.to_csv(f, header=False)
#df = pd.DataFrame(df)
#df.to_csv('test.csv')
def initiate():
global bm
# Connect to client
client = Client('api-key', 'api-secret')
# Setup Socket
bm = BinanceSocketManager(client)
# then start the socket manager
conn_key = bm.start_multiplex_socket(['bnbbtc@kline_1m', 'neobtc@kline_1m'], process_message)
# start the socket
bm.start()
initiate()
This is what saves in the csv:
0,"{'e': 'kline', 'E': 1521847596412, 's': 'NEOBTC', 'k': {'t': 1521847560000, 'T': 1521847619999, 's': 'NEOBTC', 'i': '1m', 'f': 11202559, 'L': 11202604, 'o': '0.00765700', 'c': '0.00765900', 'h': '0.00766000', 'l': '0.00765400', 'v': '182.38000000', 'n': 46, 'x': False, 'q': '1.39674356', 'V': '131.09000000', 'Q': '1.00404646', 'B': '0'}}",neobtc@kline_1m
0,"{'e': 'kline', 'E': 1521847596412, 's': 'NEOBTC', 'k': {'t': 1521847560000, 'T': 1521847619999, 's': 'NEOBTC', 'i': '1m', 'f': 11202559, 'L': 11202604, 'o': '0.00765700', 'c': '0.00765900', 'h': '0.00766000', 'l': '0.00765400', 'v': '182.38000000', 'n': 46, 'x': False, 'q': '1.39674356', 'V': '131.09000000', 'Q': '1.00404646', 'B': '0'}}",neobtc@kline_1m
1,"{'e': 'kline', 'E': 1521847597055, 's': 'BNBBTC', 'k': {'t': 1521847560000, 'T': 1521847619999, 's': 'BNBBTC', 'i': '1m', 'f': 12744199, 'L': 12744341, 'o': '0.00132050', 'c': '0.00131530', 'h': '0.00132200', 'l': '0.00131500', 'v': '5571.10000000', 'n': 143, 'x': False, 'q': '7.33546205', 'V': '2637.29000000', 'Q': '3.47577851', 'B': '0'}}",bnbbtc@kline_1m
0,"{'e': 'kline', 'E': 1521847596412, 's': 'NEOBTC', 'k': {'t': 1521847560000, 'T': 1521847619999, 's': 'NEOBTC', 'i': '1m', 'f': 11202559, 'L': 11202604, 'o': '0.00765700', 'c': '0.00765900', 'h': '0.00766000', 'l': '0.00765400', 'v': '182.38000000', 'n': 46, 'x': False, 'q': '1.39674356', 'V': '131.09000000', 'Q': '1.00404646', 'B': '0'}}",neobtc@kline_1m
However, I'm looking for it to look like this (or even to get rid of the value in column 0...not sure what that is):
0, 'kline', 1521847596412, NEOBTC', 1521847560000, 1521847619999, 'NEOBTC', '1m', 11202559, 11202604, 0.00765700, 0.00765900, 0.00766000, 0.00765400, 182.38000000, 46, False, 1.39674356', 131.09000000, 1.00404646,0,neobtc@kline_1m
0,'kline', 1521847596412, 'NEOBTC', 1521847560000, 1521847619999, 'NEOBTC', '1m', 11202559, 11202604, 0.00765700, 0.00765900, 0.00766000, 0.00765400, 182.38000000, 46, False, 1.39674356, 131.09000000, 1.00404646, 0,neobtc@kline_1m
1,'kline', 1521847597055, 'BNBBTC', 1521847560000, 1521847619999, 'BNBBTC', '1m', 12744199, 12744341, 0.00132050, 0.00131530, 0.00132200, 0.00131500, 5571.10000000, 143, False, 7.33546205, 2637.29000000, 3.47577851, 0,bnbbtc@kline_1m
0,'kline', 1521847596412, 'NEOBTC', 1521847560000, 1521847619999, 'NEOBTC', '1m', 11202559, 11202604,0.00765700, 0.00765900, 0.00766000, 0.00765400, 182.38000000, 46, False, 1.39674356, 131.09000000, 1.00404646, 0,neobtc@kline_1m
I've tried a bunch of things but can't get it to work: Json to DataFrame, multiple dictionaries to DataFrame, and a few other things. Please let me know if I'm even looking in the right place. Some of the number are shown up with 'xxxx.xxxx' instead of just xxxx.xxxx as well.
What I would eventually like to do is grab all the ticker data and save each ticker to an individual file from the stream above. ie NEOBTC amnd BNBBTC would be saved in their own csv. I only want it to save when 'x': True, however.
Any help would be greatly appreciated on any of my struggles and thank you for taking the time to look at this.
json python-3.x dataframe websocket
Is the end goal to produce a CSV file, or to just keep the data in a DataFrame in memory and stop writing the file?
– John Zwinck
Mar 24 '18 at 0:37
The end goal is to produce a csv file. However, I can produce the csv but the data is as above, ie dictionary within a dictionary but I'd like it to look like the bottom example
– Brian F
Mar 24 '18 at 0:47
add a comment |
Wondering if anyone can help. I'm trying to grab data from a websocket and put it into a DataFrame without it having multiple dictionaries in each line. The code I'm using to pull from binance is:
from binance.client import Client
from binance.websockets import BinanceSocketManager
from binance.enums import *
import datetime as dt
import matplotlib.pyplot as plt
from matplotlib import style
import pandas as pd
import pandas_datareader.data as web
from pandas.io.json import json_normalize
client = Client('api-key', 'api-secret')
tickers = client.get_all_tickers()
df = pd.DataFrame()
count = 0
bm = None
### Multiplex socket
# Save incoming data
def process_message(msg):
global count, df, bm
print("stream: {} data: {}".format(msg['stream'], msg['data']))
# append message to array
df = df.append(msg, ignore_index=True)
count += 1
with open('klinesmultiplex_socket.csv', 'a') as f:
df.to_csv(f, header=False)
#df = pd.DataFrame(df)
#df.to_csv('test.csv')
def initiate():
global bm
# Connect to client
client = Client('api-key', 'api-secret')
# Setup Socket
bm = BinanceSocketManager(client)
# then start the socket manager
conn_key = bm.start_multiplex_socket(['bnbbtc@kline_1m', 'neobtc@kline_1m'], process_message)
# start the socket
bm.start()
initiate()
This is what saves in the csv:
0,"{'e': 'kline', 'E': 1521847596412, 's': 'NEOBTC', 'k': {'t': 1521847560000, 'T': 1521847619999, 's': 'NEOBTC', 'i': '1m', 'f': 11202559, 'L': 11202604, 'o': '0.00765700', 'c': '0.00765900', 'h': '0.00766000', 'l': '0.00765400', 'v': '182.38000000', 'n': 46, 'x': False, 'q': '1.39674356', 'V': '131.09000000', 'Q': '1.00404646', 'B': '0'}}",neobtc@kline_1m
0,"{'e': 'kline', 'E': 1521847596412, 's': 'NEOBTC', 'k': {'t': 1521847560000, 'T': 1521847619999, 's': 'NEOBTC', 'i': '1m', 'f': 11202559, 'L': 11202604, 'o': '0.00765700', 'c': '0.00765900', 'h': '0.00766000', 'l': '0.00765400', 'v': '182.38000000', 'n': 46, 'x': False, 'q': '1.39674356', 'V': '131.09000000', 'Q': '1.00404646', 'B': '0'}}",neobtc@kline_1m
1,"{'e': 'kline', 'E': 1521847597055, 's': 'BNBBTC', 'k': {'t': 1521847560000, 'T': 1521847619999, 's': 'BNBBTC', 'i': '1m', 'f': 12744199, 'L': 12744341, 'o': '0.00132050', 'c': '0.00131530', 'h': '0.00132200', 'l': '0.00131500', 'v': '5571.10000000', 'n': 143, 'x': False, 'q': '7.33546205', 'V': '2637.29000000', 'Q': '3.47577851', 'B': '0'}}",bnbbtc@kline_1m
0,"{'e': 'kline', 'E': 1521847596412, 's': 'NEOBTC', 'k': {'t': 1521847560000, 'T': 1521847619999, 's': 'NEOBTC', 'i': '1m', 'f': 11202559, 'L': 11202604, 'o': '0.00765700', 'c': '0.00765900', 'h': '0.00766000', 'l': '0.00765400', 'v': '182.38000000', 'n': 46, 'x': False, 'q': '1.39674356', 'V': '131.09000000', 'Q': '1.00404646', 'B': '0'}}",neobtc@kline_1m
However, I'm looking for it to look like this (or even to get rid of the value in column 0...not sure what that is):
0, 'kline', 1521847596412, NEOBTC', 1521847560000, 1521847619999, 'NEOBTC', '1m', 11202559, 11202604, 0.00765700, 0.00765900, 0.00766000, 0.00765400, 182.38000000, 46, False, 1.39674356', 131.09000000, 1.00404646,0,neobtc@kline_1m
0,'kline', 1521847596412, 'NEOBTC', 1521847560000, 1521847619999, 'NEOBTC', '1m', 11202559, 11202604, 0.00765700, 0.00765900, 0.00766000, 0.00765400, 182.38000000, 46, False, 1.39674356, 131.09000000, 1.00404646, 0,neobtc@kline_1m
1,'kline', 1521847597055, 'BNBBTC', 1521847560000, 1521847619999, 'BNBBTC', '1m', 12744199, 12744341, 0.00132050, 0.00131530, 0.00132200, 0.00131500, 5571.10000000, 143, False, 7.33546205, 2637.29000000, 3.47577851, 0,bnbbtc@kline_1m
0,'kline', 1521847596412, 'NEOBTC', 1521847560000, 1521847619999, 'NEOBTC', '1m', 11202559, 11202604,0.00765700, 0.00765900, 0.00766000, 0.00765400, 182.38000000, 46, False, 1.39674356, 131.09000000, 1.00404646, 0,neobtc@kline_1m
I've tried a bunch of things but can't get it to work: Json to DataFrame, multiple dictionaries to DataFrame, and a few other things. Please let me know if I'm even looking in the right place. Some of the number are shown up with 'xxxx.xxxx' instead of just xxxx.xxxx as well.
What I would eventually like to do is grab all the ticker data and save each ticker to an individual file from the stream above. ie NEOBTC amnd BNBBTC would be saved in their own csv. I only want it to save when 'x': True, however.
Any help would be greatly appreciated on any of my struggles and thank you for taking the time to look at this.
json python-3.x dataframe websocket
Wondering if anyone can help. I'm trying to grab data from a websocket and put it into a DataFrame without it having multiple dictionaries in each line. The code I'm using to pull from binance is:
from binance.client import Client
from binance.websockets import BinanceSocketManager
from binance.enums import *
import datetime as dt
import matplotlib.pyplot as plt
from matplotlib import style
import pandas as pd
import pandas_datareader.data as web
from pandas.io.json import json_normalize
client = Client('api-key', 'api-secret')
tickers = client.get_all_tickers()
df = pd.DataFrame()
count = 0
bm = None
### Multiplex socket
# Save incoming data
def process_message(msg):
global count, df, bm
print("stream: {} data: {}".format(msg['stream'], msg['data']))
# append message to array
df = df.append(msg, ignore_index=True)
count += 1
with open('klinesmultiplex_socket.csv', 'a') as f:
df.to_csv(f, header=False)
#df = pd.DataFrame(df)
#df.to_csv('test.csv')
def initiate():
global bm
# Connect to client
client = Client('api-key', 'api-secret')
# Setup Socket
bm = BinanceSocketManager(client)
# then start the socket manager
conn_key = bm.start_multiplex_socket(['bnbbtc@kline_1m', 'neobtc@kline_1m'], process_message)
# start the socket
bm.start()
initiate()
This is what saves in the csv:
0,"{'e': 'kline', 'E': 1521847596412, 's': 'NEOBTC', 'k': {'t': 1521847560000, 'T': 1521847619999, 's': 'NEOBTC', 'i': '1m', 'f': 11202559, 'L': 11202604, 'o': '0.00765700', 'c': '0.00765900', 'h': '0.00766000', 'l': '0.00765400', 'v': '182.38000000', 'n': 46, 'x': False, 'q': '1.39674356', 'V': '131.09000000', 'Q': '1.00404646', 'B': '0'}}",neobtc@kline_1m
0,"{'e': 'kline', 'E': 1521847596412, 's': 'NEOBTC', 'k': {'t': 1521847560000, 'T': 1521847619999, 's': 'NEOBTC', 'i': '1m', 'f': 11202559, 'L': 11202604, 'o': '0.00765700', 'c': '0.00765900', 'h': '0.00766000', 'l': '0.00765400', 'v': '182.38000000', 'n': 46, 'x': False, 'q': '1.39674356', 'V': '131.09000000', 'Q': '1.00404646', 'B': '0'}}",neobtc@kline_1m
1,"{'e': 'kline', 'E': 1521847597055, 's': 'BNBBTC', 'k': {'t': 1521847560000, 'T': 1521847619999, 's': 'BNBBTC', 'i': '1m', 'f': 12744199, 'L': 12744341, 'o': '0.00132050', 'c': '0.00131530', 'h': '0.00132200', 'l': '0.00131500', 'v': '5571.10000000', 'n': 143, 'x': False, 'q': '7.33546205', 'V': '2637.29000000', 'Q': '3.47577851', 'B': '0'}}",bnbbtc@kline_1m
0,"{'e': 'kline', 'E': 1521847596412, 's': 'NEOBTC', 'k': {'t': 1521847560000, 'T': 1521847619999, 's': 'NEOBTC', 'i': '1m', 'f': 11202559, 'L': 11202604, 'o': '0.00765700', 'c': '0.00765900', 'h': '0.00766000', 'l': '0.00765400', 'v': '182.38000000', 'n': 46, 'x': False, 'q': '1.39674356', 'V': '131.09000000', 'Q': '1.00404646', 'B': '0'}}",neobtc@kline_1m
However, I'm looking for it to look like this (or even to get rid of the value in column 0...not sure what that is):
0, 'kline', 1521847596412, NEOBTC', 1521847560000, 1521847619999, 'NEOBTC', '1m', 11202559, 11202604, 0.00765700, 0.00765900, 0.00766000, 0.00765400, 182.38000000, 46, False, 1.39674356', 131.09000000, 1.00404646,0,neobtc@kline_1m
0,'kline', 1521847596412, 'NEOBTC', 1521847560000, 1521847619999, 'NEOBTC', '1m', 11202559, 11202604, 0.00765700, 0.00765900, 0.00766000, 0.00765400, 182.38000000, 46, False, 1.39674356, 131.09000000, 1.00404646, 0,neobtc@kline_1m
1,'kline', 1521847597055, 'BNBBTC', 1521847560000, 1521847619999, 'BNBBTC', '1m', 12744199, 12744341, 0.00132050, 0.00131530, 0.00132200, 0.00131500, 5571.10000000, 143, False, 7.33546205, 2637.29000000, 3.47577851, 0,bnbbtc@kline_1m
0,'kline', 1521847596412, 'NEOBTC', 1521847560000, 1521847619999, 'NEOBTC', '1m', 11202559, 11202604,0.00765700, 0.00765900, 0.00766000, 0.00765400, 182.38000000, 46, False, 1.39674356, 131.09000000, 1.00404646, 0,neobtc@kline_1m
I've tried a bunch of things but can't get it to work: Json to DataFrame, multiple dictionaries to DataFrame, and a few other things. Please let me know if I'm even looking in the right place. Some of the number are shown up with 'xxxx.xxxx' instead of just xxxx.xxxx as well.
What I would eventually like to do is grab all the ticker data and save each ticker to an individual file from the stream above. ie NEOBTC amnd BNBBTC would be saved in their own csv. I only want it to save when 'x': True, however.
Any help would be greatly appreciated on any of my struggles and thank you for taking the time to look at this.
json python-3.x dataframe websocket
json python-3.x dataframe websocket
asked Mar 24 '18 at 0:09


Brian FBrian F
84
84
Is the end goal to produce a CSV file, or to just keep the data in a DataFrame in memory and stop writing the file?
– John Zwinck
Mar 24 '18 at 0:37
The end goal is to produce a csv file. However, I can produce the csv but the data is as above, ie dictionary within a dictionary but I'd like it to look like the bottom example
– Brian F
Mar 24 '18 at 0:47
add a comment |
Is the end goal to produce a CSV file, or to just keep the data in a DataFrame in memory and stop writing the file?
– John Zwinck
Mar 24 '18 at 0:37
The end goal is to produce a csv file. However, I can produce the csv but the data is as above, ie dictionary within a dictionary but I'd like it to look like the bottom example
– Brian F
Mar 24 '18 at 0:47
Is the end goal to produce a CSV file, or to just keep the data in a DataFrame in memory and stop writing the file?
– John Zwinck
Mar 24 '18 at 0:37
Is the end goal to produce a CSV file, or to just keep the data in a DataFrame in memory and stop writing the file?
– John Zwinck
Mar 24 '18 at 0:37
The end goal is to produce a csv file. However, I can produce the csv but the data is as above, ie dictionary within a dictionary but I'd like it to look like the bottom example
– Brian F
Mar 24 '18 at 0:47
The end goal is to produce a csv file. However, I can produce the csv but the data is as above, ie dictionary within a dictionary but I'd like it to look like the bottom example
– Brian F
Mar 24 '18 at 0:47
add a comment |
1 Answer
1
active
oldest
votes
In your current code, each time you receive one message, you append it to a DataFrame (n.b. appending to a DataFrame is always slow and never a good idea). You then append the DataFrame to the file. So when you receive messages A, B, and C, you write A, A, B, A, B, C. That's a major bug.
The other problem with your code is that there is no reason to use Pandas for this. You should use the built-in csv
module. Something like this:
import csv
columns = ['e', 'E', 's', 'k'] # add whatever JSON keys you want
out = csv. DictWriter(open('klines.csv', 'wb'), columns)
Then for each message:
out.writerow(msg)
This will give you the output format you desire (assuming you add all the columns you need).
If all you need to do with each message is write it to the CSV, you can directly pass the writerow()
function as the API callback:
bm.start_multiplex_socket(['bnbbtc@kline_1m'], out.writerow)
This way you don't need process_message()
at all.
Thank you for your reply. I've entered your code and I think it works but I get an error: dict contains fields not in fieldnames: 'data', 'stream'. I think this is because I have the header=False in the original code. This gets rid of this in the data: ,data,stream 0,"{'e': 'kline', 'E': 1521848434118, 's': 'BNBBTC', 'k': {'t': 1521848400000, 'T': 1521848459999, 's': 'BNBBTC', 'i': '1m', 'f': 12747441, 'L': 12747497, 'o': '0.00130880', 'c': '0.00130890', 'h': '0.00131000', 'l': '0.00130860', 'v': '989.09000000', 'n': 57, 'x': False, 'q': '1.29497482', 'V': '757.98000000'.....
– Brian F
Mar 24 '18 at 3:47
add a comment |
Your Answer
StackExchange.ifUsing("editor", function () {
StackExchange.using("externalEditor", function () {
StackExchange.using("snippets", function () {
StackExchange.snippets.init();
});
});
}, "code-snippets");
StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "1"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);
StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});
function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
autoActivateHeartbeat: false,
convertImagesToLinks: true,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: 10,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});
}
});
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f49459883%2fpython3-grabbing-data-from-websocket-and-putting-it-into-a-dataframe%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
1 Answer
1
active
oldest
votes
1 Answer
1
active
oldest
votes
active
oldest
votes
active
oldest
votes
In your current code, each time you receive one message, you append it to a DataFrame (n.b. appending to a DataFrame is always slow and never a good idea). You then append the DataFrame to the file. So when you receive messages A, B, and C, you write A, A, B, A, B, C. That's a major bug.
The other problem with your code is that there is no reason to use Pandas for this. You should use the built-in csv
module. Something like this:
import csv
columns = ['e', 'E', 's', 'k'] # add whatever JSON keys you want
out = csv. DictWriter(open('klines.csv', 'wb'), columns)
Then for each message:
out.writerow(msg)
This will give you the output format you desire (assuming you add all the columns you need).
If all you need to do with each message is write it to the CSV, you can directly pass the writerow()
function as the API callback:
bm.start_multiplex_socket(['bnbbtc@kline_1m'], out.writerow)
This way you don't need process_message()
at all.
Thank you for your reply. I've entered your code and I think it works but I get an error: dict contains fields not in fieldnames: 'data', 'stream'. I think this is because I have the header=False in the original code. This gets rid of this in the data: ,data,stream 0,"{'e': 'kline', 'E': 1521848434118, 's': 'BNBBTC', 'k': {'t': 1521848400000, 'T': 1521848459999, 's': 'BNBBTC', 'i': '1m', 'f': 12747441, 'L': 12747497, 'o': '0.00130880', 'c': '0.00130890', 'h': '0.00131000', 'l': '0.00130860', 'v': '989.09000000', 'n': 57, 'x': False, 'q': '1.29497482', 'V': '757.98000000'.....
– Brian F
Mar 24 '18 at 3:47
add a comment |
In your current code, each time you receive one message, you append it to a DataFrame (n.b. appending to a DataFrame is always slow and never a good idea). You then append the DataFrame to the file. So when you receive messages A, B, and C, you write A, A, B, A, B, C. That's a major bug.
The other problem with your code is that there is no reason to use Pandas for this. You should use the built-in csv
module. Something like this:
import csv
columns = ['e', 'E', 's', 'k'] # add whatever JSON keys you want
out = csv. DictWriter(open('klines.csv', 'wb'), columns)
Then for each message:
out.writerow(msg)
This will give you the output format you desire (assuming you add all the columns you need).
If all you need to do with each message is write it to the CSV, you can directly pass the writerow()
function as the API callback:
bm.start_multiplex_socket(['bnbbtc@kline_1m'], out.writerow)
This way you don't need process_message()
at all.
Thank you for your reply. I've entered your code and I think it works but I get an error: dict contains fields not in fieldnames: 'data', 'stream'. I think this is because I have the header=False in the original code. This gets rid of this in the data: ,data,stream 0,"{'e': 'kline', 'E': 1521848434118, 's': 'BNBBTC', 'k': {'t': 1521848400000, 'T': 1521848459999, 's': 'BNBBTC', 'i': '1m', 'f': 12747441, 'L': 12747497, 'o': '0.00130880', 'c': '0.00130890', 'h': '0.00131000', 'l': '0.00130860', 'v': '989.09000000', 'n': 57, 'x': False, 'q': '1.29497482', 'V': '757.98000000'.....
– Brian F
Mar 24 '18 at 3:47
add a comment |
In your current code, each time you receive one message, you append it to a DataFrame (n.b. appending to a DataFrame is always slow and never a good idea). You then append the DataFrame to the file. So when you receive messages A, B, and C, you write A, A, B, A, B, C. That's a major bug.
The other problem with your code is that there is no reason to use Pandas for this. You should use the built-in csv
module. Something like this:
import csv
columns = ['e', 'E', 's', 'k'] # add whatever JSON keys you want
out = csv. DictWriter(open('klines.csv', 'wb'), columns)
Then for each message:
out.writerow(msg)
This will give you the output format you desire (assuming you add all the columns you need).
If all you need to do with each message is write it to the CSV, you can directly pass the writerow()
function as the API callback:
bm.start_multiplex_socket(['bnbbtc@kline_1m'], out.writerow)
This way you don't need process_message()
at all.
In your current code, each time you receive one message, you append it to a DataFrame (n.b. appending to a DataFrame is always slow and never a good idea). You then append the DataFrame to the file. So when you receive messages A, B, and C, you write A, A, B, A, B, C. That's a major bug.
The other problem with your code is that there is no reason to use Pandas for this. You should use the built-in csv
module. Something like this:
import csv
columns = ['e', 'E', 's', 'k'] # add whatever JSON keys you want
out = csv. DictWriter(open('klines.csv', 'wb'), columns)
Then for each message:
out.writerow(msg)
This will give you the output format you desire (assuming you add all the columns you need).
If all you need to do with each message is write it to the CSV, you can directly pass the writerow()
function as the API callback:
bm.start_multiplex_socket(['bnbbtc@kline_1m'], out.writerow)
This way you don't need process_message()
at all.
answered Mar 24 '18 at 1:14


John ZwinckJohn Zwinck
153k16177293
153k16177293
Thank you for your reply. I've entered your code and I think it works but I get an error: dict contains fields not in fieldnames: 'data', 'stream'. I think this is because I have the header=False in the original code. This gets rid of this in the data: ,data,stream 0,"{'e': 'kline', 'E': 1521848434118, 's': 'BNBBTC', 'k': {'t': 1521848400000, 'T': 1521848459999, 's': 'BNBBTC', 'i': '1m', 'f': 12747441, 'L': 12747497, 'o': '0.00130880', 'c': '0.00130890', 'h': '0.00131000', 'l': '0.00130860', 'v': '989.09000000', 'n': 57, 'x': False, 'q': '1.29497482', 'V': '757.98000000'.....
– Brian F
Mar 24 '18 at 3:47
add a comment |
Thank you for your reply. I've entered your code and I think it works but I get an error: dict contains fields not in fieldnames: 'data', 'stream'. I think this is because I have the header=False in the original code. This gets rid of this in the data: ,data,stream 0,"{'e': 'kline', 'E': 1521848434118, 's': 'BNBBTC', 'k': {'t': 1521848400000, 'T': 1521848459999, 's': 'BNBBTC', 'i': '1m', 'f': 12747441, 'L': 12747497, 'o': '0.00130880', 'c': '0.00130890', 'h': '0.00131000', 'l': '0.00130860', 'v': '989.09000000', 'n': 57, 'x': False, 'q': '1.29497482', 'V': '757.98000000'.....
– Brian F
Mar 24 '18 at 3:47
Thank you for your reply. I've entered your code and I think it works but I get an error: dict contains fields not in fieldnames: 'data', 'stream'. I think this is because I have the header=False in the original code. This gets rid of this in the data: ,data,stream 0,"{'e': 'kline', 'E': 1521848434118, 's': 'BNBBTC', 'k': {'t': 1521848400000, 'T': 1521848459999, 's': 'BNBBTC', 'i': '1m', 'f': 12747441, 'L': 12747497, 'o': '0.00130880', 'c': '0.00130890', 'h': '0.00131000', 'l': '0.00130860', 'v': '989.09000000', 'n': 57, 'x': False, 'q': '1.29497482', 'V': '757.98000000'.....
– Brian F
Mar 24 '18 at 3:47
Thank you for your reply. I've entered your code and I think it works but I get an error: dict contains fields not in fieldnames: 'data', 'stream'. I think this is because I have the header=False in the original code. This gets rid of this in the data: ,data,stream 0,"{'e': 'kline', 'E': 1521848434118, 's': 'BNBBTC', 'k': {'t': 1521848400000, 'T': 1521848459999, 's': 'BNBBTC', 'i': '1m', 'f': 12747441, 'L': 12747497, 'o': '0.00130880', 'c': '0.00130890', 'h': '0.00131000', 'l': '0.00130860', 'v': '989.09000000', 'n': 57, 'x': False, 'q': '1.29497482', 'V': '757.98000000'.....
– Brian F
Mar 24 '18 at 3:47
add a comment |
Thanks for contributing an answer to Stack Overflow!
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
To learn more, see our tips on writing great answers.
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f49459883%2fpython3-grabbing-data-from-websocket-and-putting-it-into-a-dataframe%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Is the end goal to produce a CSV file, or to just keep the data in a DataFrame in memory and stop writing the file?
– John Zwinck
Mar 24 '18 at 0:37
The end goal is to produce a csv file. However, I can produce the csv but the data is as above, ie dictionary within a dictionary but I'd like it to look like the bottom example
– Brian F
Mar 24 '18 at 0:47