Source code for bandl.nse_data

import os
import concurrent.futures
import io
import math
import json

from datetime import datetime,timedelta
import pandas as pd
from requests.exceptions import HTTPError
from bs4 import BeautifulSoup

from bandl.nse_urls import NseUrls
from bandl.helper import get_formated_date,get_date_range
from bandl.request import RequestUrl

#default params for url connection
DEFAULT_TIMEOUT = 5 # seconds
MAX_RETRIES = 3
INDEX_DATA_LIMIT = 99
STOCK_DATA_LIMIT = 240
#default to last three month
PART_OI_DAYS = 22*3

#to disable pandas warning
pd.set_option('mode.chained_assignment', None)

[docs]class NseData: def __init__(self,timeout=DEFAULT_TIMEOUT,max_retries=MAX_RETRIES): self.__nse_urls = NseUrls() self.__headers = self.__nse_urls.header #create request self.__request = RequestUrl(timeout,max_retries)
[docs] def get_underlying_val(self,symbol): """get value of underlying asset :param symbol: stock/index symbol :type symbol: string :raises Exception: NSE connection related :return: underlying value :rtype: integer """ try: hack = self.__request.get(self.__nse_urls.OC_FIRST_TRY,headers=self.__headers) base_oc_url = self.__nse_urls.get_oc_url(symbol) page = self.__request.get(base_oc_url,headers=self.__headers) oc_json = json.loads(page.text) underlying_val = oc_json['records']['underlyingValue'] return underlying_val except Exception as err: raise Exception("something went wrong while reading nse URL :", str(err))
[docs] def get_indices(self): """To get list of NSE indices """ try: index_page = self.__request.get(self.__nse_urls.INDEX_URL,headers=self.__headers) soup = BeautifulSoup(index_page.text,'lxml') table = soup.find("select",{"id":"indexType"}) indices_data = table.find_all("option") indices = [index.get("value") for index in indices_data if "NIFTY" in index.get("value")] #lets append india vix as well indices.append("INDIA VIX") return indices except Exception as err: raise Exception("Error occurred while getting NSE indices :", str(err))
[docs] def get_oc_strike_prices(self,symbol,format=True,level=3): """To get options strike prices or in OTM,ITM,ATM format :param symbol: stock/index symbol :type symbol: string :param format: format in OTM,ITM,ATM, defaults to True :type format: bool, optional :param level: level of strike prices for ITM/OTM, defaults to 3 :type level: int, optional :raises Exception: NSE connection related :return: strike prices :rtype: list/dictionary """ try: hack = self.__request.get(self.__nse_urls.OC_FIRST_TRY,headers=self.__headers) base_oc_url = self.__nse_urls.get_oc_url(symbol) page = self.__request.get(base_oc_url,headers=self.__headers) oc_json = json.loads(page.text) underlying_val = oc_json['records']['underlyingValue'] strikePrices = oc_json['records']['strikePrices'] #if normat strike prices are requested then return as it is if not format: return strikePrices strike_count = len(strikePrices) #else lets format in OTM/ITM/ATM atm_index= min(range(strike_count), key = lambda i: abs(strikePrices[i]-underlying_val)) strikes = {"ATM":strikePrices[atm_index],"ITM":[],"OTM":[]} for index in range(level): if index + atm_index < strike_count -1: strikes["OTM"].append(strikePrices[index+atm_index+1]) if atm_index - index > 1: strikes["ITM"].append(strikePrices[atm_index-index-1]) return strikes except Exception as err: raise Exception("something went wrong while fetching nse :", str(err))
[docs] def get_oc_exp_dates(self,symbol): """get current available expiry dates :param symbol: stock/index symbol :type symbol: string :raises Exception: NSE connection related :return: expiry dates :rtype: list """ try: hack = self.__request.get(self.__nse_urls.OC_FIRST_TRY,headers=self.__headers) base_oc_url = self.__nse_urls.get_oc_url(symbol) page = self.__request.get(base_oc_url,headers=self.__headers) oc_json = json.loads(page.text) expiry_dates = oc_json['records']['expiryDates'] return expiry_dates except Exception as err: raise Exception("something went wrong while fetching nse :", str(err))
[docs] def get_option_data(self,symbol,expiry_date=None,strikes=None): """get option data :param symbol: stock/index symbol :type symbol: string :param expiry_date: expiry date (all date formats accepted), defaults to next near :type expiry_date: string, optional :param strikes: Dictionary having OTM,ITM,ATM strikes, defaults to None :type strikes: Dictionary, optional :raises Exception: Connection related :return: underlying_val, option data :rtype: pair """ try: hack = self.__request.get(self.__nse_urls.OC_FIRST_TRY,headers=self.__headers) base_oc_url = self.__nse_urls.get_oc_url(symbol) page = self.__request.get(base_oc_url,headers=self.__headers) oc_json = json.loads(page.text) underlying_val = oc_json['records']['underlyingValue'] if not expiry_date: oc_data = oc_json['filtered']['data'] oc_mapped_data = {} for eachData in oc_data: strikePrice = eachData.get("strikePrice") if(strikes): if (strikePrice == strikes["ATM"] or strikePrice in strikes["ITM"] or strikePrice in strikes["OTM"]): oc_mapped_data[strikePrice] = {"CE":eachData.get("CE"),"PE":eachData.get("PE")} else: oc_mapped_data[strikePrice] = {"CE":eachData.get("CE"),"PE":eachData.get("PE")} return underlying_val,oc_mapped_data except Exception as err: raise Exception("something went wrong while fetching nse :", str(err))
[docs] def get_option_chain_df(self, symbol, expiry_date=None,dayfirst=False): """ This function fetches option chain data from NSE and returns in pandas.DataFrame :param symbol: stock/index symbol :type symbol: string :param expiry_date: expiry date (all date formats accepted), defaults to next near :type expiry_date: string :param dayfirst: True if date format is european style DD/MM/YYYY, defaults to False :type dayfirst: bool, optional :raises Exception: NSE connection related :raises Exception: In html parsing :return: option chain :rtype: pandas.DataFrame """ try: if not expiry_date: expiry_date = self.get_oc_exp_dates(symbol)[0] oc_url = self.__nse_urls.get_option_chain_url(symbol, expiry_date,dayfirst) # If the response was successful, no Exception will be raised oc_page = self.__request.get(oc_url, headers = self.__headers) except Exception as err: raise Exception("Error occurred while connecting NSE :", str(err)) else: try: dfs = pd.read_html(oc_page.text) return dfs[1] except Exception as err: raise Exception("Error occurred while reading html :", str(err))
def __get_file_path(self, file_name, file_path = None, is_use_default_name = True): """[summary] :param file_name: file name :type file_name: string :param file_path: file directory or file path , defaults to None :type file_path: string, optional :param is_use_default_name: to get filename in current timestamp, defaults to True :type is_use_default_name: bool, optional :return: file path :rtype: string """ try: if not file_path: file_path = os.getcwd() if os.path.isfile(file_path): if (not is_use_default_name): return file_path # if need to use default file path, we get parent path else: file_path = os.path.dirname(file_path) # datetime object containing current date and time now = datetime.now() # dd/mm/YY H:M:S dt_string = now.strftime("%d_%B_%H_%M") file_name = file_name + "_" + dt_string + ".xlsx" excel_path = os.path.join(file_path, file_name) return excel_path except Exception as err: print("Error while naming file. Error: ", str(err))
[docs] def get_option_chain_excel(self, symbol, expiry_date=None,dayfirst=False,file_path = None, is_use_default_name = True): """Fetches NSE option chain data and returns in the form of excel (.xlsx) :param symbol: stock/index symbol :type symbol: string :param expiry_date: expiry date (all date formats accepted), defaults to next near :type expiry_date: string :param dayfirst: True if date format is european style DD/MM/YYYY, defaults to False :type dayfirst: bool, optional :param file_path: file/folder path, defaults to None :type file_path: string, optional :param is_use_default_name: to get filename as current timestamp, defaults to True :type is_use_default_name: bool, optional :raises Exception: NSE connection related """ try: if not expiry_date: expiry_date = self.get_oc_exp_dates(symbol)[0] df = self.get_option_chain_df(symbol, expiry_date,dayfirst) file_name = symbol + "_" + expiry_date excel_path = self.__get_file_path(file_name, file_path, is_use_default_name) df.to_excel(excel_path, file_name) except Exception as err: raise Exception("Error occurred while getting excel :", str(err))
def __join_part_oi_dfs(self,df_join,df_joiner): """will append joiner to join for oi_dfs :param df_join: Dictionary of participants :type df_join: dict :param df_joiner: Dictionary of participants :type df_joiner: dict """ for client in df_join: df_join[client] = self.__join_dfs(df_join[client],df_joiner[client]).sort_index() def __join_dfs(self,join,joiner): """will append joiner to join for oi_dfs :param join: df which will be appended :type join: pandas.DataFrame :param joiner: df which we want to append :type joiner: pandas.DataFrame :return: merged data frame :rtype: pandas.DataFrame """ return join.append(joiner)
[docs] def get_part_oi_df(self,start=None,end=None,periods=None,dayfirst=False,workers=None): """Return dictionary of participants containing data frames :param start: start date , defaults to None :type start: string, optional :param end: end date, defaults to None :type end: string, optional :param periods: number of days, defaults to None :type periods: interger, optional :param dayfirst: True if date format is european style DD/MM/YYYY, defaults to False :type dayfirst: bool, optional :param workers: Number of threads for requesting nse, defaults to None :type workers: interger, optional :raises Exception: NSE Connection/Request overload :return: participant wise open interest :rtype: pandas.DataFrame """ try: #format date just in case if start: start = get_formated_date(start,dayfirst=dayfirst) if end: end = get_formated_date(end,dayfirst=dayfirst) #if both none, we set end to today if not start and not end: end = get_formated_date() if not periods: periods = PART_OI_DAYS #get urls for these days dates = pd.date_range(start=start,end=end, periods=periods,freq='B') url_date = [(self.__nse_urls.get_participant_oi_url(date),date) for date in dates]# oi_clm = self.__nse_urls.PART_OI_CLM #lets preinitialize, better readability oi_dfs = { "Client":pd.DataFrame(columns=oi_clm,index=dates), "DII":pd.DataFrame(columns=oi_clm,index=dates), "FII":pd.DataFrame(columns=oi_clm,index=dates), "Pro":pd.DataFrame(columns=oi_clm,index=dates), "TOTAL":pd.DataFrame(columns=oi_clm,index=dates) } if not workers: workers = os.cpu_count() * 2 with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as executor: responses = {executor.submit(self.__request.get, url,self.__headers): date for url,date in url_date} for res in concurrent.futures.as_completed(responses): date = responses[res] try: csv = res.result() except Exception as exc: #might be holiday pass else: df = pd.read_csv(io.StringIO(csv.content.decode('utf-8'))) #drop the first header df_header = df.iloc[0] #is there any implace way? df = df[1:] df.columns = df_header df.set_index('Client Type',inplace=True) #lets us create data frome for all client type oi_dfs['Client'].loc[date] = df.loc['Client'] oi_dfs['FII'].loc[date] = df.loc['FII'] oi_dfs['DII'].loc[date] = df.loc['DII'] oi_dfs['Pro'].loc[date] = df.loc['Pro'] oi_dfs['TOTAL'].loc[date] = df.loc['TOTAL'] if not oi_dfs['Client'].empty: #remove nan row for client in oi_dfs: oi_dfs[client].dropna(inplace=True) #if holiday occurred in business day, lets retrive more data equivalent to holdidays. if oi_dfs['Client'].shape[0] < periods: new_periods = periods - oi_dfs['Client'].shape[0] try: #if only start, find till today if start and (not end): s_from = oi_dfs['Client'].index[-1] + timedelta(1) e_till = None #if not start, can go to past elif(end and (not start)): s_from = None e_till = oi_dfs['Client'].index[0] - timedelta(1) #if start and end, no need to change else: return oi_dfs except IndexError as err: raise Exception("NSE Access error.size down/clean cookies to resolve the issue.") except Exception as exc: raise Exception("participant OI error: ",str(exc)) oi_dfs_new = self.get_part_oi_df(start = s_from,end = e_till,periods = new_periods) self.__join_part_oi_dfs(oi_dfs,oi_dfs_new) return oi_dfs except Exception as err: raise Exception("Error occurred while getting part_oi :", str(err))
def __parse_indexdata(self,res_txt,symbol): dfs = pd.read_html(res_txt)[0] if dfs.shape[0] <2: raise Exception("No record found") if "NIFTY" in symbol: fined_dfs = dfs.iloc[0:] fined_dfs.columns = self.__nse_urls.INDEX_DATA_CLM elif symbol == "INDIA VIX": fined_dfs = dfs.iloc[1:] fined_dfs.drop(fined_dfs.index[0],inplace=True) fined_dfs.columns = self.__nse_urls.VIX_DATA_CLM fined_dfs.drop(fined_dfs.index[-1],inplace=True) fined_dfs.set_index("Date",inplace=True) return fined_dfs def __get_datarange_intv(self,start,end,intv): diff = math.ceil((end - start).days / intv) date_ranges = [] curr_start = prev_start = start for i in range(diff): curr_start = (start + timedelta(intv * i)) if i !=0: start_ = prev_start end_ = curr_start - timedelta(1) date_ranges.append((start_,end_)) prev_start = curr_start date_ranges.append((curr_start,end)) return date_ranges def __get_data_adjusted(self,dfs,symbol,series="EQ",start=None,end=None,periods=None): if periods and (dfs.shape[0] < periods): new_periods = periods - dfs.shape[0] try: s_from = e_till = None #if only start, find till today if start and (not end): s_from = dfs.index[0] + timedelta(1) e_till = None #if not start, can go to past elif((end and (not start)) or periods): s_from = None e_till = dfs.index[-1] - timedelta(1) except IndexError as err: raise Exception("NSE Access error.") except Exception as exc: raise Exception("Stock data error: ",str(exc)) try: dfs_new = self.get_data(symbol,series,start = s_from,end = e_till,periods = new_periods) dfs = self.__join_dfs(dfs,dfs_new).sort_index(ascending=False) except Exception as exc: #data may not be available pass return dfs def __format_df(self,dfs): if not dfs.empty: dfs.columns = dfs.columns.str.title()
[docs] def get_data(self,symbol,series="EQ",start=None,end=None,periods=None,dayfirst=False): """To get NSE stock data :param symbol: stock/index symbol :type symbol: string :param series: segment, defaults to "EQ" :type series: string, optional :param start: start date, defaults to None :type start: string, optional :param end: end date, defaults to None :type end: string, optional :param periods: number of days, defaults to None :type periods: interger, optional :param dayfirst: True if date format is european style DD/MM/YYYY, defaults to False :type dayfirst: bool, optional :raises Exception: NSE Connection Related :return: stock data :rtype: pandas.DataFrame """ try: #Step1: get the date range s_from,e_till = get_date_range(start=start,end=end,periods=periods,dayfirst=dayfirst) if s_from > e_till: raise ValueError("End should grater than start.") data_limit = None if self.__nse_urls.is_index(symbol): data_limit = INDEX_DATA_LIMIT else: data_limit = STOCK_DATA_LIMIT data_days = e_till - s_from hack = self.__request.get(self.__nse_urls.BASE_URL,headers=self.__headers) if (data_days.days) > data_limit: date_ranges = self.__get_datarange_intv(s_from,e_till,data_limit) workers = len(date_ranges) with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as executor: responses = [executor.submit(self.get_data, symbol=symbol,start=start_,end=end_,dayfirst=dayfirst)\ for start_,end_ in date_ranges] dfs = [] for res in concurrent.futures.as_completed(responses): try: df = res.result() dfs.append(df) except Exception as exc: #might be holiday/no record pass all_dfs = pd.concat(dfs).sort_index(ascending=False) adjusted_dfs = self.__get_data_adjusted(all_dfs,symbol,start=start,end=end,periods=periods) return adjusted_dfs data_url = self.__nse_urls.get_stock_data_url\ ( symbol,series=series,start=s_from, end=e_till ) csv = self.__request.get(data_url,headers=self.__headers) #if it is index, wee need to read table # Why the heck, We are doing so much handling? Is there any other way? # Suggestions are welcome. ping me on github if self.__nse_urls.is_index(symbol): dfs = self.__parse_indexdata(csv.text,symbol) else: dfs = pd.read_csv(io.StringIO(csv.content.decode('utf-8'))) dfs.set_index('Date ',inplace=True) # Converting the index as date dfs.index = pd.to_datetime(dfs.index) dfs = self.__get_data_adjusted(dfs,symbol,start=start,end=end,periods=periods) #format dataframe self.__format_df(dfs) return dfs except Exception as err: raise Exception("Error occurred while fetching stock data :", str(err))