Source code for bandl.nse_data

import os
import concurrent.futures
import io
import math
import json

from datetime import datetime,timedelta
import pandas as pd
from requests.exceptions import HTTPError
from bs4 import BeautifulSoup

from bandl.nse_urls import NseUrls
from bandl.helper import get_formated_date,get_date_range
from bandl.request import RequestUrl

#default params for url connection
DEFAULT_TIMEOUT = 5 # seconds
MAX_RETRIES = 3
INDEX_DATA_LIMIT = 99
STOCK_DATA_LIMIT = 240
#default to last three month
PART_OI_DAYS = 22*3

#to disable pandas warning
pd.set_option('mode.chained_assignment', None)

[docs]class NseData:
    def __init__(self,timeout=DEFAULT_TIMEOUT,max_retries=MAX_RETRIES):
        self.__nse_urls = NseUrls()
        self.__headers = self.__nse_urls.header
        #create request
        self.__request = RequestUrl(timeout,max_retries)

[docs]    def get_underlying_val(self,symbol):
        """get value of underlying asset
        :param symbol: stock/index symbol
        :type symbol: string
        :raises Exception: NSE connection related
        :return: underlying value
        :rtype: integer
        """
        try:
            hack = self.__request.get(self.__nse_urls.OC_FIRST_TRY,headers=self.__headers)
            base_oc_url = self.__nse_urls.get_oc_url(symbol)
            page = self.__request.get(base_oc_url,headers=self.__headers)
            oc_json = json.loads(page.text)
            underlying_val = oc_json['records']['underlyingValue']
            return underlying_val

        except Exception as err:
            raise Exception("something went wrong while reading nse URL :", str(err))

[docs]    def get_indices(self):
        """To get list of NSE indices
        """
        try:
            index_page = self.__request.get(self.__nse_urls.INDEX_URL,headers=self.__headers)
            soup = BeautifulSoup(index_page.text,'lxml')
            table = soup.find("select",{"id":"indexType"})
            indices_data = table.find_all("option")
            indices = [index.get("value") for index in indices_data if "NIFTY" in index.get("value")]

            #lets append india vix as well
            indices.append("INDIA VIX")
            return indices
        except Exception as err:
            raise Exception("Error occurred while getting NSE indices :", str(err))

[docs]    def get_oc_strike_prices(self,symbol,format=True,level=3):
        """To get options strike prices or in OTM,ITM,ATM format

        :param symbol: stock/index symbol
        :type symbol: string
        :param format: format in OTM,ITM,ATM, defaults to True
        :type format: bool, optional
        :param level: level of strike prices for ITM/OTM, defaults to 3
        :type level: int, optional
        :raises Exception: NSE connection related
        :return: strike prices
        :rtype: list/dictionary
        """
        try:
            hack = self.__request.get(self.__nse_urls.OC_FIRST_TRY,headers=self.__headers)
            base_oc_url = self.__nse_urls.get_oc_url(symbol)
            page = self.__request.get(base_oc_url,headers=self.__headers)
            oc_json = json.loads(page.text)
            underlying_val = oc_json['records']['underlyingValue']
            strikePrices = oc_json['records']['strikePrices']
            #if normat strike prices are requested then return as it is
            if not format:
                return strikePrices

            strike_count = len(strikePrices)
            #else lets format in OTM/ITM/ATM
            atm_index= min(range(strike_count), key = lambda i: abs(strikePrices[i]-underlying_val))
            strikes = {"ATM":strikePrices[atm_index],"ITM":[],"OTM":[]}
            for index in range(level):
                if index + atm_index < strike_count -1:
                    strikes["OTM"].append(strikePrices[index+atm_index+1])
                if atm_index - index > 1:
                    strikes["ITM"].append(strikePrices[atm_index-index-1])
            return strikes

        except Exception as err:
            raise Exception("something went wrong while fetching nse :", str(err))

[docs]    def get_oc_exp_dates(self,symbol):

        """get current  available expiry dates

        :param symbol: stock/index symbol
        :type symbol: string
        :raises Exception: NSE connection related
        :return: expiry dates
        :rtype: list
        """
        try:
            hack = self.__request.get(self.__nse_urls.OC_FIRST_TRY,headers=self.__headers)
            base_oc_url = self.__nse_urls.get_oc_url(symbol)
            page = self.__request.get(base_oc_url,headers=self.__headers)
            oc_json = json.loads(page.text)
            expiry_dates = oc_json['records']['expiryDates']
            return expiry_dates

        except Exception as err:
            raise Exception("something went wrong while fetching nse :", str(err))

[docs]    def get_option_data(self,symbol,expiry_date=None,strikes=None):
        """get option data

        :param symbol: stock/index symbol
        :type symbol: string
        :param expiry_date: expiry date (all date formats accepted), defaults to next near
        :type expiry_date: string, optional
        :param strikes: Dictionary having OTM,ITM,ATM strikes, defaults to None
        :type strikes: Dictionary, optional
        :raises Exception: Connection related
        :return: underlying_val, option data
        :rtype: pair
        """
        try:
            hack = self.__request.get(self.__nse_urls.OC_FIRST_TRY,headers=self.__headers)
            base_oc_url = self.__nse_urls.get_oc_url(symbol)
            page = self.__request.get(base_oc_url,headers=self.__headers)
            oc_json = json.loads(page.text)
            underlying_val = oc_json['records']['underlyingValue']
            if not expiry_date:
                oc_data = oc_json['filtered']['data']
                oc_mapped_data = {}
                for eachData in oc_data:
                    strikePrice = eachData.get("strikePrice")
                    if(strikes):
                        if (strikePrice == strikes["ATM"]
                            or strikePrice in strikes["ITM"]
                            or strikePrice in strikes["OTM"]):
                            oc_mapped_data[strikePrice] = {"CE":eachData.get("CE"),"PE":eachData.get("PE")}
                    else:
                        oc_mapped_data[strikePrice] = {"CE":eachData.get("CE"),"PE":eachData.get("PE")}
                return underlying_val,oc_mapped_data
        except Exception as err:
            raise Exception("something went wrong while fetching nse :", str(err))



[docs]    def get_option_chain_df(self, symbol, expiry_date=None,dayfirst=False):
        """ This function fetches option chain data from NSE and returns in pandas.DataFrame

        :param symbol: stock/index symbol
        :type symbol: string
        :param expiry_date: expiry date (all date formats accepted), defaults to next near
        :type expiry_date: string
        :param dayfirst: True if date format is european style DD/MM/YYYY, defaults to False
        :type dayfirst: bool, optional
        :raises Exception: NSE connection related
        :raises Exception: In html parsing
        :return: option chain
        :rtype: pandas.DataFrame
        """
        try:
            if not expiry_date:
                expiry_date = self.get_oc_exp_dates(symbol)[0]

            oc_url = self.__nse_urls.get_option_chain_url(symbol, expiry_date,dayfirst)
            # If the response was successful, no Exception will be raised
            oc_page = self.__request.get(oc_url, headers = self.__headers)

        except Exception as err:
             raise Exception("Error occurred while connecting NSE :", str(err))
        else:
            try:
                dfs = pd.read_html(oc_page.text)
                return dfs[1]
            except Exception as err:
                raise Exception("Error occurred while reading html :", str(err))

    def __get_file_path(self, file_name, file_path = None, is_use_default_name = True):
        """[summary]

        :param file_name: file name
        :type file_name: string
        :param file_path: file directory or file path , defaults to None
        :type file_path: string, optional
        :param is_use_default_name: to get filename in current timestamp, defaults to True
        :type is_use_default_name: bool, optional
        :return: file path
        :rtype: string
        """
        try:
            if not file_path:
                file_path = os.getcwd()

            if os.path.isfile(file_path):
                if (not is_use_default_name):
                    return file_path
                # if need to use default file path, we get parent path
                else:
                    file_path = os.path.dirname(file_path)

            # datetime object containing current date and time
            now = datetime.now()
            # dd/mm/YY H:M:S
            dt_string = now.strftime("%d_%B_%H_%M")
            file_name = file_name + "_" + dt_string + ".xlsx"

            excel_path = os.path.join(file_path, file_name)
            return excel_path
        except Exception as err:
            print("Error while naming file. Error: ", str(err))

[docs]    def get_option_chain_excel(self, symbol, expiry_date=None,dayfirst=False,file_path = None, is_use_default_name = True):
        """Fetches NSE option chain data and returns in the form of excel (.xlsx)

        :param symbol: stock/index symbol
        :type symbol: string
        :param expiry_date: expiry date (all date formats accepted), defaults to next near
        :type expiry_date: string
        :param dayfirst: True if date format is european style DD/MM/YYYY, defaults to False
        :type dayfirst: bool, optional
        :param file_path: file/folder path, defaults to None
        :type file_path: string, optional
        :param is_use_default_name:  to get filename as current timestamp, defaults to True
        :type is_use_default_name: bool, optional
        :raises Exception:  NSE connection related
        """
        try:
            if not expiry_date:
                expiry_date = self.get_oc_exp_dates(symbol)[0]

            df = self.get_option_chain_df(symbol, expiry_date,dayfirst)
            file_name = symbol + "_" + expiry_date
            excel_path = self.__get_file_path(file_name, file_path, is_use_default_name)

            df.to_excel(excel_path, file_name)
        except Exception as err:
            raise Exception("Error occurred while getting excel :", str(err))

    def __join_part_oi_dfs(self,df_join,df_joiner):
        """will append joiner to join for oi_dfs

        :param df_join: Dictionary of participants
        :type df_join: dict
        :param df_joiner: Dictionary of participants
        :type df_joiner: dict
        """
        for client in df_join:
            df_join[client] = self.__join_dfs(df_join[client],df_joiner[client]).sort_index()

    def __join_dfs(self,join,joiner):
        """will append joiner to join for oi_dfs

        :param join: df which will be appended
        :type join: pandas.DataFrame
        :param joiner: df which we want to append
        :type joiner: pandas.DataFrame
        :return: merged data frame
        :rtype: pandas.DataFrame
        """
        return join.append(joiner)

[docs]    def get_part_oi_df(self,start=None,end=None,periods=None,dayfirst=False,workers=None):
        """Return dictionary of participants containing data frames

        :param start: start date , defaults to None
        :type start: string, optional
        :param end: end date, defaults to None
        :type end: string, optional
        :param periods: number of days, defaults to None
        :type periods: interger, optional
        :param dayfirst: True if date format is european style DD/MM/YYYY, defaults to False
        :type dayfirst: bool, optional
        :param workers: Number of threads for requesting nse, defaults to None
        :type workers: interger, optional
        :raises Exception: NSE Connection/Request overload
        :return: participant wise open interest
        :rtype: pandas.DataFrame
        """
        try:
            #format date just in case
            if start:
                start = get_formated_date(start,dayfirst=dayfirst)
            if end:
                end = get_formated_date(end,dayfirst=dayfirst)

            #if both none, we set end to today
            if not start and not end:
                end = get_formated_date()
                if not periods:
                    periods = PART_OI_DAYS
            #get urls for these days
            dates = pd.date_range(start=start,end=end, periods=periods,freq='B')
            url_date = [(self.__nse_urls.get_participant_oi_url(date),date) for date in dates]#

            oi_clm = self.__nse_urls.PART_OI_CLM
            #lets preinitialize, better readability
            oi_dfs = {  "Client":pd.DataFrame(columns=oi_clm,index=dates),
                        "DII":pd.DataFrame(columns=oi_clm,index=dates),
                        "FII":pd.DataFrame(columns=oi_clm,index=dates),
                        "Pro":pd.DataFrame(columns=oi_clm,index=dates),
                        "TOTAL":pd.DataFrame(columns=oi_clm,index=dates)
                        }

            if not workers:
                workers = os.cpu_count() * 2

            with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as executor:
                responses = {executor.submit(self.__request.get, url,self.__headers): date for url,date in url_date}
                for res in concurrent.futures.as_completed(responses):
                    date = responses[res]
                    try:
                        csv = res.result()
                    except Exception as exc:
                        #might be holiday
                        pass
                    else:
                        df = pd.read_csv(io.StringIO(csv.content.decode('utf-8')))
                        #drop the first header
                        df_header = df.iloc[0]
                        #is there any implace way?
                        df = df[1:]
                        df.columns = df_header
                        df.set_index('Client Type',inplace=True)
                        #lets us create data frome for all client type
                        oi_dfs['Client'].loc[date] = df.loc['Client']
                        oi_dfs['FII'].loc[date] = df.loc['FII']
                        oi_dfs['DII'].loc[date] = df.loc['DII']
                        oi_dfs['Pro'].loc[date] = df.loc['Pro']
                        oi_dfs['TOTAL'].loc[date] = df.loc['TOTAL']

            if not oi_dfs['Client'].empty:
                #remove nan row
                for client in oi_dfs:
                    oi_dfs[client].dropna(inplace=True)

                #if holiday occurred in business day, lets retrive more data equivalent to holdidays.
                if oi_dfs['Client'].shape[0] < periods:
                    new_periods = periods - oi_dfs['Client'].shape[0]
                    try:
                        #if only start, find till today
                        if start and (not end):
                            s_from = oi_dfs['Client'].index[-1] + timedelta(1)
                            e_till = None
                        #if not start, can go to past
                        elif(end and (not start)):
                            s_from = None
                            e_till = oi_dfs['Client'].index[0] - timedelta(1)
                        #if start and end, no need to change
                        else:
                            return oi_dfs
                    except IndexError as err:
                        raise Exception("NSE Access error.size down/clean cookies to resolve the issue.")
                    except Exception as exc:
                        raise Exception("participant OI error: ",str(exc))

                    oi_dfs_new = self.get_part_oi_df(start = s_from,end = e_till,periods = new_periods)
                    self.__join_part_oi_dfs(oi_dfs,oi_dfs_new)

                return oi_dfs

        except Exception as err:
            raise Exception("Error occurred while getting part_oi :", str(err))

    def __parse_indexdata(self,res_txt,symbol):
        dfs = pd.read_html(res_txt)[0]
        if dfs.shape[0] <2:
            raise Exception("No record found")
        if "NIFTY" in symbol:
            fined_dfs = dfs.iloc[0:]
            fined_dfs.columns = self.__nse_urls.INDEX_DATA_CLM
        elif symbol == "INDIA VIX":
            fined_dfs = dfs.iloc[1:]
            fined_dfs.drop(fined_dfs.index[0],inplace=True)
            fined_dfs.columns = self.__nse_urls.VIX_DATA_CLM
        fined_dfs.drop(fined_dfs.index[-1],inplace=True)
        fined_dfs.set_index("Date",inplace=True)
        return fined_dfs

    def __get_datarange_intv(self,start,end,intv):
        diff = math.ceil((end  - start).days / intv)
        date_ranges = []
        curr_start = prev_start = start
        for i in range(diff):
            curr_start =  (start + timedelta(intv * i))
            if i !=0:
                start_ = prev_start
                end_ = curr_start - timedelta(1)
                date_ranges.append((start_,end_))
            prev_start = curr_start
        date_ranges.append((curr_start,end))
        return date_ranges

    def __get_data_adjusted(self,dfs,symbol,series="EQ",start=None,end=None,periods=None):
        if periods and (dfs.shape[0] < periods):
            new_periods = periods - dfs.shape[0]
            try:
                s_from = e_till = None
                #if only start, find till today
                if start and (not end):
                    s_from = dfs.index[0] + timedelta(1)
                    e_till = None
                #if not start, can go to past
                elif((end and (not start)) or periods):
                    s_from = None
                    e_till = dfs.index[-1] - timedelta(1)
            except IndexError as err:
                raise Exception("NSE Access error.")
            except Exception as exc:
                raise Exception("Stock data error: ",str(exc))
            try:
                dfs_new = self.get_data(symbol,series,start = s_from,end = e_till,periods = new_periods)
                dfs = self.__join_dfs(dfs,dfs_new).sort_index(ascending=False)
            except Exception as exc:
                #data may not be available
                pass
        return dfs

    def __format_df(self,dfs):
        if not dfs.empty:
            dfs.columns = dfs.columns.str.title()

[docs]    def get_data(self,symbol,series="EQ",start=None,end=None,periods=None,dayfirst=False):
        """To get NSE stock data

        :param symbol: stock/index symbol
        :type symbol: string
        :param series: segment, defaults to "EQ"
        :type series: string, optional
        :param start: start date, defaults to None
        :type start: string, optional
        :param end: end date, defaults to None
        :type end: string, optional
        :param periods: number of days, defaults to None
        :type periods: interger, optional
        :param dayfirst: True if date format is european style DD/MM/YYYY, defaults to False
        :type dayfirst: bool, optional
        :raises Exception: NSE Connection Related
        :return: stock data
        :rtype: pandas.DataFrame
        """
        try:

            #Step1: get the date range
            s_from,e_till = get_date_range(start=start,end=end,periods=periods,dayfirst=dayfirst)

            if s_from > e_till:
                raise ValueError("End should grater than start.")

            data_limit = None
            if self.__nse_urls.is_index(symbol):
                data_limit = INDEX_DATA_LIMIT
            else:
                data_limit = STOCK_DATA_LIMIT

            data_days = e_till - s_from

            hack = self.__request.get(self.__nse_urls.BASE_URL,headers=self.__headers)
            if (data_days.days) > data_limit:
                date_ranges = self.__get_datarange_intv(s_from,e_till,data_limit)
                workers = len(date_ranges)
                with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as executor:
                    responses = [executor.submit(self.get_data, symbol=symbol,start=start_,end=end_,dayfirst=dayfirst)\
                                    for start_,end_ in date_ranges]
                    dfs = []
                    for res in concurrent.futures.as_completed(responses):
                        try:
                            df = res.result()
                            dfs.append(df)
                        except Exception as exc:
                            #might be holiday/no record
                            pass
                    all_dfs = pd.concat(dfs).sort_index(ascending=False)
                    adjusted_dfs = self.__get_data_adjusted(all_dfs,symbol,start=start,end=end,periods=periods)
                    return adjusted_dfs

            data_url = self.__nse_urls.get_stock_data_url\
                                                        (
                                                        symbol,series=series,start=s_from,
                                                        end=e_till
                                                        )

            csv = self.__request.get(data_url,headers=self.__headers)

            #if it is index, wee need to read table
            # Why the heck, We are doing so much handling? Is there any other way?
            # Suggestions are welcome. ping me on github
            if self.__nse_urls.is_index(symbol):
                dfs = self.__parse_indexdata(csv.text,symbol)
            else:
                dfs = pd.read_csv(io.StringIO(csv.content.decode('utf-8')))
                dfs.set_index('Date ',inplace=True)
            # Converting the index as date
            dfs.index = pd.to_datetime(dfs.index)
            dfs = self.__get_data_adjusted(dfs,symbol,start=start,end=end,periods=periods)
            #format dataframe
            self.__format_df(dfs)
            return dfs

        except Exception as err:
            raise Exception("Error occurred while fetching stock data :", str(err))