import urllib import requests #import Python requests library #import the Beautiful soup functions to parse the data returned from the website from bs4 import BeautifulSoup scripFile = open("stocksList.txt") scripArray = scripFile.read() scripList = scripArray.split("\n") print("Total number of scrips in list = ", len(scripList)) i = 0; while i < len(scripList): urlNSE= "https://www.nseindia.com/live_market/" str1 = "dynaContent/live_watch/get_quote/GetQuote.jsp?symbol=" str2 = "&illiquid=0&smeFlag=0&itpFlag=0" url = urlNSE + str1 + scripList[i] + str2 #Use urllib.parse.quote_plus(scripList[i]) to avoid errors with scrips like M&M #htmlFile = urllib.urlopen(url) htmlFile = requests.get(url) #Status code 200 indicates if the page was downloaded successfully #htmlFile.status_code htmlText = BeautifulSoup(htmlFile.content, 'html.parser') #search for elements by id / class / tag respText = htmlText.find('div', attrs={'id':'responseDiv'}).text splitText = respText.split('closePrice\":\"')[1] clPrice = splitText.split('\",\"')[0] print ("Closing price of ", '{:>11}'.format(scripList[i]), " = ", '{:>8}'.format(clPrice)) i = i + 1
  • Limitation of 'requests': it cannot handle Javascript, 'requests' get the code one sees from "view source" utility. Hence, for dynamic web pages or pages requiring to fill a form to retrieve some data, selenium webdriver is required, as demonstrated below. The advantage of 'requests' is that it does not open web broweser and can work in background.
  • Python script to retrieve the historical OHLC prices of stocks listed in a file named stocksList.txt. The code does not check internet connectivity and correctness of the symbol. The output can either be written in Python console or in a text file.
    #Python 3.5.2, Windows 10
    import os
    import urllib
    import requests                #import Python requests library
    from bs4 import BeautifulSoup  #BeautifulSoup functions to parse the data
    
    # Download and copy IEDriverServer.exe and chromedriver.exe in same folder
    from selenium import webdriver
    from selenium.webdriver.common.keys import Keys
    
    scripFile = open("stocksList.txt")
    scripArray = scripFile.read()
    scripList = scripArray.split("\n")
    print("Total number of scrips in list = ", len(scripList))
    
    i = 0;
    while i < 1:
        urlNSE= "https://www.nseindia.com/products/"
        str1 = "content/equities/equities/eq_security.htm"
        url = urlNSE + str1
    
        # get the path of driver server
        dir = os.path.dirname(__file__)
        driver_path = dir + "\chromedriver.exe"   #IEDriverServer.exe for IE
    
        # create a new Internet Explorer session
        driver = webdriver.Chrome(driver_path)    #webdriver.Ie for IE
        driver.implicitly_wait(30)
        driver.maximize_window()
    
        driver.get(url)
       
        symbol = driver.find_element_by_id('symbol')
        symbol.send_keys(scripList[i])
    
        series = driver.find_element_by_id('series')
        series.send_keys('EQ')
    
        dateRange = driver.find_element_by_id('dateRange')
        #Options: "1 Day", "7 Days", "2 weeks", "1 month", "3 months", "365 Days"
        dateRange.send_keys("7 Days") 
    
        getButton = driver.find_element_by_id('get')
        getButton.submit()
    
        fileData = []
        fileName = scripList[i]+".txt"
        f = open(fileName, "a")
        
        #Check for thead tag in table, if present
        #headerStr = driver.find_element_by_tag_name('thead')
        tableBody = []
        tableBody = driver.find_element_by_tag_name('tbody')
    
        tableRows = tableBody.find_elements_by_tag_name('tr')
        nRow = len(tableBody.find_elements_by_tag_name('tr'))
    
        tableHeader = tableRows[0]
        headers = tableHeader.find_elements_by_tag_name('th')
        headerStr = []
        for th in headers:
            headerText = th.text.encode('utf8')
            headerText = headerText.replace(b'\n', b' ')
            headerStr.append(headerText)
        fileData.append(b",".join(headerStr).decode())
    
        print(','.join(fileData))
        #f.write(', '.join(fileData))  #write() argument must be a string
        
        tabRows = []
        rowData = []
        for tabRows in tableRows:
            fileData = []
            rowData  = []
            for td in tabRows.find_elements_by_tag_name('td'):
                tdTxt = td.text.encode('utf8')
                tdTxt = tdTxt.strip().decode()
                #list.append(e)-add 'e't to the end, modifies original
                rowData.append(tdTxt)
            print(','.join(rowData))     
            #f.write(', '.join(rowData))
        i = i + 1
    #f.close()
    driver.quit()   #Close the browser window