Snippets

Andrew 7nBe8n: Untitled snippet

Created by Andrew
import csv
import json
import time
import urllib.parse

inlinetime = time.strftime('%m/%-d/%Y').lstrip("0").replace(" 0", " ")

CSV_FILE_PATH = './list/list.csv'
BRAND_INDEX = 0  # use these instead of referencing with a value like list[0] to know what is 0; the same for code_index
CODE_INDEX = 1
CATEGORY_INDEX = 2


def get_list_of_dicts_from_csv_file(csv_file_path):
    list_of_dicts = []
    with open(csv_file_path) as csv_file:
        csv_reader = csv.DictReader(csv_file, delimiter=',')
        next(csv_reader)  # this is to skip the first line which contains the columns
        for line in csv_reader:
            list_of_dicts.append(line)
        return list_of_dicts


class UrlCreator:
    def __init__(self, brand, code):
        self.brand = urllib.parse.quote(brand)
        self.code = code

    def create_url(self):
        return f"https://www.site.com.com/Listings?st={self.brand}&sg=Ending&c={self.code}&s=&lp=0&hp=999999&sbn=false&spo=false&snpo=false&socs=false&sd=false&sca=false&caed={inlinetime}%2012:00:00%20AM&cadb=7&scs=False&sis=False&col=0&p=1&ps=40&desc=False&ss=0&Us"


def url_encoded():
    data_from_csv = get_list_of_dicts_from_csv_file(CSV_FILE_PATH)
    urls = []  # here we will store the urls created
    for data in data_from_csv:
        url_creator = UrlCreator(data['brand'], data['code'])
        url = url_creator.create_url()
        urls.append(url)
        url_list = json.dumps(urls, indent=4)
        return url_list
    #print(json.dumps(urls, indent=4))


if __name__ == "__main__":
    data_from_csv = get_list_of_dicts_from_csv_file(CSV_FILE_PATH)
    urls = []  # here we will store the urls created
    for data in data_from_csv:
        url_creator = UrlCreator(data['brand'], data['code'])
        url = url_creator.create_url()
        urls.append(url)
        url_list = json.dumps(urls, indent=4)
    print(json.dumps(urls, indent=4))
from bs4 import BeautifulSoup
import requests
import logging as logger
#import MySQLdb
import sys, os, csv


from .main import url_encoded
url_encoded()

'''
I want to start working with the urls from main.py url_encoded which is just a copy of the if __name__ == "__main__":
Probably going about this wrong... How can I get the URLS to be a list or json to loop through in this file? 
Below is sudo code as its not working but you get the idea for whats next.
'''


def pagenum():
    pageNumber = 1
    url_list = [] # guessing this would be the urls from the url_encoded() above
    for urls in list:

      response2 = requests.get(url, proxies=proxies, headers=header).text
      soup2 = BeautifulSoup(response2, features='lxml')
      page = soup2.find('ul', {'class': 'pagination'})

      try:
          pages = page.find('a', {'id': 'last'})['data-page']
          print('(try)Pages = ' + str(pages))
          for u in pages:
              url(split, )
              except:
                  pages = 1
                  print('(except) statement Pages = ' + str(pages))
                  # while pageNumber < int(pages) + 1:
                  #     # sleep(randint(1,5))
                  #     print('Entering while response loop')
                  #     #print('brands = ' + brands)
                  #     print('(2)pageNumber = ' + str(pageNumber))
                  url_list.append(link)
                  print(url_list)

                  return url_list, brands, pages

Comments (0)

HTTPS SSH

You can clone a snippet to your computer for local editing. Learn more.