From 26fb56c669d28142f84a4a72c7975eb6cc9106a2 Mon Sep 17 00:00:00 2001 From: Llloooggg Date: Tue, 21 Apr 2020 23:44:10 +0300 Subject: [PATCH] =?UTF-8?q?=D0=94=D0=BE=D0=B1=D0=B0=D0=B2=D0=BB=D0=B5?= =?UTF-8?q?=D0=BD=D0=B0=20=D0=BF=D1=80=D0=BE=D0=B2=D0=B5=D1=80=D0=BA=D0=B0?= =?UTF-8?q?=20=D1=81=D0=BB=D0=BE=D0=B2=20=D0=B8=D0=B7=20=D0=B1=D0=B0=D0=BD?= =?UTF-8?q?-=D0=BB=D0=B8=D1=81=D1=82=D0=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- __init__.py | 37 +++++++++++++++++++++++++++++++++---- ban_list.txt | 3 +++ 2 files changed, 36 insertions(+), 4 deletions(-) create mode 100644 ban_list.txt diff --git a/__init__.py b/__init__.py index 627fba4..fcc6d61 100644 --- a/__init__.py +++ b/__init__.py @@ -4,10 +4,20 @@ import requests import random import re import progressbar +from os import path from bs4 import BeautifulSoup from datetime import datetime +global banList +banList = [] +if path.exists('./ban_list.txt'): + f = open('./ban_list.txt') + for line in f.readlines(): + banList.append(line.lower()) + f.close() + + def button_by_text(text): # получение кнопки по тексту на ней try: @@ -21,6 +31,8 @@ def button_by_text(text): # получение кнопки по тексту def get_profession(): # получение случайной профессии в РФ, где 1 и 2 диапазон - профессии рабочих, 2 и 3 - должности служащих + global banList + coin = (1, 2, 3, 4) coin = random.choices(coin, [0.1, 0.1, 0.4, 0.4], k=1)[0] @@ -40,11 +52,27 @@ def get_profession(): # получение случайной професси try: profession = soup.findAll("div", {"class": "my_col2"})[1] profession = re.sub(r'\([^()]*\)', '', profession.get_text()) - return profession + + if not banList: + return profession + if not_in_ban_list(profession): + return profession + except: pass +def not_in_ban_list(word): + + global banList + + wordLowReg = word.lower() # проверка на наличие слов из бан-листа + for badWord in banList: + if badWord in wordLowReg: + return False + return True + + def profile_maker(): global areas_of_activity @@ -127,14 +155,15 @@ def bulldozer(): if __name__ == '__main__': url = 'https://docs.google.com/forms/d/1f716YOLUrKhtjTlR4hYiEWkgwjqylR5fCPxWsHQKJqY' - resnondents = int(input('Введите желаемое число респондентов: ')) + # url = int(input('Введите ссылку на форму: ')) + respondents = int(input('Введите желаемое число респондентов: ')) options = webdriver.firefox.options.Options() options.headless = True driver = webdriver.Firefox(options=options) - with progressbar.ProgressBar(max_value=resnondents) as bar: - for i in range(resnondents): + with progressbar.ProgressBar(max_value=respondents) as bar: + for i in range(respondents): bar.update(i) driver.get(url) diff --git a/ban_list.txt b/ban_list.txt new file mode 100644 index 0000000..bab8c29 --- /dev/null +++ b/ban_list.txt @@ -0,0 +1,3 @@ +секрет +суд +федерал \ No newline at end of file