python

当前位置:首页 > SEO工具 > 当前文章

SEO工具

selenium贴吧顶帖(多贴多账号)

2020-11-13 59赞 老董笔记
每篇文章努力于解决一个问题!更多精品可移步文章底部。

  数据一多,就需要排序!排在第一,机会才多。selenium贴吧顶帖脚本如下:

  注意

  回帖按钮用的js提交,如果发布过程出现输入验证码,会提示回复成功但实际上未发布上去

  功能:多账号多贴子顶帖,默认60-70秒回复1次。

  原理:脚本会循环检测每个帖子在贴吧的排名,如果不在第一位则随机一个账号回帖顶帖!

  用法:

  1、准备cookie_zh.txt一行一个cookie

  2、准备tieba_tie.txt,一行一个帖子地址(https://tieba.baidu.com/p/5205473810),会根据帖子地址来得出所在贴吧。

  3、准备content.txt 一行一个回复的句子,随机1个回复

  4、随机1个账号,随机1条内容去回帖!有异常会写入log.txt

# -*- coding: utf-8 -*-
"""
注意:
用的js提交,如果发布过程出现输入验证码,会提示发布成功但实际上未发布上去)
功能:多账号多贴子顶帖
默认60-70秒回复1次
准备cookie_zh.txt一行一个cookie
准备tieba_tie.txt,一行一个帖子地址
准备content.txt 一行一个回复的句子,随机1个回复
有异常会写入log.txt
"""

from pyquery import PyQuery as pq
import threading
import queue
import time
from urllib.parse import urlparse
from openpyxl import load_workbook
from openpyxl import Workbook
import time
import gc
import json
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
import random
from selenium.webdriver.common.action_chains import ActionChains
from pykeyboard import PyKeyboard
import traceback


def get_driver(chromedriver_path,chrome_path,ua):
    ua = ua
    option = Options()
    option.binary_location = chrome_path
    option.add_argument("user-agent=" + ua)
    option.add_argument("--no-sandbox")
    option.add_argument("--disable-dev-shm-usage")
    option.add_argument("--disable-gpu")
    option.add_argument("--disable-features=NetworkService")
    option.add_argument("--disable-features=VizDisplayCompositor")
    # option.add_argument('headless')
    option.add_argument('log-level=3')  # 屏蔽日志
    option.add_argument('--ignore-certificate-errors-spki-list')  # 屏蔽ssl error
    option.add_argument('-ignore -ssl-errors')  # 屏蔽ssl error
    option.add_experimental_option("excludeSwitches", ["enable-automation"])
    option.add_experimental_option('useAutomationExtension', False)
    No_Image_loading = {"profile.managed_default_content_settings.images": 1}
    option.add_experimental_option("prefs", No_Image_loading)
    # 屏蔽webdriver特征
    option.add_argument("--disable-blink-features")
    option.add_argument("--disable-blink-features=AutomationControlled")
    driver = webdriver.Chrome(options=option, chrome_options=option,executable_path=chromedriver_path)
    return driver

# 获得所有账号cookie
def get_cookie(filepath):
    cookie_list = []
    cookie_list = [line.strip() for line in open(filepath,'r',encoding='utf-8')]
    return cookie_list


# 字符串cookie转为字典
def to_dict(cookie_str):
    cookie = {}
    lists = cookie_str.split(';')
    for i in lists:
        j = i.strip()
        j = j.split('=')
        cookie[j[0]] = j[1]
    return cookie


# 自动登录
def auto_login(cookie_dict):
    num = user_name = 0
    teiba_index = 'https://tieba.baidu.com/'
    driver.get(teiba_index)
    driver.delete_all_cookies()
    for k, v in cookie_dict.items():
        driver.add_cookie({'name': k, 'value': v})
    try:
        driver.get(teiba_index)  # 带cookie访问
        # 右侧导航加载
        navs = WebDriverWait(driver, 30).until(
            EC.visibility_of_element_located((By.ID, "com_userbar"))
        )
        li_list = driver.find_elements_by_css_selector('#com_userbar > ul >li')
        li_classnames = [li.get_attribute('class') for li in li_list]
        if 'u_username' in li_classnames:
            num = 1
            user = WebDriverWait(driver, 30).until(
                EC.visibility_of_element_located(
                    (By.CSS_SELECTOR, "#j_u_username > div.u_menu_item.u_menu_username > a > span"))
            )
            user_name = user.text
    except Exception as e:
        print('登陆过程异常',e)
    else:
        pass
    finally:
        return num,user_name


# 获取帖子所在贴吧
def get_tieba(tie_url):
    tieba_url = ''
    driver.get(tie_url)
    # 加载贴吧名
    my_tieba = WebDriverWait(driver, 30).until(EC.element_to_be_clickable((By.CLASS_NAME, "card_title_fname")))
    tieba_url = my_tieba.get_attribute('href')
    return tieba_url


# 查看第一名id
def check_rank(tieba_url):
    id = None
    driver.get(tieba_url)
    # 标题部分加载
    input_title = WebDriverWait(driver, 20).until(
        EC.visibility_of_element_located((By.XPATH, '//*[@id="tb_rich_poster"]/div[3]/div[1]/div[2]/input'))
    )

    # 内容提交框加载
    input_content = WebDriverWait(driver, 30).until(
        EC.visibility_of_element_located((By.ID, "ueditor_replace"))
    )
    # 帖子列表加载
    tiezis = WebDriverWait(driver, 20).until(
        EC.visibility_of_all_elements_located((By.CLASS_NAME, 'j_thread_list'))
    )

    tie_rank1 = tiezis[0] if tiezis else None
    id = tie_rank1.get_attribute('data-tid') if tie_rank1 else None
    return id

def huifu(tie_url,content):
    driver.get(tie_url)
    # 人为滚动一下
    driver.execute_script(js)
    # 加载关注按钮
    guanzhu = WebDriverWait(driver, 30).until(
        EC.visibility_of_element_located((By.ID, "j_head_focus_btn"))
    )
    # 鼠标移动到关注按钮
    ActionChains(driver).move_to_element(guanzhu).perform()
    # 输入框加载
    input_content = WebDriverWait(driver, 30).until(EC.element_to_be_clickable((By.ID, "ueditor_replace")))
    # 提交按钮加载
    button = WebDriverWait(driver, 30).until(EC.element_to_be_clickable((By.ID, "ueditor_replace")))
    huifu_first = WebDriverWait(driver, 30).until(EC.element_to_be_clickable((By.CLASS_NAME, "p_reply_first")))
    # 鼠标移动
    ActionChains(driver).move_to_element(guanzhu).move_to_element(huifu_first).perform()
    huifu_first.click()
    # 判断到了页面底部
    while True:
        js_to_bottom = 'var height = document.documentElement.scrollHeight-document.documentElement.scrollTop-document.documentElement.clientHeight;return(height)'
        to_bottom = driver.execute_script(js_to_bottom)
        # print(to_bottom)
        if int(to_bottom) == 0:
            break
    print('页面到达底部,需额外等10s')
    # 鼠标点击进贴吧内容框
    js_content_position = "document.getElementById('ueditor_replace').click();"
    driver.execute_script(js_content_position)
    js_content = """document.getElementById('ueditor_replace').innerText='{0}'""".format(content)
    driver.execute_script(js_content)
    time.sleep(10)
    # button.click()
    # 用js方式提交 出现弹窗不会影响提交
    button_js = 'document.querySelector("#tb_rich_poster > div.poster_body.editor_wrapper > div.poster_component.editor_bottom_panel.clearfix > div > a").click()'
    driver.execute_script(button_js)
    return 1


def main(tie_urls,contents,cookie_list,time1,time2):
    while True:
        for tie in tie_urls:
            try:
                tie_id = tie.split('/')[-1]
                tie_ba = get_tieba(tie)
                first_id = check_rank(tie_ba)
                print('当前帖id:{0},所在吧:{1},排名第1贴id:{2}'.format(tie_id,tie_ba,first_id))
                if int(tie_id) == int(first_id):
                    print('在第一位,本次无需回帖')
                else:
                    cookie = random.choice(cookie_list)
                    cookie_dict = to_dict(cookie)
                    num_auto, user_name = auto_login(cookie_dict)  # 自动登录
                    if num_auto == 1:
                        print(user_name, '自动登录成功,开始顶帖')
                        content = random.choice(contents)
                        num = huifu(tie, content)
                        if num == 1:
                            print(tie, '--顶帖回复成功')
                        else:
                            print(tie, '--顶帖回复失败')
                        time.sleep(random.randint(time1, time2))
                    else:
                        print('登录失败,无法顶帖,检查账号')
            except Exception:
                print(e,'异常写入文件log.txt')
                traceback.print_exc(file=open('log.txt', 'a'))
            finally:
                print('循环检测下一个贴')

if __name__ == "__main__":
    f = open('tieba_huifu_fail.txt','a',encoding='utf-8')
    js = 'window.scrollBy(0,{0})'.format('document.body.scrollHeight')
    chromedriver_path = 'D:/python3/install/chromedriver.exe'
    chrome_path = 'C:/Program Files (x86)/Google/Chrome/Application/chrome.exe'
    ua = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.125 Safari/537.36'
    # 全局变量
    driver = get_driver(chromedriver_path,chrome_path,ua)
    # cookie账号路径
    cookie_path = './cookie_zh.txt'
    cookie_list = get_cookie(cookie_path)
    # 顶帖内容
    contents = [i.strip() for i in open('./content.txt','r',encoding='utf-8')]
    # 要顶的贴
    tie_urls = [i.strip() for i in open('./tieba_tie.txt', 'r', encoding='utf-8')]
    # 登陆间隔时间最小and最大
    sleep_min, sleep_max = 60, 70
    main(tie_urls,contents,cookie_list,sleep_min,sleep_max)
    driver.quit()

文章评论

selenium贴吧顶帖(多贴多账号)文章写得不错,值得赞赏