来源:python中国网 时间:2019-07-18

  Cookies

  如果一个http响应中包含了cookie,那么我们可以利用 cookies参数拿到:

# -*- coding: utf-8 -*-
import requests


def get_html(url,retry=2):
    try:
        r = requests.get(url=url, headers=headers, stream=True)
    except Exception as e:
        print(e)
        if retry > 0:
            get_html(url, retry - 1)
    else:
        cookiejar = r.cookies
        return cookiejar


if __name__ == "__main__":
    # 自定义请求头信息
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36',
    }
    url = 'https://www.baidu.com/'
    cookiejar = get_html(url) #CookieJar对象
    cookiedict = requests.utils.dict_from_cookiejar(cookiejar) # 转成字典
    print(cookiedict)
D:python3installpython.exe D:/python/py3script/test.py
{'BAIDUID': '8C6CC54A0BE5DB56B2393B3441F127BC:FG=1', 'BIDUPSID': '8C6CC54A0BE5DB56B2393B3441F127BC', 'H_PS_PSSID': '1448_21103_29522_29520_28518_29099_28839_29221_29071', 'PSTM': '1563449386', 'delPer': '0', 'BDSVRTM': '0', 'BD_HOME': '0'}

Process finished with exit code 0

  Session

  在 requests 里,session对象是一个非常常用的对象,这个对象代表一次用户会话:从客户端浏览器连接服务器开始,到客户端浏览器与服务器断开。

  会话能让我们在跨请求时候保持某些参数,比如在同一个 Session 实例发出的所有请求之间保持 cookie 。这样就可以实现自动登录。以人人网为例,实现思路如下:

# -*- coding: utf-8 -*-
import requests
import re

# 1. 创建session对象,可以保存Cookie值
ssion = requests.session()
# 2. 自定义 headers
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36"}
# 3. 需要登录的用户名和密码
data = {"email":"614863843@qq.com", "password":"772483200dong"}
# 4. 发送附带用户名和密码的请求,并获取登录后的Cookie值,保存在ssion里
ssion.post("http://www.renren.com/PLogin.do", data = data)
# 5. session包含用户登录后的Cookie值,可以直接访问那些登录后才可以访问的页面
r = ssion.get("http://www.renren.com/410043129/profile")
# 6. 返回的内容正常
html = r.text
title = re.search('<title></title>',html) print(title.group(1)) #结果是 人人网 - 邓永洁❤


  完整的自动登录人人网的代码

# -*- coding: utf-8 -*-
import requests
import re

# 自动登录
def login_auto(url,data,retry=2):
    try:
        r = ssion.post(url=url, headers=headers, data=data)
    except Exception as e:
        print(e)
        if retry > 0:
            login_auto(url, retry - 1)
    else:
        html = r.text
        return html

# 检车登陆是否成功
def check_login(html=''):
    try:
        title_re = re.search('(.*?)', html)
    except Exception as e:
        print(e)
    else:
        title = title_re.group(1)
        res = 'success' if '老董' in title else None
        return res


if __name__ == "__main__":
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36"}
    url = 'http://www.renren.com/PLogin.do'
    data = {"email": "614863843@qq.com", "password": "772483200dong"}
    ssion = requests.session()
    html = login_auto(url,data)
    status = check_login(html)
    if status:
        print('登陆成功')
        # 访问一个登陆才能看到的页面
        r = ssion.get("http://www.renren.com/410043129/profile")
        html = r.text
        title = re.search('<title>(.*?)</title>', html)
        print(title.group(1))  # 结果是 人人网 - 邓永洁❤

D:\python3\install\python.exe D:/python/py3script/python66.py
登陆成功
人人网 - 邓永洁❤

Process finished with exit code 0