Python Requests

来自linux中国网wiki
Evan讨论 | 贡献2024年11月6日 (三) 04:24的版本 →‎get
跳到导航 跳到搜索

ins and usage

sudo apt install python3-pip
sudo pip3 install requests

>>> import requests
>>> dir(requests)
['ConnectTimeout', 'ConnectionError', 'DependencyWarning', 'FileModeWarning', 'HTTPError', 'JSONDecodeError', 'NullHandler', 'PreparedRequest', 'ReadTimeout', 'Request', 'RequestException', 'RequestsDependencyWarning', 'Response', 'Session', 'Timeout', 'TooManyRedirects', 'URLRequired', '__author__', '__author_email__', '__build__', '__builtins__', '__cached__', '__cake__', '__copyright__', '__description__', '__doc__', '__file__', '__license__', '__loader__', '__name__', '__package__', '__path__', '__spec__', '__title__', '__url__', '__version__', '_check_cryptography', '_internal_utils', 'adapters', 'api', 'auth', 'certs', 'chardet_version', 'charset_normalizer_version', 'check_compatibility', 'codes', 'compat', 'cookies', 'delete', 'exceptions', 'get', 'head', 'hooks', 'logging', 'models', 'options', 'packages', 'patch', 'post', 'put', 'request', 'session', 'sessions', 'ssl', 'status_codes', 'structures', 'urllib3', 'utils', 'warnings']
>>> 


>>> r = requests.get("http://google.com")
>>> r.cookies
<RequestsCookieJar[Cookie(version=0, name='AEC', value='AVYB7cqAFnkbEnNvpWvGBA9ve1M_JSdKgo7mLU4GK1p1KZigxhf-Bipu2g', port=None, port_specified=False, domain='.google.com', domain_specified=True, domain_initial_dot=True, path='/', path_specified=True, secure=True, expires=1745391827, discard=False, comment=None, comment_url=None, rest={'HttpOnly': None, 'SameSite': 'lax'}, rfc2109=False), Cookie(version=0, name='NID', value='518=Ba-UW487XfcHFf6eSMcF1lioSHTcC3Zt2kzulMnC207wDs17OrwbxveIppcZ4u_B4Tcm9ZLO9IQguWhIzH_-zRU9GMPhf66LsSatS4dLTIP8CfexMbf0oJh3T2phHKsdZjyqIocWHNcKoyLqwuWO_YAmQYIDvI_jGx7zaoB31LKU2UjAU4EJpk-Bhht3vo3pVlpL', port=None, port_specified=False, domain='.google.com', domain_specified=True, domain_initial_dot=True, path='/', path_specified=True, secure=False, expires=1745651027, discard=False, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False)]>

>>> r.cookies
<RequestsCookieJar[Cookie(version=0, name='AEC', value='AVYB7cqAFnkbEnNvpWvGBA9ve1M_JSdKgo7mLU4GK1p1KZigxhf-Bipu2g', port=None, port_specified=False, domain='.google.com', domain_specified=True, domain_initial_dot=True, path='/', path_specified=True, secure=True, expires=1745391827, discard=False, comment=None, comment_url=None, rest={'HttpOnly': None, 'SameSite': 'lax'}, rfc2109=False), Cookie(version=0, name='NID', value='518=Ba-UW487XfcHFf6eSMcF1lioSHTcC3Zt2kzulMnC207wDs17OrwbxveIppcZ4u_B4Tcm9ZLO9IQguWhIzH_-zRU9GMPhf66LsSatS4dLTIP8CfexMbf0oJh3T2phHKsdZjyqIocWHNcKoyLqwuWO_YAmQYIDvI_jGx7zaoB31LKU2UjAU4EJpk-Bhht3vo3pVlpL', port=None, port_specified=False, domain='.google.com', domain_specified=True, domain_initial_dot=True, path='/', path_specified=True, secure=False, expires=1745651027, discard=False, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False)]>
>>> r.headers
{'Date': 'Fri, 25 Oct 2024 07:03:47 GMT', 'Expires': '-1', 'Cache-Control': 'private, max-age=0', 'Content-Type': 'text/html; charset=ISO-8859-1', 'Content-Security-Policy-Report-Only': "object-src 'none';base-uri 'self';script-src 'nonce-GYqphV2LY_t_XuymXSKByw' 'strict-dynamic' 'report-sample' 'unsafe-eval' 'unsafe-inline' https: http:;report-uri https://csp.withgoogle.com/csp/gws/other-hp", 'P3P': 'CP="This is not a P3P policy! See g.co/p3phelp for more info."', 'Content-Encoding': 'gzip', 'Server': 'gws', 'Content-Length': '9134', 'X-XSS-Protection': '0', 'X-Frame-Options': 'SAMEORIGIN', 'Set-Cookie': 'AEC=AVYB7cqAFnkbEnNvpWvGBA9ve1M_JSdKgo7mLU4GK1p1KZigxhf-Bipu2g; expires=Wed, 23-Apr-2025 07:03:47 GMT; path=/; domain=.google.com; Secure; HttpOnly; SameSite=lax, NID=518=Ba-UW487XfcHFf6eSMcF1lioSHTcC3Zt2kzulMnC207wDs17OrwbxveIppcZ4u_B4Tcm9ZLO9IQguWhIzH_-zRU9GMPhf66LsSatS4dLTIP8CfexMbf0oJh3T2phHKsdZjyqIocWHNcKoyLqwuWO_YAmQYIDvI_jGx7zaoB31LKU2UjAU4EJpk-Bhht3vo3pVlpL; expires=Sat, 26-Apr-2025 07:03:47 GMT; path=/; domain=.google.com; HttpOnly'}


>>> r.encoding 
'ISO-8859-1'
>>> r.status_code 
200
>>> 


post


构造url

我们常常将http请求的参数以url的query string的形式进行发送,传统的做法是我们使用拼凑的方式构造这个url。例如我们需要构造以下这个url:

    http://httpbin.org/get?key1=value1&key2=value2

使用reqeuets,你可以方便地构造这个url,而不用手工拼凑。你只需要将这些参数和值构造一个字典,然后将这个字典传给params参数即可

传递一个字典给data参数

>>> import requests 
>>> payload={"key1":"value1","key2":"value2"}
>>> r=requests.post("https://httpbin.org/post")
>>> r1=requests.post("https://httpbin.org/post",data=payload)
>>> 

有没有参考的不同
>>> r.text
'{\n  "args": {}, \n  "data": "", \n  "files": {}, \n  "form": {}, \n  "headers": {\n    "Accept": "*/*", \n    "Accept-Encoding": "gzip, deflate, br", \n    "Content-Length": "0", \n    "Host": "httpbin.org", \n    "User-Agent": "python-requests/2.28.1", \n    "X-Amzn-Trace-Id": "Root=1-671b4668-1f54cdbc43c5b947731a92b5"\n  }, \n  "json": null, \n  "origin": "146.190.165.134", \n  "url": "https://httpbin.org/post"\n}\n'
>>> r1.text 
'{\n  "args": {}, \n  "data": "", \n  "files": {}, \n  "form": {\n    "key1": "value1", \n    "key2": "value2"\n  }, \n  "headers": {\n    "Accept": "*/*", \n    "Accept-Encoding": "gzip, deflate, br", \n    "Content-Length": "23", \n    "Content-Type": "application/x-www-form-urlencoded", \n    "Host": "httpbin.org", \n    "User-Agent": "python-requests/2.28.1", \n    "X-Amzn-Trace-Id": "Root=1-671b46b8-2b9d0e294d20bcc104da3fb3"\n  }, \n  "json": null, \n  "origin": "146.190.165.134", \n  "url": "https://httpbin.org/post"\n}\n'
>>> 

==http header==
<pre>
>>> r.headers['content-type']
'application/json'
>>> r.headers['content-type']='ada'
>>> r.headers['content-type']
'ada'

get

In [20]: url3 ='https://www.weather.com.cn/data/sk/10113010.html'

In [21]: r3 = requests.get(url3)


In [32]: ku_url='https://www.kuaidi100.com/query'

In [33]: params = {'type': 'youzhengguonei','postid': '9893442769997'}

In [35]: r = requests.get(ku_url,params=params)

In [36]: r.json()
Out[36]: 
{'message': 'ok',
 'nu': '9893442769997',
 'ischeck': '1',
 'com': 'youzhengguonei',
 'status': '200',
 'condition': 'F00',
 'state': '3',
 'data': [{'time': '2024-11-05 11:44:13',
   'context': '查无结果',
   'ftime': '2024-11-05 11:44:13'}]}

#加头
In [37]: js_url = 'http://www.jianshu.com'

In [38]: r=requests.get(js_url)

In [39]: r.text
Out[39]: '<html>\r\n<head><title>403 Forbidden</title></head>\r\n<body>\r\n<center><h1>403 Forbidden</h1></center>\r\n<hr><center>openresty</center>\r\n</body>\r\n</html>\r\n'

In [40]: headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:133.0) Gecko/20100101 Firefox/133.0'}

In [41]: r=requests.get(js_url,headers=headers)
#出东西
r.text


img

 url2 = 'https://img02.sogoucdn.com/app/a/100520021/46b805167ca269bbfc35922057a14665'
r2 = requests.get(url2)

In [18]: with open('/tmp/i.jpg','wb') as fobj:
    ...:     fobj.write(r2.content)


see also

Python Requests库简明使用教程

python requests用法总结

Python爬虫利器一之Requests库的用法

requests 快速上手

Python 使用requests发送POST请求