Skip to main content

urllib

简介

首先urllib是Python自带的标准库,无需安装,可以直接使用。如果想系统性的学习urllib库,可以直接看它的官方文档。

主要子模块

  • urllib.request - 请求模块
  • urllib.error - 异常处理模块
  • urllib.parse - 解析模块
  • urllib.robotparser robot.txt - 文件解析模块

urllib.request

语法:

urllib.request.urlopen(
url,
data=None, # 是指向服务器提交信息时传递的字典形式的信息
[timeout, ] # 超时时间
)

常用功能

文件下载(带进度显示)

# -*- coding: utf-8 -*-
#
# @Author: CPS
# @email: 373704015@qq.com
# @Date: 2022-11-22 22:41:11.106612
# @Last Modified by: CPS
# @Last Modified time: 2022-11-22 22:41:11.106612
# @file_path "D:\CPS\IDE\JS_SublmieText\Data\Packages\testt_update_channel\core"
# @Filename "download_channel_by_url.py"
# @Description: 下载最新的channel_v3.json
#
import shutil
from os import path
from urllib.request import urlretrieve, urlopen, urlcleanup

# url = "https://packagecontrol.io/channel_v3.json"
# header = {
# "User-Agent": "Mozilla/5.0 (X11; Fedora; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"
# }


def check_url(url) -> bool:
res = urlopen(url)

return bool(res.getcode() == 200)


def run(output: str, url: str = "https://packagecontrol.io/channel_v3.json") -> str:
if check_url(url):
temp_file = path.abspath("./channel_v3.json.temp")
print("开始下载channel_v3.json文件:", temp_file)

def download_report(count, block_size, total_size):
downloaded = count * block_size
percent = 100.0 * downloaded / total_size
percent = round(min(100, percent), 2)
print(f"downloaded: {downloaded}/{total_size}, {percent}% completed")

try:
res = urlretrieve(url, temp_file, reporthook=download_report)
print("下载完成: ", dir(res))
shutil.copyfile(temp_file, output)
urlcleanup()
except Exception as e:
print("下载失败: ", e)

else:
print("检查url不通过,请稍后再试")


# if __name__ == "__main__":
# run("./channel_v3.json")