Python 从入门到放弃 - Lesson 5 常用模块

文章目录

模块导入方法
动态加载模块
time与datetime
random模块
os模块
sys模块
shutil模块
zipfile模块
tarfile模块
shelve模块
xml模块
PyYAML模块
ConfigParser模块
hashlib模块和hmac模块
re模块

模块和包导入

模块导入方法

import XXX

根据环境变量PATH的内容查找模块并导入

# test.py文件
def func1():
    print("Hello1")
    

def func2():
    print("Hello2")

# test.py文件

def func1():

print("Hello1")

def func2():

print("Hello2")

# 引入同目录下的test模块（即test.py文件）
import test  # 相当于将模块中所有的代码执行一遍，并将模块代码保存到test变量中

# 调用模块内容
test.func1()
test.func2()

# 引入同目录下的test模块（即test.py文件）

import test # 相当于将模块中所有的代码执行一遍，并将模块代码保存到test变量中

# 调用模块内容

test.func1()

test.func2()

from YYY import XXX

1. from dir import module

# 从文件夹引入模块
from . import test  # 作用与import test一样

# 调用模块内容
test.func1()

# 从文件夹引入模块

from . import test # 作用与import test一样

# 调用模块内容

test.func1()

2. from mudule import [function or var or *]

# 从模块中引入函数
from test import func1

# 调用函数
func1()

# 从模块中引入函数

from test import func1

# 调用函数

func1()

包导入

python包导入等于导入包下面的__init__.py文件，即将包中的代码拷贝到当前文件中执行一次

假设目录结构如下

package_test
   |- __init__.py
   |- test.py
import_package.py

# __init__.py in package_test 
import os

print("package_test init file")
print(os.getcwd())  # 获取到导入目的文件的路径（即下面import_package.py所在目录）

# __init__.py in package_test

import os

print("package_test init file")

print(os.getcwd()) # 获取到导入目的文件的路径（即下面import_package.py所在目录）

# import_package.py中引入包
import package_test

1 2	# import_package.py中引入包 import package_test

如果要引入包下面的模块，要么使用前面说的from dir import module的方式，要么直接在包的__init__文件中写上import
导入包本质上是导入包内的__init__文件，即把__init__文件内容放到当前文件中执行一次

# __init__ in.py package_test 

from . import test  # 不能用import test
print("package_test init file")

# __init__ in.py package_test

from . import test # 不能用import test

print("package_test init file")

# import_package.py中引入包
import package_test

package_test.test.func1()

# import_package.py中引入包

import package_test

package_test.test.func1()

动态加载模块

import importlib

# 解释器内部方法
__import__("classmethod")

# 使用importlib模块导入
importlib.import_module("staticmethod")

import importlib

# 解释器内部方法

__import__("classmethod")

# 使用importlib模块导入

importlib.import_module("staticmethod")

常用模块

python中模块一般分为以下三类：

1. 标准库
2. 第三方模块
3. 自定义模块

time与datetime

时间格式

python中的三种时间格式：

1. 时间戳(timestamp格式)：自1970-01-01 08:00:00以来的秒数

import time

print(time.time())

import time

print(time.time())

2. 时间元组(struct_time格式)：将时间拆分成九个元组

import time

print(time.gmtime())  # 返回UTC时间
print(time.localtime())  # 返回本地时间

import time

print(time.gmtime()) # 返回UTC时间

print(time.localtime()) # 返回本地时间

返回值格式

time.struct_time(tm_year=2019, tm_mon=2, tm_mday=17, tm_hour=21, tm_min=27, tm_sec=50, tm_wday=6, tm_yday=48, tm_isdst=0)

3. 字符串时间格式

import time
print(time.asctime())
print(time.ctime())

import time

print(time.asctime())

print(time.ctime())

返回值格式

Sun Feb 17 22:04:32 2019

时间格式互转

import time
# timestamp转struct_time
x = time.localtime(1111111111)
y = time.gmtime(1111111111)
print(x,"\n","y")
print(x.tm_year)  # 获取年份
print(x.tm_yday)  # 获取一年中的第几天

# struct_time转timestamp
print(mktime(x))

# struct_time转指定格式的字符串时间
str_time = time.strftime("%Y-%m-%d %H:%M:%S", x)
print(str_time)

# 字符串时间转struct_time
print(time.strptime(str_time, "%Y-%m-%d %H:%M:%S"))

# 分别从struct_time和timestamp转换成默认格式的字符串时间
print(time.asctime(x))
print(time.ctime(1111111111))

import time

# timestamp转struct_time

x = time.localtime(1111111111)

y = time.gmtime(1111111111)

print(x,"\n","y")

print(x.tm_year) # 获取年份

print(x.tm_yday) # 获取一年中的第几天

# struct_time转timestamp

print(mktime(x))

# struct_time转指定格式的字符串时间

str_time = time.strftime("%Y-%m-%d %H:%M:%S", x)

print(str_time)

# 字符串时间转struct_time

print(time.strptime(str_time, "%Y-%m-%d %H:%M:%S"))

# 分别从struct_time和timestamp转换成默认格式的字符串时间

print(time.asctime(x))

print(time.ctime(1111111111))

datetime模块

Python

# datetime类的now方法返回当前时间的datetime格式 print(datetime.datetime.now()) print(datetime.datetime.now()+datetime.timedelta(3)) # 获取三天以后的时间 print(datetime.datetime.now()+datetime.timedelta(-3)) # 获取三天以前的时间 print(datetime.datetime.now()+datetime.timedelta(hours=3)) # 获取三小时以后的时间 # 另外两个常用类date和time print(datetime.date) print(datetime.time)

1
2
3
4
5
6
7
8
9

# datetime类的now方法返回当前时间的datetime格式
print(datetime.datetime.now())
print(datetime.datetime.now()+datetime.timedelta(3))  # 获取三天以后的时间
print(datetime.datetime.now()+datetime.timedelta(-3))  # 获取三天以前的时间
print(datetime.datetime.now()+datetime.timedelta(hours=3))  # 获取三小时以后的时间

# 另外两个常用类date和time
print(datetime.date)
print(datetime.time)

random模块

用于产生随机数

# 随机返回一个0-1之间的浮点数
print("random: ", random.random())

# 随机返回一个指定数值之间的整数
print("randint: ", random.randint(0, 5))

# 随机返回一个range范围内的数值
print("randrange: ", random.randrange(5))  # 0 ~ 4
print("randrange: ", random.randrange(1, 3))   # 1，2

# 随机返回序列中的一个值，可以是字符串，列表，元组等
print("choice: ",random.choice("random"))

# 随机返回序列中的指定个数值
print("sample: ",random.sample("random", 2))

# 随机返回一个指定区间内的浮点数
print("uniform: ", random.uniform(1, 5))

# 打乱一个列表的排序
list1 = [1, 2, 3, 4, 5]
print("Before shuffle: ", list1)
random.shuffle(list1)
print("After shuffle: ", list1)

# 随机返回一个0-1之间的浮点数

print("random: ", random.random())

# 随机返回一个指定数值之间的整数

print("randint: ", random.randint(0, 5))

# 随机返回一个range范围内的数值

print("randrange: ", random.randrange(5)) # 0 ~ 4

print("randrange: ", random.randrange(1, 3)) # 1，2

# 随机返回序列中的一个值，可以是字符串，列表，元组等

print("choice: ",random.choice("random"))

# 随机返回序列中的指定个数值

print("sample: ",random.sample("random", 2))

# 随机返回一个指定区间内的浮点数

print("uniform: ", random.uniform(1, 5))

# 打乱一个列表的排序

list1 = [1, 2, 3, 4, 5]

print("Before shuffle: ", list1)

random.shuffle(list1)

print("After shuffle: ", list1)

os模块

目录操作

# 返回当前目录完整路径
print("getcwd: ", os.getcwd())

# 切换目录，相当于cd命令
os.chdir(r"D:\PycharmProjects")
print("After_chdir: ", os.getcwd())

# curdir属性，当前目录.
print("curdir: ", os.curdir)

# pardir属性，父目录..
print("padir: ", os.pardir)

# 递归创建目录
os.makedirs(r"C:\testa\testb\testc")

# 递归删除目录，遇非空目录则停止删除
os.removedirs(r"C:\testa\testb\testc")

# 创建目录
os.mkdir(r"C:\testa")

# 删除目录
os.rmdir(r"C:\testa")

# 以列表返回指定目录下的文件和子目录
print("listdir: ", os.listdir(r"C:\Users\Public"))

# 删除指定文件，如果文件为空则报错
#os.remove(r"C:\temp\test.txt")

# 重命名文件或目录
#os.rename(r"C:\temp", r"C:\tmp")

# 返回文件或目录信息
print("stat: ", os.stat(r"C:\Users\Public"))

# 返回当前目录完整路径

print("getcwd: ", os.getcwd())

# 切换目录，相当于cd命令

os.chdir(r"D:\PycharmProjects")

print("After_chdir: ", os.getcwd())

# curdir属性，当前目录.

print("curdir: ", os.curdir)

# pardir属性，父目录..

print("padir: ", os.pardir)

# 递归创建目录

os.makedirs(r"C:\testa\testb\testc")

os.removedirs(r"C:\testa\testb\testc")

# 创建目录

os.mkdir(r"C:\testa")

# 删除目录

os.rmdir(r"C:\testa")

print("listdir: ", os.listdir(r"C:\Users\Public"))

# 删除指定文件，如果文件为空则报错

#os.remove(r"C:\temp\test.txt")

# 重命名文件或目录

#os.rename(r"C:\temp", r"C:\tmp")

# 返回文件或目录信息

print("stat: ", os.stat(r"C:\Users\Public"))

系统信息相关的属性

# sep属性,当前系统的路径分隔符
print("sep: ", os.sep)

# linesep属性,当前系统的行结尾符
print("linesep: ", os.linesep.encode())

# pathsep属性,当前系统变量PATH所用的分隔符
print("pathsep: ", os.pathsep)

# environ属性,当前系统的环境变量(字典)
print("environ: ", os.environ)

# sep属性,当前系统的路径分隔符

print("sep: ", os.sep)

# linesep属性,当前系统的行结尾符

print("linesep: ", os.linesep.encode())

# pathsep属性,当前系统变量PATH所用的分隔符

print("pathsep: ", os.pathsep)

# environ属性,当前系统的环境变量(字典)

print("environ: ", os.environ)

路径操作

# 返回指定路径的绝对路径
print("path.abspath: ", os.path.abspath(__file__))

# 将路径拆分成目录和文件名两部分
print("path.split: ", os.path.split(r"C:\temp\test"))

# 返回路径的目录部分
print("path.dirname: ", os.path.dirname(r"C:\temp\test"))

# 返回路径的文件部分
print("path.basename: ", os.path.basename(r"C:\temp\test"))

# 判断路径是否是绝对路径
print("path.isabs: ", os.path.isabs(r"C:\temp\test"))

# 判断文件或目录是否存在
print("path.exists: ", os.path.exists(r"C:\temp\test"))

# 判断目标是否存在并且是文件
print("path.isfile: ", os.path.isfile(r"C:\Windows"))
print("path.isfile: ", os.path.isfile(r"C:\Windows\notepad.exe"))

# 判断目标是否存在并且是目录
print("path.isdir: ", os.path.isdir(r"C:\Windows"))
print("path.isdir: ", os.path.isdir(r"C:\Windows\notepad.exe"))

# 路径拼接，丢弃绝对路径前的参数(格式“C:”除外）
print("path.join: ", os.path.join(r"C:\Windows", r"system32", r"notepad.exe"))
print("path.join: ", os.path.join(r"Windows", r"\system32", r"notepad.exe"))  # 丢弃最后一个\前的参数
print("path.join: ", os.path.join(r"C:", r"\system32", r"\notepad.exe"))  # 丢弃最后一个\前的参数,盘符除外
print("path.join: ", os.path.join(r"/C", r"/system32", r"/notepad.exe"))  # 丢弃最后一个/前的参数
print("path.join: ", os.path.join(r"\C", r"\system32", r"\notepad.exe"))  # 丢弃最后一个\前的参数
print("path.join: ", os.path.join(r"Windows", r"C:", r"/notepad.exe"))  # 丢弃最后一个/前的参数,盘符除外

# 返回指定路径的绝对路径

print("path.abspath: ", os.path.abspath(__file__))

# 将路径拆分成目录和文件名两部分

print("path.split: ", os.path.split(r"C:\temp\test"))

# 返回路径的目录部分

print("path.dirname: ", os.path.dirname(r"C:\temp\test"))

# 返回路径的文件部分

print("path.basename: ", os.path.basename(r"C:\temp\test"))

# 判断路径是否是绝对路径

print("path.isabs: ", os.path.isabs(r"C:\temp\test"))

# 判断文件或目录是否存在

print("path.exists: ", os.path.exists(r"C:\temp\test"))

# 判断目标是否存在并且是文件

print("path.isfile: ", os.path.isfile(r"C:\Windows"))

print("path.isfile: ", os.path.isfile(r"C:\Windows\notepad.exe"))

# 判断目标是否存在并且是目录

print("path.isdir: ", os.path.isdir(r"C:\Windows"))

print("path.isdir: ", os.path.isdir(r"C:\Windows\notepad.exe"))

# 路径拼接，丢弃绝对路径前的参数(格式“C:”除外）

print("path.join: ", os.path.join(r"C:\Windows", r"system32", r"notepad.exe"))

print("path.join: ", os.path.join(r"Windows", r"\system32", r"notepad.exe")) # 丢弃最后一个\前的参数

print("path.join: ", os.path.join(r"C:", r"\system32", r"\notepad.exe")) # 丢弃最后一个\前的参数,盘符除外

print("path.join: ", os.path.join(r"/C", r"/system32", r"/notepad.exe")) # 丢弃最后一个/前的参数

print("path.join: ", os.path.join(r"\C", r"\system32", r"\notepad.exe")) # 丢弃最后一个\前的参数

print("path.join: ", os.path.join(r"Windows", r"C:", r"/notepad.exe")) # 丢弃最后一个/前的参数,盘符除外

文件信息

# 获取文件或目录的访问时间（时间戳）
print("path.getatime: ", os.path.getatime(r"C:\Windows"))

# 获取文件或目录的修改时间（时间戳）
print("path.getctime: ", os.path.getmtime(r"C:\Windows"))

# 获取文件或目录的创建时间（metadata change time ?）（时间戳）
print("path.getctime: ", os.path.getctime(r"C:\Windows"))

# 获取文件或目录的访问时间（时间戳）

print("path.getatime: ", os.path.getatime(r"C:\Windows"))

# 获取文件或目录的修改时间（时间戳）

print("path.getctime: ", os.path.getmtime(r"C:\Windows"))

# 获取文件或目录的创建时间（metadata change time ?）（时间戳）

print("path.getctime: ", os.path.getctime(r"C:\Windows"))

sys模块

import sys

# 获取程序的运行参数，第一个参数默认是程序本身
print("argv:", sys.argv)

# 获取当前Python版本
print("version:", sys.version)

# 获取python环境变量Path的内容
print("path:", sys.path)

# 获取系统平台名称
print("platform:", sys.platform)

# 打印内容到标准输出
sys.stdout.write("Stdout message\n")
sys.stdout.flush()  # 立即刷新输出缓存，打印消息

# 从标准输入读取内容
print("stdin.readline".center(20,"="))
val = sys.stdin.readline()[:-1]  # [:-1] 去掉末尾的换行符
print(val)

import sys

# 获取程序的运行参数，第一个参数默认是程序本身

print("argv:", sys.argv)

# 获取当前Python版本

print("version:", sys.version)

# 获取python环境变量Path的内容

print("path:", sys.path)

# 获取系统平台名称

print("platform:", sys.platform)

# 打印内容到标准输出

sys.stdout.write("Stdout message\n")

sys.stdout.flush() # 立即刷新输出缓存，打印消息

# 从标准输入读取内容

print("stdin.readline".center(20,"="))

val = sys.stdin.readline()[:-1] # [:-1] 去掉末尾的换行符

print(val)

shutil模块

高级文件、文件夹、压缩包处理模块

文件操作

import shutil
"""
shutil.copyfileobj(fsrc, fdst[, length])
从源文件拷贝内容到目标文件，可以选择指定单次读取的内容长度以分次读取大文件
"""

with open("src_file", "r", encoding="utf-8") as fs, open("dest_file1", "w", encoding="utf-8") as fw1:
    shutil.copyfileobj(fs, fw1, 1000)


# 通过指定文件名完成复制操作，会打开文件进行读写完成复制操作
shutil.copyfile("src_file", "dest_file2")

# 拷贝文件权限
shutil.copymode("src_file", "dest_file1")

# 拷贝文件状态信息到一个已存在的文件，包括：mode bits（权限）, atime（访问时间）, mtime（修改时间）, flags（不知道什么东东）
shutil.copystat("src_file", "dest_file1")

# 同时拷贝文件内容和文件权限到新文件
shutil.copy("src_file", "dest_file3")

# 同时拷贝文件内容和文件状态到新文件
shutil.copy2("src_file", "dest_file4")

import shutil

"""

shutil.copyfileobj(fsrc, fdst[, length])

从源文件拷贝内容到目标文件，可以选择指定单次读取的内容长度以分次读取大文件

"""

with open("src_file", "r", encoding="utf-8") as fs, open("dest_file1", "w", encoding="utf-8") as fw1:

shutil.copyfileobj(fs, fw1, 1000)

# 通过指定文件名完成复制操作，会打开文件进行读写完成复制操作

shutil.copyfile("src_file", "dest_file2")

# 拷贝文件权限

shutil.copymode("src_file", "dest_file1")

# 拷贝文件状态信息到一个已存在的文件，包括：mode bits（权限）, atime（访问时间）, mtime（修改时间）, flags（不知道什么东东）

shutil.copystat("src_file", "dest_file1")

# 同时拷贝文件内容和文件权限到新文件

shutil.copy("src_file", "dest_file3")

# 同时拷贝文件内容和文件状态到新文件

shutil.copy2("src_file", "dest_file4")

目录操作

# 递归复制目录到目标（如果目标已存在则报错）
shutil.copytree("src_dir", "dest_dir")
shutil.copytree("src_dir", "dest_dir_del")

# 移动文件或目录
shutil.move("dest_dir", "dest_dir_moved")
shutil.move("dest_file4", "dest_file4_moved")

# 递归删除目录
shutil.rmtree("dest_dir_del")

# 递归复制目录到目标（如果目标已存在则报错）

shutil.copytree("src_dir", "dest_dir")

shutil.copytree("src_dir", "dest_dir_del")

# 移动文件或目录

shutil.move("dest_dir", "dest_dir_moved")

shutil.move("dest_file4", "dest_file4_moved")

# 递归删除目录

shutil.rmtree("dest_dir_del")

文件打包和压缩

"""
第一个参数为压缩包存放路径和名字，没路径默认当前目录
第二个参数为打包模式，可以是zip,tar,bztar,gztar
第三个参数为要打包的目录，不填默认当前目录
make_archive实际是调用zipfile和tarfile两个模块进行文件归档压缩
"""
shutil.make_archive("dest_archive", "zip", "src_dir")

"""

第一个参数为压缩包存放路径和名字，没路径默认当前目录

第二个参数为打包模式，可以是zip,tar,bztar,gztar

第三个参数为要打包的目录，不填默认当前目录

make_archive实际是调用zipfile和tarfile两个模块进行文件归档压缩

"""

shutil.make_archive("dest_archive", "zip", "src_dir")

zipfile模块

zipfile模块会对文件进行压缩

import zipfile

# 以写入方式创建压缩包文件
zz = zipfile.ZipFile("dest.zip", "w")
zz.write("src_dir/echo.sh")
zz.write("src_dir/src_dir2")  # 只会压缩目录本身，不包括其内容
zz.close()

# 以读方式打开压缩包
zx = zipfile.ZipFile("dest.zip", "r")
zx.extract("src_dir/echo.sh")  # 解压指定文件到当前目录
zx.extractall()  # 解压所有文件,默认当前目录
zx.close()

import zipfile

# 以写入方式创建压缩包文件

zz = zipfile.ZipFile("dest.zip", "w")

zz.write("src_dir/echo.sh")

zz.write("src_dir/src_dir2") # 只会压缩目录本身，不包括其内容

zz.close()

# 以读方式打开压缩包

zx = zipfile.ZipFile("dest.zip", "r")

zx.extract("src_dir/echo.sh") # 解压指定文件到当前目录

zx.extractall() # 解压所有文件,默认当前目录

zx.close()

tarfile模块

tarfile模块对文件和目录进行打包（但不压缩）

import tarfile
import shutil

# tar归档文件和目录
tt = tarfile.open('dest.tar', 'w')
tt.add('src_dir/echo.sh')
tt.add('src_dir/src_dir2')  # 会递归打包子目录和文件
tt.close()

# tar解压
tx = tarfile.open('dest.tar','r')
tx.extractall()  # 解压所有文件,默认当前目录
tx.close()

import tarfile

import shutil

# tar归档文件和目录

tt = tarfile.open('dest.tar', 'w')

tt.add('src_dir/echo.sh')

tt.add('src_dir/src_dir2') # 会递归打包子目录和文件

tt.close()

# tar解压

tx = tarfile.open('dest.tar','r')

tx.extractall() # 解压所有文件,默认当前目录

tx.close()

shelve模块

以K/V方式保存内存中的数据到文件，支持所有pickle支持的数据类型

# 导入模块
import shelve


# 定义一个函数
def func1():
    print("Message from func1")


# 使用shelve打开文件
d = shelve.open("shelve_dump")
# 以K/V形式存储值到对象d中
d["name"] = "Bob"  # 存储字符串类型
d["info"] = {"age":20, "addr": "CN"}  # 存储字典类型
d["func"] = func1  # 存储函数
d.close()


# 还原数据
r = shelve.open("shelve_dump")
# .get方法获取指定元素
print(r.get("name"))
# .items方法获取所有元素
for item in r.items():
    print(item)
    # 还原函数
    if item[0] == "func":
        item[1]()
r.close()

# 导入模块

import shelve

# 定义一个函数

def func1():

print("Message from func1")

# 使用shelve打开文件

d = shelve.open("shelve_dump")

# 以K/V形式存储值到对象d中

d["name"] = "Bob" # 存储字符串类型

d["info"] = {"age":20, "addr": "CN"} # 存储字典类型

d["func"] = func1 # 存储函数

d.close()

# 还原数据

r = shelve.open("shelve_dump")

# .get方法获取指定元素

print(r.get("name"))

# .items方法获取所有元素

for item in r.items():

print(item)

# 还原函数

if item[0] == "func":

item[1]()

r.close()

xml模块

XML格式

<info lange="xml">123456</info>
<标签(tag) 属性(attrib)>"文本(text)"</标签(tag)>

<name state=1 />
<标签(tag) 属性(attrib) />"

<标签(tag) 属性(attrib)>"文本(text)"</标签(tag)>

<标签(tag) 属性(attrib) />"

读取xml

import xml.etree.ElementTree as E

# 读取xml文档
xml_tree = E.parse("data_example.xml")

# 获取xml文档的根元素
root = xml_tree.getroot()
print(root)
print("root.tag:", root.tag)

# 遍历xml文档
for leaf in root:
    # 获取节点tag名称
    print("leaf.tag:", leaf.tag)
    # 获取节点属性
    print("leaf.attrib:", leaf.attrib)
    # 获取节点值
    print("leaf.text:", leaf.text)
    # 遍历下一层
    for n in leaf:
        print(n)
        print(n.text)

import xml.etree.ElementTree as E

# 读取xml文档

xml_tree = E.parse("data_example.xml")

# 获取xml文档的根元素

root = xml_tree.getroot()

print(root)

print("root.tag:", root.tag)

# 遍历xml文档

for leaf in root:

# 获取节点tag名称

print("leaf.tag:", leaf.tag)

# 获取节点属性

print("leaf.attrib:", leaf.attrib)

# 获取节点值

print("leaf.text:", leaf.text)

# 遍历下一层

for n in leaf:

print(n)

print(n.text)

修改xml内容

# 循环查找出所有year节点
for leaf in root.iter("year"):
    print(leaf.text)
    # 修改节点内容（要转换成字符串）
    leaf.text = str(int(leaf.text) + 1)
    # 增加属性
    leaf.set("mod", "true")
# 写入到文件
xml_tree.write("data_example_mod.xml")

### 删除节点
# 查找所有country节点
for leaf in root.findall("country"):
    # 从country节点查找year子节点
    year = int(leaf.find("year").text)
    if year < 2017:
        # remove 方法删除节点
        root.remove(leaf)
# 写入到文件
xml_tree.write("data_example_mod2.xml")

# 循环查找出所有year节点

for leaf in root.iter("year"):

print(leaf.text)

# 修改节点内容（要转换成字符串）

leaf.text = str(int(leaf.text) + 1)

# 增加属性

leaf.set("mod", "true")

# 写入到文件

xml_tree.write("data_example_mod.xml")

### 删除节点

# 查找所有country节点

for leaf in root.findall("country"):

# 从country节点查找year子节点

year = int(leaf.find("year").text)

if year < 2017:

# remove 方法删除节点

root.remove(leaf)

# 写入到文件

xml_tree.write("data_example_mod2.xml")

创建xml文件

# 创建根元素
info = E.Element("info")
# 创建子元素，指定上级元素，指定元素属性
name = E.SubElement(info, "person", name="Bob")
age = E.SubElement(name, "age")
# 为子元素赋值
age.text = "20"
sex = E.SubElement(name, "sex")
sex.text = "Male"

# 第二个子元素
name2 = E.SubElement(info, "person", name="Tiny")
age2 = E.SubElement(name2, "age")
age2.text = "30"
sex2 = E.SubElement(name2, "sex")
sex2.text = "Female"

# 生成元素树
el = E.ElementTree(info)
# 写入文件
el.write("new_xml.xml")

# 创建根元素

info = E.Element("info")

# 创建子元素，指定上级元素，指定元素属性

name = E.SubElement(info, "person", name="Bob")

age = E.SubElement(name, "age")

# 为子元素赋值

age.text = "20"

sex = E.SubElement(name, "sex")

sex.text = "Male"

# 第二个子元素

name2 = E.SubElement(info, "person", name="Tiny")

age2 = E.SubElement(name2, "age")

age2.text = "30"

sex2 = E.SubElement(name2, "sex")

sex2.text = "Female"

# 生成元素树

el = E.ElementTree(info)

# 写入文件

el.write("new_xml.xml")

PyYAML模块

略，详见

http://pyyaml.org/wiki/PyYAMLDocumentation

ConfigParser模块

用于生成和修改配置文件,例如MySQL配置文件

[client]
port = 3306
socket = /tmp/mysql.sock

[mysqld]
port = 3306
socket = /tmp/mysql.sock

basedir = /usr/local/mysql
datadir = /data/mysql
pid-file = /data/mysql/mysql.pid
user = mysql
bind-address = 0.0.0.0
server-id = 1

[mysqldump]
max_allowed_packet = 16M

[myisamchk]
key_buffer_size = 8M
sort_buffer_size = 8M
read_buffer = 4M
write_buffer = 4M

创建配置

import configparser

# 定义一个configparser对象
config = configparser.ConfigParser()

# 定义对象的段以及其内容（一个字典）
config["mysqld"] = {"port": "3306",
                    "socket": "/tmp/mysql.sock",
                    "basedir": "data/mysql/mysql.pid",
                    "user": "mysql",
                    "bind-address": "0.0.0.0",
                    "server-id": "1"}


config["client"] = {"port": "3306",
                    "socket": "/tmp/mysql.sock"}


config["mysqldump"] = {"max_allowed_packet": "16M"}

config["myisamchk"] = {"key_buffer_size": "8M",
                       "sort_buffer_size": "8M",
                       "read_buffer": "4M",
                       "write_buffer": "4M"}

# 打开一个文件句柄，将configparser对象写入
with open("my.cnf", "w") as cnf:
    config.write(cnf)

import configparser

# 定义一个configparser对象

config = configparser.ConfigParser()

# 定义对象的段以及其内容（一个字典）

config["mysqld"] = {"port": "3306",

"socket": "/tmp/mysql.sock",

"basedir": "data/mysql/mysql.pid",

"user": "mysql",

"bind-address": "0.0.0.0",

"server-id": "1"}

config["client"] = {"port": "3306",

"socket": "/tmp/mysql.sock"}

config["mysqldump"] = {"max_allowed_packet": "16M"}

config["myisamchk"] = {"key_buffer_size": "8M",

"sort_buffer_size": "8M",

"read_buffer": "4M",

"write_buffer": "4M"}

# 打开一个文件句柄，将configparser对象写入

with open("my.cnf", "w") as cnf:

config.write(cnf)

读取配置

import configparser

# 定义一个configparser对象用于读取
config_reader = configparser.ConfigParser()

# 读取配置文件
config_reader.read("my.cnf")

# 返回所有配置段
print(config_reader.sections())

# 返回指定段中的所有配置选项
print(config_reader.options("mysqld"))

# 以元组方式返回指定段中的所有配置项和值
print(config_reader.items("mysqld"))

# 取出指定配置项的值
print(config_reader["client"]["port"])
print(config_reader.get("client", "socket"))

# 遍历配置段内的字段和值
for key in config_reader["mysqld"]:
    print("%s : %s" % (key, config_reader["mysqld"][key]))

import configparser

# 定义一个configparser对象用于读取

config_reader = configparser.ConfigParser()

# 读取配置文件

config_reader.read("my.cnf")

# 返回所有配置段

print(config_reader.sections())

# 返回指定段中的所有配置选项

print(config_reader.options("mysqld"))

# 以元组方式返回指定段中的所有配置项和值

print(config_reader.items("mysqld"))

# 取出指定配置项的值

print(config_reader["client"]["port"])

print(config_reader.get("client", "socket"))

# 遍历配置段内的字段和值

for key in config_reader["mysqld"]:

print("%s : %s" % (key, config_reader["mysqld"][key]))

修改配置

import configparser

# 定义一个configparser对象用于读取
config_reader = configparser.ConfigParser()

# 修改具体配置项
config_reader["client"]["port"] = "3307"
config_reader.set("client", "socket", "/var/run/mysql.sock")

# 删除配置段
config_reader.remove_section("mysqldump")

# 删除配置项
config_reader.remove_option("mysqld", "server-id")

# 插入段
if not config_reader.has_section("backup"):  # 判断段是否存在
    config_reader.add_section("backup")

# 插入配置项
if not config_reader.has_option("backup", "user"):  # 判断配置项是否存在
    config_reader.set("backup", "user", "backup_user")

# 重新写入文件
with open("my_new.cnf", "w") as new:
    config_reader.write(new)

import configparser

# 定义一个configparser对象用于读取

config_reader = configparser.ConfigParser()

# 修改具体配置项

config_reader["client"]["port"] = "3307"

config_reader.set("client", "socket", "/var/run/mysql.sock")

# 删除配置段

config_reader.remove_section("mysqldump")

# 删除配置项

config_reader.remove_option("mysqld", "server-id")

# 插入段

if not config_reader.has_section("backup"): # 判断段是否存在

config_reader.add_section("backup")

# 插入配置项

if not config_reader.has_option("backup", "user"): # 判断配置项是否存在

config_reader.set("backup", "user", "backup_user")

# 重新写入文件

with open("my_new.cnf", "w") as new:

config_reader.write(new)

hashlib模块和hmac模块

用于计算MD5值或者SHA值

hashlib

import hashlib

# 定义一个md5对象
m1 = hashlib.md5()
# 更新对象内容
m1.update("Nice to me you.".encode())
# 打印16进制的md5值
print(m1.hexdigest())
# 追加内容
m1.update("你好".encode(encoding="utf-8"))
print(m1.hexdigest())

# 使用sha256摘要算法
m2 = hashlib.sha256()
m2.update("Nice to me you.你好".encode())
print(m2.hexdigest())

import hashlib

# 定义一个md5对象

m1 = hashlib.md5()

# 更新对象内容

m1.update("Nice to me you.".encode())

# 打印16进制的md5值

print(m1.hexdigest())

# 追加内容

m1.update("你好".encode(encoding="utf-8"))

print(m1.hexdigest())

# 使用sha256摘要算法

m2 = hashlib.sha256()

m2.update("Nice to me you.你好".encode())

print(m2.hexdigest())

hmac

加盐计算MD5

import hmac

# 使用hma模块加盐计算md5值
m3 = hmac.new("天王盖地虎".encode(), "Nice to me you.你好".encode())
print(m3.hexdigest())

import hmac

# 使用hma模块加盐计算md5值

m3 = hmac.new("天王盖地虎".encode(), "Nice to me you.你好".encode())

print(m3.hexdigest())

re模块

正则表达式基础

'.'     默认匹配除\n之外的任意一个字符，若指定flag DOTALL,则匹配任意字符，包括换行
'^'     匹配字符开头，若指定flags MULTILINE,这种也可以匹配上(r"^a","\nabc\neee",flags=re.MULTILINE)
'$'     匹配字符结尾，或e.search("foo$","bfoo\nsdfsf",flags=re.MULTILINE).group()也可以
'*'     匹配*号前的字符0次或多次，re.findall("ab*","cabb3abcbbac")  结果为['abb', 'ab', 'a']
'+'     匹配前一个字符1次或多次，re.findall("ab+","ab+cd+abb+bba") 结果['ab', 'abb']
'?'     匹配前一个字符1次或0次
'{m}'   匹配前一个字符m次
'{n,m}' 匹配前一个字符n到m次，re.findall("ab{1,3}","abb abc abbcbbb") 结果'abb', 'ab', 'abb']
'|'     匹配|左或|右的字符，re.search("abc|ABC","ABCBabcCD").group() 结果'ABC'
'(...)' 分组匹配，re.search("(abc){2}a(123|456)c", "abcabca456c").group() 结果 abcabca456c
 
 
'\A'    只从字符开头匹配，re.search("\Aabc","alexabc") 是匹配不到的
'\Z'    匹配字符结尾，同$
'\d'    匹配数字0-9
'\D'    匹配非数字
'\w'    匹配[A-Za-z0-9]
'\W'    匹配非[A-Za-z0-9]
's'     匹配空白字符、\t、\n、\r , re.search("\s+","ab\tc1\n3").group() 结果 '\t'
 
'(?P...)' 分组匹配 re.search("(?P[0-9]{4})(?P[0-9]{2})(?P[0-9]{4})","371481199306143242").groupdict("city") 结果{'province': '3714', 'city': '81', 'birthday': '1993'}

re匹配

import re

# match方法：匹配开头
print(re.match("[A-Z]+", "superMan666Super"))  # 无匹配项
print(re.match("[A-Z]+", "SSuperMan666Super"))  # 匹配SS
print(re.match("[A-Z]+", "SSuperMan666").group())  # 返回匹配到的字符串

# search方法：在字符串中搜索,返回第一个匹配到的字符串
print(re.search("[A-Z]+", "superMan666Super"))

# findall方法：返回所有匹配到的字符串
print(re.findall("[A-Z]+", "superMan666Super"))

# split方法：以匹配到的字符串分割字符串，返回一个列表
print(re.split("[A-Z]+", "superMan666Super"))

# sub方法：将匹配到的字符串替换为指定字符串
print(re.sub("[A-Z]+", "||", "superMan666Super"))

# 匹配模式re.I：忽略大小写
print("re.I".center(20, "="))
print(re.findall("[A-Z]+", "superMan666Super", re.I))

# 匹配模式re.M：多行匹配，
print("re.M".center(20, "="))
print(re.findall("[a-zA-z]+", "superMan\nSuper666"))
print(re.findall("^[a-zA-z]+", "superMan\nSuper666"))
print(re.findall("^[a-zA-z]+", "superMan\nSuper666", re.M))

# 匹配模式re.S：点任意匹配，贪婪模式
print("No re.S".center(20, "="))
print(re.search(".+", "superMan\n666Super").group())
print("re.S".center(20, "="))
print(re.search(".+", "superMan\n666Super", re.S).group())

# 反斜杠匹配
print(re.search("\\\\", "123\\456").group())  # 四个放斜杠两两一组python经解析器后成两个反斜杠，最后再经一次re转义后还原成一个反斜杠
print(re.search(r"\\", "123\\456").group())  # 使用python原生字符串模式，两个反斜杠由re转义成为一个字符串类型的反斜杠
print(re.search(r"\\", r"123\456").group())  # 前后都使用原生字符串

# 分组匹配：将匹配到的字符串分组存为一个K/V对，然后组合成一个字典
print(re.search("(?P<area>[0-9]{3,4})-(?P<number>[0-9]{7,8})", "010-12345678").groupdict())

import re

# match方法：匹配开头

print(re.match("[A-Z]+", "superMan666Super")) # 无匹配项

print(re.match("[A-Z]+", "SSuperMan666Super")) # 匹配SS

print(re.match("[A-Z]+", "SSuperMan666").group()) # 返回匹配到的字符串

# search方法：在字符串中搜索,返回第一个匹配到的字符串

print(re.search("[A-Z]+", "superMan666Super"))

# findall方法：返回所有匹配到的字符串

print(re.findall("[A-Z]+", "superMan666Super"))

# split方法：以匹配到的字符串分割字符串，返回一个列表

print(re.split("[A-Z]+", "superMan666Super"))

# sub方法：将匹配到的字符串替换为指定字符串

print(re.sub("[A-Z]+", "||", "superMan666Super"))

# 匹配模式re.I：忽略大小写

print("re.I".center(20, "="))

print(re.findall("[A-Z]+", "superMan666Super", re.I))

# 匹配模式re.M：多行匹配，

print("re.M".center(20, "="))

print(re.findall("[a-zA-z]+", "superMan\nSuper666"))

print(re.findall("^[a-zA-z]+", "superMan\nSuper666"))

print(re.findall("^[a-zA-z]+", "superMan\nSuper666", re.M))

# 匹配模式re.S：点任意匹配，贪婪模式

print("No re.S".center(20, "="))

print(re.search(".+", "superMan\n666Super").group())

print("re.S".center(20, "="))

print(re.search(".+", "superMan\n666Super", re.S).group())

# 反斜杠匹配

print(re.search("\\\\", "123\\456").group()) # 四个放斜杠两两一组python经解析器后成两个反斜杠，最后再经一次re转义后还原成一个反斜杠

print(re.search(r"\\", "123\\456").group()) # 使用python原生字符串模式，两个反斜杠由re转义成为一个字符串类型的反斜杠

print(re.search(r"\\", r"123\456").group()) # 前后都使用原生字符串

# 分组匹配：将匹配到的字符串分组存为一个K/V对，然后组合成一个字典

print(re.search("(?P<area>[0-9]{3,4})-(?P<number>[0-9]{7,8})", "010-12345678").groupdict())

原文链接：Python 从入门到放弃 - Lesson 5 常用模块，转载请注明来源！

2025 年 4 月
一	二	三	四	五	六	日
	1	2	3	4	5	6
7	8	9	10	11	12	13
14	15	16	17	18	19	20
21	22	23	24	25	26	27
28	29	30

模块和包导入

模块导入方法

import XXX

from YYY import XXX

包导入

动态加载模块

常用模块

time与datetime

时间格式

时间格式互转

random模块

os模块

目录操作

系统信息相关的属性

路径操作

文件信息

sys模块

shutil模块

文件操作

目录操作

文件打包和压缩

zipfile模块

tarfile模块

shelve模块

xml模块

XML格式

读取xml

修改xml内容

创建xml文件

PyYAML模块

ConfigParser模块

创建配置

读取配置

修改配置

hashlib模块和hmac模块

hashlib

hmac

re模块

正则表达式基础

re匹配

发表回复 取消回复

发表回复取消回复