文件操作
大约 7 分钟
环境准备
通过Pip安装如下组件。
> pip install pandas
> pip install numpy
> pip install matplotlib
> pip install opencv-python
创建、复制与删除
import shutil
import os
# 复制单个文件
source = 'resources/dir1/fire.jpg'
target = 'resources/dir2/fire_copy.jpg'
shutil.copyfile(source, target)
# 复制多个文件
source = 'resources/dir1'
target = 'resources/dir3'
# 使用copytree复制目录,指定要忽略哪些文件(前提是目录不能存在)
shutil.copytree(source, target, ignore = shutil.ignore_patterns("*.txt"))
print(f"Directory copied from {source} to {target}")
# 移动文件
source = 'resources/dir1/fire.jpg'
target = 'resources/dir2/fire_move.jpg'
shutil.move(source, target)
# 删除文件
source = 'resources/dir2/fire_move.jpg'
os.remove(source)
# 创建单个文件夹
source = "resources/dir4"
if os.path.exists(source):
print("文件夹已经存在!")
else:
os.mkdir(source)
print("文件夹已经创建完毕!")
# 创建多级文件夹
os.makedirs(r"resources/dir5/dir6/dir7")
# 遍历文件夹
dir = "resources/dir5"
full_path = []
for root, dirs, files in os.walk(dir):
for i in files:
i_full_path = os.path.join(root, i)
full_path.append(i_full_path)
print(full_path)
# 删除空文件夹
source = 'resources/dir8'
if os.path.exists(source):
os.rmdir(source)
print("删除文件夹" + source + "成功")
else:
print("文件夹" + source + "不存在")
# 删除非空文件夹
source = "resources/dir5"
if os.path.exists(source):
shutil.rmtree(source)
print("文件夹已经删除")
else:
os.mkdir(source)
print("文件夹不存在!")
读写文件内容
# 使用'read'方法读取文件
with open('training.log', 'r') as file:
content = file.read()
print(content)
# 使用'readline'方法读取文件
with open('training.log', 'r') as file:
line = file.readline()
# print(line)
while line:
print(line, end = '')
line = file.readline()
# 使用'readlines'方法读取文件
with open('training.log', 'r') as file:
lines = file.readlines()
# print(lines)
for line in lines:
print(line, end = '')
# 使用'write'方法写入文件
with open('example01.txt', 'w') as file:
file.write("Hello, World!")
# 使用'writelines'方法写入文件
lines = ["Hello, World!", "Welcome to Python programming."]
with open('example02.txt', 'w') as file:
file.writelines(line + '\n' for line in lines)
pandas表格处理
import pandas as pd
file1 = r'test.csv'
file2 = r'sales.xlsx'
data1 = pd.read_csv(file1)
data2 = pd.read_excel(file2)
print("<================ 打印testl.csv文件内容 ================>")
print(data1)
print("<================ 打印sales.xlsx文件内容 ================>")
print(data2)
print("<================ 对sales.xlsx内容进行排序 ================>")
print(data2.describe())
print("<================ 读取头5行和尾5行 ================>")
print(data2.head())
print(data2.tail())
print("<================ 读取第一行 ================>")
print(data2.loc[0])
print("<================ 读取第一行大类编码中的值 ================>")
print(data2.loc[1, "大类编码"])
print("<================ 对行和列进行切片 ================>")
print(data2.loc[1:3, '大类编码':'中类名称'])
print("<================ 读取“大类编码”列的所有行 ================>")
print(data2.loc[:, '大类编码'])
print("<================ 根据条件提取,并进行切片 ================>")
print(data2.loc[ data2["销售数量"] > 10, ["中类编码", "中类名称"]])
print("<================ 读取指定列的值 ================>")
box_loss = data1['train/box_loss']
print(box_loss)
print("<================ 按照分类对销售金额汇总求和 ================>")
result = data2.groupby('商品类型')['销售金额'].sum()
result = result.reset_index()
print(result)
print("<================ 将数据保存为csv或excel文件 ================>")
result.to_csv("处理好的表格.csv")
result.to_excel("处理好的表格.xlsx")
matplotlib绘图
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
# 绘制单个折线图
data = pd.read_excel(r'sales.xlsx')
box_loss = data[' train/box_loss']
x_list = [i for i in range(len(box_loss))]
plt.plot(x_list, box_loss)
plt.show()
# 绘制多个折线图
file1 = pd.read_csv('test1.csv')
file2 = pd.read_csv('test2.csv')
file3 = pd.read_csv('test3.csv')
data1 = file1['train/box_loss']
data2 = file2['train/box_loss']
data3 = file3['train/box_loss']
# 列表推导式
x_list = [i for i in range(len(data1))]
plt.plot(x_list, data2)
plt.plot(x_list, data2)
plt.plot(x_list, data3)
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Train box_loss")
plt.grid()
plt.legend(['test1', 'test2', 'test3'])
plt.show()
# 绘制柱状图
# 用黑体显示中文
matplotlib.rcParams['font.sans-serif'] = ['SimHei']
# 正常显示负号
matplotlib.rcParams['axes.unicode_minus'] = False
data = pd.read_excel('sales.xlsx')
result = (data.groupby('大类名称')['销售金额'].sum()
.reset_index()
.sort_values('销售金额', ascending = True)
.reset_index(drop = True))
# 打印汇总数据
print(result)
# 使用大类名称作为x轴的标签
x_labels = result['大类名称']
bars = plt.bar(x_labels, result['销售金额'], tick_label = x_labels)
# 如果标签文字太长,可以旋转标签以便更好地显示
plt.xticks(rotation = 45)
# 在每一根柱上显示对应的高度值
for bar in bars:
val = bar.get_height()
# ha: 水平对其, va: 垂直对齐
plt.text(bar.get_x() + bar.get_width() / 2, val, round(val, 2), ha='center', va='bottom')
plt.show()
# 绘制饼状图
matplotlib.rcParams['font.sans-serif'] = ['SimHei']
matplotlib.rcParams['axes.unicode_minus'] = False
data = pd.read_excel('sales.xlsx')
result = (data.groupby('商品类型')['销售金额'].sum()
.reset_index()
.sort_values('销售金额', ascending = True)
.reset_index(drop = True))
print(result)
sales_amounts = result['销售金额']
category_names = result['商品类型']
# 计算每个类别的占比
sales_proportions = sales_amounts / sales_amounts.sum()
# 画饼状图
fig1, ax1 = plt.subplots()
ax1.pie(sales_proportions, labels = category_names, autopct = '%1.1f%%', startangle = 90)
plt.show()
# 绘制散点图
num_points = 100
x = np.random.rand(num_points)
y = np.random.rand(num_points)
# 每个点的颜色
colors = np.random.rand(num_points)
# 每个点的大小
sizes = 1000 * np.random.rand(num_points)
# 每个点的透明度
alphas = np.random.rand(num_points)
plt.scatter(x, y, c = colors, s = sizes, alpha = 0.5, cmap = 'viridis')
plt.colorbar()
plt.show()
opencv图像处理
import cv2
import numpy as np
# 加载与展示图片
image = cv2.imread('food.png')
# 检查图片是否正确加载
if image is None:
print("Error: Could not load image.")
exit()
# 以彩色模式读取图片
image_color = cv2.imread('food.png')
# 以灰度模式读取图片
image_gray = cv2.imread('food.png', cv2.IMREAD_GRAYSCALE)
# 显示图片
#cv2.imshow('Color Image', image_color)
cv2.imshow('Grayscale Image', image_gray)
# 等待用户按键,然后关闭窗口
cv2.waitKey(0)
cv2.destroyAllWindows()
# 缩放图片
# 获取图片的原始尺寸
original_height, original_width = image.shape[:2]
# 计算新的尺寸
new_width = int(original_width / 2)
new_height = int(original_height / 2)
# 使用cv2.resize进行图片缩放
resized_image = cv2.resize(image, (new_width, new_height), interpolation = cv2.INTER_AREA)
# 显示原始图片和缩放后的图片
cv2.imshow('Original Image', image)
cv2.imshow('Resized Image', resized_image)
# 等待用户按键,然后关闭窗口
cv2.waitKey(0)
cv2.destroyAllWindows()
# 旋转图片
rotated_90 = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE) # 顺时针旋转90度
rotated_180 = cv2.rotate(image, cv2.ROTATE_180) # 顺时针旋转180度
rotated_270 = cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE) # 顺时针旋转270度
cv2.imshow('original', image)
cv2.imshow('90 degree', rotated_90)
cv2.imshow('180 degree', rotated_180)
cv2.imshow('270 degree', rotated_270)
cv2.waitKey(0)
# 保存图像
if image is not None:
cv2.imwrite('output_image.png', image)
else:
print("无法读取图像")
# 播放视频
# 创建VideoCapture对象,也可以传入一个视频文件的路径
cap = cv2.VideoCapture("test.mp4")
while True:
# 读取一帧
ret, frame = cap.read()
# 如果读取成功,显示这一帧
if ret:
cv2.imshow('Frame', frame)
# 按'q'键退出循环
if cv2.waitKey(15) & 0xFF == ord('q'):
break
# 释放资源并关闭窗口
cap.release()
cv2.destroyAllWindows()
# 捕捉摄像头视频
# 定义视频捕获对象,参数0表示使用默认的摄像头
cap = cv2.VideoCapture(0)
# 检查是否成功打开摄像头
if not cap.isOpened():
print("Error: Could not open camera.")
exit()
# 获取摄像头的帧宽度和帧高度
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
# 定义视频编码器和输出文件
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter('output.mp4', fourcc, 20.0, (frame_width, frame_height))
while True:
ret, frame = cap.read()
if not ret:
print("Failed to grab frame.")
break
# 将当前帧写入输出视频文件
out.write(frame)
# 显示当前帧
cv2.imshow('frame', frame)
# 按'q'键退出循环
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# 释放资源
cap.release()
out.release()
cv2.destroyAllWindows()
# 给视频输出添加高斯模糊
def add_gaussian_noise(image):
row, col = image.shape
mean = 0
sigma = 15
gauss = np.random.normal(mean, sigma, (row, col))
noisy = image + gauss
noisy_img = np.clip(noisy, 0, 255)
return noisy_img.astype(np.uint8)
# 输入和输出视频文件名
input_video = 'input.mp4'
output_video = 'output.mp4'
# 打开输入视频
cap = cv2.VideoCapture(input_video)
# 视频的帧率和帧大小
fps = int(cap.get(cv2.CAP_PROP_FPS))
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
new_height = 540
new_width = int((new_height / frame_height) * frame_width)
# 视频写入对象
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_video, fourcc, fps, (new_width, new_height), isColor = False)
while True:
ret, frame = cap.read()
if not ret:
break
# 调整帧大小
frame = cv2.resize(frame, (new_width, new_height))
# 转换为灰度图像
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# 垂直翻转画面
frame = cv2.flip(frame, 1)
# 添加高斯噪声
frame = add_gaussian_noise(frame)
# 写入输出视频
out.write(frame)
# 释放资源
cap.release()
out.release()
cv2.destroyAllWindows()
pickle读写
import pickle
import time
import os
import numpy as np
# 加载数据
with open('data.pkl', 'rb') as file:
data = pickle.load(file)
print(data)
# 权重计算
def calculate_weights():
print("开始计算权重...")
time.sleep(5)
weights = np.random.rand(10, 10)
print("权重计算完成")
return weights
# 保存权重和epoch到文件
def save_weights(weights, epoch, filename='weights.pkl'):
data = {'weights': weights, 'epoch': epoch}
with open(filename, 'wb') as f:
pickle.dump(data, f)
print(f"权重和epoch已保存到{filename}")
# 从文件加载权重和epoch
def load_weights(filename='weights.pkl'):
with open(filename, 'rb') as f:
data = pickle.load(f)
print(f"权重和epoch已从{filename}加载")
return data['weights'], data['epoch']
# 主程序
def main():
weights_file = 'weights.pkl'
total_epochs = 100
# 如果权重文件存在,则加载权重和epoch
if os.path.exists(weights_file):
weights, start_epoch = load_weights(weights_file)
else:
# 否则,从第一个epoch开始,并计算权重
weights = calculate_weights()
start_epoch = 0
# 训练剩余的epochs
for epoch in range(start_epoch, total_epochs):
print(f"开始训练epoch {epoch}...")
# 进行实际训练的代码
time.sleep(1)
print(f"完成训练epoch {epoch}")
# 每个epoch结束后保存权重和epoch信息
save_weights(weights, epoch, weights_file)
if __name__ == '__main__':
main()
感谢支持
更多内容,请移步《超级个体》。