txt版本
import pandas as pd
from openpyxl import load_workbook
from collections import defaultdict
import os
import re
# 这个函数用来从单元格内容中提取日期
# 如果 raw=True,会返回“2025/07/21”这种格式,否则返回“7月21日”
def extract_date_from_cell(cell_value, raw=False):
if pd.isna(cell_value):
return ""
# 尝试把数字或数字字符串都转成日期(Excel日期其实是个数字)
try:
num = float(cell_value)
date = pd.to_datetime(num, unit='d', origin='1899-12-30')
if raw:
return date.strftime("%Y/%m/%d")
return f"{date.month}月{date.day}日"
except Exception:
pass
# 如果不是数字,就直接当字符串处理
date_str = str(cell_value)
if raw:
return date_str
# 尝试从字符串中提取“几月几日”
date_str = re.sub(r'[^\d月日]', '', date_str)
match = re.search(r'(\d{1,2})月(\d{1,2})日', date_str)
if match:
month = int(match.group(1))
day = int(match.group(2))
return f"{month}月{day}日"
match = re.search(r'(\d{1,2})[\./](\d{1,2})', date_str)
if match:
month = int(match.group(1))
day = int(match.group(2))
return f"{month}月{day}日"
return date_str.strip()
# 这个函数用来获取单元格的背景色
# 主要识别“黄色填充”和“白底”,其它颜色会标记为“有填充颜色”
def get_cell_color(ws, row, col):
# openpyxl 的行和列都是从1开始的
cell = ws.cell(row=row, column=col)
fill = cell.fill
# 如果没有填充色,直接认为是白底
if fill.patternType is None or fill.fgColor is None:
return "白底"
fg = fill.fgColor
# 如果是RGB类型的颜色
if hasattr(fg, 'type') and fg.type == 'rgb' and isinstance(fg.rgb, str):
rgb = fg.rgb[-6:].upper() # 取后6位,得到真正的颜色值
# 常见的黄色
if rgb in ["FFFF00", "FFF200"]:
return "黄色填充"
# 判断是不是黄色(红高绿高蓝低)
try:
r = int(rgb[0:2], 16)
g = int(rgb[2:4], 16)
b = int(rgb[4:6], 16)
if r > 200 and g > 200 and b < 150:
return "黄色填充"
except Exception:
pass
return "有填充颜色"
else:
# 不是rgb类型,通常为白底或主题色
return "白底"
# 这个函数是整个程序的核心,负责读取Excel、分析数据、输出结果
# 增加start_row, end_row参数,支持只处理部分行(北区/南区)
def process_excel_schedule(df, ws, start_row, end_row):
# D=3, F=5, H=7, J=9, L=11, N=13, P=15
date_columns = [3, 5, 7, 9, 11, 13, 15] # 注意:列号从0开始
col_to_date = {} # 记录每一列对应的日期
for col_idx in date_columns:
if col_idx < df.shape[1]:
date_value = df.iloc[2, col_idx] # 第3行是日期
formatted_date = extract_date_from_cell(date_value)
col_to_date[col_idx] = formatted_date
# 每个日期列的下一个列也归到同一天(比如D和E都算同一天)
if col_idx + 1 < df.shape[1]:
col_to_date[col_idx + 1] = formatted_date
results = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
# 遍历指定的行区间
for row_idx in range(start_row, end_row):
if row_idx >= df.shape[0]:
break
c_value = df.iloc[row_idx, 2] # 第3列是班次(1/2)
if pd.isna(c_value) or str(c_value).strip() == '':
continue # 跳过空行如果有的话
# 获取这一行的颜色(主要看第3列)
color_type = get_cell_color(ws, row_idx + 1, 3) # openpyxl行列从1开始
for col_idx in col_to_date:
if col_idx >= df.shape[1]:
continue
date = col_to_date[col_idx]
cell_value = df.iloc[row_idx, col_idx]
if pd.notna(cell_value) and str(cell_value).strip():
# 把内容按日期-班次-颜色分类收集
results[date][c_value][color_type].append(str(cell_value))
output_lines = [] # 用来存储最终要输出的每一行
# 下面开始整理输出内容
sorted_dates = sorted(set(col_to_date.values()), key=lambda x: min([k for k, v in col_to_date.items() if v == x]))
for date in sorted_dates:
# 班次排序,先按字符串长度再按内容排序
sorted_positions = sorted(results[date].keys(), key=lambda x: (len(str(x)), str(x)))
for position in sorted_positions:
# 把1和2替换成“白班”“夜班”
pos_str = str(position)
if pos_str == '1':
pos_str = '白班'
elif pos_str == '2':
pos_str = '夜班'
# 颜色替换为时间点
for color in ["黄色填充", "有填充颜色", "白底"]:
if color in results[date][position]:
values = results[date][position][color]
if color == "黄色填充":
color_str = "7:00"
elif color == "白底":
color_str = "7:30"
else:
color_str = color
# 拼接成一行,格式:日期,班次,时间:内容1、内容2...
line = f"{date},{pos_str},{color_str}:{'、'.join(values)}"
output_lines.append(line)
# 用两个换行隔开每条数据,让结果更清晰
return "\n\n".join(output_lines)
file_path = "222.xlsx"
excel = pd.ExcelFile(file_path)
wb = load_workbook(file_path, data_only=True)
all_results = []
for sheet_name in excel.sheet_names:
df = pd.read_excel(file_path, header=None, dtype=object, sheet_name=sheet_name)
ws = wb[sheet_name]
# 北区(前26行,Python索引0-25)
north_result = process_excel_schedule(df, ws, 0, 26)
all_results.append(f"【{sheet_name}】北区中晚班数据\n" + north_result)
# 南区(27行及以后,Python索引26及以后)
south_result = process_excel_schedule(df, ws, 26, df.shape[0])
all_results.append(f"【{sheet_name}】南区中晚班数据\n" + south_result)
# 合并所有结果
final_result = "\n\n".join(all_results)
print(final_result)
# 写入txt文件
output_file = os.path.join(os.path.dirname(file_path), "定点岗位表分析结果.txt")
with open(output_file, "w", encoding="utf-8") as f:
f.write(final_result)
print(f"\n结果已保存至: {output_file}")
json版本
import pandas as pd
from openpyxl import load_workbook
from collections import defaultdict
import os
import re
import json
def extract_date_from_cell(cell_value, raw=False):
if pd.isna(cell_value):
return ""
try:
num = float(cell_value)
date = pd.to_datetime(num, unit='d', origin='1899-12-30')
if raw:
return date.strftime("%Y/%m/%d")
return f"{date.month}月{date.day}日"
except Exception:
pass
date_str = str(cell_value)
if raw:
return date_str
date_str = re.sub(r'[^\d月日]', '', date_str)
match = re.search(r'(\d{1,2})月(\d{1,2})日', date_str)
if match:
month = int(match.group(1))
day = int(match.group(2))
return f"{month}月{day}日"
match = re.search(r'(\d{1,2})[\./](\d{1,2})', date_str)
if match:
month = int(match.group(1))
day = int(match.group(2))
return f"{month}月{day}日"
return date_str.strip()
def get_cell_color(ws, row, col):
cell = ws.cell(row=row, column=col)
fill = cell.fill
if fill.patternType is None or fill.fgColor is None:
return "白底"
fg = fill.fgColor
if hasattr(fg, 'type') and fg.type == 'rgb' and isinstance(fg.rgb, str):
rgb = fg.rgb[-6:].upper()
if rgb in ["FFFF00", "FFF200"]:
return "黄色填充"
try:
r = int(rgb[0:2], 16)
g = int(rgb[2:4], 16)
b = int(rgb[4:6], 16)
if r > 200 and g > 200 and b < 150:
return "黄色填充"
except Exception:
pass
return "有填充颜色"
else:
return "白底"
def process_excel_schedule_json(df, ws, start_row, end_row):
date_columns = [3, 5, 7, 9, 11, 13, 15]
col_to_date = {}
for col_idx in date_columns:
if col_idx < df.shape[1]:
date_value = df.iloc[2, col_idx]
formatted_date = extract_date_from_cell(date_value)
col_to_date[col_idx] = formatted_date
if col_idx + 1 < df.shape[1]:
col_to_date[col_idx + 1] = formatted_date
results = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
for row_idx in range(start_row, end_row):
if row_idx >= df.shape[0]:
break
c_value = df.iloc[row_idx, 2]
if pd.isna(c_value) or str(c_value).strip() == '':
continue
color_type = get_cell_color(ws, row_idx + 1, 3)
for col_idx in col_to_date:
if col_idx >= df.shape[1]:
continue
date = col_to_date[col_idx]
cell_value = df.iloc[row_idx, col_idx]
if pd.notna(cell_value) and str(cell_value).strip():
results[date][c_value][color_type].append(str(cell_value))
# 构建json结构
json_result = {}
sorted_dates = sorted(set(col_to_date.values()), key=lambda x: min([k for k, v in col_to_date.items() if v == x]))
for date in sorted_dates:
date_dict = {}
sorted_positions = sorted(results[date].keys(), key=lambda x: (len(str(x)), str(x)))
for position in sorted_positions:
pos_str = str(position)
if pos_str == '1':
pos_str = '白班'
elif pos_str == '2':
pos_str = '夜班'
for color in ["黄色填充", "有填充颜色", "白底"]:
if color in results[date][position]:
values = results[date][position][color]
if color == "黄色填充":
color_str = "7:00"
elif color == "白底":
color_str = "7:30"
else:
color_str = color
# 结构:date -> pos_str -> color_str -> list
if pos_str not in date_dict:
date_dict[pos_str] = {}
date_dict[pos_str][color_str] = values
json_result[date] = date_dict
return json_result
# 主程序入口,支持多sheet和分区,输出json
file_path = "222.xlsx" # 这里改成你要分析的Excel文件名
excel = pd.ExcelFile(file_path)
wb = load_workbook(file_path, data_only=True)
all_results = {}
for sheet_name in excel.sheet_names:
df = pd.read_excel(file_path, header=None, dtype=object, sheet_name=sheet_name)
ws = wb[sheet_name]
# 北区(前26行,Python索引0-25)
north_result = process_excel_schedule_json(df, ws, 0, 26)
# 南区(27行及以后,Python索引26及以后)
south_result = process_excel_schedule_json(df, ws, 26, df.shape[0])
all_results[sheet_name] = {
"北区中晚班数据": north_result,
"南区中晚班数据": south_result
}
# 写入json文件
output_file = os.path.join(os.path.dirname(file_path), "定点岗位表分析结果.json")
with open(output_file, "w", encoding="utf-8") as f:
json.dump(all_results, f, ensure_ascii=False, indent=2)
print(f"\n结果已保存至: {output_file}")
© 版权声明
声明
博客使用须知
- 1本博客内容仅供个人学习、研究和欣赏,未经授权禁止用于任何商业用途。
- 2博客中的代码示例仅用于教育目的,使用时请遵循相关开源协议和授权规定。
- 3转载或引用本站内容请注明出处,尊重原创,共同维护良好的创作环境。
- 4博客评论区欢迎理性讨论,请勿发表违反法律法规的言论,共建和谐社区。
- 5如有内容侵犯您的权益,请通过博客联系方式告知,将立即核实并处理。
- 6使用本站资源时产生的任何问题与后果需自行承担,请谨慎操作。
THE END
暂无评论内容