0%

python的一些工具类

使用 pandas 读取 excel,返回 Json 格式

1
2
3
4
5
6
7
8
import pandas as pd
import numpy as np

def read_excel_to_dict(filename, sheet_name=0):
df = pd.read_excel(filename, sheet_name=sheet_name)
df.replace(np.nan, "", inplace=True)
res = df.to_dict(orient="records")
return res

使用 pandas 保存字典到 excel

1
2
3
4
5
import pandas as pd

def dict_list_to_excel(dict_list, filePath, index=False):
df = pd.DataFrame(dict_list)
df.to_excel(filePath, encoding='utf-8', index=index)

读取 csv 文件,返回 Json 格式

1
2
3
4
5
6
7
8
9
10
11
def csv2Json(path):
fo = open(path, encoding="utf-8")
ls = []
for line in fo:
line = line.replace("\n", "")
ls.append(line.split(","))
fo.close()
results = []
for i in range(1, len(ls)):
results.append(dict(zip(ls[0], ls[i])))
return results

字典保存 csv 文件

1
2
3
4
5
6
7
8
9
10
11
12
13
import csv
import os

def toCsv(filename, item):
if not os.path.exists(filename):
with open(filename, 'a', newline='', encoding='utf-8') as csvfile:
f = csv.writer(csvfile)
f.writerow(sorted(item.keys()))
with open(filename, 'a', newline='', encoding='utf-8') as csvfile:
f = csv.writer(csvfile)
f.writerow([item[key] for key in sorted(item.keys())])
return item

字典列表根据某个字段排序

1
2
def sort_dict_list(dict_list, key, reverse=False):
return sorted(dict_list, key=lambda item: item[key], reverse=reverse)

字典列表根据某个字段分组

1
2
3
4
5
6
7
8
9
from operator import itemgetter
from itertools import groupby

def group_by_dict_list(dict_list, key):
dict_list.sort(key=itemgetter(key))
result = {}
for kw, items in groupby(dict_list, key=itemgetter((key))):
result[value[0][key]] = list(items)
return result

合并多个 excel 文件

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import xlrd
import xlsxwriter
import os

def merge_xlsx(source_xls_list, target_xls):
# 读取数据
data = []

wb = xlrd.open_workbook(source_xls_list[0])
header = wb.sheets()[0].row_values(0)
data.append(header)

for i in source_xls_list:
wb = xlrd.open_workbook(i)
for sheet in wb.sheets():
for rownum in range(1, sheet.nrows):
data.append(sheet.row_values(rownum))
print(len(data))
# 写入数据
workbook = xlsxwriter.Workbook(target_xls)
worksheet = workbook.add_worksheet()
# font = workbook.add_format({"font_size": 14})
for i in range(len(data)):
for j in range(len(data[i])):
worksheet.write(i, j, data[i][j])
# 关闭文件流
workbook.close()

一行代码读取文件每一行到列表

1
kws = [line.strip() for line in open('keywords.txt', encoding='UTF-8').readlines()]

时间转换工具

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# -*- coding:utf-8 -*-
import datetime
import time


# 日期时间字符串
st = "2017-11-23 16:10:10"
# 当前日期时间
dt = datetime.datetime.now()
# 当前时间戳
sp = time.time()

# 1.把datetime转成字符串
def datetime_toString(dt):
print("1.把datetime转成字符串: ", dt.strftime("%Y-%m-%d %H:%M:%S"))


# 2.把字符串转成datetime
def string_toDatetime(st):
print("2.把字符串转成datetime: ", datetime.datetime.strptime(st, "%Y-%m-%d %H:%M:%S"))


# 3.把字符串转成时间戳形式
def string_toTimestamp(st):
print("3.把字符串转成时间戳形式:", time.mktime(time.strptime(st, "%Y-%m-%d %H:%M:%S")))


# 4.把时间戳转成字符串形式
def timestamp_toString(sp):
print("4.把时间戳转成字符串形式: ", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(sp)))


# 5.把datetime类型转外时间戳形式
def datetime_toTimestamp(dt):
print("5.把datetime类型转外时间戳形式:", time.mktime(dt.timetuple()))


# 1.把datetime转成字符串
datetime_toString(dt)
# 2.把字符串转成datetime
string_toDatetime(st)
# 3.把字符串转成时间戳形式
string_toTimestamp(st)
# 4.把时间戳转成字符串形式
timestamp_toString(sp)
# 5.把datetime类型转外时间戳形式
datetime_toTimestamp(dt)

时间工具

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import datetime
from dateutil.relativedelta import relativedelta

# 格式化时间
now_str = datetime.datetime.now().strftime("%Y-%m-%d %H:%M")
# 字符串格式时间转时间类型
date = datetime.datetime.strptime('20180131', '%Y%m%d')
# 日期加减
# days, seconds, microseconds, milliseconds, minutes,hours, weeks
date = (datetime.datetime.now() + datetime.timedelta(days=1))
# 日期加减1年
date = datetime.datetime.now() - relativedelta(years=1)

# 获取两个日期之间的日期列表
def get_datelist(begin_date, end_date):
begin_date = begin_date.replace('-', '')
end_date = end_date.replace('-', '')
date_list = []
begin_date = datetime.datetime.strptime(begin_date, "%Y%m%d")
end_date = datetime.datetime.strptime(end_date, "%Y%m%d")
while begin_date <= end_date:
date_str = begin_date.strftime("%Y%m%d")
date_list.append(date_str)
begin_date += datetime.timedelta(days=1)
return date_list

import pandas as pd

# 获取两个日期之间的日期列表,pandas版本
def get_date_list(begin_date, end_date):
  date_list = [x.strftime('%Y-%m-%d') for x in list(pd.date_range(start=begin_date, end=end_date))]
  return date_list

cookies 格式转换

1
2
3
4
5
6
7
def get_cookies():
cookies = {}
with open('cookies/baidu_cookies.txt', encoding='utf-8') as f:
items = f.read().split(';')
for item in items:
cookies[item.split('=')[0]] = item.split('=')[1]
return cookies