8 个 Python 实用脚本,收藏备用( 二 )

< now - days * 86400: # 判断是否超过240天 if os.path.isfile(f): # 检查是否是文件 shutil.move(f, dst) # 移动文件5.扫描脚本目录 , 并给出不同类型脚本的计数 。import osimport shutilfrom time import strftimelogsdir="c:logsputtylogs"zipdir="c:logsputtylogszipped_logs"zip_program="zip.exe"for files in os.listdir(logsdir):if files.endswith(".log"):files1=files+"."+strftime("%Y-%m-%d")+".zip"os.chdir(logsdir)os.system(zip_program + " " + files1 +" "+ files)shutil.move(files1, zipdir)os.remove(files)6.下载Leetcode的算法题 。import sysimport reimport osimport argparseimport requestsfrom lxml import html as lxml_htmltry: import htmlexcept ImportError: import HTMLParser html = HTMLParser.HTMLParser()try: import cPickle as pkexcept ImportError: import pickle as pkclass LeetcodeProblems(object): def get_problems_info(self): leetcode_url = 'https://leetcode.com/problemset/algorithms' res = requests.get(leetcode_url) if not res.ok: print('request error') sys.exit() cm = res.text cmt = cm.split('tbody>')[-2] indexs = re.findall(r'<td>(d+)</td>', cmt) problem_urls = ['https://leetcode.com' + urlfor url in re.findall( r'<a href=https://www.isolves.com/it/cxkf/yy/Python/2019-10-09/"(/problems/.+?)"', cmt)] levels = re.findall(r"(.+?)", cmt) tinfos = zip(indexs, levels, problem_urls) assert (len(indexs) == len(problem_urls) == len(levels)) infos = [] for info in tinfos: res = requests.get(info[-1]) if not res.ok: print('request error') sys.exit() tree = lxml_html.fromstring(res.text) title = tree.xpath('//meta[@property="og:title"]/@content')[0] description = tree.xpath('//meta[@property="description"]/@content') if not description: description = tree.xpath('//meta[@property="og:description"]/@content')[0] else: description = description[0] description = html.unescape(description.strip()) tags = tree.xpath('//div[@id="tags"]/following::a[@class="btn btn-xs btn-primary"]/text()') infos.append( { 'title': title, 'level': info[1], 'index': int(info[0]), 'description': description, 'tags': tags } ) with open('leecode_problems.pk', 'wb') as g: pk.dump(infos, g) return infos def to_text(self, pm_infos): if self.args.index: key = 'index' elif self.args.title: key = 'title' elif self.args.tag: key = 'tags' elif self.args.level: key = 'level' else: key = 'index' infos = sorted(pm_infos, key=lambda i: i[key]) text_template = '## {index} - {title}n''~{level}~ {tags}n''{description}n' + 'n' * self.args.line text = '' for info in infos: if self.args.rm_blank: info['description'] = re.sub(r'[nr]+', r'n', info['description']) text += text_template.format(**info) with open('leecode problems.txt', 'w') as g: g.write(text) def run(self): if os.path.exists('leecode_problems.pk') and not self.args.redownload: with open('leecode_problems.pk', 'rb') as f: pm_infos = pk.load(f) else: pm_infos = self.get_problems_info() print('find %s problems.' % len(pm_infos)) self.to_text(pm_infos)def handle_args(argv): p = argparse.ArgumentParser(description='extract all leecode problems to location') p.add_argument('--index', action='store_true', help='sort by index') p.add_argument('--level', action='store_true', help='sort by level') p.add_argument('--tag', action='store_true', help='sort by tag') p.add_argument('--title', action='store_true', help='sort by title') p.add_argument('--rm_blank', action='store_true', help='remove blank') p.add_argument('--line', action='store', type=int, default=10, help='blank of two problems') p.add_argument('-r', '--redownload', action='store_true', help='redownload data') args = p.parse_args(argv[1:]) return argsdef main(argv): args = handle_args(argv) x = LeetcodeProblems() x.args = args x.run()if __name__ == '__main__': argv = sys.argv main(argv)7.将 Markdown 转换为 HTML 。import sysimport osfrom bs4 import BeautifulSoupimport markdownclass MarkdownToHtml: headTag = '<head><meta charset="utf-8" /></head>' def __init__(self,cssFilePath = None): if cssFilePath != None: self.genStyle(cssFilePath) def genStyle(self,cssFilePath): with open(cssFilePath,'r') as f: cssString = f.read() self.headTag = self.headTag[:-7] + '<style type="text/css">{}</style>'.format(cssString) + self.headTag[-7:] def markdownToHtml(self, sourceFilePath, destinationDirectory = None, outputFileName = None): if not destinationDirectory: # 未定义输出目录则将源文件目录(注意要转换为绝对路径)作为输出目录 destinationDirectory = os.path.dirname(os.path.abspath(sourceFilePath)) if not outputFileName: # 未定义输出文件名则沿用输入文件名 outputFileName = os.path.splitext(os.path.basename(sourceFilePath))[0] + '.html' if destinationDirectory[-1] != '/': destinationDirectory += '/' with open(sourceFilePath,'r', encoding='utf8') as f: markdownText = f.read() # 编译出原始 HTML 文本 rawHtml = self.headTag + markdown.markdown(markdownText,output_format='html5') # 格式化 HTML 文本为可读性更强的格式 beautifyHtml = BeautifulSoup(rawHtml,'html5lib').prettify() with open(destinationDirectory + outputFileName, 'w', encoding='utf8') as f: f.write(beautifyHtml)if __name__ == "__main__": mth = MarkdownToHtml() # 做一个命令行参数列表的浅拷贝 , 不包含脚本文件名 argv = sys.argv[1:] # 目前列表 argv 可能包含源文件路径之外的元素(即选项信息) # 程序最后遍历列表 argv 进行编译 markdown 时 , 列表中的元素必须全部是源文件路径 outputDirectory = None if '-s' in argv: cssArgIndex = argv.index('-s') +1 cssFilePath = argv[cssArgIndex] # 检测样式表文件路径是否有效 if not os.path.isfile(cssFilePath): print('Invalid Path: '+cssFilePath) sys.exit() mth.genStyle(cssFilePath) # pop 顺序不能随意变化 argv.pop(cssArgIndex) argv.pop(cssArgIndex-1) if '-o' in argv: dirArgIndex = argv.index('-o') +1 outputDirectory = argv[dirArgIndex] # 检测输出目录是否有效 if not os.path.isdir(outputDirectory): print('Invalid Directory: ' + outputDirectory) sys.exit() # pop 顺序不能随意变化 argv.pop(dirArgIndex) argv.pop(dirArgIndex-1) # 至此 , 列表 argv 中的元素均是源文件路径 # 遍历所有源文件路径 for filePath in argv: # 判断文件路径是否有效 if os.path.isfile(filePath): mth.markdownToHtml(filePath, outputDirectory) else: print('Invalid Path: ' + filePath)


推荐阅读