新人報到

1 主题	0 好友	207 积分

Rank: 3 Rank: 3

发消息

31^#

发表于 2016-4-9 13:11:40 |显示全部楼层

本帖最后由 manhong2112 于 2016-12-8 21:44 编辑

之前寫的一堆排序算法...

import random
import timeit
def printf(string, *obj):
return print(string.format(*obj))
def bubble_sort(arr):
arr = arr[:]
for i in range(len(arr) - 1, 0, -1):
for j in range(i):
if arr[j] > arr[j + 1]:
arr[j], arr[j + 1] = arr[j + 1], arr[j]
return arr
def quick_sort(arr):
if len(arr) <= 1:
return arr
a = arr[0]
b = []
c = []
for i in arr[1:]:
(b if i < a else c).append(i)
return quick_sort(b) + [a] + quick_sort(c)
def swap(arr, x, y):
arr[x], arr[y] = arr[y], arr[x]
def quicksort_inplace_iter(arr):
stack = [(0, len(arr))]
while stack:
s, e = stack.pop()
s1, e1 = s, e
if s >= e:
continue
i = s
x = arr[e-1]
while s+1 < e1:
if arr[s] > x:
swap(arr, s, e1 - 1)
e1 -= 1
else:
swap(arr, s, s1)
s1 += 1
s += 1
stack.append((i, s))
stack.append((s+1, e))
def random_arr(length):
arr = []
for i in range(length):
arr.append(random.randint(0, length ** 2))
return arr
def merge_sort(arr):
def merge(arr1, arr2):
result = []
i = 0
j = 0
while True:
if i == len(arr1):
result.extend(arr2[j:])
break
if j == len(arr2):
result.extend(arr1[i:])
break
if arr1[i] < arr2[j]:
result.append(arr1[i])
i += 1
else:
result.append(arr2[j])
j += 1
return result
length = len(arr)
if length <= 1:
return arr
return arr if length <= 1 else merge(
merge_sort(arr[:int(length / 2)]),
merge_sort(arr[int(length / 2):]))
def select_sort(arr):
for i in range(len(arr)):
for j in range(i, len(arr)):
if arr[j] < arr[i]:
arr[i], arr[j] = arr[j], arr[i]
return arr
def time(fun, *arr):
s = timeit.default_timer()
fun(*arr)
e = timeit.default_timer()
return((e - s) * 1000)

复制代码

1 主题	0 好友	207 积分

Rank: 3 Rank: 3

发消息

32^#

发表于 2016-6-26 19:38:46 |显示全部楼层

筆記本

import os
import math
import pickle
sum = lambda x: 0 if len(x) == 0 else x[0] + sum(x[1:])
class Reader(object):
def __init__(self, path=None):
self.Note = NoteManager.get_note(path)
self.Note.load()
pass
def read(self):
return self.Note.Context
def write(self, context):
self.Note.Context = context
pass
def write_line(self, context):
self.Note.Context += context
self.Note.Context += "\n"
pass
def append(self, context):
self.Note.Context += context
pass
def save(self):
with open(self.Note.nPath, "wb+") as f:
pickle.dump((self.Note.Context, self.Note.Type), f)
class Note(object):
storageLoc = "./note"
def __init__(self, path, name, file_type="Text"):
assert type(name) is str
assert type(path) is str
assert type(file_type) is str
self.Name = name
self.Type = file_type
self.id = path
self.nPath = os.path.join(self.storageLoc, path)
self.Context = ""
def load(self):
os.makedirs(os.path.dirname(self.nPath), exist_ok=True)
if not os.path.isfile(self.nPath):
with open(self.nPath, "wb+") as f:
pickle.dump(("", self.Type), f)
with open(self.nPath, "rb+") as f:
self.Context, self.Type = pickle.load(f)
class NoteManager(object):
note_pool_loc = "./NoteList.db"
__note_pool = {}
@staticmethod
def get_note(path):
pool, name = NoteManager.parse_path(path)
return pool[name]["Note"]
pass
@staticmethod
def new_note(path, name, file_type="Text"):
n = Note(path, name, file_type)
NoteManager.add_note(path, n)
pass
@staticmethod
def filter(dir, f):
assert type(dir) is str
result = []
pool, name = NoteManager.parse_path(dir)
for i in pool:
if i[-1] == "/":
result.extend(NoteManager.filter(dir + i, f))
elif (pool[i]["Tag"] & f) == f:
result.append(dir + i)
return sorted(result)
pass
@staticmethod
def save():
tmp = {}
def _f(path, pool):
for p in pool:
if p[-1] == "/":
_f(path + p, pool[p])
else:
x = pool[p]
tmp[p] = {"Tag": x["Tag"], "Name": x["Note"].Name}
_f("", NoteManager.__note_pool)
with open(NoteManager.note_pool_loc, "wb+") as f:
pickle.dump(tmp, f)
pass
@staticmethod
def add_note(path, note):
pool, name = NoteManager.parse_path(path)
pool[name] = {"Tag": 0, "Note": note}
pass
@staticmethod
def del_note(path):
os.remove(os.path.join(Note.storageLoc, path))
pool, name = NoteManager.parse_path(path)
pool.remove(name)
pass
@staticmethod
def add_tag(path, *tag):
pool, name = NoteManager.parse_path(path)
t = sum(tag)
if (pool[name]["Tag"] & t) != t:
pool[name]["Tag"] += t
@staticmethod
def del_tag(path, *tag):
pool, name = NoteManager.parse_path(path)
t = sum(tag)
if (pool[name]["Tag"] & t) == t:
pool[name]["Tag"] -= t
@staticmethod
def get_tag(path):
pool, name = NoteManager.parse_path(path)
t = pool[name]["Tag"]
tag = set({})
while t != 0:
x = int(math.log(t, 2))
t -= x
tag.add(TagManager.tag(x))
pass
return tag
@staticmethod
def parse_path(path):
x = path.split("/")
pool = NoteManager.__note_pool
for i in x[:-1]:
if i + "/" not in pool:
pool[i + "/"] = {}
pool = pool[i + "/"]
return pool, x[-1]
if not os.path.isfile(note_pool_loc):
with open(note_pool_loc, "wb+") as f:
pickle.dump(__note_pool, f)
else:
with open(os.path.join(note_pool_loc), "rb+") as f:
tmp = pickle.load(f)
for i in tmp:
x = i.split("/")
pool = __note_pool
for i in x[:-1]:
if i + "/" not in pool:
pool[i + "/"] = {}
pool = pool[i + "/"]
name = x[-1]
pool[name] = {"Tag": tmp[i]["Tag"],
"Note": Note(i, tmp[i]["Name"])}
class TagManager(object):
tag_pool_loc = "TagList.db"
__tag_pool = []
@staticmethod
def mk_filter(*tag):
return sum(tag)
@staticmethod
def new_tag(tag_name):
if tag_name not in TagManager.__tag_pool:
TagManager.__tag_pool.append(tag_name)
pass
@staticmethod
def tag(x):
if type(x) is str:
return 2**TagManager.__tag_pool.index(x)
elif type(x) is int:
return TagManager.__tag_pool[math.log(x, 2)]
@staticmethod
def save():
with open(os.path.join(TagManager.tag_pool_loc), "wb+") as f:
pickle.dump(TagManager.__tag_pool, f)
pass
if os.path.isfile(tag_pool_loc):
with open(os.path.join(tag_pool_loc), "rb+") as f:
__tag_pool = pickle.load(f)
else:
with open(os.path.join(tag_pool_loc), "wb+") as f:
pickle.dump(__tag_pool, f)

复制代码

1 主题	0 好友	207 积分

Rank: 3 Rank: 3

发消息

33^#

发表于 2016-8-15 20:25:00 |显示全部楼层

本帖最后由 manhong2112 于 2016-8-25 18:34 编辑

Brainfuck 解釋器(+半個編譯器)

import msvcrt
def interp(expr):
ptr = 0
ram = dict()
loopStack = []
excapingLoop = 0
i = 0
while(i < len(expr)):
# print(ram, loopStack, excapingLoop)
x = expr[i]
if not (ptr in ram) or ram[ptr] > 127 or ram[ptr] < -128:
ram[ptr] = 0
if x == "[":
if excapingLoop > 0:
excapingLoop += 1
elif ram[ptr] != 0:
loopStack.append(i)
else:
excapingLoop = 1
elif x == "]":
if excapingLoop > 0:
excapingLoop -= 1
elif ram[ptr] != 0:
i = loopStack[-1]
else:
loopStack.pop()
elif excapingLoop > 0:
pass
elif x == "+":
ram[ptr] += 1
elif x == "-":
ram[ptr] -= 1
elif x == ">":
ptr += 1
elif x == "<":
ptr -= 1
elif x == ".":
print(chr(ram[ptr]), end="")
pass
elif x == ",":
ram[ptr] = ord(msvcrt.getch())
i += 1
version = 0x01
header = [0x00, 0x10, 0x26, 0xBF, version, 0x00]
# null, id, id, ver, id, mem_len
class ID():
START_LOOP = 0
END_LOOP = 1
INC = 2
DEC = 3
NEXT = 4
BACK = 5
GETC = 6
PUTC = 7
ADD = 8
MOVE = 9
def compile(expr):
result = header.copy()
state = 0
tmp = 0
memloc = 0
mmemloc = 0
loopStack = []
i = 0
while i <= len(expr):
# print(ram, loopStack, excapingLoop)
if memloc > mmemloc:
mmemloc = memloc
x = expr[i] if i != len(expr) else 0xFF
if state == 0:
if x == "[":
loopStack.append(len(result))
result.append(ID.START_LOOP)
result.append(0)
if memloc == 0:
memloc = 1
elif x == "]":
a = loopStack.pop()
result[a + 1] = len(result) - a - 1
result.append(ID.END_LOOP)
result.append(len(result) - a + 1)
elif x == "+":
state = ID.ADD
tmp += 1
elif x == "-":
state = ID.ADD
tmp -= 1
elif x == ">":
state = ID.MOVE
tmp += 1
elif x == "<":
state = ID.MOVE
tmp -= 1
elif x == ".":
result.append(ID.PUTC)
elif x == ",":
result.append(ID.GETC)
elif state == ID.ADD:
if x in "+-":
if x == "+":
tmp += 1
elif x == "-":
tmp -= 1
else:
if tmp == 0:
pass
elif tmp == 1:
result.append(ID.INC)
elif tmp == -1:
result.append(ID.DEC)
else:
result.append(ID.ADD)
result.append(tmp)
state = 0
i -= 1
tmp = 0
elif state == ID.MOVE:
if x in "><":
if x == ">":
tmp += 1
elif x == "<":
tmp -= 1
else:
if tmp == 0:
pass
elif tmp == 1:
result.append(ID.NEXT)
memloc += 1
elif tmp == -1:
result.append(ID.BACK)
memloc -= 1
else:
result.append(ID.MOVE)
result.append(tmp)
memloc += tmp
i -= 1
state = 0
tmp = 0
i += 1
result[5] = mmemloc
result.append(0xFF)
return result
class Env(object):
def __init__(self, mem_len):
self.data = [0] * (mem_len + 1)
self.ptr = 0
def move(self, val):
self.ptr += val
def read(self):
return self.data[self.ptr]
def write(self, val):
self.data[self.ptr] = val if -128 <= val <= 127 else 0
def execute(compiledExpr):
expr = compiledExpr
i = len(header)
env = Env(expr[5])
while i < len(expr) - 1:
x = expr[i]
if x == ID.INC:
env.write(env.read() + 1)
elif x == ID.DEC:
env.write(env.read() - 1)
elif x == ID.NEXT:
env.move(1)
elif x == ID.BACK:
env.move(-1)
elif x == ID.PUTC:
print(chr(env.read()), end="")
elif x == ID.GETC:
env.write(ord(msvcrt.getch()))
else:
if x == ID.START_LOOP and env.read() == 0:
i += expr[i + 1]
elif x == ID.END_LOOP and env.read() != 0:
i -= expr[i + 1]
elif x == ID.ADD:
env.write(env.read() + expr[i + 1])
elif x == ID.MOVE:
env.move(expr[i + 1])
i += 1
i += 1
import timeit
def time(fun, *arr):
s = timeit.default_timer()
fun(*arr)
e = timeit.default_timer()
return((e - s) * 1000)
expr = "++++++++++[>+++++++>++++++++++>+++>+<<<<-]>++.>+.+++++++..+++.>++.<<+++++++++++++++.>.+++.------.--------.>+.>."
cexpr = compile(expr)
x1, x2 = 0, 0
for i in range(0, 100):
x1 += time(execute, cexpr)
for i in range(0, 100):
x2 += time(interp, expr)
print(x1 / 100)
print(x2 / 100)

复制代码

1 主题	0 好友	207 积分

Rank: 3 Rank: 3

发消息

34^#

发表于 2016-8-22 15:28:39 |显示全部楼层

本帖最后由 manhong2112 于 2016-8-22 16:00 编辑

寫了一個爬蟲, 一開始用lxml總是報錯(貌似是網站的HTML有問題)...就轉用BeautifulSoup了...
雖然能用, 但總感覺哪裡不太對勁..
EDIT: 相對路徑沒被簡化, 組成了一個迴圈....
EDIT: 找到了urljoin這函數, 希望不再出bug

# from lxml import etree
from bs4 import BeautifulSoup
import urllib as request
from urlparse import urljoin
import re
import os
def getHtml(url):
res = request.urlopen(url)
try:
return url, res.read().decode("utf-8")
except Exception:
return url, res.read().decode("big5")
def extractLink(url, html):
soup = BeautifulSoup(html, "html.parser")
return set(map(lambda i: re.sub("#.*$", "", urljoin(url, i["href"])), soup.findAll('a')))
# return set(etree.HTML(html).xpath("//a/@href"))
target = "http://example.com/" # 目標網站
startAt = "http://example.com/index.html" # 起始網頁
output = "output" # 輸出
downloadedLink = set()
toBeDownload = set()
toBeDownload.add(startAt)
p = re.compile("https?://.*?/(.*)")
while len(toBeDownload) != 0:
try:
url = toBeDownload.pop()
downloadedLink.add(url)
if (url.startswith("http") or url.startswith("https")) and not url.startswith(target):
continue
print "Downloading " + url
_, content = getHtml(url)
m = re.match(p, url)
m = m.group(1)
path, file = os.path.split(m)
try:
os.makedirs(os.path.join(output, path))
except Exception:
pass
with open(os.path.join(output, m), "w") as f:
f.write(content.encode('utf8'))
newUrl = extractLink(url, content).difference(downloadedLink)
toBeDownload = toBeDownload.union(newUrl)
except Exception:
print "Failed to Download " + url

复制代码

1 主题	0 好友	207 积分

Rank: 3 Rank: 3

发消息

35^#

发表于 2016-10-10 21:00:38 |显示全部楼层

本帖最后由 manhong2112 于 2016-10-21 17:32 编辑

原本想弄個筆記本, 然後架個站自用...然後寫着寫着寫成了vcs(算嗎?算吧)......
話說混亂到自己都看不太下去了XDDD
EDIT: 換成if-else...感覺不想再看到python的lambda了....
EDIT: 大改了一下, 和把index的rollback功能刪了

import hashlib
import os
import json
import datetime
import time
dataLoc = "data"
class Enitiy(object):
def __init__(self, name, dtype, hashcode, history):
self.name = name
self.dtype = dtype
self.hashcode = hashcode
self.history = history
class Index(Enitiy):
def __init__(self, name, jsons=None, hashcode=None):
self.name = name
self.index = dict()
if jsons is not None:
for i in jsons["index"]:
if i["type"] == "Index":
self.index[i["name"]] = Index(
i["name"],
json.loads(read(i["hashcode"])),
i["hashcode"])
else:
self.index[i["name"]] = Enitiy(
i["name"],
i["type"],
i["hashcode"],
i["history"] if "history" in i else dict())
super().__init__(name, "Index", hashcode, None)
def __str__(self):
return str(self.index)
def __getitem__(self, k):
return self.index[k]
def __setitem__(self, k, v):
self.index[k] = v
def __contains__(self, item):
return item in self.index
def __len__(self):
return len(self.index)
def items(self):
return self.index.items()
def dumpJsons(self):
result = dict()
result["index"] = []
result["name"] = self.name
for k, v in self.index.items():
obj = dict()
obj["name"], obj["type"], obj["hashcode"], obj["history"] = v.name, v.dtype, v.hashcode, v.history
result["index"].append(obj)
return json.dumps(result)
_open = open
def open(hashcode, type="w+"):
path = getPath(hashcode)
os.makedirs(os.path.dirname(path), exist_ok=True)
if os.path.isfile(path):
type = "r+"
else:
type = "w"
return _open(path, type, encoding="UTF-8")
def getPath(hashcode):
return os.path.join(dataLoc, hashcode[:2], hashcode)
def hash(str):
return hashlib.sha1(str).hexdigest()
def hashStr(str):
return hash(str.encode("utf-8"))
def hashFile(name, content):
return hash((name + "|" + content).encode("utf-8"))
def read(hashcode):
with open(hashcode) as f:
return f.read()
def init():
root = None
path = getPath("root")
name = "/"
if not os.path.isfile(path):
root = Index(name)
root_json = root.dumpJsons()
hashcode = hashFile(name, root_json)
with open(hashcode) as f:
f.write(root_json)
with open("root") as f:
f.write(json.dumps({"hashcode": hashcode, "name": name, "type": "Index"}))
else:
with open("root") as f:
accessPt = json.loads(f.read())
hashcode = accessPt["hashcode"]
with open(hashcode) as f:
root = Index(name, json.loads(f.read()), hashcode)
return [root]
def end(indexStack):
root = indexStack[0]
date = datetime.datetime.now().strftime("%Y/%m/%d-%H:%M:%S")
name = "/"
content = root.dumpJsons()
hashcode = hashFile(name, content)
with open(hashcode) as f:
f.write(content)
with open("root") as f:
accessPt = json.loads(f.read())
f.seek(0)
accessPt["hashcode"] = hashcode
f.write(json.dumps(accessPt))
f.truncate()
helpCmd = {
"update": ["Usage>> update <name> <type> <content>",
"Usage>> Update or create a file"],
"rollback": ["Usage>> rollback <name> <id>",
"Usage>> Rollback to the specified id"],
"ls": ["Usage>> ls [name]",
"Usage>> list all file in current dir or specified dir"],
"cd": ["Usage>> cd <dir|'..'>",
"Usage>> cd to specified dir"],
"mkdir": ["Usage>> mkdir <dir>",
"Usage>> create a new dir at current dir"],
"read": ["Usage>> read <name>",
"Usage>> read the content of specified file"],
"history": ["Usage>> history <name>",
"Usage>> List modifiy history of specified file"],
"help": ["Usage>> <cmd>", "Usage>> Print the usage of cmd",
"Usage>> all cmd[update, rollback, ls, cd, mkdir, read, history, help]"]
}
mainCmd = {"update", "rollback", "ls", "cd",
"mkdir", "read", "history", "help", }
def update(indexStack, name, dtype, content):
index = indexStack[-1]
date = datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S")
dateid = hashStr(date)[:6]
entity_hashcode = hashFile(name, content)
with open(entity_hashcode) as f:
f.write(content)
if name not in index:
if dtype == "Index":
index[name] = Index(name, hashcode=entity_hashcode)
else:
index[name] = Enitiy(name, dtype, entity_hashcode, dict())
else:
entity = index[name]
entity.history[dateid] = (date, entity.hashcode)
entity.hashcode = entity_hashcode
def rollback(indexStack, name, datetimeid):
"""
indexStack
name = file/dir name
datetimeid = date
"""
index = indexStack[-1]
assert type(index[name]) is not Index
history = index[name].history
assert datetimeid in history
with open(history[datetimeid][1]) as f:
update(indexStack, name, index[name].dtype, f.read())
def ls(indexStack, name=None):
if name is not None:
index = Index(name, json.loads(read(indexStack[-1][name].hashcode)))
else:
index = indexStack[-1]
assert type(index) is Index
return index.name, sorted([(e.name, e.dtype) for _, e in index.items()])
def cd(indexStack, name):
"""
indexStack
name = file/dir name
"""
assert type(indexStack) is list
assert type(name) is str
if name == "..":
indexStack.pop()
else:
index = indexStack[-1]
assert type(index[name]) is Index
indexStack.append(index[name])
def getHistory(indexStack, name):
"""
indexStack
name = file name
"""
assert type(indexStack) is list
assert type(name) is str
index = indexStack[-1]
assert type(index[name]) is not Index
return index[name].history
def mkdir(indexStack, name):
assert name not in indexStack[-1]
update(indexStack, name, "Index", Index(name).dumpJsons())
if __name__ == '__main__':
indexStack = init()
try:
while True:
cmd = input("/".join([i.name for i in indexStack]) + ">> ").split(" ")
args = cmd[1:] if len(cmd) > 1 else []
if cmd[0] in mainCmd:
if cmd[0] == "update":
update(indexStack, *args)
elif cmd[0] == "rollback":
rollback(indexStack, args[0], args[1])
elif cmd[0] == "ls":
if len(args) == 1:
i = ls(indexStack, *args)
else:
i = ls(indexStack)
print(i[0] + " :"),
print("\n".join(["\t" + n + " | " + t for n, t in i[1]]))
elif cmd[0] == "cd":
cd(indexStack, *args)
elif cmd[0] == "mkdir":
if len(args) != 1:
print("E>")
else:
mkdir(indexStack, args[0])
elif cmd[0] == "read":
if len(args) != 1:
print("E>")
else:
print(read(indexStack[-1][args[0]].hashcode)),
elif cmd[0] == "history":
if len(args) == 1:
history = getHistory(indexStack, args[0])
print(args[0] + " :")
print("id\t|date")
print("\n".join(
["{}\t|{}".format(x[0], x[1]) for x in
sorted([(dateid, v[0]) for dateid, v in history.items()], key=lambda x: x[1])]))
else:
print("E>")
elif cmd[0] == "help":
exit = False
while not exit:
cmd = input("Help>> ")
if cmd == "exit" or cmd == "quit":
exit = True
elif cmd in helpCmd:
list(map(lambda x: print(x), helpCmd[cmd]))
else:
print("E> Invalid command")
elif cmd[0] == "exit" or cmd[0] == "quit":
break
else:
print("E> Invalid command")
except Exception:
raise
finally:
end(indexStack)

复制代码

1 主题	0 好友	207 积分

Rank: 3 Rank: 3

发消息

36^#

发表于 2016-10-14 19:58:04 |显示全部楼层

和之前的筆記本一系列的, 用來抽取指定網址的內容!
目前只做了知乎, 以後再慢慢拓展
#註解掉的是md轉html的

from bs4 import BeautifulSoup
import urllib.request as urllib
import re
#import markdown
def getContext(url):
data = parse(url, getRule(url))
md = """\
{title}
{author}
{url}
{content}\
""".format(
title=("#" + data["title"]) if "title" in data else "",
author=("###" + data["author"]) if "author" in data else "",
url=("#####[Link](" + data["url"] + ")") if "url" in data else "",
content=("<hr><br>" + data["content"]) if "content" in data else "")
#return "<meta charset='UTF-8'>\n" + markdown.markdown(md, output_format="html5")
return md
rule = [
(re.compile(r"https?://www.zhihu.com/question/\d*?/answer/\d*?#?.*"),
{"content": {"selector": "div#zh-question-answer-wrap .zm-editable-content"},
"title": {"selector": "div#zh-question-title > h2.zm-item-title > a"},
"author": {"selector": "span.author-link-line > a.author-link"},
"url": {"selector": "meta['http-equiv'='mobile-agent']", "attr": "content", "matching": re.compile(".*url=(.*)")}
})
]
def getRule(url):
for k, v in rule:
if k.match(url):
return v
def getHtml(url, encoding="utf-8"):
res = urllib.urlopen(url)
return res.read()
def parse(url, rule):
result = dict()
soup = BeautifulSoup(getHtml(url), "html.parser")
for (k, v) in rule.items():
content = None
if "attr" in v:
content = str(soup.select(v["selector"])[0][v["attr"]]).strip(' \t\n\r')
else:
content = str(soup.select(v["selector"])[0].get_text()).strip(' \t\n\r')
if "matching" in v:
m = v["matching"].match(content)
if m:
result[k] = m.group(1)
else:
result[k] = content
return result
print(getContext("http://www.zhihu.com/question/51266789/answer/125952575"))

复制代码