#!/usr/bin/env python import cgi #import cgitb; cgitb.enable(); TEMPLATE = """ Ocrat Mirror - Animated Chinese Characters - Character search

Ocrat Mirror → Animated Chinese Characters → Character search

Character search

Click on any Chinese character to see how to write it.

Introduction
Any Char
Cities
Compass
Countries
Numbers
Politics
Pronouns
Provinces
Surnames
Zodiac

Here is the result of your search:

%(content)s

Not what you expected?

Please make sure that your browser is set to use the Unicode (UTF-8) encoding. Any strings you copy and paste into the search box should be UTF-8 strings.

http://lost-theory.org/ocrat/chargif

""" def start_response(): print "Content-type: text/html\n" def end_response(): pass ENCODINGS = ["utf-8", "big5", "gb2312"] #ENCODINGS = ["utf-8"] if __name__ == "__main__": form = cgi.FieldStorage() start_response() if "string" in form: string = form["string"].value dec_dict = {} dec_dict["original"] = string good = "utf-8" for enc in ENCODINGS: try: dec_dict[enc] = string.decode(enc) good = enc break #stop at the first clean decode, otherwise there will be some ambiguous results e.g. '\xe6\xb1\xbd\xe8\xbd\xa6' (2009-10-12) except: dec_dict[enc] = "Unable to decode input string: %s, please send this message to the email address below." % `string` dec = dec_dict[good] chars = [] for char in dec: if ord(char) > 256: #i'm pretty sure it's a UCN (like \u1234?) codepoint = repr(char)[4:-1] if len(codepoint) != 4: raise Exception("This should be a UCN (e.g. \u1234): %s" % codepoint) chars.append("""%s""" % (codepoint, char)) else: chars.append(char) content = ("""

""" + "".join(chars) + """

""").encode("utf-8") print TEMPLATE % dict(content=content) else: content = "

No search string specified

" print TEMPLATE % dict(content=content) end_response()