#!/usr/bin/env python

import cgi
#import cgitb; cgitb.enable();

TEMPLATE = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/DTD/loose.dtd">
<html lang="zh">
    <head>
        <meta http-equiv="content-type" content="text/html;charset=utf-8">
        <title>Ocrat Mirror - Animated Chinese Characters - Character search</title>
        <style type="text/css">
        .result { font-size: 16pt; }
        </style>
    <link href="style.css" rel="stylesheet" type="text/css" />
</head>
    <BODY BGCOLOR="#FFFFFF" TEXT="#000000">
        <p><A HREF="/ocrat/">Ocrat Mirror</A> &rarr; <a href="/ocrat/chargif/">Animated Chinese Characters</a> &rarr; Character search</p>
        <h1>Character search</h1>
        <p>Click on any Chinese character to see how to write it.</p>
        <TABLE CELLSPACING="0" CELLPADDING="0">
            <COLGROUP><COL WIDTH="100"><COL WIDTH="20"><COL SPAN="1"></COLGROUP>
            <TR>
                <TD WIDTH="100" BGCOLOR="#ffffcc" VALIGN="TOP">
                    <P>
                    <FONT SIZE="-1">
                        <STRONG><a href="/ocrat/chargif">Introduction</a></STRONG><BR>
                        <A HREF="anychar.html">Any Char</A><BR>
                        <A HREF="cities.html">Cities</A><BR>
                        <A HREF="compass.html">Compass</A><BR>
                        <A HREF="country.html">Countries</A><BR>
                        <A HREF="numbers.html">Numbers</A><BR>
                        <A HREF="people.html">Politics</A><BR>
                        <A HREF="pronouns.html">Pronouns</A><BR>
                        <A HREF="province.html">Provinces</A><BR>
                        <A HREF="surnames.html">Surnames</A><BR>
                        <A HREF="zodiac.html">Zodiac</A><BR>
                    </FONT>
                    </P>
                </TD>
                <TD WIDTH="20">
                </TD>
                <TD VALIGN="TOP" ALIGN="left">
                <h2>Here is the result of your search:</h2>
                %(content)s
                <h2>Not what you expected?</h2>
                <p>Please make sure that your browser is set to use the Unicode (UTF-8) encoding.  Any strings you copy and paste into the search box should be UTF-8 strings.</p>
                </TD>
            </TR>
        </TABLE>
        <HR>
        <TABLE WIDTH="100%%">
            <TR>
                <TD ALIGN="LEFT" VALIGN="TOP">
                    <TT>
                        <FONT SIZE="-1">
                            http://lost-theory.org/ocrat/chargif<br>
                        </FONT>
                    </TT>
                </TD>
            </TR>
        </TABLE>
    </BODY>
</HTML>"""

def start_response():
    print "Content-type: text/html\n"

def end_response():
    pass

ENCODINGS = ["utf-8", "big5", "gb2312"]
#ENCODINGS = ["utf-8"]

if __name__ == "__main__":
    form = cgi.FieldStorage()
    start_response()
    if "string" in form:
        string = form["string"].value
        dec_dict = {}
        dec_dict["original"] = string
        good = "utf-8"
        for enc in ENCODINGS:
            try:
                dec_dict[enc] = string.decode(enc)
                good = enc
                break #stop at the first clean decode, otherwise there will be some ambiguous results e.g. '\xe6\xb1\xbd\xe8\xbd\xa6' (2009-10-12)
            except:
                dec_dict[enc] = "Unable to decode input string: %s, please send this message to the email address below." % `string`
        dec = dec_dict[good]
        chars = []
        for char in dec:
            if ord(char) > 256:
               #i'm pretty sure it's a UCN (like \u1234?)
               codepoint = repr(char)[4:-1]
               if len(codepoint) != 4:
                   raise Exception("This should be a UCN (e.g. \u1234):  %s" % codepoint)
               chars.append("""<a href="/ocrat/chargif/char/unicode.py?codepoint=%s">%s</a>""" % (codepoint, char))
            else:
               chars.append(char)
        content = ("""<p class="result">""" + "".join(chars) + """</p>""").encode("utf-8")
        print TEMPLATE % dict(content=content)
    else:
        content = "<p>No search string specified</p>"
        print TEMPLATE % dict(content=content)
    end_response()
