Ahmad said:
I am a python newbie, I want to print on the console UTF-8 arabic
characters. They print OK with
print text.encode("UTF-8")
BUT, the characters are printed LTR, not RTL (right to left). How can
I change the printing direction??
Are all arabic characters 2 byte in UTF-16? Then the following RTLStream
class should work in an otherwise left to right environment.
Call the script with a -d or --delay parameter to see it working.
<rtl.py>
import sys, time
def utfReverse(s):
# CAVEAT: this will mess up characters that are
# more than 2 bytes long in utf 16
u = s.decode("utf-8")
return u[::-1].encode("utf-8")
class RTLStream:
""" Emulate a right-to-left printing console in a
left-to-right environment
"""
def __init__(self, out=sys.stdout, wrapwidth=40):
self.out = out
self.wrapwidth = wrapwidth
self.curline = ""
def _write(self, s):
if len(s) == 0: return
self.curline += utfReverse(s)
self.out.write("\r")
if len(self.curline) > self.wrapwidth:
self.out.write(self.curline[:self.wrapwidth])
self.out.write("\n")
self.curline = self.curline[self.wrapwidth:]
self.out.write(self.curline.rjust(self.wrapwidth))
def _nl(self):
self.out.write("\n")
self.curline = ""
def write(self, s):
lines = s.split("\n")
lines.reverse()
for line in lines[:-1]:
self._write(line)
self._nl()
self._write(lines[-1])
class SlowStream:
""" delay the output to the target stream
"""
def __init__(self, out=sys.stdout, delay=0.01):
self.delay = delay
self.out = out
def write(self, s):
for b in s:
time.sleep(self.delay)
self.out.write(b)
self.out.flush()
if __name__ == "__main__":
rtlstream = RTLStream(wrapwidth=36)
args = sys.argv[1:]
if "--delay" in args or "-d" in args:
rtlstream.out = SlowStream()
# always save a copy of the original stdout
orig_stdout = sys.stdout
# redirect stdout
sys.stdout = rtlstream
print "sella ow",
print "tsieweb kc\xc3\xbclg hcrud hcis"
print "dnu", "thcsuat",
print "kcilb ned", "hcuregniew mi\negnir eid thcsuat dnu",
print "egnid red hcsuar mi"
print "med kc\xc3\xbclgnegeg med ud tsneid"
# restore stdout
sys.stdout = orig_stdout
# explicit redirection with
# print >> rtlstream, some_text
# is usually preferable
print
print "back to normal"
print >> rtlstream, "a saner way to use it"
print "that's all folks"
</rtl.py>
Disclaimer: As I know nothing about right-to-left printing languages, it's
likely that I have got it at least partially wrong.
Can anybody point me to a way to iterate over characters with a varying
number of bytes? Something like
for c in "Gru\xc3\x9f".characters("utf-8):
print repr(c),
#should print 'G' 'r' 'u' '\xc3\x9f'
Peter