problems with base64

K

Karl Pech

Hi all,

I'm trying to write a program which can read in files in the following
format:
sos_encoded.txt:
---
begin-base64 644 sos.txt
UGxlYXNlLCBoZWxwIG1lIQ==
---

and convert them to "clear byte code". For example if you take the file
sos_encoded.txt and use my program on it you should get the following:
sos.txt:
---
Please, help me!
---

Unfortunately if I try to convert files which didn't have any "human
readable text" when they were encoded and if these files are large (> 1.5MB)
I get back corrupted files.

This is the source of my program:
---
import string

def extract_base64(source):
source_ = source
all_chars = string.maketrans('','') # create 256-ASCII-char-table

# delete all base64-chars from source-copy
source_without_base64_signs = source_.translate(all_chars, string.letters+string.digits+"+/=")

# delete all chars, which remained in the changed source-copy, from the first copy
# and return this new copy
# --> all base64-chars remain
return source_.translate(all_chars, source_without_base64_signs)

def convert_to_8bits(source):
base64_table = {'A' : 0, 'N' : 13, 'a' : 26, 'n' : 39, '0' : 52,
'B' : 1, 'O' : 14, 'b' : 27, 'o' : 40, '1' : 53,
'C' : 2, 'P' : 15, 'c' : 28, 'p' : 41, '2' : 54,
'D' : 3, 'Q' : 16, 'd' : 29, 'q' : 42, '3' : 55,
'E' : 4, 'R' : 17, 'e' : 30, 'r' : 43, '4' : 56,
'F' : 5, 'S' : 18, 'f' : 31, 's' : 44, '5' : 57,
'G' : 6, 'T' : 19, 'g' : 32, 't' : 45, '6' : 58,
'H' : 7, 'U' : 20, 'h' : 33, 'u' : 46, '7' : 59,
'I' : 8, 'V' : 21, 'i' : 34, 'v' : 47, '8' : 60,
'J' : 9, 'W' : 22, 'j' : 35, 'w' : 48, '9' : 61,
'K' : 10, 'X' : 23, 'k' : 36, 'x' : 49, '+' : 62,
'L' : 11, 'Y' : 24, 'l' : 37, 'y' : 50, '/' : 63,
'M' : 12, 'Z' : 25, 'm' : 38, 'z' : 51, '=' : 0}

result_ = []

# fill an integer with four 6-bit-blocks from left to right
box_ = int( (base64_table[source[0]] << 26)\
+ (base64_table[source[1]] << 20)\
+ (base64_table[source[2]] << 14)\
+ (base64_table[source[3]] << 8) )

# get 8-bit-blocks out of the integer starting with the first 6-bit-block we have
# inserted plus the two highest bits from the second 6-bit-block
result_ += chr((box_ >> 24) & 255) + chr((box_ >> 16) & 255) + chr((box_ >> 8) & 255)

# strip possible zeros from decoded result
del result_[len(result_)-source.count('='):len(result_)]

return result_

#open source file in binary-mode
fsource = open(raw_input("Please specify the source file that should be decoded: "), "rb")

# read in first line of the file and split it in 2+n "whitespace-blocks"
_1stline = fsource.readline().split()

# delete the first two blocks ("begin ..." and "644 ...")
del _1stline[0:2]

# join the other blocks to the target-filename
targetname = string.join(_1stline)
ftarget = open(targetname, "wb")

# read in the remainder of the file in 4-byte-blocks and write the results in 3-byte-blocks
# into the target file

while 1 == 1:
source = ''
while len(source) < 4:
source += fsource.read(4)
if source == '':
break

# reduce byte-code to base64-chars
source = extract_base64(source)

if source == '':
break

# convert 6-bit-blocks to 8-bit-blocks
clear_text = convert_to_8bits(source)

ftarget.writelines(clear_text)

ftarget.close()
fsource.close()

print "file "+targetname+" has been written!"
---

Unfortunately I can't use python's standard base64-module since
this whole task is an exercise. :(

And I don't see any logical problems in my code. I think I really
need some more eyes to watch over this. So you are my "last hope"! ;)
Perhaps you can give me a hint.

Thank you very much!

Regards
Karl
 
B

Byron

Hi Karl,

I don't know if this is much help for you, but have you tried using the
following:

import base64
print base64.decodestring("UGxlYXNlLCBoZWxwIG1lIQ==")
print base64.encodestring("Please, help me!")

Byron
---



Karl said:
Hi all,

I'm trying to write a program which can read in files in the following
format:
sos_encoded.txt:
---
begin-base64 644 sos.txt
UGxlYXNlLCBoZWxwIG1lIQ==
---

and convert them to "clear byte code". For example if you take the file
sos_encoded.txt and use my program on it you should get the following:
sos.txt:
---
Please, help me!
---

Unfortunately if I try to convert files which didn't have any "human
readable text" when they were encoded and if these files are large (> 1.5MB)
I get back corrupted files.

This is the source of my program:
---
import string

def extract_base64(source):
source_ = source
all_chars = string.maketrans('','') # create 256-ASCII-char-table

# delete all base64-chars from source-copy
source_without_base64_signs = source_.translate(all_chars, string.letters+string.digits+"+/=")

# delete all chars, which remained in the changed source-copy, from the first copy
# and return this new copy
# --> all base64-chars remain
return source_.translate(all_chars, source_without_base64_signs)

def convert_to_8bits(source):
base64_table = {'A' : 0, 'N' : 13, 'a' : 26, 'n' : 39, '0' : 52,
'B' : 1, 'O' : 14, 'b' : 27, 'o' : 40, '1' : 53,
'C' : 2, 'P' : 15, 'c' : 28, 'p' : 41, '2' : 54,
'D' : 3, 'Q' : 16, 'd' : 29, 'q' : 42, '3' : 55,
'E' : 4, 'R' : 17, 'e' : 30, 'r' : 43, '4' : 56,
'F' : 5, 'S' : 18, 'f' : 31, 's' : 44, '5' : 57,
'G' : 6, 'T' : 19, 'g' : 32, 't' : 45, '6' : 58,
'H' : 7, 'U' : 20, 'h' : 33, 'u' : 46, '7' : 59,
'I' : 8, 'V' : 21, 'i' : 34, 'v' : 47, '8' : 60,
'J' : 9, 'W' : 22, 'j' : 35, 'w' : 48, '9' : 61,
'K' : 10, 'X' : 23, 'k' : 36, 'x' : 49, '+' : 62,
'L' : 11, 'Y' : 24, 'l' : 37, 'y' : 50, '/' : 63,
'M' : 12, 'Z' : 25, 'm' : 38, 'z' : 51, '=' : 0}

result_ = []

# fill an integer with four 6-bit-blocks from left to right
box_ = int( (base64_table[source[0]] << 26)\
+ (base64_table[source[1]] << 20)\
+ (base64_table[source[2]] << 14)\
+ (base64_table[source[3]] << 8) )

# get 8-bit-blocks out of the integer starting with the first 6-bit-block we have
# inserted plus the two highest bits from the second 6-bit-block
result_ += chr((box_ >> 24) & 255) + chr((box_ >> 16) & 255) + chr((box_ >> 8) & 255)

# strip possible zeros from decoded result
del result_[len(result_)-source.count('='):len(result_)]

return result_

#open source file in binary-mode
fsource = open(raw_input("Please specify the source file that should be decoded: "), "rb")

# read in first line of the file and split it in 2+n "whitespace-blocks"
_1stline = fsource.readline().split()

# delete the first two blocks ("begin ..." and "644 ...")
del _1stline[0:2]

# join the other blocks to the target-filename
targetname = string.join(_1stline)
ftarget = open(targetname, "wb")

# read in the remainder of the file in 4-byte-blocks and write the results in 3-byte-blocks
# into the target file

while 1 == 1:
source = ''
while len(source) < 4:
source += fsource.read(4)
if source == '':
break

# reduce byte-code to base64-chars
source = extract_base64(source)

if source == '':
break

# convert 6-bit-blocks to 8-bit-blocks
clear_text = convert_to_8bits(source)

ftarget.writelines(clear_text)

ftarget.close()
fsource.close()

print "file "+targetname+" has been written!"
---

Unfortunately I can't use python's standard base64-module since
this whole task is an exercise. :(

And I don't see any logical problems in my code. I think I really
need some more eyes to watch over this. So you are my "last hope"! ;)
Perhaps you can give me a hint.

Thank you very much!

Regards
Karl
 
K

Karl Pech

Hi Byron,
I don't know if this is much help for you, but have you tried using the
following:

import base64
print base64.decodestring("UGxlYXNlLCBoZWxwIG1lIQ==")
print base64.encodestring("Please, help me!")

Thank you very much for your answer! I'm glad that somebody answered me,
because my problem slowly but surely is getting worse. :((
I found the mistake in the previous code which was a problem with read()
I think it should be read(1). Anyway I deleted this old version of my
program since it was very slow for base64 - files > 0.5 MB. Now I coded the
program below. This program doesn't work at all. It seems to me that
some variables that read_in_data should use are somehow out of scope.
The idea of the program is: "Read in 5-Blocks of base64-data. Since
5 % 4 =|= 0 we get an IndexError-Exception in the "while len(source) < 4"-loop,
we catch this exception, set the source file pointer 1 byte back, because the
would lose one byte otherwise, and read in the next 4 bytes of the source file.
Unfortunately I don't know how to solve this Unbound...Error-Thing. |((
I think I'm really stuck now. @{

And as I said, since this task is an exercise for this weekend I'm not allowed to
use the standard base64-module. Well, I have to code the base64-decoder myself. :(((

Anyway, Thanks, Byron.

---
import string

def convert_to_8bits(source):
base64_table = {'A' : 0, 'N' : 13, 'a' : 26, 'n' : 39, '0' : 52,
'B' : 1, 'O' : 14, 'b' : 27, 'o' : 40, '1' : 53,
'C' : 2, 'P' : 15, 'c' : 28, 'p' : 41, '2' : 54,
'D' : 3, 'Q' : 16, 'd' : 29, 'q' : 42, '3' : 55,
'E' : 4, 'R' : 17, 'e' : 30, 'r' : 43, '4' : 56,
'F' : 5, 'S' : 18, 'f' : 31, 's' : 44, '5' : 57,
'G' : 6, 'T' : 19, 'g' : 32, 't' : 45, '6' : 58,
'H' : 7, 'U' : 20, 'h' : 33, 'u' : 46, '7' : 59,
'I' : 8, 'V' : 21, 'i' : 34, 'v' : 47, '8' : 60,
'J' : 9, 'W' : 22, 'j' : 35, 'w' : 48, '9' : 61,
'K' : 10, 'X' : 23, 'k' : 36, 'x' : 49, '+' : 62,
'L' : 11, 'Y' : 24, 'l' : 37, 'y' : 50, '/' : 63,
'M' : 12, 'Z' : 25, 'm' : 38, 'z' : 51, '=' : 0}

result_ = []

box_ = int( (base64_table[source[0]] << 26)\
+ (base64_table[source[1]] << 20)\
+ (base64_table[source[2]] << 14)\
+ (base64_table[source[3]] << 8) )

result_ += chr((box_ >> 24) & 255) + chr((box_ >> 16) & 255) + chr((box_ >> 8) & 255)

return result_

fsource = open(raw_input("Bitte geben Sie die zu dekodierende base64-Datei an: "), "rb")

_1stline = fsource.readline().split()

del _1stline[0:2]

targetname = string.join(_1stline)
ftarget = open(targetname, "wb")

global source_file_data
global target_file_buffer
global filling_counter

source_file_data = fsource.read(5)
target_file_buffer = []
filling_counter = 0

def read_in_data():
for i in range(0, len(source_file_data)):
print i
source = []
while len(source) < 4:
source += source_file_data

if source_file_data == '=':
filling_counter += 1

if not(ord('a') <= ord(source_file_data) <= ord('z')) and\
not(ord('A') <= ord(source_file_data) <= ord('Z')) and\
not(ord('0') <= ord(source_file_data) <= ord('9'))\
and not(ord(source_file_data) in [ord('+'), ord('/'), ord('=')]):
del source[len(source)-1]

i += 1

clear_text = convert_to_8bits(source)

target_file_buffer += clear_text

try:
read_in_data()
except IndexError:
print "exception"
fsource.seek(-1, 1)
source_file_data = fsource.read(5)
read_in_data()

ftarget.writelines(target_file_buffer)
ftarget.seek(filling_counter-1, 2)
ftarget.truncate()

ftarget.close()
fsource.close()

print "decoded file "+targetname+" has been written!"
---
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Similar Threads


Members online

Forum statistics

Threads
473,744
Messages
2,569,482
Members
44,901
Latest member
Noble71S45

Latest Threads

Top