Steganografiaa Pythonilla

Akheron 20.03.08 20:04

Funktiot datan piilottamiseen ja lukemiseen kuvasta.

 Tekstiversio  Arvo: 4 (6 ääntä)  Äänestä: +  -
Avaimen perusteella generoidaan satunnaislukugeneraattorin siemenluku, ja satunnaislukugeneraattorilla
generoidaan permutaatio, jonka perusteella valitaan järjestys, jossa pikseleihin koodataan data. Tukee
vain 24-bittisiä kuvia. Data koodataan R-, G- ja B-arvojen vähiten merkitseviin bitteihin, joten lopputulos
ei ole silmällä havaittavissa. Tilastollisilla analysointimenetelmillä on kuitenkin helppo havaita tällä
tavoin kuvaan piilotetun datan olemassaolo.

Koska permutaatio generoidaan avaimen perusteella, ei koodausta voi purkaa ilman avainta, ei ainakaan
triviaalisti. En ole kryptanalyysin ekspertti, joten minulla ei ole hajuakaan kuinka helppoa purkaminen on
ammattilaiselle. Tässä on kuitenkin pyritty siihen, ettei avainavaruus ainakaan olisi liian pieni ottamalla
160-bittinen SHA1-tiiviste avaimesta ja käyttämällä sitä satunnaislukugeneraattorin siemenlukuna.
Pythonin Mersenne Twister -toteutus hyväksyy (nopean koodin vilkaisun perusteella) enintään 256-bittisiä
avaimia, ja koska Mersenne Twisterin tila on 19937-bittinen, tuntuisi intuitiivisesti siltä, että
purkaminen ei ole triviaalia. Näissä hommissa ei kuitenkaan kannata luottaa intuitioon :)

Huomaa, että jos kuva koodataan häviöllisellä pakkauksella (esim. JPEG), ei datan palauttaminen onnistu.
Suosittelen PNG:n käyttöä. Ohjelma vaatii toimiakseen Python Imaging Library (PIL) 1.1.6:n.


steg.py:

#!/usr/bin/env python

import math, random, sys, sha
from PIL import Image
from getpass import getpass


def _str2num(s):
    result = 0
    l = len(s)
    for i, c in enumerate(s):
        result += (ord(c) << ((l-i-1)*8))
    return result

def _num2str(n, m):
    # m is the desired length of the resulting string
    result = [chr(0)]*m
    i = 0
    while n > 0:
        result[i] = (chr(n & 0xFF))
        n >>= 8
        i += 1
    result.reverse()
    return "".join(result)


def room(img):
    """Returns the number of bytes that can be encoded in img"""
    if img.mode != "RGB": return 0
    else: return 3*img.size[0]*img.size[1]/8


def encode(data, img, key):
    """Encodes data to img using key"""

    # Only RGB images are supported at the moment
    if img.mode != "RGB":
        return False

    data_length = len(data)*8
    width = img.size[0]
    height = img.size[1]
    img_length = width*height*3
    img_data = img.load()

    # Compute the number of bits needed to encode the data length
    size_length = int(math.ceil(math.log(img_length, 2)))
    total_length = data_length+size_length

    # Check that there's enough space in the image to encode the data
    if room(img) < total_length: return False

    # Compute a 160-bit PRNG seed using SHA1 digest and initialize the
    # pseudo-random number generator
    hsh = sha.new(key)
    random.seed(_str2num(hsh.digest()))

    # Generate a permutation of the length data_length from the
    # integers 0...img_length-1. These are the indices in the image in
    # which the data is encoded.
    perm = random.sample(xrange(img_length), total_length)

    # Encode the data length
    i = _encode(img_data, width, data_length, size_length, perm, 0)

    # Encode the data, one byte at a time
    for c in data:
        i = _encode(img_data, width, ord(c), 8, perm, i)

def _encode(img_data, img_width, data, data_length, perm, perm_index):
    """The low-level encoding function. Writes data_length bits from
    data to img_data at positions in perm from perm_index to
    perm_index+data_length. img_width gives the image width in pixels."
""

    for j in range(data_length):
        i = perm[perm_index+j]

        # Extract one bit
        bit = data & 1
        data >>= 1

        # Compute the coordinates in the image
        x = (i / 3) % img_width
        y = (i / 3) / img_width

        # ... and the channel which to use (R, G, or B). This is used
        # to index the RGB value.
        c = i % 3

        # Load the RGB value from the image. Convert it to list to
        # make assignment possible.
        rgb = list(img_data[x, y])

        # Load the R, G or B color value begin modified
        v = rgb[c]

        # Assing a bit from the data to the LSB of the color value
        rgb[c] = (v & 0xFE) + bit

        # Assign the obtained RGB value back to the image
        img_data[x, y] = tuple(rgb)

    return perm_index+data_length


def decode(img, key):
    """Decodes data from img using key. Returns the decoded data."""

    # Only RGB images are supported at the moment
    if img.mode != "RGB":
        return False

    width = img.size[0]
    height = img.size[1]
    img_length = width*height*3
    img_data = img.load()

    # Compute the number of bits needed to encode the data length
    size_length = int(math.ceil(math.log(img_length, 2)))

    # Initialize the pseudo-random number generator
    hsh = sha.new(key)
    seed = _str2num(hsh.digest())
    random.seed(seed)

    # Generate a permutation of the length size_length*8 from the
    # integers 0...img_length-1. These are the indices in the image in
    # which the data length is encoded.
    perm = random.sample(xrange(img_length), size_length)

    # Decode the data length
    data_length, i = _decode(img_data, width, size_length, perm, 0)

    random.seed(seed)
    perm = random.sample(xrange(img_length), size_length+data_length)
    i = size_length

    # Decode the data, on byte at a time
    data = []
    for _ in range(data_length/8):
        byte, i = _decode(img_data, width, 8, perm, i)
        data.append(chr(byte))

    return "".join(data)

def _decode(img_data, img_width, data_length, perm, perm_index):
    """The low level decoding function. Reads data_length bits from
    img_data at positions in perm from perm_index to
    perm_index+data_length. img_width gives the image width in pixels."
""

    data = 0
    for j in range(data_length):
        i = perm[perm_index+j]

        # Compute the coordinates in the image
        x = (i / 3) % img_width
        y = (i / 3) / img_width

        # ... and the channel which to use (R, G, or B). This is used
        # to index the RGB value.
        c = i % 3

        # Load the RGB value from the image.
        rgb = img_data[x, y]

        # Load the R, G or B color value begin modified
        v = rgb[c]

        # Append the LSB to data
        data += (v & 1) << j

    return data, perm_index+data_length


if __name__ == "__main__":
    if len(sys.argv) != 4:
        print """\
Usage: %s [data] [src] [dest]

Reads the file `data', asks a key and encodes the data to the image
from file `src', producing an image to the file `dest'.
"
"" % sys.argv[0]
        sys.exit(1)

    # Load the image
    img = Image.open(sys.argv[2])
    print "Image: %s %s %s, %d bytes can be hidden" % (img.format, img.size, img.mode, room(img))

    # Read the data
    data = open(sys.argv[1]).read()
    print "Data length: %d bytes" % len(data)

   
    if room(img) < len(data):
        print "Error: Too much data to hide"
        sys.exit(1)

    # Query the key
    key = getpass("key    : ")
    confirm = getpass("confirm: ")

    if key != confirm:
        print "Error: Invalid key"
        sys.exit(1)

    # Encode
    print "Encoding..."
    encode(data, img, key)

    # Save
    img.save(sys.argv[3])
    print "done."


    # Verify by decoding
    print "Verifying..."
    img = Image.open(sys.argv[3])
    decoded_data = decode(img, key)

    if decoded_data == data:
        print "ok."
    else:
        print "failed."
 

Ztane 07:08 21.3.08 
Hyvähyvä. Meinasin kirjottaa tässä viikko taaksepäin itsekin, säästy vaiva :)