X
Xah Lee
The following is a program to generate thumbnail images for a website.
Useful, if you want to do that.
It is used to generate the thumbnails for my “Banners, Damsels, and
Mores†project gallery. (
http://xahlee.org/Periodic_dosage_dir/lanci/lanci.html )
Comments and versions in other lang welcome.
Xah
(e-mail address removed)
∑ http://xahlee.org/
# -*- coding: utf-8 -*-
# Python
# © 2006-04 by Xah Lee, ∑ http://xahlee.org/, 2006-04
# Given a webite gallery of photos with hundreds of photos, i want to
generate a thumbnail page so that viewers can get a bird's eye's view
images.
# Technically:
# Given a dir: e.g. /Users/xah/web/Periodic_dosage_dir/lanci/
# This dir has many html files inside it, maybe in sub dir.
# Any html file is a photo gallery, with inline images of photos.
# all the image files are in under the given dir, or in subdir.
# • The goal is to create thumbnail images of all the inline images
in all html files under that dir.
# • These thumbnail images destination can be specified, unrelated to
the given dir.
# • Thumbnail images must preserve the dir structure they are in. For
example, if a inline image's full path is /a/b/c/d/1.img, and the a
root is given as /a/b, then the thumbnail image's path must retain the
c/d, as sud bir under the specified thumbnail destination.
# • if the inline image's size is smaller than a certain given size
(specified as area), then skip it.
# Note: online inline images in a html file will be considered for
thumbnail. Any other images in the given dir or as linked images should
be ignored.
###############################################
import re, subprocess, os.path
# path where html files and images are at. e.g. /a/b/c/d
inPath= '/Users/xah/3D-XplorMath/Curves' # no trailing slash
# a substring of inPath. The thumbnails will preserve dir structures.
If a image is at /a/b/c/d/e/f/1.png, and rootDir is /a/b/c, then the
thumbnail will be at /x/y/d/e/f/1.png
rootDir= '/Users/xah/3D-XplorMath/Curves' # no trailing slash
# the destination path of thumbanil images. It will be created.
Existing things will be over-written. e.g. /x/y
thumbnailDir= '/Users/xah/3D-XplorMath/Curves/tn' # no trailing slash
# thumbnail size
thumbnailSizeArea = 150 * 150
# if a image is smaller than this area, don't gen thumbnail for it.
minArea = 200*200
# imageMagic 'identify' program path
identify = r'/sw/bin/identify'
convert = r'/sw/bin/convert'
# depth of nested dir to dive into.
minLevel=1; # files and dirs of mydir are level 1.
maxLevel=9; # inclusive
###############################
## functions
def scaleFactor(A,(w,h)):
'''getInlineImg(A,(w,h)) returns a number s such that w*s*h*s==A.
This is used for generating the scaling factor of a image with a given
desired thumbnail area A. The w and h are width and height of rectangle
(image). The A is given size of thumbnail of the photo (as area). When
the image is scaled by s in both dimensions, it will have desired size
specified by area A as thumbnail.'''
return (float(A)/float(w*h))**0.5
def getInlineImg(file_full_path):
'''getInlineImg(html_file_full_path) returns a array that is a list
of inline images. For example, it may return
['xx.jpg','../image.png']'''
FF = open(file_full_path,'rb')
txt_segs = re.split( r'src', unicode(FF.read(),'utf-8'))
txt_segs.pop(0)
FF.close()
linx=[]
for linkBlock in txt_segs:
matchResult = re.search(ur'\s*=\s*\"([^\"]+)\"',
linkBlock,re.U)
if matchResult: linx.append(
matchResult.group(1).encode('utf-8') )
return linx
def linkFullPath(dir,locallink):
'''linkFullPath(dir, locallink) returns a string that is the full
path to the local link. For example,
linkFullPath('/Users/t/public_html/a/b', '../image/t.png') returns
'Users/t/public_html/a/image/t.png'. The returned result will not
contain double slash or '../' string.'''
result = dir + '/' + locallink
result = re.sub(r'//+', r'/', result)
while re.search(r'/[^\/]+\/\.\.', result): result =
re.sub(r'/[^\/]+\/\.\.', '', result)
return result
def buildThumbnails(dPath, fName, tbPath, rPath, areaA):
u'''Generate thumbnail images. dPath is directory full path, and
fName is a html file name that exists under it. The tbPath is the
thumbnail images destination dir. The areaA is the thumbnail image size
in terms of its area. This function will create thumbnail images in the
tbPath. rPath is a root dir subset of dPath, used to build the dir
structure for tbPath for each thumbnail.
For Example, if
dPath = '/Users/mary/Public/pictures'
fName = 'trip.html' (this exits under dPath)
tbPath = '/Users/mary/Public/thumbs'
rPath = '/Users/mary/Public' (must be a substring of dPath or equal to
it.)
and trip.html contains <img ="Beijin/day1/img1.jpg">
then a thumbnail will be generated at
'/Users/mary/Public/thumbs/pictures/Beijin/day1/img1.jpg'
This func uses the imagemagick's shell command “convert†and
“identifyâ€, and assumes that both's path on the disk are set in the
global vars “convert†and “identifyâ€.'''
# outline:
# • Read in the file.
# • Get the img paths from inline images tags, accumulate them
into a list.
# • For each image, find its dimension w and h.
# • Generate the thumbnail image on disk.
# Generate a list of image paths.
imgPaths=[]
for im in filter(lambda x : (not x.startswith('http')) and (not
x.endswith('icon_sum.gif')), getInlineImg(dPath + '/' + fName)):
imgPaths.append (linkFullPath(dPath, im))
# Change the image path to the full sized image, if it exists.
# That is, if image ends in -s.jpg, find one without the '-s'.
imgPaths2=[]
for myPath in imgPaths:
p=myPath
(dirName, fileName) = os.path.split(myPath)
(fileBaseName, fileExtension)=os.path.splitext(fileName)
if(re.search(r'-s$',fileBaseName,re.U)):
p2=os.path.join(dirName,fileBaseName[0:-2]) + fileExtension
if os.path.exists(p2): p=p2
imgPaths2.append(p)
# find out each images's width & height
# Each element in imgData has the form [image full path, [width,
height]]
imgData=[]
for imp in imgPaths2:
# DSCN2699m-s.JPG JPEG 307x230+0+0 DirectClass 8-bit 51.7k 0.0u
0:01
print "Identifying:", imp
imgInfo=subprocess.Popen([identify, imp],
stdout=subprocess.PIPE).communicate()[0]
(width,height)=(imgInfo.split()[2]).split('x')
height=height.split('+')[0]
if int(width)*int(height) > minArea: imgData.append( [imp,
[int(width), int(height)]])
##print '<a href="' + fName + '">'
# create the scaled image files in thumbnail dir. The dir structure
is replicated.
for imp in imgData:
print "Thumbnailing:", imp
oriImgFullPath=imp[0]
thumbnailRelativePath = oriImgFullPath[ len(rPath) + 1:]
thumbnailFullPath = tbPath + '/' + thumbnailRelativePath
print 'r',thumbnailRelativePath
print 'f',thumbnailFullPath
sf=scaleFactor(areaA,(imp[1][0],imp[1][1]))
#print '<img src="' + thumbnailRelativePath + '" alt="">'
# make dirs to the thumbnail dir
(dirName, fileName) = os.path.split(thumbnailFullPath)
(fileBaseName, fileExtension)=os.path.splitext(fileName)
print "Creating thumbnail:", thumbnailFullPath
try:
os.makedirs(dirName,0775)
except(OSError):
pass
# create thumbnail
subprocess.Popen([convert, '-scale', str(round(sf*100,2)) +
'%', oriImgFullPath, thumbnailFullPath] )
#print '</a>'
#################
# main
def dirHandler(dummy, curdir, filess):
curdirLevel=len(re.split('/',curdir))-len(re.split('/',inPath))
filessLevel=curdirLevel+1
if minLevel <= filessLevel <= maxLevel:
for child in filess:
# if 'act1.html' == child and os.path.isfile(curdir+'/'+child):
if '.html' == os.path.splitext(child)[1] and
os.path.isfile(curdir+'/'+child):
print "processing:", curdir+'/'+child
buildThumbnails(curdir,child,thumbnailDir,rootDir,thumbnailSizeArea)
while inPath[-1] == '/': inPath = inPath[0:-1] # get rid of trailing
slash
os.path.walk(inPath, dirHandler, 'dummy')
Useful, if you want to do that.
It is used to generate the thumbnails for my “Banners, Damsels, and
Mores†project gallery. (
http://xahlee.org/Periodic_dosage_dir/lanci/lanci.html )
Comments and versions in other lang welcome.
Xah
(e-mail address removed)
∑ http://xahlee.org/
# -*- coding: utf-8 -*-
# Python
# © 2006-04 by Xah Lee, ∑ http://xahlee.org/, 2006-04
# Given a webite gallery of photos with hundreds of photos, i want to
generate a thumbnail page so that viewers can get a bird's eye's view
images.
# Technically:
# Given a dir: e.g. /Users/xah/web/Periodic_dosage_dir/lanci/
# This dir has many html files inside it, maybe in sub dir.
# Any html file is a photo gallery, with inline images of photos.
# all the image files are in under the given dir, or in subdir.
# • The goal is to create thumbnail images of all the inline images
in all html files under that dir.
# • These thumbnail images destination can be specified, unrelated to
the given dir.
# • Thumbnail images must preserve the dir structure they are in. For
example, if a inline image's full path is /a/b/c/d/1.img, and the a
root is given as /a/b, then the thumbnail image's path must retain the
c/d, as sud bir under the specified thumbnail destination.
# • if the inline image's size is smaller than a certain given size
(specified as area), then skip it.
# Note: online inline images in a html file will be considered for
thumbnail. Any other images in the given dir or as linked images should
be ignored.
###############################################
import re, subprocess, os.path
# path where html files and images are at. e.g. /a/b/c/d
inPath= '/Users/xah/3D-XplorMath/Curves' # no trailing slash
# a substring of inPath. The thumbnails will preserve dir structures.
If a image is at /a/b/c/d/e/f/1.png, and rootDir is /a/b/c, then the
thumbnail will be at /x/y/d/e/f/1.png
rootDir= '/Users/xah/3D-XplorMath/Curves' # no trailing slash
# the destination path of thumbanil images. It will be created.
Existing things will be over-written. e.g. /x/y
thumbnailDir= '/Users/xah/3D-XplorMath/Curves/tn' # no trailing slash
# thumbnail size
thumbnailSizeArea = 150 * 150
# if a image is smaller than this area, don't gen thumbnail for it.
minArea = 200*200
# imageMagic 'identify' program path
identify = r'/sw/bin/identify'
convert = r'/sw/bin/convert'
# depth of nested dir to dive into.
minLevel=1; # files and dirs of mydir are level 1.
maxLevel=9; # inclusive
###############################
## functions
def scaleFactor(A,(w,h)):
'''getInlineImg(A,(w,h)) returns a number s such that w*s*h*s==A.
This is used for generating the scaling factor of a image with a given
desired thumbnail area A. The w and h are width and height of rectangle
(image). The A is given size of thumbnail of the photo (as area). When
the image is scaled by s in both dimensions, it will have desired size
specified by area A as thumbnail.'''
return (float(A)/float(w*h))**0.5
def getInlineImg(file_full_path):
'''getInlineImg(html_file_full_path) returns a array that is a list
of inline images. For example, it may return
['xx.jpg','../image.png']'''
FF = open(file_full_path,'rb')
txt_segs = re.split( r'src', unicode(FF.read(),'utf-8'))
txt_segs.pop(0)
FF.close()
linx=[]
for linkBlock in txt_segs:
matchResult = re.search(ur'\s*=\s*\"([^\"]+)\"',
linkBlock,re.U)
if matchResult: linx.append(
matchResult.group(1).encode('utf-8') )
return linx
def linkFullPath(dir,locallink):
'''linkFullPath(dir, locallink) returns a string that is the full
path to the local link. For example,
linkFullPath('/Users/t/public_html/a/b', '../image/t.png') returns
'Users/t/public_html/a/image/t.png'. The returned result will not
contain double slash or '../' string.'''
result = dir + '/' + locallink
result = re.sub(r'//+', r'/', result)
while re.search(r'/[^\/]+\/\.\.', result): result =
re.sub(r'/[^\/]+\/\.\.', '', result)
return result
def buildThumbnails(dPath, fName, tbPath, rPath, areaA):
u'''Generate thumbnail images. dPath is directory full path, and
fName is a html file name that exists under it. The tbPath is the
thumbnail images destination dir. The areaA is the thumbnail image size
in terms of its area. This function will create thumbnail images in the
tbPath. rPath is a root dir subset of dPath, used to build the dir
structure for tbPath for each thumbnail.
For Example, if
dPath = '/Users/mary/Public/pictures'
fName = 'trip.html' (this exits under dPath)
tbPath = '/Users/mary/Public/thumbs'
rPath = '/Users/mary/Public' (must be a substring of dPath or equal to
it.)
and trip.html contains <img ="Beijin/day1/img1.jpg">
then a thumbnail will be generated at
'/Users/mary/Public/thumbs/pictures/Beijin/day1/img1.jpg'
This func uses the imagemagick's shell command “convert†and
“identifyâ€, and assumes that both's path on the disk are set in the
global vars “convert†and “identifyâ€.'''
# outline:
# • Read in the file.
# • Get the img paths from inline images tags, accumulate them
into a list.
# • For each image, find its dimension w and h.
# • Generate the thumbnail image on disk.
# Generate a list of image paths.
imgPaths=[]
for im in filter(lambda x : (not x.startswith('http')) and (not
x.endswith('icon_sum.gif')), getInlineImg(dPath + '/' + fName)):
imgPaths.append (linkFullPath(dPath, im))
# Change the image path to the full sized image, if it exists.
# That is, if image ends in -s.jpg, find one without the '-s'.
imgPaths2=[]
for myPath in imgPaths:
p=myPath
(dirName, fileName) = os.path.split(myPath)
(fileBaseName, fileExtension)=os.path.splitext(fileName)
if(re.search(r'-s$',fileBaseName,re.U)):
p2=os.path.join(dirName,fileBaseName[0:-2]) + fileExtension
if os.path.exists(p2): p=p2
imgPaths2.append(p)
# find out each images's width & height
# Each element in imgData has the form [image full path, [width,
height]]
imgData=[]
for imp in imgPaths2:
# DSCN2699m-s.JPG JPEG 307x230+0+0 DirectClass 8-bit 51.7k 0.0u
0:01
print "Identifying:", imp
imgInfo=subprocess.Popen([identify, imp],
stdout=subprocess.PIPE).communicate()[0]
(width,height)=(imgInfo.split()[2]).split('x')
height=height.split('+')[0]
if int(width)*int(height) > minArea: imgData.append( [imp,
[int(width), int(height)]])
##print '<a href="' + fName + '">'
# create the scaled image files in thumbnail dir. The dir structure
is replicated.
for imp in imgData:
print "Thumbnailing:", imp
oriImgFullPath=imp[0]
thumbnailRelativePath = oriImgFullPath[ len(rPath) + 1:]
thumbnailFullPath = tbPath + '/' + thumbnailRelativePath
print 'r',thumbnailRelativePath
print 'f',thumbnailFullPath
sf=scaleFactor(areaA,(imp[1][0],imp[1][1]))
#print '<img src="' + thumbnailRelativePath + '" alt="">'
# make dirs to the thumbnail dir
(dirName, fileName) = os.path.split(thumbnailFullPath)
(fileBaseName, fileExtension)=os.path.splitext(fileName)
print "Creating thumbnail:", thumbnailFullPath
try:
os.makedirs(dirName,0775)
except(OSError):
pass
# create thumbnail
subprocess.Popen([convert, '-scale', str(round(sf*100,2)) +
'%', oriImgFullPath, thumbnailFullPath] )
#print '</a>'
#################
# main
def dirHandler(dummy, curdir, filess):
curdirLevel=len(re.split('/',curdir))-len(re.split('/',inPath))
filessLevel=curdirLevel+1
if minLevel <= filessLevel <= maxLevel:
for child in filess:
# if 'act1.html' == child and os.path.isfile(curdir+'/'+child):
if '.html' == os.path.splitext(child)[1] and
os.path.isfile(curdir+'/'+child):
print "processing:", curdir+'/'+child
buildThumbnails(curdir,child,thumbnailDir,rootDir,thumbnailSizeArea)
while inPath[-1] == '/': inPath = inPath[0:-1] # get rid of trailing
slash
os.path.walk(inPath, dirHandler, 'dummy')