#!/usr/bin/perlour$CHUNK_SIZE=1024*1024*100;# 100M=head1 NAMEiconv-chunks - Process huge files with iconv=head1 SYNOPSIS iconv-chunks <filename> [iconv-options]=head1 DESCRIPTIONThe standard iconv program reads the entire input file intomemory, which doesn't work for large files (such as database exports).This script is just a wrapper that processes the input filein manageable chunks and writes it to standard output.The first argument is the input filename (use - to specify standard input).Anything else is passed through to iconv.The real iconv needs to be somewhere in your PATH.=head1 EXAMPLES # Convert latin1 to utf-8: ./iconv-chunks database.txt -f latin1 -t utf-8 > out.txt # Input filename of - means standard input: ./iconv-chunks - -f iso8859-1 -t utf8 < database.txt > out.txt # More complex example, using compressed input/output to minimize disk use: zcat database.txt.gz | ./iconv-chunks - -f iso8859-1 -t utf8 | \ gzip - > database-utf.dump.gz=head1 AUTHORMaurice Aubrey <maurice.aubrey+iconv@gmail.com>=cut# $Id: iconv-chunks 6 2007-08-20 21:14:55Z mla $usestrict;usewarnings;usebytes;useFile::Tempqw/ tempfile /;# iconv errors:# iconv: unable to allocate buffer for input: Cannot allocate memory# iconv: cannot open input file `database.txt': File too large@ARGV>=1ordie"Usage: $0 <inputfile> [iconv-options]\n";my@options=splice@ARGV,1;my($oh,$tmp)=tempfile(undef,CLEANUP=>1);# warn "Tempfile: $tmp\n";my$iconv="iconv @options $tmp";subiconv{system($iconv)==0ordie"command '$iconv' failed: $!"}my$size=0;# must read by line to ensure we don't split multi-byte characterwhile(<>){$size+=length$_;print$oh$_;if($size>=$CHUNK_SIZE){iconv;truncate$oh,0ordie"truncate '$tmp' failed: $!";seek$oh,0,0ordie"seek on '$tmp' failed: $!";$size=0;}}iconvif$size>0;
Comments (0)
HTTPSSSH
You can clone a snippet to your computer for local editing.
Learn more.