#!/usr/bin/perl
# -----------------------------------------------------------------------------
# zzzrecode.pl -- recode data received from IRC network
# Copyright (C) 2004 Fabian "zzznowman" Pietsch <fabian-irssi@zzznowman.dyndns.org>
#
# Distributed under the GNU GPL (General Public Licence)
# zzzrecode.pl is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# zzzrecode.pl is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# -----------------------------------------------------------------------------
#

use strict;
use vars qw($VERSION %IRSSI);

$VERSION = '0.1.1';

%IRSSI = (
	'authors'     => 'Fabian "zzznowman" Pietsch',
	'contact'     => 'fabian-irssi@zzznowman.dyndns.org',
	'name'        => 'zzzrecode',
	'description' => 'Recode data received from IRC network',
	'license'     => 'GPL',
	'url'         => 'http://zzz.deixu.net/software/raw/script/irssi/zzzrecode_pl',
	'changed'     => '2004-02-10'
);

use Irssi;


# utf8_to_latin1() -- decode UTF-8
sub utf8_to_latin1
{
	my (@input, @output);

	# join arguments into a single array of input bytes
	foreach my $arg (@_)
	{
		push(@input, split(//, $arg));
	}

	# process every input byte
	my $i = 0;
	while ($i <= $#input)
	{
		my $bfirst = $input[$i];

		# is this the first byte of a UTF-8 multibyte char?
		# (i.e., 11xxxxxx; 0xxxxxxx means normal char,
		#  10xxxxxx would mean second byte => erroneous char)
		if (vec($bfirst, 3, 2) == 3)
		{
			my ($count, $chrnum);

			# two-byte sequence?
			if (vec($bfirst, 5, 1) == 0)
			{
				$count = 2;
				$chrnum = ord(($bfirst & chr(0x1f))) << (6 * ($count - 1));
			}
			## three-byte sequence?
			#elsif (vec($byte, 4, 1) == 0)
			#{
			#	$chrnum = ($byte & 0x0f) * 0x1000;
			#	$count = 3;
			#}
			# [...]
			# ^ we couldn't transform this to latin1 anyway...

			# valid first byte of sequence found?
			goto PASS unless
			  (defined($count) && ($i + $count <= scalar(@input)));

			# process trailing bytes of sequence
			my $j = 0;
			while (++$j < $count)
			{
				my $byte = $input[$i + $j];

				# valid trailing byte in sequence?
				goto PASS unless (vec($byte, 3, 2) == 2);

				# extract this sequence byte's data bits
				# and put them at the right place
				$chrnum |=
				  ord($byte & chr(0x3f)) << (6 * ($count - 1 - $j));
			}

			# complete sequence processed
			# can the Unicode char be expressed in latin1?
			if ($chrnum < 256)
			{
				# append decoded char to output
				push(@output, chr($chrnum));
				$i += $count;
				next;
			}
		}

	PASS:	# simply append this char to output
		push(@output, $bfirst);
		$i++;
	}

	return(join('', @output));
}

# latin1_to_utf8() -- encode as UTF-8
sub latin1_to_utf8(@)
{
	die("latin1_to_utf8(): not implemented yet");
}

# recode() -- transform a string from one encoding to another
sub recode($$@)
{
	my ($from, $to, @data) = @_;

	if (($from eq 'utf8') && ($to eq 'iso8859-1'))
	{
		# decode UTF-8
		return utf8_to_latin1(@data);
	}
	elsif (($from eq 'iso8859-1') && ($to eq 'utf8'))
	{
		# encode as UTF-8
		return latin1_to_utf8(@data);
	}
	else
	{
		die("recode(): invalid encoding");
	}
}

my $lock = 0;

sub sig_print_text
{
	if ($lock)
	{
#		print(LOG "[sig_skip] lock set => skipping signal handler (\"",
#		      join('"; "', @_), "\")\n");
		return 0;
	}

	# dest, text, stripped
#	print(LOG "[sig] + entered signal handler (\"",
#	      join('"; "', @_), "\")\n");
	my $newtext     = utf8_to_latin1($_[1]);
	my $newstripped = utf8_to_latin1($_[2]);
	if (($newtext ne $_[1]) || ($newstripped ne $_[2]))
	{
#		print(LOG "[sig] # converted to \"$newtext\"/\"$newstripped\"\n");
		$lock = 1;
		Irssi::signal_emit('print text', $_[0], $newtext, $newstripped);
		$lock = 0;
		Irssi::signal_stop();
	}
#	print(LOG "[sig] - returning from signal handler.\n");
	return 0;
}

# open(LOG, '>>zzzrecode.log') or die("Can't open log: $!");
# my $oldfh = select(LOG); $| = 1; select($oldfh); undef $oldfh;
# print(LOG "-" x 80 . "\n");
# print(LOG "[main] adding signal handler\n");
Irssi::signal_add_first('print text', \&sig_print_text);
# print(LOG "[main] initialization complete\n");

