付記(7/13/2008):
はてなのスーパーpre記法(ほんと、しつこいな)でコードを載っけるのもいいかなと思い、
diary2hw.pl のコードをここに張り付けておく:
>|perl|
#!/usr/local/bin/perl
# converter from my diary.html to hw, hatena diary writer
Copyright (C) 1997-2018, Kengo Ichiki
# $Id: diary2hw.pl,v 1.1 2008/01/02 20:52:28 ichiki Exp $
use HTML::Parser;
use Jcode;
$topurl = "http://d.hatena.ne.jp/kichiki";
$oldurl = "http://kichiki.web.fc2.com/diary";
$year_start = 1997;
@list0 = (); # old blosxom entry files
# globals for check_category_in_link
$check_link_tag = '';
$check_link_li = 0;
$check_link_text = '';
# check for command-line argument
die "Usage: diary2blosxom.pl (HTML file)\n" unless @ARGV == 1;
# get the command-line argument
my $file = shift;
my $path, $file0;
if ($file =~ /^diary\d{4}\_\d{2}.shtml$/) {
$path = '';
}
elsif ($file =~ /(.*\/)(diary\d{4}\_\d{2}.shtml)$/) {
$path = $1;
$file0 = $2;
}
else {
print "Illeagal diary file!\n";
exit;
}
#print "PATH = $path\n";
# Create HTML Parser object
my $p = HTML::Parser->new
(api_version => 3,
start_h => [\&start, "tagname,attr"],
text_h => [\&text, "dtext"],
end_h => [\&end, "tagname"],
);
die "File $file does't exist.\n" unless -e $file;
open (HTML, "< $file") or die "Cannot open $file";
$h1flag = 0; # h1 tag
$h2flag = 0; # h2 tag
$preflag = 0; # pre tag
$nsaflag = 0; # not-shown anchor tag flag -- local copy, no-href tags
$ullevel = 0; # ul level
$year = 0;
$month = 0;
$day = 0;
$num = 1;
$name = "";
$title = "";
$body = "";
$daybody = "";
while (<HTML>) {
# convert input-line into utf8
Jcode::convert ( \$_, "utf8");
$p->parse($_);
}
close (HTML);
foreach $file (@list0) {
print "removed: $file\n";
unlink ($file);
}
exit;
sub start
{
my ($tagname, $attr) = @_;
my $i;
my $a, $c, $tmptag;
# h1 tag
if ($tagname eq "h1") {
$h1flag = 1;
}
# h2 tag
elsif ($tagname eq "h2") {
$h2flag = 1;
}
# a tag
elsif ($tagname eq "a") {
if ($ullevel == 0 && $h2flag == 1) {
$_ = $attr->{id};
if (/d(\d+)/) {
$day = $1;
$num = 1; # reset counter
}
}
if ($ullevel == 1 && $attr->{id} ne "") {
$_ = $attr->{id};
if (/d(\d+)-(.+)/) {
my $subday = $1;
if ($subday != $day) {
print "WRONG!! id= ",$attr->{id},"\n";
}
$name = $2;
}
}
if ($ullevel >= 1) {
if ($attr->{href} eq "") {
# do nothing
$nsaflag = 1;
}
else {
$_ = $attr->{href};
$_ =~ s/^images\/AMAZON/http:\/\/images.amazon.com\/images\/P/;
$_ =~ s/^theorems.html/http:\/\/kichiki.web.fc2.com\/diary\/theorems.html/;
$_ =~ s/^summaries.html/http:\/\/kichiki.web.fc2.com\/diary\/summaries.html/;
$_ =~ s/^images\//http:\/\/kichiki.web.fc2.com\/diary\/images\//;
if (/^\#d(\d+)-(.+)$/) {
if ($year >= $year_start) {
$_ = sprintf ("$topurl/%4d%02d%02d/%s",
$year, $month, $1, $2);
} else {
$_ = sprintf ("$oldurl/diary%4d_%02d.shtml\#d%d-%s",
$year, $month, $1, $2);
}
} elsif (/^diary(\d{4})\_(\d{2}).shtml$/) {
if ($1 >= $year_start) {
$_ = sprintf ("$topurl/archive/%4d%02d", $1, $2);
} else {
$_ = sprintf ("$oldurl/diary%4d_%02d.shtml", $1, $2);
}
} elsif (/^diary(\d{4})\_(\d{2}).shtml\#d(\d+)$/) {
if ($1 >= $year_start) {
$_ = sprintf ("$topurl/%4d%02d%02d", $1, $2, $3);
} else {
$_ = sprintf ("$oldurl/diary%4d_%02d.shtml\#d%d",
$1, $2, $3);
}
} elsif (/^diary(\d{4})\_(\d{2}).shtml\#d(\d+)-(.+)$/) {
if ($1 >= $year_start) {
$_ = sprintf ("$topurl/%4d%02d%02d/%s",
$1, $2, $3, $4);
} else {
$_ = sprintf ("$oldurl/diary%4d_%02d.shtml\#d%d-%s",
$1, $2, $3, $4);
}
}
if ($_ eq "") {
# do nothing
$nsaflag = 1;
}
elsif (/^LOCAL\//) {
# do nothing
$nsaflag = 1;
}
else {
$nsaflag = 0; # to show (for sure)
if ($ullevel == 1) {
$title .= "<a href=\"$_\">";
}
else {
$body .= "<a href=\"$_\">";
}
}
}
}
}
# ul tag
elsif ($tagname eq "ul") {
if ($ullevel >= 1) {
#$body .= "\n";
for ($i = 1; $i < $ullevel; $i ++) {
$body .= " ";
}
$body .= "<ul>\n";
}
$ullevel ++;
}
# li tag
elsif ($tagname eq "li") {
if ($ullevel == 1) {
# start of the entry
$name = "";
$title = "";
$body = "";
}
elsif ($ullevel > 1) {
for ($i = 2; $i < $ullevel; $i ++) {
$body .= " ";
}
$body .= "<li>\n";
}
}
# pre tag
elsif ($tagname eq "pre") {
$preflag = 1;
if ($ullevel == 1) {
$title .= "<pre>";
}
elsif ($ullevel > 1) {
$body .= "<pre>";
}
}
# other tags
else {
if ($ullevel >= 1) {
$tmptag = "<$tagname";
while (($a, $c) = each (%{$attr})) {
# for amazon images
if (($tagname eq "img")
and ($a eq "src")) {
if ($c =~ /^images\/AMAZON/) {
$c =~ s/^images\/AMAZON/http:\/\/images.amazon.com\/images\/P/;
}
elsif ($c =~ /^images/) {
$c =~ s/^images\//http:\/\/kichiki.web.fc2.com\/diary\/images\//;
}
}
if ($a ne "/") {
$tmptag .= " $a=\"$c\"";
}
}
if (($tagname eq "img")
or ($tagname eq "hr")
or ($tagname eq "br")) {
$tmptag .= " />";
}
else {
$tmptag .= ">";
}
if ($ullevel == 1) {
$title .= $tmptag;
}
else {
$body .= $tmptag;
}
}
}
}
sub text
{
my ($dtext) = shift;
$dtext =~ s/&/&/g;
if ($preflag != 1) {
$dtext =~ s/ +/ /g;
$dtext =~ s/^\n//g;
}
# else {
# $dtext =~ s/</</g;
# $dtext =~ s/>/>/g;
# $dtext =~ s/"/"/g;
# }
if ($h1flag == 1) {
#print "dtext = $dtext\n";
$_ = $dtext;
if (/(\d+)年(\d+)月/) {
$year = $1;
$month = $2;
#print "year = $year\nmonth = $month\n";
@list0 = getentries (".", $year, $month);
}
}
if ($preflag == 1) {
$dtext =~ s/\*\*\*/***/g;
$dtext =~ s/\-\-\-/ーーー/g;
$dtext =~ s/\+/+/g;
}
if ($nsaflag == 0) {
if ($ullevel == 1) {
$title .= $dtext;
}
elsif ($ullevel > 1) {
$body .= $dtext;
}
}
}
sub end
{
my ($tagname) = shift;
my $i;
my $blosxomfile;
my $localtitle;
# h1 tag
if ($tagname eq "h1") {
$h1flag = 0;
}
# h2 tag
elsif ($tagname eq "h2") {
$h2flag = 0;
}
# ul tag
elsif ($tagname eq "ul") {
$ullevel --;
if ($ullevel >= 1) {
#$body .= "\n";
for ($i = 1; $i < $ullevel; $i ++) {
$body .= " ";
}
$body .= "</ul>\n";
} else {
# end of the day
my $dayfile = sprintf ("%4d-%02d-%02d",
$year, $month, $day);
print_day_entry ($dayfile,
$daybody, $year, $month, $day);
# for sure
$daybody = "";
}
}
# a tag
elsif ($tagname eq "a") {
if ($ullevel >= 1 && $nsaflag == 0) {
if ($ullevel == 1) {
$title .= "</a>";
}
elsif ($ullevel > 1) {
$body .= "</a>";
}
}
else {
$nsaflag = 0;
}
}
# li tag
elsif ($tagname eq "li") {
if ($ullevel == 1) {
$blosxomfile = sprintf ("%4d-%02d-%02d-%02d",
$year, $month, $day, $num);
# end of the entry
#print_entry ($blosxomfile,
# $name, $title, $body,
# $year, $month, $day, $num);
if ($name ne "") {
$daybody .= "*$name*$title\n";
} else {
$daybody .= "*$title\n";
}
# cut extra space in front of [] if exist
$daybody =~ s/\*( +)\[/\*\[/;
$daybody .= "$body\n";
# for sure
$num ++;
$name = "";
$title = "";
$body = "";
}
elsif ($ullevel > 1) {
#$body .= "\n";
for ($i = 1; $i < $ullevel; $i ++) {
$body .= " ";
}
$body .= "</li>\n";
}
}
# pre tag
elsif ($tagname eq "pre") {
$preflag = 0;
if ($ullevel == 1) {
$title .= "</pre>";
}
elsif ($ullevel > 1) {
$body .= "</pre>";
}
}
# other tags
else {
if ($ullevel >= 1) {
if ($ullevel == 1) {
$title .= "</$tagname>";
}
else {
$body .= "</$tagname>";
}
}
}
}
sub print_day_entry {
my ($f, # file
$b, # body
$y, # year (4 digits)
$m, # month (1 - 12)
$d, # day (1 - 31)
) = @_;
my @txtmon = ("Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec", "13");
my $filename = '';
$t =~ s/\n//g;
$t =~ s/^ +//g;
$t =~ s/ +$//g;
$t =~ s/ / /g;
$t =~ s/\Q()\E//g;
if ($t eq "") {
$t = "-";
}
$b =~ s/\Q()\E//g;
$filename = "./$f.txt";
open (FH, "> tmp.txt");
print (FH "\n");
print (FH "$b\n");
close (FH);
my $flag_match = 0;
my $count = 0;
foreach my $file (@list0) {
if ($file eq $filename) {
$flag_match = 1;
if (!compare_files ("tmp.txt", $file)) {
# modified
print ("updated: $filename\n");
rename ("tmp.txt", $filename);
}
else {
# same
unlink ("tmp.txt");
}
# remove the file from @list0
splice (@list0, $count, 1);
return;
}
$count ++;
}
if ($flag_match == 0) {
# new entry
print ("new: $filename\n");
rename ("tmp.txt", $filename);
}
}
# "getfiles" is borrowed from
# Blosxom Plugin: recentwritebacks_tree
# Author(s): typester <typester@unknownplace.org>
# Version: 1.0
# Blosxom Home/Docs/Licensing: http://www.blosxom.com/
sub getfiles {
my $dir = $_[0];
my @files = ();
my @ret = ();
$dir .= "/" if ($dir =~ /[^\/]$/);
if (opendir (DIR, $dir)) {
@files = readdir (DIR);
closedir (DIR);
}
foreach my $file (@files) {
next if ($file eq '.' or $file eq '..');
if (-d "$dir$file" and $file ne '') {
my @subdir = &getfiles("$dir$file");
@ret = (@ret, @subdir);
}
else {
push (@ret, "$dir$file");
}
}
return @ret;
}
sub getentries {
my ($dir, $year, $month) = @_;
my @ret = ();
my @files = getfiles ($dir);
my $str_ym = sprintf ("%4d-%02d", $year, $month);
foreach my $file (@files) {
if ($file =~ /$str_ym.+\.txt/) {
push (@ret, "$file");
}
}
return @ret;
}
# return true when two files are same
sub compare_files {
my ($f1, $f2) = @_;
my $l1 = '';
my $l2 = '';
my $ret = 1; # true
my $s1 = -s $f1;
my $s2 = -s $f2;
if ($s1 != $s2) {
$ret = 0; # false
}
else {
open (F1, "< $f1");
open (F2, "< $f2");
COMPARE: while ($l1 = <F1> and
$l2 = <F2>) {
if ($l1 ne $l2) {
$ret = 0; # false
last COMPARE;
}
}
close (F1);
close (F2);
}
return $ret;
}
# check link
sub check_link_start
{
my ($tagname, $attr) = @_;
# $check_link_li and $check_link_tag are global.
# a tag
if ($tagname eq "a") {
if ($attr->{id} eq $check_link_tag) {
$check_link_li = 1;
}
}
}
sub check_link_text
{
my ($dtext) = shift;
# $check_link_li and $check_link_text are global.
$dtext =~ s/&/&/g;
$dtext =~ s/ +/ /g;
$dtext =~ s/^\n//g;
if ($check_link_li == 1) {
$check_link_text .= $dtext;
}
}
sub check_link_end
{
my ($tagname) = shift;
# $check_link_li is global.
# li tag
if ($tagname eq "li" and $check_link_li == 1) {
$check_link_li = 0;
}
}
||<
cf. 7/12/2008:
はてなの名前付きタグの件。