付記(7/13/2008):
script は一応ここに置いてあるけれど、
はてなのスーパーpre記法(しつこいか)でコードを載っけるのもいいかな、と思って、
ここに張り付けておく(最新版):
>|perl|
#!/usr/local/bin/perl
# converter from my diary.html to blosxom
Copyright (C) 1997-2018, Kengo Ichiki
# $Id: diary2blosxom.pl,v 1.21 2008/07/13 15:41:53 ichiki Exp $
use HTML::Parser;
use Jcode;
$topurl = "http://kichiki.web.fc2.com/cgi-bin/blosxom.cgi/diary";
$flavour = "html";
@list0 = (); # old blosxom entry files
# globals for check_category_in_link
$check_link_tag = '';
$check_link_li = 0;
$check_link_text = '';
# check for command-line argument
die "Usage: diary2blosxom.pl (HTML file)\n" unless @ARGV == 1;
# get the command-line argument
my $file = shift;
my $path, $file0;
if ($file =~ /^diary\d{4}\_\d{2}.shtml$/) {
$path = '';
}
elsif ($file =~ /(.*\/)(diary\d{4}\_\d{2}.shtml)$/) {
$path = $1;
$file0 = $2;
}
else {
print "Illeagal diary file!\n";
exit;
}
#print "PATH = $path\n";
# Create HTML Parser object
my $p = HTML::Parser->new
(api_version => 3,
start_h => [\&start, "tagname,attr"],
text_h => [\&text, "dtext"],
end_h => [\&end, "tagname"],
);
die "File $file does't exist.\n" unless -e $file;
open (HTML, "< $file") or die "Cannot open $file";
$h1flag = 0; # h1 tag
$h2flag = 0; # h2 tag
$preflag = 0; # pre tag
$nsaflag = 0; # not-shown anchor tag flag -- local copy, no-href tags
$ullevel = 0; # ul level
$year = 0;
$month = 0;
$day = 0;
$num = 1;
$title = "";
$body = "";
$subn = 0;
$subday = 0;
$subname = "";
@subfile;
@subtitle;
@subbody;
@subullevel;
while (<HTML>) {
# convert input-line into utf8
Jcode::convert ( \$_, "utf8");
$p->parse($_);
}
close (HTML);
foreach $file (@list0) {
print "removed: $file\n";
unlink ($file);
}
exit;
sub start
{
my ($tagname, $attr) = @_;
my $i;
my $a, $c, $tmptag;
my $link_category;
# h1 tag
if ($tagname eq "h1") {
$h1flag = 1;
}
# h2 tag
elsif ($tagname eq "h2") {
$h2flag = 1;
}
# a tag
elsif ($tagname eq "a") {
if ($ullevel == 0 && $h2flag == 1) {
$_ = $attr->{id};
if (/d(\d+)/) {
$day = $1;
$num = 1; # reset counter
}
}
if ($ullevel >= 1) {
if ($attr->{href} eq "") {
# do nothing
$nsaflag = 1;
}
else {
$_ = $attr->{href};
$_ =~ s/^images\/AMAZON/http:\/\/images.amazon.com\/images\/P/;
$_ =~ s/^theorems.html/http:\/\/kichiki.web.fc2.com\/diary\/theorems.html/;
$_ =~ s/^summaries.html/http:\/\/kichiki.web.fc2.com\/diary\/summaries.html/;
$_ =~ s/^images\//http:\/\/kichiki.web.fc2.com\/diary\/images\//;
if (/^\#(.+)/) {
$_ = sprintf ("diary%4d_%02d.shtml\#%s",
$year, $month, $1);
}
if (/^diary(\d{4})\_(\d{2}).shtml\#d(\d+)-(.+)/) {
$link_category = check_category_in_link ($_);
if ($link_category eq "") {
$_ = sprintf ("$topurl/%4d%02d%02d-%s.$flavour",
$1, $2, $3, $4);
}
else {
$_ = sprintf
("$topurl/$link_category/%4d%02d%02d-%s.$flavour",
$1, $2, $3, $4);
}
}
elsif (/^diary(\d{4})\_(\d{2}).shtml\#d(\d+)/) {
$_ = "$topurl/$1/$2/$3";
}
elsif (/^diary(\d{4})\_(\d{2}).shtml/) {
$_ = "$topurl/$1/$2";
}
if ($_ eq "") {
# do nothing
$nsaflag = 1;
}
elsif (/^LOCAL\//) {
# do nothing
$nsaflag = 1;
}
else {
$nsaflag = 0; # to show (for sure)
if ($subn == 0) {
if ($ullevel == 1) {
$title .= "<a href=\"$_\">";
}
else {
$body .= "<a href=\"$_\">";
}
}
else {
if ($subullevel[$subn-1] == $ullevel) {
$subtitle[$subn-1] .= "<a href=\"$_\">";
}
else {
$subbody[$subn-1] .= "<a href=\"$_\">";
}
}
}
}
if ($attr->{id} ne "") {
$_ = $attr->{id};
if (/d(\d+)-(.+)/) {
$subn ++;
$subday = $1;
$subname = $2;
if ($subday != $day) {
print "WRONG!! id= ",$attr->{id},"\n";
}
$subfile [$subn-1]
= sprintf ("%4d%02d%02d-%s",
$year, $month, $day, $subname);
$subullevel [$subn-1] = $ullevel;
$subtitle [$subn-1] = "";
$subbody [$subn-1] = "";
}
}
}
}
# ul tag
elsif ($tagname eq "ul") {
if ($ullevel >= 1) {
if ($subn == 0) {
$body .= "\n";
for ($i = 1; $i < $ullevel; $i ++) {
$body .= " ";
}
$body .= "<ul>\n";
}
else {
$subbody [$subn-1] .= "\n";
for ($i = 1; $i < ($ullevel - $subullevel[$subn-1]); $i ++) {
$subbody [$subn-1] .= " ";
}
$subbody [$subn-1] .= "<ul>\n";
}
}
$ullevel ++;
}
# li tag
elsif ($tagname eq "li") {
if ($subn == 0) {
if ($ullevel == 1) {
# start of the entry
$title = "";
$body = "";
}
elsif ($ullevel > 1) {
for ($i = 2; $i < $ullevel; $i ++) {
$body .= " ";
}
$body .= "<li>\n";
}
}
else {
for ($i = 2; $i < ($ullevel - $subullevel[$subn-1]); $i ++) {
$subbody [$subn-1] .= " ";
}
$subbody [$subn-1] .= "<li>\n";
}
}
# pre tag
elsif ($tagname eq "pre") {
$preflag = 1;
if ($subn == 0) {
if ($ullevel == 1) {
$title .= "<pre>";
}
elsif ($ullevel > 1) {
$body .= "<pre>";
}
}
else {
if ($ullevel == $subullevel[$subn-1]) {
$subtitle[$subn-1] .= "<pre>";
}
elsif ($ullevel > $subullevel[$subn-1]) {
$subbody[$subn-1] .= "<pre>";
}
}
}
# other tags
else {
if ($ullevel >= 1) {
$tmptag = "<$tagname";
while (($a, $c) = each (%{$attr})) {
# for amazon images
if (($tagname eq "img")
and ($a eq "src")) {
if ($c =~ /^images\/AMAZON/) {
$c =~ s/^images\/AMAZON/http:\/\/images.amazon.com\/images\/P/;
}
elsif ($c =~ /^images/) {
$c =~ s/^images\//http:\/\/kichiki.web.fc2.com\/diary\/images\//;
}
}
if ($a ne "/") {
$tmptag .= " $a=\"$c\"";
}
}
if (($tagname eq "img")
or ($tagname eq "hr")
or ($tagname eq "br")) {
$tmptag .= " />";
}
else {
$tmptag .= ">";
}
if ($subn == 0) {
if ($ullevel == 1) {
$title .= $tmptag;
}
else {
$body .= $tmptag;
}
}
else {
if ($subullevel[$subn-1] == $ullevel) {
$subtitle[$subn-1] .= $tmptag;
}
else {
$subbody[$subn-1] .= $tmptag;
}
}
}
}
}
sub text
{
my ($dtext) = shift;
$dtext =~ s/&/&/g;
if ($preflag != 1) {
$dtext =~ s/ +/ /g;
$dtext =~ s/^\n//g;
}
else {
$dtext =~ s/</</g;
$dtext =~ s/>/>/g;
$dtext =~ s/"/"/g;
}
if ($h1flag == 1) {
#print "dtext = $dtext\n";
$_ = $dtext;
if (/(\d+)年(\d+)月/) {
$year = $1;
$month = $2;
#print "year = $year\nmonth = $month\n";
@list0 = getentries (".", $year, $month);
}
}
if ($nsaflag == 0) {
if ($subn == 0) {
if ($ullevel == 1) {
$title .= $dtext;
}
elsif ($ullevel > 1) {
$body .= $dtext;
}
}
elsif ($subullevel[$subn-1] == $ullevel) {
$subtitle[$subn-1] .= $dtext;
}
else {
$subbody[$subn-1] .= $dtext;
}
}
}
sub end
{
my ($tagname) = shift;
my $i;
my $blosxomfile;
my $category;
my $localtitle;
# h1 tag
if ($tagname eq "h1") {
$h1flag = 0;
}
# h2 tag
elsif ($tagname eq "h2") {
$h2flag = 0;
}
# ul tag
elsif ($tagname eq "ul") {
$ullevel --;
if ($subn == 0) {
if ($ullevel >= 1) {
$body .= "\n";
for ($i = 1; $i < $ullevel; $i ++) {
$body .= " ";
}
$body .= "</ul>\n";
}
}
else {
$subbody[$subn-1] .= "\n";
for ($i = 1; $i < ($ullevel-$subullevel[$subn-1]); $i ++) {
$subbody[$subn-1] .= " ";
}
$subbody[$subn-1] .= "</ul>\n";
}
}
# a tag
elsif ($tagname eq "a") {
if ($ullevel >= 1 && $nsaflag == 0) {
if ($subn == 0) {
if ($ullevel == 1) {
$title .= "</a>";
}
elsif ($ullevel > 1) {
$body .= "</a>";
}
}
else {
if ($ullevel == $subullevel[$subn-1]) {
$subtitle[$subn-1] .= "</a>";
}
elsif ($ullevel > $subullevel[$subn-1]) {
$subbody[$subn-1] .= "</a>";
}
}
}
else {
$nsaflag = 0;
}
}
# li tag
elsif ($tagname eq "li") {
if ($subn == 0) {
if ($ullevel == 1) {
$blosxomfile = sprintf ("%4d%02d%02d%02d",
$year, $month, $day, $num);
# end of the entry
print_entry ($blosxomfile,
$title,
$body,
$year, $month, $day, $num);
# for sure
$num ++;
$title = "";
$body = "";
}
elsif ($ullevel > 1) {
$body .= "\n";
for ($i = 1; $i < $ullevel; $i ++) {
$body .= " ";
}
$body .= "</li>\n";
}
}
else {
if ($ullevel == $subullevel[$subn-1]) {
# end of the entry
print_entry ($subfile[$subn-1],
$subtitle[$subn-1],
$subbody[$subn-1],
$year, $month, $day, $num);
# make a link
# categorize
#$category = categorize ($subtitle[$subn-1]);
($category, $subtitle[$subn-1])
= categorize ($subtitle[$subn-1]);
# remove anchor tag in the tiltke
$localtitle = $subtitle[$subn-1];
$localtitle =~ s/<a .+?>//g;
$localtitle =~ s/<\/a>//g;
if ($subn == 1) {
if ($category eq "") {
$body .=
"<a href=\"$topurl/$subfile[$subn-1].$flavour\">$localtitle</a>";
}
else {
$body .=
"<a href=\"$topurl/$category/$subfile[$subn-1].$flavour\">$localtitle</a>";
}
}
else {
if ($category eq "") {
$subbody [$subn - 2] .=
"<a href=\"$topurl/$subfile[$subn-1].$flavour\">$localtitle</a>";
}
else {
$subbody [$subn - 2] .=
"<a href=\"$topurl/$category/$subfile[$subn-1].$flavour\">$localtitle</a>";
}
}
# for sure
$num ++;
$subfile [$subn-1] = "";
$subtitle [$subn-1] = "";
$subbody [$subn-1] = "";
$subullevel[$subn-1] = 0;
$subn --;
}
else {
$subbody[$subn-1] .= "\n";
for ($i = 1; $i < ($ullevel-$subullevel[$sun-1]); $i ++) {
$subbody[$subn-1] .= " ";
}
$subbody[$subn-1] .= "</li>\n";
}
}
}
# pre tag
elsif ($tagname eq "pre") {
$preflag = 0;
if ($subn == 0) {
if ($ullevel == 1) {
$title .= "</pre>";
}
elsif ($ullevel > 1) {
$body .= "</pre>";
}
}
else {
if ($ullevel == $subullevel[$subn-1]) {
$subtitle[$subn-1] .= "</pre>";
}
elsif ($ullevel > $subullevel[$subn-1]) {
$subbody[$subn-1] .= "</pre>";
}
}
}
# other tags
else {
if ($ullevel >= 1) {
if ($subn == 0) {
if ($ullevel == 1) {
$title .= "</$tagname>";
}
else {
$body .= "</$tagname>";
}
}
else {
if ($ullevel == $subullevel[$subn-1]) {
$subtitle[$subn-1] .= "</$tagname>";
}
else {
$subbody[$subn-1] .= "</$tagname>";
}
}
}
}
}
sub print_entry {
my ($f, # file
$t, # title
$b, # body
$y, # year (4 digits)
$m, # month (1 - 12)
$d, # day (1 - 31)
$n # number (1 - )
) = @_;
my $category;
my @txtmon = ("Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec", "13");
my $filename = '';
$t =~ s/\n//g;
$t =~ s/^ +//g;
$t =~ s/ +$//g;
$t =~ s/ / /g;
$t =~ s/\Q()\E//g;
if ($t eq "") {
$t = "-";
}
$b =~ s/\Q()\E//g;
# categorize
#$category = categorize ($t);
($category, $t) = categorize ($t);
if ($category ne "") {
unless (-e $category) {
mkdir "$category", 0755
or die "Cannot make dir $category\n";
}
$filename = "./$category/$f.txt";
}
else {
$filename = "./$f.txt";
}
open (FH, "> tmp.txt");
print (FH "$t\n");
printf (FH "meta-creation_date: %s %d, %d 00:%02d\n",
$txtmon[int($m-1)], $d, $y, $n);
print (FH "$b\n");
close (FH);
my $flag_match = 0;
my $count = 0;
foreach my $file (@list0) {
if ($file eq $filename) {
$flag_match = 1;
if (!compare_files ("tmp.txt", $file)) {
# modified
print ("updated: $filename\n");
rename ("tmp.txt", $filename);
}
else {
# same
unlink ("tmp.txt");
}
# remove the file from @list0
splice (@list0, $count, 1);
return;
}
$count ++;
}
if ($flag_match == 0) {
# new entry
print ("new: $filename\n");
rename ("tmp.txt", $filename);
}
}
# "getfiles" is borrowed from
# Blosxom Plugin: recentwritebacks_tree
# Author(s): typester <typester@unknownplace.org>
# Version: 1.0
# Blosxom Home/Docs/Licensing: http://www.blosxom.com/
sub getfiles {
my $dir = $_[0];
my @files = ();
my @ret = ();
$dir .= "/" if ($dir =~ /[^\/]$/);
if (opendir (DIR, $dir)) {
@files = readdir (DIR);
closedir (DIR);
}
foreach my $file (@files) {
next if ($file eq '.' or $file eq '..');
if (-d "$dir$file" and $file ne '') {
my @subdir = &getfiles("$dir$file");
@ret = (@ret, @subdir);
}
else {
push (@ret, "$dir$file");
}
}
return @ret;
}
sub getentries {
my ($dir, $year, $month) = @_;
my @ret = ();
my @files = getfiles ($dir);
my $str_ym = sprintf ("%4d%02d", $year, $month);
foreach my $file (@files) {
if ($file =~ /$str_ym.+\.txt/) {
push (@ret, "$file");
}
}
return @ret;
}
# return true when two files are same
sub compare_files {
my ($f1, $f2) = @_;
my $l1 = '';
my $l2 = '';
my $ret = 1; # true
my $s1 = -s $f1;
my $s2 = -s $f2;
if ($s1 != $s2) {
$ret = 0; # false
}
else {
open (F1, "< $f1");
open (F2, "< $f2");
COMPARE: while ($l1 = <F1> and
$l2 = <F2>) {
if ($l1 ne $l2) {
$ret = 0; # false
last COMPARE;
}
}
close (F1);
close (F2);
}
return $ret;
}
# check link
sub check_category_in_link {
my $link = shift;
my ($year, $month, $day);
my $link_parser;
my $category;
# globals: $path, $check_link_tag, $check_link_li, $check_link_text.
#print "CHECK : ".$link." => ";
$category = '';
if ($link =~ /^diary(\d{4})\_(\d{2}).shtml\#d(\d+)-(.+)/) {
$year = $1;
$month = $2;
$day = $3;
$check_link_tag = "d$3-$4";
#print $check_link_tag." => ";
$file = sprintf ("diary%4d_%02d.shtml", $1, $2);
#print $file." => ";
open (LINK, "< $path$file") or return $category;
$link_parser = HTML::Parser->new
(api_version => 3,
start_h => [\&check_link_start, "tagname,attr"],
text_h => [\&check_link_text, "dtext"],
end_h => [\&check_link_end, "tagname"],
);
$check_link_li = 0;
$check_link_text = '';
while (<LINK>) {
# convert input-line into utf8
Jcode::convert ( \$_, "utf8");
$link_parser->parse($_);
}
close (LINK);
#$category = categorize ($check_link_text);
($category, $check_link_text) = categorize ($check_link_text);
}
#print $category."\n";
return $category;
}
sub check_link_start
{
my ($tagname, $attr) = @_;
# $check_link_li and $check_link_tag are global.
# a tag
if ($tagname eq "a") {
if ($attr->{id} eq $check_link_tag) {
$check_link_li = 1;
}
}
}
sub check_link_text
{
my ($dtext) = shift;
# $check_link_li and $check_link_text are global.
$dtext =~ s/&/&/g;
$dtext =~ s/ +/ /g;
$dtext =~ s/^\n//g;
if ($check_link_li == 1) {
$check_link_text .= $dtext;
}
}
sub check_link_end
{
my ($tagname) = shift;
# $check_link_li is global.
# li tag
if ($tagname eq "li" and $check_link_li == 1) {
$check_link_li = 0;
}
}
# check category : news, phys, job, blog, FreeBSD
sub categorize {
my $t = shift;
my $category = '';
#print "CATEGORY: ".$t." => ";
$t =~ s/^ +//g;
$t =~ s/^\n//g;
if ($t =~ /^\Q[\Enews\Q]\E/) {
$category = "news";
$t =~ s/\Q[\Enews\Q]\E//;
}
elsif ($t =~ /^\Q[\Ejob\Q]\E/) {
$category = "job";
$t =~ s/\Q[\Ejob\Q]\E//;
}
elsif ($t =~ /^\Q[\Ephys\Q]\E/) {
$category = "phys";
$t =~ s/\Q[\Ephys\Q]\E//;
}
elsif ($t =~ /^\Q[\Eblog\Q]\E/) {
$category = "blog";
$t =~ s/\Q[\Eblog\Q]\E//;
}
elsif ($t =~ /^\Q[\EFreeBSD\Q]\E/) {
$category = "FreeBSD";
$t =~ s/\Q[\EFreeBSD\Q]\E//;
}
elsif ($t =~ /^\Q[\Een\Q]\E/) {
$category = "en";
$t =~ s/\Q[\Een\Q]\E//;
}
elsif ($t =~ /^\Q[\ESF\Q]\E/) {
$category = "SF";
$t =~ s/\Q[\ESF\Q]\E//;
}
elsif ($t =~ /^\Q[\Emonton\Q]\E/) {
$category = "monton";
$t =~ s/\Q[\Emonton\Q]\E//;
}
#elsif ($t =~ /^\Q[\E.+\Q]\E/) {
# $category = "phys";
#}
#print $category."\n";
#return $category;
return ($category, $t);
}
||<
cf. 7/12/2008:
はてなの名前付きタグの件。