#!/usr/bin/env perl

# SPDX-FileCopyrightText: © 2016 Stefano Zacchiroli <zack@upsilon.cc>
# SPDX-FileCopyrightText: © 2018 Martin Michlmayr <tbm@cyrius.com>
# SPDX-FileCopyrightText: © 2020 Software in the Public Interest, Inc.

# SPDX-License-Identifier: GPL-3.0-or-later

# convert a ledger-cli file to beancount format

use warnings;
use strict;

use experimental 'smartmatch';
use utf8;
use feature 'unicode_strings';
use open qw/:std :locale/;

use Config::Onion;
use Date::Calc qw/Add_Delta_Days/;
use DateTime::Format::Strptime qw/strftime/;
use File::BaseDir qw/config_home/;
use Getopt::Long::Descriptive;
use List::MoreUtils qw(uniq);
use Memoize;
use POSIX qw/ceil/;
use Regexp::Common;
use String::Interpolate qw(safe_interpolate);
use Unicode::Normalize;

memoize('map_account');
memoize('map_commodity');
memoize('map_metadata');
memoize('pp_date');

use enum qw(DEPTH TYPE BLANK COMMENT POSTING FLAG ACCOUNT WHITESPACE AMOUNT
            COST PRICE ASSERTION NUMBER CURRENCY DATE NOTE FIXATED);

my ($opt, $usage) = describe_options(
    "ledger2beancount %o <ledger-file>",
    [ "config|c=s", "configuration file", ],
    [ "help|h",     "print usage message and exit", { shortcircuit => 1 } ],
);

print($usage->text), exit if $opt->help;

my @config_files = ('ledger2beancount.yml', config_home('ledger2beancount', 'config.yml'));
if (defined $opt->config) {
    if (! -e $opt->config) {
	print "Config file ", $opt->config, " doesn't exist\n";
	exit 1;
    }
    if ($opt->config !~ /\.(yml|yaml)$/) {
	print "Config file must end in .yml or .yaml\n";
	exit 1;
    }
    unshift @config_files, $opt->config;
}
my $config = Config::Onion->new;
foreach my $config_file (@config_files) {
    next if ! -e $config_file;
    # Config::Onion expects filename without extension
    $config_file =~ s/.(yml|yaml)$//;
    $config = Config::Onion->load($config_file);
    # We don't actually use the Config::Onion feature to load several
    # config files.  We merely use it to set some defaults.
    last;
}

$config->set_default(date_format => "%Y-%m-%d");
$config->set_default(date_format_no_year => "%m-%d");
$config->set_default(account_open_date => "1970-01-01");
$config->set_default(commodities_date => "1970-01-01");
$config->set_default(payee_tag => "");
$config->set_default(payer_tag => "");
$config->set_default(payee_match => []);
$config->set_default(ledger_indent => 4);
$config->set_default(beancount_indent => 2);
$config->set_default(automatic_declarations => 1);
$config->set_default(decimal_comma => 0);
$config->set_default(convert_virtual => 0);
$config->set_default(commodity_map => {"\$" => "USD", "£" => "GBP", "€" => "EUR", "¥" => "JPY"});

# Make config variables easier to access
$config = $config->get;

if (ref $config->{payee_match} ne ref []) {
    die "Config variable payee_match has to be a Yaml list";
}

# regular expression snippets used for ledger parsing
my $date_RE = qr/\d+[^ =]+/;
my $hledger_date_RE = qr#(\d{4}[./-])?\d{1,2}[./-]\d{1,2}#;
my $flags_RE = qr/[*!]/;
my $txn_header_RE = qr/^(?<date>$date_RE)(=(?<auxdate>$date_RE))?(\s+|$)(?<flag>$flags_RE)?(\s*\((?<code>[^)]*)\))?\s*(?<narration>.*)/;
my $hledger_payee_narration_RE = qr/(?<payee>[^|]+?)\s*\|\s*(?<narration>.*)/;
my $tags_RE = qr/(?<tags>[\w:-]+)/;
my $hledger_tags_RE = qr/(?<tags>[^ ]+:.*)/;
# An account can be pretty much anything but in a posting it has to be
# followed by two spaces, a tab or new line.
# To keep the regex simpler, we parse the () and [] for virtual accounts
# as part of the account name and strip these brackets later.
my $account_RE = qr/[^\s;][^\t]*?/;
my $number_RE = qr/([\d,]*\d(\.\d+)?|\.\d+)/;
my $number_decimal_comma_RE = qr/([\d.]*\d(,\d+)?|,\d+)/;
if ($config->{decimal_comma}) {
    $number_RE = $number_decimal_comma_RE;
}
# A quoted commodity ("LU0274208692") can contain anything
# between quotes.
my $commodity_quoted_RE = qr/(["'])(?:(?=(\\?))\g{-1}.)*?\g{-2}/;
# An unquoted commodity may not contain certain characters
my $commodity_unquoted_RE = qr/(?!-)[^;\s0-9=)("'{}@*\/+,.-]+/;
my $commodity_RE = qr/$commodity_quoted_RE|$commodity_unquoted_RE/;
my $comment_top_level_RE = qr/[;#%|*]\s?(?<comment>.*?)/;
my $comment_RE = qr/;\s*(?<comment>.*)/;
my $metadata_RE = qr/;\s*(?<key>[^\h:][^\h]*?):(?<typed>:)?(\s*$|\s+(?<value>.*))/;
my $hledger_metadata_RE = qr/(?<key>[^:]+):\s*(?<value>.+)/;
# Postings: this does not match everything, just ensures a line consists of
# a posting
my $posting_RE = qr/((?<flag>$flags_RE)\s+)?(?<account>$account_RE)/;
my $price_RE = qr/^P\s+(?<date>$date_RE)\s+(\d\d:\d\d(:\d\d)?\s+)?(?<commodity1>$commodity_RE)\s+(?<commodity2>.*)/;

# Maximum limit for beancount commodities
my $BEANCOUNT_COMMODITY_MAX_LEN = 24;

my @beancount_root_names = qw/Assets Liabilities Equity Income Expenses/;

my @pre_output; # Used to store output of the script before automatic declarations
my @output; # Used to store the output of the script after automatic declarations
my $after_auto = 0;
# Store accounts and commodities encountered and declared
# value == undef: seen
# value == 1: declared
my %account_declared;
my %commodity_declared;
# Store some ledger directives relevant for processing
my @ledger_apply; # Capture open "apply" statements
my %ledger_alias; # Capture "alias" statements
my $ledger_bucket; # Use bucket if there's only one posting
# Conversion notes for users from ledger2beancount
my @conversion_notes;
# Keep track of all ledger accounts and commodities to check for
# collisions after remapping is done.
my %ledger_accounts;
my %ledger_commodities;
my %ledger_metadata;

# Date parsing functions
my $date_complete = DateTime::Format::Strptime->new(
    pattern  => $config->{date_format},
    on_error => "undef",
);

my $date_no_year = DateTime::Format::Strptime->new(
    pattern  => $config->{date_format_no_year},
    on_error => "undef",
);

my $date_iso = DateTime::Format::Strptime->new(
    pattern  => "%Y-%m-%d",
    on_error => "undef",
);

# Declarations
sub map_commodity($);
sub print_tags($@);


# indent some content at a given depth in beancount style
sub indent($$) {
    my ($depth, $content) = @_;

    return ' ' x ($depth * $config->{beancount_indent}) . $content;
}


sub escape_beancount_string($) {
    my ($s) = @_;
    $s =~ s/\\/\\\\/g;
    $s =~ s/"/\\"/g;
    return $s;
}

# return a beancount string literal, with a given content
sub mk_beancount_string($) {
    my ($s) = @_;
    if ($s !~ /\\|"/) {
	return '"' . $s . '"';
    } else {
	return '"' . escape_beancount_string($s) . '"';
    }
}


# Print a date in ISO 8601 format (YYYY-MM-DD)
sub pp_date($$) {
    my ($date_str, $year) = @_;

    my $date;

    # Try the date formats built into ledger
    my $date_str_iso;
    if ($date_str =~ /^(\d{4})[\/-](\d{1,2})[\/-](\d{1,2}$)/) {
	# Formats %Y/%m/%d and %Y-%m-%d
	$date_str_iso = sprintf "%4d-%02d-%02d", $1, $2, $3;
	# We don't need to parse and format the string again if it's
	# already in ISO format.
	return $date_str_iso if $date_str eq $date_str_iso;
    } elsif ($date_str =~ /^(\d{1,2})\/(\d{1,2})$/) {
	# Format %m/%d
	$date_str_iso = sprintf "%4d-%02d-%02d", $year, $1, $2;
    }
    if ($date_str_iso) {
	$date = $date_iso->parse_datetime($date_str_iso);
	return strftime("%F", $date) if $date;
    }

    # Try the configured date forms
    $date = $date_complete->parse_datetime($date_str);
    if ($date) {
	return strftime("%F", $date);
    } elsif (length $date_str >= 6) {
	die "Can't parse date $date_str (set date_format and date_format_no_year)";
    }

    $date = $date_no_year->parse_datetime($date_str);
    if ($date) {
	$date->set_year($year);
	return strftime("%F", $date);
    } else {
	die "Can't parse date $date_str (set date_format_no_year)";
    }
}


# parse a ledger value. Usually to extract "semantic" values from typed
# metadata
sub parse_ledger_value($) {
    my ($raw) = @_;
    my $value;

    if ($raw =~ /^\[(?<date>$date_RE)\]$/) {
	$value = pp_date $+{date}, 0;
    } else {
	$value = $raw;
    }

    return $value;
}


# Simple parsing state machine: we need to look ahead for payee metadata, as in
# beancount they appear on the first line of a transaction, whereas in ledger
# they appear as a regular metadata ("x-payee"). The following functions
# support the parsing state machine for this:
my ($in_txn, $in_postings,
    %cur_txn_header, @cur_txn_lines, @cur_txn_assertions, @cur_txn_tags, @cur_txn_meta);


# reset current parsing state, reinitializing it to the empty state
sub reset_cur_txn() {
    $in_txn = 0;  # whether we are currently in a txn block
    $in_postings = 0;  # whether we are currently within postings (i.e., past txn metadata)
    %cur_txn_header = ();  # txn header, i.e., its first line
    @cur_txn_lines = ();  # txn lines, i.e., all lines except the header
    @cur_txn_assertions = ();  # balance assertions related to txn
    @cur_txn_tags = ();  # posting tags
    @cur_txn_meta = ();  # posting metadata
}
reset_cur_txn();


# pretty print the transaction header (i.e., its first line) in beancount
# format
sub pp_cur_header() {
    my $buf = "";

    $buf .= $cur_txn_header{date} . " ";
    $buf .= $cur_txn_header{flag} . " ";
    if (exists $cur_txn_header{payee}) {
	$buf .= (mk_beancount_string $cur_txn_header{payee}) . " ";
    }
    $buf .= mk_beancount_string $cur_txn_header{narration};
    if (exists $cur_txn_header{comment}) {
	$buf .= " ; " . $cur_txn_header{comment};
    }
    if (exists $cur_txn_header{tags}) {
	$buf .= $cur_txn_header{tags};
    }

    return $buf;
}

# pretty print subsequent lines (all but the first) of a transaction, in
# beancount format
sub pp_cur_lines() {
    return (join "\n", @cur_txn_lines) . "\n";
}

# pretty print pending balance assertions, in beancount format
sub pp_cur_assertions() {
    return (join "\n", @cur_txn_assertions) . "\n";
}

# pretty print a single metadata key/value pair, in beancount format
sub pp_metadata($$) {
    my ($key, $value) = @_;

    return "$key: $value";
}

# Print an amount
sub pp_amount($) {
    my ($amount) = @_;

    return sprintf "%s %s", $amount->[NUMBER], $amount->[CURRENCY];
}


# Print a lot
sub pp_lot($) {
    my ($lot) = @_;

    my @info;
    push @info, pp_amount $lot->[AMOUNT] if defined $lot->[AMOUNT];
    push @info, $lot->[DATE] if defined $lot->[DATE];
    push @info, '"' . $lot->[NOTE] . '"' if defined $lot->[NOTE];
    return join ", ", @info;
}


# Print a cost
sub pp_cost($) {
    my ($lot) = @_;

    my $num = defined $lot->[TYPE] ? $lot->[TYPE] : 1;
    my $info = "{" x $num;
    $info .= pp_lot $lot;
    $info .= "}" x $num;
    return $info;
}


# Print a price
sub pp_price(@) {
    my ($lot) = @_;

    my $info = "@" x $lot->[TYPE];
    $info .= " ";
    $info .= pp_amount $lot->[AMOUNT];
    return $info;
}


# check if a tag should be a link based on link_match
sub is_link($) {
    my ($key) = @_;

    return 1 if $key =~ /^\^/;
    foreach my $link_RE (@{$config->{link_match}}) {
	return 1 if $key =~ /$link_RE/;
    }
    return 0;
}


# format string according to whether it should be a link or a tag
sub pp_tag_link(@) {
    my ($key) = @_;

    if ($key =~ /^\^/) {
	return $key;
    } elsif (is_link $key) {
	return "^" . $key;
    } else {
	return "#" . $key;
    }
}


# pretty print in-transaction tags, in beancount format
sub pp_tags(@) {
    my @tags = @_;

    return join(' ', map pp_tag_link($_), @tags);
}

# dump the current parsing state to stdout. Used for debugging purposes only
sub dump_cur_txn() {
    if ($in_txn) {
	print "D: cur_header: " . pp_cur_header() . "\n";
	print "D: cur_lines_begin\n";
	print pp_cur_lines();
	print "D: cur_lines_end\n";
    } else {
	print "D: no txn\n";
    }
}


# set the current transaction header (= first line), overriding the previous
# value (which should *usually* be empty, but it is the caller responsibility
# to ensure this is the case)
sub push_header($$$) {
    my ($date, $flag, $narration) = @_;

    $in_txn = 1;
    $cur_txn_header{date} = $date;
    $cur_txn_header{flag} = $flag;
    $cur_txn_header{narration} = $narration;
}

# set the current transaction payee, complementing the transaction header
sub push_payee($) {
    my ($payee) = @_;
    $cur_txn_header{payee} = $payee;
}

# add a transaction line. Call this multiple times to accumulate lines that will
# be emitted as soon as the transaction is over
sub push_line($$) {
    my ($depth, $line) = @_;

    push @cur_txn_lines, indent($depth, $line);
}

# add a balance assertion to be published at the end of current transaction
sub push_assertion($$) {
    my ($account, $amount) = @_;

    # beancount evaluates balance assertions at the beginning of the day,
    # whereas ledger evaluates them at the end of the txn. So we schedule the
    # balance assertion for *after* the original txn. This assumes that there
    # are no *other* txn in the same day that change the balance again.
    my $assertion_date = sprintf("%04d-%02d-%02d",
				 Add_Delta_Days(split(/-/, $cur_txn_header{date}), 1));
    push @cur_txn_assertions, "$assertion_date balance $account  $amount";
}

# add a metadata line. Wrapper around push_line() for metadata
sub push_metadata($$$) {
    my ($depth, $key, $value) = @_;
    push_line $depth, pp_metadata($key, $value);
}


# add a comment line. Wrapper around push_line() for comments
sub push_comment($$) {
    my ($depth, $comment) = @_;

    push_line $depth, "; $comment";
}


sub push_deferred_meta($) {
    my ($depth) = @_;

    foreach (@cur_txn_meta) {
	push_line $depth, $_;
    }
    @cur_txn_meta = ();
}


# Handle metadata
sub handle_metadata($$$) {
    my ($depth, $metadata, $defer) = @_;

    my $key = map_metadata($metadata->{key});
    if (not $in_postings and ($key eq $config->{payee_tag} or $key eq $config->{payer_tag})) {
	# ASSUMPTION: payer_tag always occurs later than payee_tag, which
	# is currently enforced in our ledger. This is to guarantee that we
	# promote payers to payees, because that's the sensible thing to do
	# with Beancount
	push_payee $metadata->{value};
    } else {
	# Check if we should store as metadata or as links
	# We check for $in_postings since posting-level links are not allowed
	if (lc $key ~~ [ map lc $_, @{$config->{link_tags}} ] && !$in_postings) {
	    print_tags $depth, "^$metadata->{value}";
	} else {
	    # Metadata values can be empty
	    my $value = $metadata->{value} ? $metadata->{value} : "";
	    if (defined($metadata->{typed})) {
		$value = parse_ledger_value $value;
	    } else {
		$value = mk_beancount_string $value;
	    }
	    if ($defer) {
		push @cur_txn_meta, pp_metadata($key, $value);
	    } else {
		push_metadata $depth, $key, $value;
	    }
	}
    }
}


# Process hledger tags
# hledger tags can have values (metadata) or not (tags)
sub handle_hledger_tags($$$) {
    my ($depth, $l, $defer) = @_;

    # hledger doesn't know the ledger :foo:bar: syntax.  However, this
    # is parsed as tag "foo" (without the colon!) with value "bar:".
    $l =~ s/^://;

    foreach $_ (split /\s*,\s*/, $l) {
	if (/$hledger_metadata_RE/) {
	    if ($+{key} eq "date" || $+{key} eq "date2") {
		my $key = $+{key};
		my $date = $+{value};
		my $year = $1 if $cur_txn_header{date} =~ /^(\d{4})-/;
		if ($date !~ /^$hledger_date_RE/) {
		    die "Can't parse date after hledger tag $key: $date";
		}
		if (defined $config->{postdate_tag} && $key eq "date") {
		    push @cur_txn_meta, pp_metadata $config->{postdate_tag}, pp_date $date, $year;
		}
		if (defined $config->{auxdate_tag} && $key eq "date2") {
		    push @cur_txn_meta, pp_metadata $config->{auxdate_tag}, pp_date $date, $year;
		}
		push_deferred_meta $depth if !$defer;
	    } else {
		handle_metadata $in_postings ? 2 : 1, \%+, $defer;
	    }
	} else {
	    s/:$//;
	    # The $in_postings check is a workaround since there are no posting-level tags
	    if ($defer || $in_postings) {
		push @cur_txn_tags, $_;
	    } else {
		print_tags $depth, $_;
	    }
	}
    }
}


# Process comments; in particular, look for tags
# Returns the comment (after stripping tags)
sub handle_comment($$$) {
    my ($depth, $l, $defer) = @_;

    if ($config->{hledger} && $l =~ /;\s*(?<comment>.*?)$hledger_tags_RE/) {
	handle_hledger_tags $depth, $+{tags}, $defer;
	my $comment = $+{comment};
	if ($+{tags}) {
	    $comment =~ s/,?\s*$//;
	}
	return $comment;
    } elsif ($l =~ /^$metadata_RE/) {  # metadata comment
	handle_metadata $in_postings ? 2 : 1, \%+, $defer;
	return;
    } elsif ($l =~ /^$comment_RE\s+:$tags_RE:\s*$/
	or $l =~ /^;\s+:$tags_RE:\s+(?<comment>.*)$/) {  # tags comment
	# The $in_postings check is a workaround since there are no posting-level tags
	if ($defer || $in_postings) {
	    push @cur_txn_tags, split /:/, $+{tags};
	} else {
	    print_tags $depth, split /:/, $+{tags};
	}
	return $+{comment};
    } elsif ($l =~ /^$comment_RE/) {  # (every other) comment
	return $+{comment};
    } else {
	die "Can't process comment: $l";
    }
}


# return a pretty printed transaction, resetting the current parsing state. This
# is usually called as soon as the end of a transaction (usually an empty line)
# is encountered
sub pop_txn() {
    my $buf = "";

    $buf .= pp_cur_header() . "\n";
    $buf .= pp_cur_lines();
    $buf .= "\n" . pp_cur_assertions() if @cur_txn_assertions;

    reset_cur_txn();

    return $buf;
}


# map a (ledger) metadata key to the desired (beancount) metadata key. Relies
# on the config variable metadata_map
# Beancount syntax: "Keys must begin with a lowercase character from a-z and
# may contain (uppercase or lowercase) letters, numbers, dashes and
# underscores."
sub map_metadata($) {
    my ($key) = my ($ledger_key) = @_;

    # For backwards compatibility with older ledger2beancount configs
    $key = $config->{metadata_map}{lc $key} if exists $config->{metadata_map}{lc $key};

    $key = $config->{metadata_map}{$key} if exists $config->{metadata_map}{$key};
    $key = lcfirst $key; # Make first letter lowercase
    $key =~ s/^([^\p{letter}])/x$1/; # Make sure first character is a letter
    $key .= "x" if length $key == 1;
    # Work around lack of Unicode support (beancount #161)
    $key = NFKD $key;
    $key =~ s/\p{NonspacingMark}//g;
    $key =~ s/[^a-zA-Z0-9_-]/-/g; # Replace disallowed characters
    $key = $config->{metadata_map}{$key} if exists $config->{metadata_map}{$key};

    # payee_tag and payer_tag don't show up in the beancount file, so
    # no need to warn about them.
    if ($key ne $config->{payee_tag} and $key ne $config->{payer_tag}) {
	$ledger_metadata{$ledger_key} = 1;
    }

    return $key;
}


# Apply any "apply account" statements to the account
sub map_account_apply($) {
    my ($account) = @_;

    foreach my $a (reverse @ledger_apply) {
	if (${$a}[0] eq "account") {
	    ${$a}[1] =~ s/:+$//;
	    $account = ${$a}[1] . ":" . $account;
	}
    }
    return $account;
}


# map a ledger account to a beancount account
# ledger account: can be pretty much anything, as long as it's followed
# by two spaces, a tab or the end of the line.
# beancount accounts: "account names begin with a capital letter or a
# number and are followed letters, numbers or dash (-) characters. All
# other characters are disallowed." (Letters and numbers may be UTF-8)
sub map_account($) {
    my ($account) = my ($ledger_account) = @_;

    $ledger_accounts{$account} = 1;

    # Map accounts according to the config
    $account = $config->{account_map}{$account} if exists $config->{account_map}{$account};
    foreach $_ (sort keys %{$config->{account_regex}}) {
	if ($account =~ s/$_/safe_interpolate($config->{account_regex}{$_})/eg) {
	    $config->{account_map}{$ledger_account} = $account;
	    last;
	}
    }

    # Ensure account names are valid in beancount
    $account =~ s/(^|:)(\p{lower})/$1\U$2\E/g; # Make first letter uppercase
    $account =~ s/(^|:)[^\p{letter}\p{number}]/$1X/g; # Make sure first character is a letter or number
    $account =~ s/[^\p{letter}\p{number}:-]/-/g; # Replace disallowed characters
    $account =~ s/:+$//g; # Ensure account doesn't end in a colon; this is unusual but legal in ledger
    $account = $config->{account_map}{$account} if exists $config->{account_map}{$account};
    my $root = $1 if $account =~ /([^:]+)/;
    # beancount doesn't allow just a root account (e.g. Income) as an
    # account name.  It has to be Income:Subaccount
    if ($account eq $root) {
	print_warning_once("Account $account not allowed; it needs a subaccount, e.g. $account:Subaccount");
	$account .= ":Subaccount";
    }
    if (!($root ~~ @beancount_root_names)) {
	print_warning_once("Non-standard root name $root used; please set beancount options name_*");
    }
    $account_declared{$account} = undef if not defined $account_declared{$account};
    return $account;
}


# Applies any pending account renames (e.g. ledger alias and "apply
# account") and then maps the account name.
sub apply_account($) {
    my ($account) = @_;

    if (exists $ledger_alias{$account}) {
	$account = $ledger_alias{$account};
    } else {
	$account = map_account_apply $account;
    }
    return map_account($account);
}


# map a ledger commodity to a beancount commodity
# beancount commodity: up to 24 characters long, beginning with a capital
# letter and ending with a capital letter or a number. The middle
# characters may include "_-'."
sub map_commodity($) {
    my ($commodity) = @_;

    $ledger_commodities{$commodity} = 1;

    $commodity = $config->{commodity_map}{$commodity} if exists $config->{commodity_map}{$commodity};
    $commodity =~ s/(^")|("$)//g;
    # Check again after removing the quote
    $commodity = $config->{commodity_map}{$commodity} if exists $config->{commodity_map}{$commodity};

    $commodity = substr (uc $commodity, 0, $BEANCOUNT_COMMODITY_MAX_LEN);
    # Work around lack of Unicode support (beancount #161)
    $commodity = NFKD $commodity;
    $commodity =~ s/\p{NonspacingMark}//g;
    # Dash (-) is not valid in ledger (even with quoted commodity) but valid
    # in beancount
    $commodity =~ s/[^a-zA-Z0-9_'.-]/-/g; # Replace disallowed characters
    $commodity =~ s/^[^\p{letter}]/X/g; # Make sure first character is a letter
    $commodity =~ s/[^\p{letter}\p{number}]$/X/g; # Make sure last character is a letter or number
    $commodity .= "X" if length $commodity == 1;

    $commodity = $config->{commodity_map}{$commodity} if exists $config->{commodity_map}{$commodity};

    $commodity_declared{$commodity} = undef if not defined $commodity_declared{$commodity};
    return $commodity;
}


sub get_output() {
    if (!$after_auto) {
	return \@pre_output;
    } else {
	return \@output;
    }
}

# emit a single line
sub print_line($$) {
    my ($depth, $line) = @_;

    my $out = get_output;
    push @$out, indent($depth, $line), "\n";
}


# emit a top-level comment: the comment marker ; is put as the first
# character and the rest is indented according to depth.
sub print_comment_top_level($$) {
    my ($depth, $comment) = @_;

    my $out = get_output;
    if (!$comment) {
	push @$out, ";\n";
    } else {
	push @$out, "; ", indent($depth, $comment), "\n";
    }
}


# Add warning to output file
sub print_warning($) {
    my ($warning) = @_;

    push @conversion_notes, $warning;
}

# Add warning to output file, but only once
sub print_warning_once($) {
    my ($warning) = @_;

    push @conversion_notes, $warning if !($warning ~~ @conversion_notes);
}


# Strip indentation from a line and return the depth and line
sub strip_indentation($) {
    my ($line) = @_;

    chomp $line;
    # handle line indentation once and for all
    $line =~ /^(?<indent>\s*)(?<line>.*)/;
    my $depth = ceil(length($+{indent}) / $config->{ledger_indent});
	# round up with ceil() because we mix 4 (postings) and 2 (posting tags) indent in ledger

    return ($depth, $+{line});
}


sub print_tags($@) {
    my ($depth, @ledger_tags) = @_;

    return if not scalar @ledger_tags;

    my @tags = grep {!is_link $_} @ledger_tags;
    my @links = grep {is_link $_} @ledger_tags;
    if (!$in_postings) {
	$cur_txn_header{tags} .= "\n" . indent $depth, pp_tags @ledger_tags;
    } else {
	# XXX workaround for the fact that per-posting tags are currently not
	# allowed.  See:
	# https://groups.google.com/forum/#!topic/beancount/XPtFOnqCVws
	push_line $depth, "tags: \"" . join(', ', @tags) . "\"" if @tags;
	push_line $depth, "links: \"" . join(', ', @links) . "\"" if @links;
    }
}


# Parse inline math
sub get_value_expr($) {
    my ($l) = @_;

    if ($l !~ /^(?<math>$RE{balanced}{-parens=>'()'})\s*(?<rest>.*)/) {
	die "Cannot parse inline math from $l\n";
    }
    my $math = $+{math};
    my $rest = $+{rest};

    # Strip the outer () since beancount doesn't require them.
    $math =~ s/^\(//;
    $math =~ s/\)$//;

    if ($config->{decimal_comma}) {
	$math =~ s/\.//g;
	$math =~ tr/,/./; # issue #204
    }

    # We have to move the commodity from within the inline math construct
    # to after (outside) the inline math since beancount expects the
    # format "number currency" (where number can be basic math).
    # The easiest way to do this is to remove all arithmetic operations
    # and see what's left over.  If there's a single term at the end,
    # that's the commodity we need.  If there are multiple, beancount
    # can't handle it.
    my $terms = $math;
    $terms =~ s/[.\d\(\)\*\/+-]/ /g;
    $terms =~ s/^\s+//;
    my @terms = uniq split /\s+/, $terms;
    if (scalar @terms == 0) {
	die "Failed to find commodity in inline math: $math\n";
    } elsif (scalar @terms == 1) {
	$math =~ s/\s*\Q$terms[0]\E\s*//g;
    } else {
	print_warning_once("Complex inline math not supported in beancount: $math");
    }
    my $amount;
    $amount->[NUMBER] = $math;
    $amount->[CURRENCY] = map_commodity $terms[0];
    return $amount, $rest;
}


# Parse an amount
sub get_amount($) {
    my ($l) = @_;

    # Inline math
    if ($l =~ /^\(/) {
	return get_value_expr $l;
    }

    my $amount;
    $amount->[FIXATED] = 1 if $l =~ s/^=\s*//;

    # Ledger supports three different amount formats:
    # [minus] number currency
    # [minus] currency number
    # currency [minus] number
    if ($l =~ /^(?<minus>-\s*)?(?<number>$number_RE)\s*(?<currency>$commodity_RE)\s*(?<rest>.*)/ ||
        $l =~ /^(?<minus>-\s*)?(?<currency>$commodity_RE)\s*(?<number>$number_RE)\s*(?<rest>.*)/ ||
        $l =~ /^(?<currency>$commodity_RE)\s*(?<minus>-\s*)(?<number>$number_RE)\s*(?<rest>.*)/) {
	my $amount;
	my $rest = $+{rest};
	my $minus = $+{minus} ? "-" : "";
	$amount->[NUMBER] = $+{number};
	$amount->[CURRENCY] = map_commodity $+{currency};

	if ($config->{decimal_comma}) {
	    $amount->[NUMBER] =~ s/\.//g;
	    $amount->[NUMBER] =~ tr/,/./; # issue #204
	}

	# ledger allows amounts without a leading zero (e.g. .10) but
	# beancount doesn't.
	$amount->[NUMBER] =~ s/^\./0./;

	$amount->[NUMBER] = $minus . $amount->[NUMBER];

	return $amount, "$rest";
    } else {
	die "Cannot parse amount from $l\n";
    }
}


# Parse a lot
sub get_lot($) {
    my ($l) = @_;
    my $l_orig = $l;

    my ($amount, $cost, $date);

    # A lot can contain various information but pretty much everything
    # apart from the amount is optional.  The order is fixed, though:
    # amount, cost, date, note and lot value expressions.  If no
    # optional information is specified, it's merely an amount and not
    # a lot.

    # Find the amount
    ($amount, $l) = get_amount $l;

    # Most amounts are not followed by lot information.  If the amount
    # is followed by something we know can't be a lot information,
    # stop spending time to look for lot information.  Specifically,
    # check for:
    # * Line end: $
    # * Comment: ;
    # * Price: @
    # * Virtual price: (@)
    # * Balance assertion: =
    if ($l =~ /^($|;|@|\(@|=)/) {
	return $amount, $cost, $l;
    }

    # Look for a cost: {...} or {{...}}
    if ($l =~ /^(?<type>\{\{?)\s*(?<amount>[^}]+)\}\}?\s*(?<rest>.*)/) {
	$l = $+{rest};
	$cost->[TYPE] = length $+{type};
	my $rest;
	($cost->[AMOUNT], $rest) = get_amount $+{amount};
	if ($rest) {
	    die "Cost in lot had a rest: $rest";
	}
    }

    # A lot date: dates are always in square brackets [date]
    if ($l =~ /^\[(?<date>\d+[^ =\]]+)\]\s*(?<rest>.*)/) {
	$l = $+{rest};
	$date = pp_date $+{date}, 0;
    }

    # A lot note: (note).  Don't confuse with a ((lot value expression))
    # (see below) or a virtual price: (@).
    if ($l !~ /^(\(\(|\(@)/ && $l =~ /^(?<note>$RE{balanced}{-parens=>'()'})\s*(?<rest>.*)/) {
	$l = $+{rest};
	$cost->[NOTE] = $+{note};
	$cost->[NOTE] =~ s/^\(//;
	$cost->[NOTE] =~ s/\)$//;
    }

    # A lot value expression: ((market))
    if ($l =~ /^\(\(/ && $l =~ /^(?<valuation>$RE{balanced}{-parens=>'()'})\s*(?<rest>.*)/) {
	$l = $+{rest};
	my $valuation = $+{valuation};
	$valuation =~ s/^\(\(//;
	$valuation =~ s/\)\)$//;
	$valuation =~ s/^\s+//;
	$valuation =~ s/\s+$//;
	# Print a warning unless it's "market" which is the default
	# behaviour anyway.
	if ($valuation ne "market") {
	    print_warning_once "Lot value expressions not supported in beancount: $valuation";
	}
    }

    # A date without a cost is not valid in beancount, so add the
    # amount as the cost, which essentially means that the cost
    # is 1.00 of the currency.
    if ($date) {
	if (!defined $cost->[AMOUNT]) {
	    $cost->[TYPE] = 2;
	    $cost->[AMOUNT] = $amount;
	}
	$cost->[DATE] = $date;
    }

    return $amount, $cost, $l;
}


# Parse a posting
sub parse_posting($) {
    my ($l) = @_;

    my $posting;
    $posting->[TYPE] = POSTING;

    # We parse from left to right since there is a specific order to
    # everything.  When we parse something, we put the rest of the
    # line back into $l.

    # Flag: ! or *
    if ($l =~ /^([!*])\s*(.*)/) {
	$posting->[FLAG] = $1;
	$l = $2;
    }

    # Account: an account can be pretty much anything, but we know:
    # * It's the first thing after a flag.
    # * It ends with two spaces, a tab or the end of line
    if ($l =~ /^(.*?)((  |\t| \t|\s*$)\h*)(.*)/) {
	$posting->[ACCOUNT] = $1;
	$posting->[WHITESPACE] = $2;
	$l = $4;
    }

    # Next we probably have an amount or a lot, but this is not necessarily
    # the case: it could also be a balance assignment (=) or a
    # comment (;).  If it's neither of those two, it has to be
    # an amount.
    if ($l =~ /^[^=;]/) {
	($posting->[AMOUNT], $posting->[COST], $l) = get_lot $l;
    }

    # A price: either @, @@, (@), or (@@)
    if ($l =~ /^\(?(@@?)\)?\s*(.*)/) {
	$posting->[PRICE]->[TYPE] = length $1;
	($posting->[PRICE]->[AMOUNT], $_, $l) = get_lot $2;
    }

    # An assertion (if there's no amount) or balance assignment
    # (if there's no amount).  Starts with =
    if ($l =~ /^=\s*(.*)/) {
	($posting->[ASSERTION]->[AMOUNT], $l) = get_amount $1;
    }

    # A comment: starts with ;
    if ($l =~ /^;(.*)/) {
	$l = "";
	$posting->[COMMENT] = $1;
    }

    # There should be nothing left
    if ($l) {
	die "Cannot handle: $l";
    }

    return $posting;
}


# Process a ledger transaction
sub process_txn(@) {
    my @txn = @_;

    # Count total postings and postings that have amounts.  This is needed
    # to distinguish different kinds of balance assignments.
    my $total_postings = 0;
    my $postings_with_amount = 0;
    my @txn_parsed;
    foreach my $l (@txn) {
	my $line;
	my ($depth, $l) = strip_indentation($l);
	if ($l =~ /^$comment_RE/) {
	    $line->[TYPE] = COMMENT;
	    $line->[COMMENT] = $l;
	} elsif ($l =~ /^$posting_RE/) {
	    $line = parse_posting $l;
	    $total_postings++;
	    $postings_with_amount++ if defined $line->[AMOUNT];
	} elsif ($l =~ /^\h*$/) {  # whitespace or blank line
	    $line->[TYPE] = BLANK;
	} else {
	    die "Cannot handle line: $l";
	}
	$line->[DEPTH] = $depth;
	push @txn_parsed, $line;
    }
    my @postings = grep {$_->[TYPE] == POSTING} @txn_parsed;

    my $has_amount;
    my $skipped_posting = 0;
    foreach my $i (@txn_parsed) {
	if ($i->[TYPE] == COMMENT) {
	    # If there's a virtual posting that is ignored, we have to
	    # discard the meta-data associated with the posting on
	    # following lines (if there's any).  But there might be
	    # comments worth preserving.  So just preserve all ledger
	    # comments (i.e. comments and meta-data) as is.
	    if ($skipped_posting) {
		push_line $i->[DEPTH], $i->[COMMENT];
		print_warning_once "Comment or meta-data on virtual posting preserved as comment";
	    } else {
		my $comment = handle_comment $i->[DEPTH], $i->[COMMENT], 0;
		push_comment $i->[DEPTH], $comment if $comment;
	    }
	} elsif ($i->[TYPE] == POSTING) {
	    print_tags $i->[DEPTH]+1, @cur_txn_tags;
	    @cur_txn_tags = ();

	    my $l = "";
	    $l .= "$i->[FLAG] " if defined $i->[FLAG];

	    my $account_length_old = length $i->[ACCOUNT];
	    # Check for virtual and deferred accounts
	    if ($i->[ACCOUNT] =~ /^\(/) {
		# Ignore virtual postings with parentheses
		print_warning_once "Virtual posting in parentheses ignored";
		$skipped_posting = 1;
		next;
	    } elsif ($i->[ACCOUNT] =~ /^\[(.*)\]/) {
		if ($config->{convert_virtual}) {
		    $i->[ACCOUNT] = $1; # Make account real
		} else {
		    print_warning_once "Virtual posting in bracket ignored (see convert_virtual option)";
		    $skipped_posting = 1;
		    next;
		}
	    } elsif ($i->[ACCOUNT] =~ /^<(.*)>/) {
		    $i->[ACCOUNT] = $1;
	    }
	    $skipped_posting = 0; # reset variable with each posting
	    $in_postings = 1;
	    my $postdate;
	    my $auxdate;
	    $has_amount = $+{amount} ? 1 : 0;

	    # Replace ledger account names with corresponding beancount account names
	    # while trying to keep the whitespace intact.
	    $i->[ACCOUNT] = apply_account $i->[ACCOUNT];
	    $l .= $i->[ACCOUNT];
	    my $account_length_new = length $i->[ACCOUNT];
	    my $space_diff = " "x abs($account_length_new - $account_length_old);
	    if ($account_length_new <= $account_length_old) {
		$i->[WHITESPACE] .= $space_diff;
	    } else {
		$i->[WHITESPACE] =~ s/$space_diff//;
		# # Ensure there are two spaces
		$i->[WHITESPACE] = "  " if $i->[WHITESPACE] eq " " || $i->[WHITESPACE] eq "";
	    }

	    if (defined $i->[AMOUNT]) {
		$l .= $i->[WHITESPACE] . pp_amount $i->[AMOUNT];
	    }

	    if (defined $i->[PRICE] && !defined $i->[COST]) {
		# No ledger lot cost, only price.  This one is tricky
		# because this convention can be used for two different
		# purposes:
		# 1) For conversion between currencies where you do not
		# generally wish to retain the cost.
		# 2) To acquire/dispose of commodities (e.g. shares)
		# where you want to retain the cost.
		#
		# Most currencies have 3 characters (e.g. EUR, USD, GBP)
		# whereas commodities often have more (e.g. the ISIN).
		# Therefore, we assume the cost should not be kept if
		# both currencies have 3 characters.  Since this won't
		# work in all cases, we also check for a list of
		# commodities.  Similarly, we allow users to configure
		# commodities that should be treated as currencies.
		my $commodity1 = $i->[AMOUNT]->[CURRENCY];
		my $commodity2 = $i->[PRICE]->[AMOUNT]->[CURRENCY];
		if (!defined $i->[PRICE]->[AMOUNT]->[FIXATED] && ((length $commodity1 == 3 && length $commodity2 == 3 &&
		    !($commodity1 ~~ @{$config->{currency_is_commodity}})) ||
		    $commodity1 ~~ @{$config->{commodity_is_currency}} ||
		    $commodity2 ~~ @{$config->{commodity_is_currency}})) {
		    $l .= " " . pp_price $i->[PRICE];
		} else {
		    $l .= " " . pp_cost $i->[PRICE];
		}
	    } elsif (defined $i->[COST]) {
		$l .= " " . pp_cost $i->[COST];
		# ledger requires you to specify both lot cost and lot price
		# due to a bug.  If both are the same, don't put in the price.
		if (defined $i->[PRICE]->[AMOUNT] && (!defined $i->[COST]->[AMOUNT] || pp_amount $i->[COST]->[AMOUNT] ne pp_amount $i->[PRICE]->[AMOUNT])) {
		    $l .= " " . pp_price $i->[PRICE];
		}
	    }

	    if (defined $i->[AMOUNT] && !defined $i->[COST]) {
		# Apply any fixated costs if needed
		foreach my $a (reverse @ledger_apply) {
		    if (${$a}[0] eq "fixed") {
			my $fixated = ${$a}[1];
			if ($i->[AMOUNT]->[CURRENCY] eq map_commodity $fixated->[CURRENCY]) {
			    $l .= " " . pp_cost $fixated;
			}
		    }
		}
	    }

	    if ($i->[COMMENT]) {
		# Ideally, handling of postdates and auxdates would be
		# done in handle_comment but this is difficult because
		# push_deferred_meta has to be called before push_metadata
		# of postdate and auxdate.
		$i->[COMMENT] =~ /(^\s*\[(?<postdate>$date_RE)?(=(?<auxdate>$date_RE))?\])?(\s*;?\s*(?<comment>.*))/;
		$auxdate = $+{auxdate};
		$postdate = $+{postdate};
		my $comment = handle_comment $i->[DEPTH], "; $+{comment}", 1;
		if ($comment) {
		    $l .= $i->[WHITESPACE] if !defined $i->[AMOUNT];
		    $l .= " ; $comment" if $comment;
		}
	    }

	    if ($i->[ASSERTION]) {
		push_assertion $i->[ACCOUNT], pp_amount $i->[ASSERTION]->[AMOUNT];
		if ($total_postings == 2 && $postings_with_amount == 0) {
		    # We have two postings, i.e. two accounts; remove the current
		    # account from the list of accounts to find out which account
		    # we have to pad against.
		    my @accounts = map {apply_account $_->[ACCOUNT]} @postings;
		    @accounts = grep { $_ ne $i->[ACCOUNT] } @accounts;
		    print_line 0, sprintf "%s pad %s %s", $cur_txn_header{date}, $i->[ACCOUNT], $accounts[0];
		    print_line 0, pp_cur_assertions;
		    # Skip transaction (the transaction itself is just two
		    # null postings, which are not valid in beancount)
		    pop_txn();
		    return;
		} elsif ($total_postings > 2 && ($total_postings-$postings_with_amount) == 2) {
		    print_warning_once "Balance assignments with 2 null postings not supported";
		}
	    }

	    push_line $i->[DEPTH], $l;

	    # Show all metadata that was on the same line as the posting
	    push_deferred_meta $i->[DEPTH] + 1;
	    push_metadata $i->[DEPTH] + 1, $config->{postdate_tag}, pp_date $postdate, 0 if defined $postdate && defined $config->{postdate_tag};
	    push_metadata $i->[DEPTH] + 1, $config->{auxdate_tag}, pp_date $auxdate, 0 if defined $auxdate && defined $config->{auxdate_tag};
	} elsif ($i->[TYPE] == BLANK) {
	    push_line 0, "";
	} else {  # there shouldn't be anything
	    die "Don't know how to process transaction line with type $i->TYPE\n";
	}
    }
    print_tags 2, @cur_txn_tags;

    if ($total_postings == 1) {
	if (defined $ledger_bucket) {
	    # We only saw one posting and a ledger bucket is defined
	    push_line 1, $ledger_bucket;
	} else {
	    # Transactions can have a single posting as long as the
	    # amount is 0 (otherwise it would fail to balance).  This
	    # can be used to add standalone balance assertions.
	    # If there's an amount (it has to be zero), print
	    # the transaction (using a second posting with the
	    # same account name) because it might contain an
	    # interesting narration.  If there's no amount,
	    # just use the balance assertion.
	    if (defined $postings[0]->[AMOUNT] && $postings[0]->[AMOUNT]->[NUMBER] == 0.00) {
		push_line 1, $postings[0]->[ACCOUNT];
	    } else {
		print_line 0, pp_cur_assertions if @cur_txn_assertions;
		pop_txn();
		return;
	    }
	}
    } elsif ($total_postings == 2 && $postings_with_amount == 2) {
	# Handle implicit conversions.  We only support simple implicit
	# conversions with 2 postings.

	# If there's a cost or price on any of the postings, it's not
	# an implicit conversion.  Same if the currencies on both
	# postings are the same.
	if (!$postings[0]->[COST]->[AMOUNT] && !$postings[1]->[COST]->[AMOUNT] &&
	    !$postings[0]->[PRICE]->[AMOUNT] && !$postings[0]->[PRICE]->[AMOUNT] &&
	    $postings[0]->[AMOUNT]->[CURRENCY] ne $postings[1]->[AMOUNT]->[CURRENCY]) {
	    my $amount = pp_amount $postings[0]->[AMOUNT];
	    my $price = pp_amount $postings[1]->[AMOUNT];
	    $price =~ s/^-//;  # Prices must never be negative
	    # We look for a posting that contains the amount and add the
	    # price.
	    foreach my $l (@cur_txn_lines) {
		$l =~ s/$amount/$amount @@ $price/ if $l =~ /^\s+([*!]\s+)?[\p{Uppercase_Letter}][^\s]+\s+$amount/;
	    }
	}
    }
    push @output, pop_txn();
}


# Read one ledger stanza (everything indented by whitespace)
sub read_stanza($) {
    my ($input_ref) = @_;

    my @stanza = ();
    my $l;
    do {
	$l = @{$input_ref}[0];
	push @stanza, shift @{$input_ref} if $l =~ /^\h+/;
    } while ($l =~ /^\h+/ && @{$input_ref});
    return @stanza;
}


# MAIN CONVERSION LOOP

unshift(@ARGV, '-') unless @ARGV;
open my $input, $ARGV[0] or die "Can't read $ARGV[0]";
my @input = <$input>;
close $input;

# To store year declaration
my $year = POSIX::strftime "%Y", localtime;
# The year to be used if there's no "apply year" in force; this is either
# the current year (or no date was specified) or whatever was last
# specified using the Y/year directive.
my $year_no_apply = $year;

while (@input) {
    my $l = shift @input;
    chomp $l;
    my $depth = 0;
    my @stanza;
    # The two tests for ignore_marker have to be the first thing since they
    # have to take precedence over other tests.
    if ($config->{ignore_marker} && $l =~ /;\s*:?$config->{ignore_marker}\s+begin/) {
	do {
	    $l = shift @input;
	} while $l !~ /;\s*:?$config->{ignore_marker}\s+end/;
    } elsif ($config->{ignore_marker} && $l =~ /;\s*:?$config->{ignore_marker}/) {
	next;
    } elsif ($config->{keep_marker} && $l =~ /;\s*:?$config->{keep_marker}\s+begin/) {
	$after_auto = 1;
	$l = shift @input;
	do {
	    if ($l =~ /^$comment_top_level_RE$/) {
		print_line $depth, $+{comment};
	    }
	    $l = shift @input;
	} while $l !~ /;\s*:?$config->{keep_marker}\s+end/;
    } elsif ($config->{keep_marker} && $l =~ /^$comment_top_level_RE\s*;\s*:?$config->{keep_marker}/) {
	print_line $depth, $+{comment};
    } elsif ($l =~ /^[!@]?include\s+(?<filename>.*)/) {  # include
	my $filename = $+{filename};
	$filename =~ s/(.ledger|.dat)$//;
	print_line $depth, "include \"$filename.beancount\"";
    } elsif ($l =~ /^$comment_top_level_RE/) {
	# beancount issue #282
	if ($l =~ /^\|\s?(?<comment>.*)/) {
	    print_comment_top_level $depth, $+{comment};
	} else {
	    # Rewrite the Emacs modeline
	    $l =~ s/-\*- ledger -\*-/-*- mode: beancount -*-/;
	    print_line $depth, $l;
	}
    } elsif ($l =~ /^[!@]?(?<type>alias)\s+(?<account>$account_RE)\s*=\s*(?<val>.*)/) {  # alias
	$ledger_alias{$+{account}} = map_account_apply $+{val};
    } elsif ($l =~ /^[!@]?apply\s+(?<type>account)\s+(?<val>.*)/) {  # apply account
	$after_auto = 1;
	push @ledger_apply, [$+{type}, $+{val}];
    } elsif ($l =~ /^[!@]?apply\s+(?<type>(fixed|rate))\s+(?<commodity>[^\s]+)\s*(?<fixed>.+)/) {  # apply fixed
	$after_auto = 1;
	my $fixated->[CURRENCY] = $+{commodity};
	my $rest;
	($fixated->[AMOUNT], $rest) = get_amount $+{fixed};
	$fixated->[TYPE] = 1;
	if ($rest) {
	    die "Unknown rest $rest in apply $+{type} directive: $l";
	}
	push @ledger_apply, ["fixed", $fixated];
    } elsif ($l =~ /^[!@]?apply\s+(?<type>tag)\s+(?<val>.*)/) {  # apply tag
	$after_auto = 1;
	# `apply tag` can be converted to beancount in three ways:
	# * using pushtag/poptag for tags
	# * applying links to each transactions
	# * applying metadata to each transactions
	if ("; $+{val}" =~ /$metadata_RE/) {
	    push @ledger_apply, ["metadata", {%+}];
	} elsif (is_link $+{val}) {
	    push @ledger_apply, ["link", $+{val}];
	} else {
	    print_line $depth, "pushtag " . pp_tag_link $+{val};
	    push @ledger_apply, [$+{type}, $+{val}];
	}
    } elsif ($l =~ /^[!@]?apply\s+(?<type>year)\s+(?<val>\d+)/) {  # apply year
	$after_auto = 1;
	$year = $+{val};
	push @ledger_apply, [$+{type}, $+{val}];
    } elsif ($l =~ /^[!@]?(apply\s+.*)/) {  # apply .*
	# ledger seems to silently ignore all other apply statements
	print_warning_once "Unknown '$1' directive found";
	next;
    } elsif ($l =~ /^[!@]?end/) {  # end
	next if !@ledger_apply; # end without any apply
	my $a = pop @ledger_apply;
	if (${$a}[0] eq "tag") {
	    print_line $depth, "poptag " . pp_tag_link ${$a}[1]
	} elsif (${$a}[0] eq "year") {
	    # apply year can be nested, so restore the previous year
	    my $found = 0;
	    foreach my $a (reverse @ledger_apply) {
		if (${$a}[0] eq "year") {
		    $year = ${$a}[1];
		    $found = 1;
		    last;
		}
	    }
	    $year = $year_no_apply if !$found;
	}
    } elsif ($l =~ /^[!@]?(bucket|A)\s+(.*)/) {  # bucket
	$ledger_bucket = map_account $2;
    } elsif ($l =~ /^[!@]?(comment|test)/) {  # block comment
	$l = shift @input;
	# block comments may or may not be indented.  If the first line has
	# indentation, strip the same indentation, from all other comments.
	my $strip_indent = $l =~ /^(\h+)/ ? $1 : "";
	while ($l !~ /^end\s+(comment|test)/) {
	    chomp $l;
	    $l =~ s/^$strip_indent//;
	    print_comment_top_level $depth, $l;
	    $l = shift @input;
	}
    } elsif ($l =~ /^[!@]?(define|def)\s/) {  # define
	print_warning_once "The `$1` directive is not supported";
	print_comment_top_level 0, $l;
    } elsif ($l =~ /^[!@]?(fixed|endfixed)/) {  # Fixated price
	print_warning_once "Fixated prices are not supported";
	print_comment_top_level 0, $l;
    } elsif ($l =~ /^(Y\s*|year\s+)(\d{4})/) {  # year declaration
	$year = $2;
	$year_no_apply = $2;
    } elsif ($l =~ /^$price_RE/) {
	$after_auto = 1;
	$l = sprintf "%s price %s ", pp_date($+{date}, $year), map_commodity $+{commodity1};
	my ($commodity2, $rest) = get_amount $+{commodity2};
	$l .= pp_amount $commodity2;
	if ($rest) {
	    die "Unknown rest $rest in price directive: $l";
	}
	print_line $depth, $l;
    } elsif ($l =~ /^([=~].*)/) {  # automated transaction (=) or periodic transaction (~)
	$after_auto = 1;
	print_warning_once "Automated or periodic transaction skipped";
	print_comment_top_level $depth, $1;
	@stanza = read_stanza \@input;
	foreach $l (@stanza) {
	    ($depth, $l) = strip_indentation $l;
	    print_comment_top_level $depth, $l;
	}
    } elsif ($l =~ /^[!@]?account\s+(.*)/) {  # account declaration
	my ($account, $comment);
	# account foo ; bar
	# In ledger, this is parsed as account "foo ; bar"; in hledger as
	# account "foo" with comment "bar".
	# If there are two spaces, ledger will also parse it part of the
	# account name, but such an account name is invalid so treat it
	# as a comment.
	if ($config->{hledger}) {
	    ($account, $comment) = split /\s*;\s*/, $1, 2;
	} else {
	    ($account, $comment) = split /\s\s+;\s*/, $1, 2;
	}
	$account = map_account $account;
	@stanza = read_stanza \@input;
	# Avoid duplicate account declarations if two accounts are mapped
	# to the same account and both have account declarations.
	if ($account_declared{$account}) {
	    print_warning_once "Skipped second account declaration for $account (old $1)";
	    next;
	}
	$account_declared{$account} = 1;
	print_line $depth, sprintf "$config->{account_open_date} open %s%s", $account, $comment ? " ; $comment" : "";
	foreach $l (@stanza) {
	    ($depth, $l) = strip_indentation $l;
	    if ($l =~ /^note\s+(.*)/) {  # note
		print_line $depth, pp_metadata "description", mk_beancount_string $1;
	    } elsif ($l =~ /^$metadata_RE/) {  # metadata
		print_line $depth, pp_metadata $+{key}, mk_beancount_string $+{value};
	    } else {
		print_comment_top_level $depth, $l;
	    }
	}
    } elsif ($l =~ /^[!@]?commodity\s+(.*)/) {  # commodity declaration
	my ($commodity, $comment);
	$commodity = $1;
	if ($commodity =~ /^(".*")(.*)/) {
	    $commodity = $1;
	    $comment = $1 if $2 =~ /\s*;\s*(.*)/;
	} else {
	    ($commodity, $comment) = split /\s*;\s*/, $commodity, 2;
	}
	$commodity = map_commodity $commodity;
	@stanza = read_stanza \@input;
	# Avoid duplicate commodity declarations if two commodities are
	# mapped to the same commodity and both have commodity declarations.
	if ($commodity_declared{$commodity}) {
	    print_warning_once "Skipped second commodity declaration for $commodity (old $1)";
	    next;
	}
	$commodity_declared{$commodity} = 1;
	print_line $depth, sprintf "$config->{commodities_date} commodity %s%s", $commodity, $comment ? " ; $comment" : "";
	foreach $l (@stanza) {
	    ($depth, $l) = strip_indentation $l;
	    if ($l =~ /^note\s+(.*)/) {  # note
		print_line $depth, pp_metadata "name", mk_beancount_string $1;
	    } elsif ($l =~ /^format\s+(.*)/) {  # format
		next;  # skip directive, not needed in beancount
	    } elsif ($l =~ /^$metadata_RE/) {  # metadata
		print_line $depth, pp_metadata $+{key}, mk_beancount_string $+{value};
	    } else {
		print_comment_top_level $depth, $l;
	    }
	}
    } elsif ($l =~ /^[!@]?(payee\s+.*)/) {  # payee declaration
	print_comment_top_level $depth, $1;
	@stanza = read_stanza \@input;
	foreach $l (@stanza) {
	    ($depth, $l) = strip_indentation $l;
	    print_comment_top_level $depth, $l;
	}
    } elsif ($l =~ /^[!@]?(import\s+)/) {  # import (Python)
	# There's no equivalent
	($depth, $l) = strip_indentation $l;
	print_comment_top_level $depth, $l;
    } elsif ($l =~ /^[!@]?(python)/) {  # Python
	$after_auto = 1;
	# The python directive is special in the sense that empty
	# lines don't end the directive.
	do {
	    print_comment_top_level $depth, $l;
	    $l = shift @input;
	    chomp $l;
	} while ($l =~ /^(\s+|$)/ && @input);
	unshift @input, $l;
    } elsif ($l =~ /^[!@]?(tag\s+.*)/) {  # tag declaration
	# Not needed in beancount and there's no equivalent
	read_stanza \@input if $input[0] =~ /^\h+/;
    } elsif ($l =~ /^[!@]?(N|D|C|I|i|O|o|b|h|assert|check|expr|eval|value)(\s|$)/) {
	$after_auto = 1;
	    print_warning_once "Unsupported directive `$1` skipped";
	    # Not supported in beancount
	    print_comment_top_level $depth, $l;
    } elsif ($l =~ /^[0-9]/) {
	$after_auto = 1;
	if ($l =~ /$txn_header_RE/) {  # txn header
	    $in_postings = 0;
	    # You can have a comment on the same line as the payee
	    my ($narration, $comment) = split /  +;\s*|\t+;\s*/, $+{narration}, 2;
	    $narration = "" if not $narration;
	    push_header pp_date($+{date}, $year), $+{flag} ? $+{flag} : "txn", $narration;
	    $comment = handle_comment $depth + 1, "; $comment", 0 if $comment;
	    $cur_txn_header{comment} = $comment if $comment;
	    push_metadata $depth + 1, $config->{auxdate_tag}, pp_date($+{auxdate}, $year) if defined $+{auxdate} && defined $config->{auxdate_tag};
	    push_metadata $depth + 1, $config->{code_tag}, mk_beancount_string $+{code} if defined $+{code} && defined $config->{code_tag};
	    # Determine payee based on the narration field
	    if ($config->{hledger} && $narration =~ /$hledger_payee_narration_RE/) {
		push_payee $+{payee};
		$cur_txn_header{narration} = $+{narration};
	    }
	    foreach my $custom_narration_RE (@{$config->{payee_split}}) {
		if ($narration =~ /$custom_narration_RE/) {
		    push_payee $+{payee};
		    $cur_txn_header{narration} = $+{narration};
		    last;
		}
	    }
	    # Config `payee_match` is an array of hashes
	    my @payee_match = @{$config->{payee_match}};
	    my $match = 0;
	    while (!$match && @payee_match) {
		my $payee_match = shift @payee_match;
		foreach my $custom_narration_RE (keys %{$payee_match}) {
		    if ($narration =~ /$custom_narration_RE/) {
			push_payee ${$payee_match}{$custom_narration_RE};
			$match = 1;
		    }
		}
	    }
	    # ledger "apply tag"
	    foreach my $a (reverse @ledger_apply) {
		if (${$a}[0] eq "metadata") {
		    handle_metadata 1, ${$a}[1], 0;
		} elsif (${$a}[0] eq "link") {
		    print_tags 1, ${$a}[1];
		}
	    }
	} elsif ($l !~ /^$date_RE/) {
	    die "Cannot process date in transaction header: $l\n";
	} else {
	    die "Cannot process transaction header: $l\n";
	}
	@stanza = read_stanza \@input;
	process_txn @stanza;
    } elsif ($l =~ /^\h*$/) {
	print_line 0, "";
    } elsif ($l =~ /^--/) {  # ledger option
	next;
    } else {
	print_warning "Unknown line. Please report. Line: $l";
	print_line 0, $l;
    }
}

# Check for renames
foreach (sort keys %ledger_accounts) {
    my $map = map_account $_;
    if ($_ ne $map && !($map ~~ [values %{$config->{account_map}}])) {
        print_warning "Account $_ renamed to $map";
    }
}

foreach (sort keys %ledger_commodities) {
    my $map = map_commodity $_;
    if ($_ ne $map && $_ ne qq("$map") && !($map ~~ [values %{$config->{commodity_map}}])) {
        print_warning "Commodity $_ renamed to $map";
    }
}

foreach (sort keys %ledger_metadata) {
    my $map = map_metadata $_;
    if ($_ ne $map && !($map ~~ [values %{$config->{metadata_map}}])) {
        print_warning "Metadata key $_ renamed to $map";
    }
}

# Check for collisions
my %mapped_accounts;
foreach (keys %ledger_accounts) {
    push @{$mapped_accounts{map_account $_}}, $_;
}
foreach (sort keys %mapped_accounts) {
    if (@{$mapped_accounts{$_}} > 1) {
	print_warning "Collision for account $_: " . join ", ", sort @{$mapped_accounts{$_}};
    }
}

my %mapped_commodities;
foreach (keys %ledger_commodities) {
    push @{$mapped_commodities{map_commodity $_}}, $_;
}
foreach (sort keys %mapped_commodities) {
    if (@{$mapped_commodities{$_}} > 1) {
	print_warning "Collision for commodity $_: " . join ", ", sort @{$mapped_commodities{$_}};
    }
}

my %mapped_metadata;
foreach (keys %ledger_metadata) {
    push @{$mapped_metadata{map_metadata $_}}, $_;
}
foreach (sort keys %mapped_metadata) {
    if (@{$mapped_metadata{$_}} > 1) {
	print_warning "Collision for metadata $_: " . join ", ", sort @{$mapped_metadata{$_}};
    }
}


# Print everything

if (@conversion_notes) {
    print ";", "-"x70, "\n";
    print "; ledger2beancount conversion notes:\n";
    print ";\n";
    print ";   - $_\n" foreach @conversion_notes;
    print ";", "-"x70, "\n";
    print "\n";
}

print "option \"operating_currency\" \"$_\"\n" foreach @{$config->{operating_currencies}};

if ($config->{beancount_header}) {
    open my $beancount_header, $config->{beancount_header} or
	die "Can't file beancount header: $config->{beancount_header}";
    print foreach <$beancount_header>;
    close $beancount_header;
}

print $_ for (@pre_output);

if ($config->{automatic_declarations}) {
    # Print missing account and commodity declarations
    my $out;

    $out = "";
    for my $a (sort keys %account_declared) {
	$out .= sprintf "$config->{account_open_date} open $a\n" if not defined $account_declared{$a};
    }
    if ($out) {
	print $out;
	print "\n";
    }

    $out = "";
    for my $c (sort keys %commodity_declared) {
	$out .= sprintf "$config->{commodities_date} commodity $c\n" if not defined $commodity_declared{$c};
    }
    if ($out) {
	print $out;
	print "\n";
    }
}

# Print the converted beancount output
print $_ for (@output);

