#!/usr/bin/env python
# -*- mode: python; coding: utf-8 -*-
# Copyright © 2001, 2002, 2003, 2004, 2006, 2007, 2014 Translation Project.
# Copyright © 1998, 1999, 2000, 2001 Progiciels Bourbeau-Pinard inc.
# François Pinard <pinard@iro.umontreal.ca>, 1998.
# Benno Schulenberg <benno@vertaalt.nl>, 2014.

"""\
Attempt automatic processing of PO file submissions.

Usage: tp-robot [-n] <MESSAGE

  -n   dry-run, do not send mails, do not change things
"""

import os, sys, subprocess, shutil, types
import re, string, datetime, getopt
import email, email.header, email.utils

sys.path.insert(0, sys.path[0]+'/../lib')
import config, po, registry, run

# Find sendmail in /usr/sbin or /usr/lib.
os.environ['PATH'] = '/usr/lib:/usr/sbin:' + os.environ['PATH']

# Unset possible localisation.
try: del os.environ['LANGUAGE']
except KeyError:pass
try: del os.environ['LANG']
except KeyError:pass


def _(text):
    return text

class AbortRobot:
    pass

def abort_robot():
    raise AbortRobot

def main(*arguments):
    options, arguments = getopt.getopt(arguments, 'n')
    for (option, value) in options:
        if option == '-n':
            run.dry = 1
    if arguments:
        print "No arguments are allowed: use standard input."
        sys.exit(2)
    os.umask(002)
    run.hints = registry.hints()
    try:
        # Read message and extract sender and recipient.
        msg = email.message_from_file(sys.stdin)
        sender = email.utils.getaddresses(msg.get_all("from"))[0]
        run.translator_address = sender[1]
        run.translator_name, encoding = email.header.decode_header(sender[0])[0]
        if encoding:
            run.translator_name = run.translator_name.decode(encoding)
        else:
            run.translator_name = run.translator_name.decode('utf-8')
        recipient = email.utils.getaddresses(msg.get_all("to"))[0][1]

        # Save the message headers for an email to the team leader.
        run.envelope = msg.items()

        # Find the message part that contains a file.
        content = "empty..." * 100
        for part in msg.walk():
            filename = part.get_filename()
            stuff = part.get_payload(decode=True)
            if stuff and filename and ('.po' in filename):
                content = stuff
                break
            if stuff and (len(stuff) > len(content)):
                content = stuff

        # Check if the content is UU-encoded.
        if (content[:12] == "begin-base64") or (content[:7] == "begin 6"):
            endofline = content.find('\n')
            realname = content[content[:endofline].rfind(' ')+1:endofline]
            filename = 'encodedfile'

        # Require that the email contains a Subject line.
        if msg.get_all("subject") == None:
            run.reject(_("""
Your email does not contain a Subject line.
"""), "[no subject]")
            abort_robot()

        # Get filename from Subject line.
        match = re.search('([^ \t\'\"]+\.po)[^t]*', msg.get_all("subject")[0])
        if match:
            subjectname = match.group(1)
        else:
            subjectname = ''
        if not filename and not subjectname:
            run.reject(_("""\
Your submission does not contain any PO file, or it doesn't have a name.
"""), "[no name]")
            abort_robot()
        if not filename:
            filename = subjectname
        elif ('UTF-8' in filename) and subjectname:
            filename = subjectname

        # Save the file.
        thefile = open(filename, 'w')
        thefile.write(content)
        thefile.close()

        # UU-decode the file if needed.
        if filename == 'encodedfile':
            decoding = subprocess.Popen(['uudecode', 'encodedfile'], stderr=subprocess.PIPE)
            barf = decoding.stderr.read()
            if decoding.wait() != 0:
                run.reject(_("""\
Decoding your submission failed -- uudecode said:
%s""" % barf), "[uudecode failure]")
                abort_robot()
            os.remove('encodedfile')
            filename = realname

        # Unzip the file if needed.
        if filename.endswith(".gz"):
            unzipping = subprocess.Popen(['gunzip', filename], stderr=subprocess.PIPE)
            barf = unzipping.stderr.read()
            if unzipping.wait() != 0:
                run.reject(_("""\
Uncompressing your submission failed -- gunzip said:
%s""" % barf), "[gunzip failure]")
                abort_robot()
            filename = filename[:-3]

        # Set better subject line for response from robot.
        run.subject = filename

        # Reject a tarred up file -- too awkward.
        if filename.endswith(".tar") or filename.endswith(".tgz"):
            run.reject(_("""\
Please don't use tar to wrap your file, but just gzip.
"""), "[don't use tar]")
            abort_robot()

        # Check if the email was sent to an old address.
        if (recipient != 'robot@translationproject.org'):
            run.submitter.write(_("""\
Instead of using the strange address <%s> for PO file submissions,
please use my current address, which is: <robot@translationproject.org>.
""") % recipient)

        # Rename the file if the name in the Subject line is better.
        if len(subjectname) > len(filename):
            os.rename(filename, subjectname)
            filename = subjectname

        # Look at the name and entries of the file.
        extract.hints_from_name(filename)
        check_mail.header()
        entries = po.read(filename)
        if not entries:
            run.reject(_("""\
Your PO file seems to have no entries at all, like if it were empty.
"""), "[no entries]")
            abort_robot()

        # Check that the submitted file passes 'msgfmt -c'.
        archive_base = run.hints.archive_base()
        check_po_file.contents(filename, archive_base, "first")
        # Check the header fields of the PO file.
        header = po.header(entries)
        check_po_file.header(header)
        check_registry.translator()
        if check_registry.have_pot():
            # Check that also the merged file passes 'msgfmt -c'.
            process.make_canonical(filename, archive_base, entries, header)
            check_po_file.contents(filename, archive_base, "second")
        check_po_file.special_instructions(filename)
        # The submission is acceptable, unless it is a duplicate.
        check_registry.preexisting(filename)
        if run.rejected:
            abort_robot()
        process.congratulate()
        process.store_po_file(filename)
    except AbortRobot:
        try: os.remove(filename)
        except OSError: pass
    except:
        import traceback
        # Print the exception to get a message from cron.
        traceback.print_exc()
        tb = "".join(traceback.format_exception(*sys.exc_info()))
        run.reject(_("""\
The Translation Project robot failed in a rather unexpected way.
Please report all details at <coordinator@translationproject.org>.
"""), "[robot bug]")
        run.submitter.write_nofill(tb)
        run.coordinator.write_nofill(tb)
    # Push some feedback out, whatever happened.
    run.coordinator.complete()
    run.submitter.complete()


class globals:
    team_language = None                # language spoken by this team
    team_mailto = None                  # email address for the team
    team_mailtos = []                   # alternative email addresses
    reported_unknown = []               # to avoid reporting the same twice
    unknown_address_insert = _("""\
The Translation Project registry holds the preferred email address for all
translators, and where necessary some aliases as well.  But it is better to
avoid the proliferation of alternate email addresses and stick to only one,
whenever possible.  In any case, please keep the translation coordinator
informed of them.""")


class Extract:

    def hints_from_name(self, input_name):
        try:
            run.hints.merge(input_name)
        except (KeyError, ValueError):
            # ValueError: merge did not find a matching regular expression.
            pass
        if run.hints.pot:
            run.reject(_("""\
The translation coordinator does not yet trust me enough to let me handle
a POT file all alone.  So, for this one, I'll go disturb him!
"""), "[robot does not support pot submission]")
            abort_robot()
        if not (run.hints.domain and run.hints.version):
            run.reject(_("""\
As file name you supplied '%s'.  In this %s.  The file name should look
like 'DOMAINNAME-VERSIONNUMBER.TEAMCODE.po' (all in lowercase).
""") % (input_name, unknown_elements_comment()), "[ill-formatted file name]")
            abort_robot()
        if not run.hints.team:
            run.reject(_("""\
Within the Translation Project, no team has the code '%s'.
""") % run.hints.team, "[unknown team code]")
            abort_robot()
        globals.team_language = run.hints.team.language
        globals.team_mailto = (run.hints.team.mailto[0]
                               or run.translator_address)

extract = Extract()


#################### Verifying the submission details ##############
class CheckMail:

    def header(self):
        # Check if the mail contained a sender address.
        if not run.translator_address:
            run.reject(_("""\
I cannot find a translator's email address in your submission.
"""), "[no email address]")
            abort_robot()
        # Check if domain and version of PO file exist.
        if not (run.hints.domain and run.hints.version):
            run.reject(_("""\
In the Subject line %s.

As a reminder, the Subject line of your message should look like this:

    Subject: DOMAIN-VERSION.TEAM.po

DOMAIN is normally the package name, VERSION is the version number, and
TEAM is the two-letter language code for your team, all in lower case.
""") % unknown_elements_comment(), "[domain or version is missing]")
            abort_robot()
        # Check if a team for this language exists.
        if run.hints.team:
            globals.team_language = run.hints.team.language
            globals.team_mailto = (run.hints.team.mailto[0]
                                   or run.translator_address)
            globals.team_mailtos = run.hints.team.mailto[:]
        else:
            run.reject(_("""\
Within the Translation Project, there is no team with the code '%s'.
If you simply mistyped the code, I invite you to correct the error
and retry the submission.

If however the code is correct, I presume you would like a new translation
team to be created.  This is a simple matter, really.  See
https://translationproject.org/leaders.html for all the details.
""") % run.hints.team, "[unknown team]")
            globals.team_language = _('Unknown_Language')
            globals.team_mailto = run.translator_address

check_mail = CheckMail()


#################### Verifying the PO header and contents ##########
class CheckPoFile:

    def header(self, header):
        if run.translator_name:
            self.language_team(header)
            self.last_translator(header)
        else:
            self.last_translator(header)
            self.language_team(header)
        self.title(header)
        self.copyright(header)
        self.license(header)
        self.last_author(header)
        self.project_id_version(header)
        self.both_dates(header)
        self.mime_fields(header)

    def title(self, header):
        if not header['TITLE']:
            run.reject(_("""\
The PO title line seems to be missing.  The very first line of any PO
file should start with '# ' followed by a one-line description of what
the file is about.  The title line for this PO file could be:

    # %s translation for %s.
""") % (globals.team_language, run.hints.domain), "[no title header]")

    def copyright(self, header):
        text = header['COPYRIGHT']
        if isinstance(text, types.ListType):
            # Ignore other copyrights, just look at the first one.
            text = text[0]
        try:
            utext = po.decfunc(header)(text)[0]
        except UnicodeDecodeError:
            run.reject(_("""\
There is something wrong with the encoding of your submission.  Please
verify that the character set specified by the Content-Type field in
your file matches the actual encoding of the file."""))
            abort_robot()
        match = re.match('Copyright (\\(C\\)|\xa9|\xc2\xa9).* '
                         '(?P<first>(199[4-9]|20[01][0-9]|202[01234]|YEAR))'
                         '(, ((19)?9[4-9]|(20)?[01][0-9]|(20)?2[01234]))* '
                         '(?P<author>.*)', text)
        if match:
            # FIXME: Better validate year lists.
            author = 'Free Software Foundation, Inc.'
            definite_author = 0
            if run.hints.domain and run.hints.domain.potcopyright:
                definite_author = 1
                author = run.hints.domain.potcopyright
            if match.group('first') == "YEAR":
                run.reject(_("""\
Please replace YEAR in the copyright line with the year the corresponding
POT file was released, probably %s.
""") % datetime.date.today().strftime('%Y'), "[YEAR should be numbers]")
            if author == 'bsdzero':
                return
            if match.group('author') == author:
                return
            if run.hints.domain and \
               (run.hints.domain.disclaim or run.hints.domain.potcopyright):
                if not definite_author:
                    comment = _("""\
For the time being, I merely guess that the Free Software Foundation is to
be listed as the copyright holder for any package requiring translation
disclaimers.  Yet, the truth is that this requirement goes further than
the spirit of the disclaimer.  If this creates any problem in real life,
please write to the translation coordinator, so we can discuss the matter,
and so he could eventually change me (the robot) for handling such cases.
""")
                    reason = "[Copyright not FSF]"
                else:
                    comment = ""
                    reason = "[Copyright not %s]" % author
                run.reject(_("""\
The POT copyright should be assigned to "%s".  So the copyright line
should look like this:

    # Copyright (C) YEAR %s

Maybe there is some extra text or a superfluous period?  YEAR should
normally be the year the last package version was published.  %s
""") % (author, author, comment), reason)
        elif run.hints.domain and run.hints.domain.disclaim:
            if text:
                comment = (_("""\
The copyright comment is not fully correct:

    # %s

It should instead look like:
""") % utext)
            else:
                comment = _("""\
The copyright line, which should appear as the second line of the whole
PO file, seems to be missing.  The copyright line should look like:
""")
            run.reject(_("""\
%s
    # Copyright (C) YEAR Free Software Foundation, Inc.

The YEAR should be either a single four-digit year, or a list of years
separated by commas, each comma followed by a space.  Years have to be
explicitly enumerated, range notations are not accepted.  The '(C)'
triplet (case matters) may be replaced with a single character if the
encoding of your file allows it: either the single Latin-1 byte
(decimal code 169) or the equivalent UTF-8 sequence (\\xc2\\xa9).
""") % comment, "[copyright incorrectly formatted]")

    def license(self, header):
        packagename = run.hints.domain.package or run.hints.domain.name
        nameandversion = "%s-%s" % (packagename, run.hints.version)
        if run.hints.domain.potcopyright == 'bsdzero':
            clause = re.match('# This file is published under the BSD Zero Clause License.', header['LICENSE'])
            spdx = re.match('# SPDX-License-Identifier: 0BSD', header['LICENSE'])
            if not clause and not spdx:
                run.reject_nofill(_("""\
Your PO file should contain a comment line saying that
the file is published under the BSD Zero Clause License.
Please add exactly this line near the beginning:

    # This file is published under the BSD Zero Clause License.
"""), "[license should be BSD zero]")
            return
        if re.match('# This file is put in the public domain.', header['LICENSE']):
            return
        if not header['LICENSE']:
            run.reject_nofill(_("""\
Your PO file does not seem to contain a comment line specifying which
license covers the file.  Such a line is now mandatory.  It must be placed
before the author lines, and should say exactly this:

    # This file is distributed under the same license as the %s package.
""") % packagename, "[missing license line]")
            return
        match = re.match('# This file is distributed under the same licen[cs]e'
                         ' as the (.*) package.', header['LICENSE'])
        if not match or (string.lower(match.group(1)) != packagename and
                         string.lower(match.group(1)) != nameandversion):
            run.reject_nofill(_("""\
The license line is not fully correct:

    %s

It should say exactly this, the final period included:

    # This file is distributed under the same license as the %s package.
""") % (header['LICENSE'], packagename), "[license line incorrect]")


    def last_author(self, header):
        match1 = re.search('.*; *(.*[^ ]) *<(.*)>.*?([0-9]*)$',
                          header['AUTHORS'])
        match2 = re.search('.*; ([^ ].+[^ ]) <([^ ]+)>, .*(199[4-9]|20[01][0-9]|202[01234])$',
                          header['AUTHORS'])
        if not match1:
            run.reject(_("""\
There is no author comment line in your PO file, or it is not formatted
correctly.  Author lines should come after the copyright lines, and look
like this:

    # NAME OF AUTHOR <EMAIL@ADDRESS>, YEAR.

There may be several of these lines, normally in chronological order.
"""), "[missing or ill-formatted author line]")
            run.submitter.write(_("""\
If you wish to add some general comments to the PO file, you can put them
after the title, copyright, and author lines.  There should be a space
after the '#' that starts each comment line, and no blank line before
the very first 'msgid' (the empty one).
"""))
            return
        if ((match1 and not match2) or
            (match1.group(1, 2, 3) != match2.group(1, 2, 3))):
            run.submitter.write(_("""\
There is a slight formatting error in one of the author lines:
superfluous spaces, no name, a lacking comma, or a bad year.
"""))
        last_name, last_address, last_year = match1.group(1, 2, 3)
        last_name = po.decfunc(header)(last_name)[0]
        try:
            translator = registry.translator(run.hints.team,
                                             last_name, last_address)
        except KeyError:
            return  # FIXME: maybe change this to pass.
        if last_name != run.translator_name:
            run.submitter.write(_("""\
The last name in the author lines differs from the name in the header
field.  The last author line mentions '%s', while the 'Last-Translator'
field says '%s'.  Maybe the author lines are in the wrong order?  The
expected order is chronological: the earliest translator first, and
your own author line last.
""") % (last_name.encode('utf-8'), run.translator_name.encode('utf-8')))
        elif last_address != run.translator_address:
            run.submitter.write(_("""\
Your submission contains contradicting addresses for you.  The last of
the introductory author lines says <%s>, while the 'Last-Translator'
field says <%s>.  Please arrange to have a single preferred address to
reach you, and use it consistently in all your translation files.
""") % (last_address, run.translator_address))
        elif last_year != datetime.date.today().strftime('%Y'):
            run.submitter.write(_("""\
The final year (%s) given in the last author line (which should be yours)
is not the current year.  Please correct either the year or the order of
the author lines: the earliest author first, the latest last.
""") % last_year)

    def project_id_version(self, header):
        match = re.search('^(Free |GNU )?(?P<dom>%s)?[- ]?(?P<ver>%s)?$' %
                          (registry.DOMAIN, registry.VERSION),
                          header['project-id-version'])
        if match:
            if match.group('dom') != run.hints.domain.name:
                run.reject(_("""\
The 'Project-Id-Version' field of your PO file says that the textual domain
is '%s', instead of '%s' as the file name says.  They should be the same. 
The 'Project-Id-Version' field should contain both the domain name and the
version number.
""") % (match.group('dom'), run.hints.domain),
                           "[domain in project-id conflicts with file name]")
            if match.group('ver') != run.hints.version.name:
                run.reject(_("""\
The 'Project-Id-Version' field of your file says the translation
is meant for version '%s' of '%s', but the file name says it is for
version '%s'.  Please adjust either the 'Project-Id-Version' field
or the file name, whichever is appropriate.
""") % (match.group('ver'), run.hints.domain.name, run.hints.version.name),
                           "[version in project-id conflicts with file name]")
        else:
            run.reject(_("""\
The 'Project-Id-Version' field was not found in your PO file header,
or is not formatted properly.  The line should look like:

    "Project-Id-Version: PACKAGE VERSION\\n"

in which PACKAGE should be replaced by the textual domain of your
translation, probably '%s' in this case, and VERSION by the appropriate
version number, probably '%s'.
""") % (run.hints.domain.name, run.hints.version.name),
                       "[no project-id-version]")

    def both_dates(self, header):
        date = {}
        for field in 'POT-Creation-Date', 'PO-Revision-Date':
            value = header[string.lower(field)]
            match = re.match('(199[4-9]|20[01][0-9]|202[01234])-[01][0-9]-[0-3][0-9] '
                             '[0-2][0-9]:[0-5][0-9]( ?[-+][0-1][0-9](:?00)?)?',
                             value)
            if match:
                date[field] = value
            elif field == 'POT-Creation-Date':
                run.submitter.write(_("""\
The 'POT-Creation-Date' field is missing from your PO file header, or
misformatted.  I will copy the original line from the POT file.
"""))
            else:
                run.reject(_("""\
The 'PO-Revision-Date' field was not found in your PO file header,
or is not formatted properly.  The line should look like:

    "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"
"""), "[missing revision date]")
                run.submitter.write(_("""\
In the above line, YEAR is a four-digit number giving the year,
MO is a two-digit number giving the month (from 01 to 12), and DA
is a two-digit number giving the day within the month (from 01 to 31).
The time of day is provided by a two-digit HO for the hour (from
00 to 23) and a two-digit MI for the minutes (from 00 to 59).

As for +ZONE, it should have the format +HHMM or -HHMM.
The '+' sign is used east of Greenwich, the '-' sign west of Greenwich.
HH gives the number of zone hours (from 00 to 11),
while MM gives the number of zone minutes.
"""))
        if ('POT-Creation-Date' in date and 'PO-Revision-Date' in date):
            po_stamp = date['PO-Revision-Date']
            pot_stamp = date['POT-Creation-Date']
            if time_to_same_zone(po_stamp) <= time_to_same_zone(pot_stamp):
                run.reject(_("""\
The PO revision date is '%s', which is older than the POT creation date,
'%s'.  Please update the revision date field before submitting a PO file.
""") % (date['PO-Revision-Date'], date['POT-Creation-Date']),
                           "[revision date before creation date]")

    def last_translator(self, header):
        text = header['last-translator']
        match = re.match('(.*?)( +)<(.+)>$', text)
        if match:
            last_fullname, last_address = match.group(1, 3)
            last_fullname = po.decfunc(header)(last_fullname)[0]
            if len(match.group(2)) != 1:
                run.submitter.write(_("""\
There is a slight formatting error.  In the 'Last-Translator' header line,
please use no more than a single space between '%s' and '<%s>'.
""") % (last_fullname.encode('utf-8'), last_address))
            if not run.translator_name:
                run.translator_name = last_fullname
            if not run.translator_address:
                run.translator_address = last_address
            try:
                translator = registry.translator(run.hints.team,
                                                 last_fullname, last_address)
            except KeyError:
                run.reject(_("""\
According to my notes, %s is not a member of the %s team. 
If you wish to join the %s team, then please write to its leader
(see %s/team/%s.html), or else to the project coordinators. 
If it is only a matter of spelling, then please tell the project
coordinators about the possible alternatives.
""") % (last_fullname.encode('utf-8'), _(globals.team_language),
        _(globals.team_language), config.site_base, run.hints.team.code),
                           "[translator not in team]")
                abort_robot()
            if last_address not in translator.mailto:
                if last_address not in globals.reported_unknown:
                    run.reject(_("""\
%s is the last translator of this PO file.  But according to my notes,
<%s> is not a good address to reach that translator.  %s %s
""") % (last_fullname.encode('utf-8'), last_address,
        say_alias_list(translator.mailto), globals.unknown_address_insert),
                           "[unknown address]")
                    globals.reported_unknown.append(last_address)
                    globals.unknown_address_insert = ''
            elif last_fullname.encode('utf-8') != run.translator_name.encode('utf-8'):
                run.submitter.write(_("""\
The header of your email says that you are '%s', while the
'Last-Translator' field of the PO file header says your name
is '%s'.  If the difference is merely a spelling variation,
there is no problem.  As I expect you have most control over your
PO files, I'll consider the second name as the most dependable.
""") % (run.translator_name.encode('utf-8'), last_fullname.encode('utf-8')))
            elif last_address != run.translator_address:
                run.submitter.write(_("""\
Your message was sent from '%s', but the 'Last-Translator' field in the
PO file says that your email address is '%s'.  I assume that the latter
is correct.
""") % (run.translator_address, last_address))
            # Override email data with those found in PO file.
            run.translator_name = last_fullname
            run.translator_address = last_address
        else:
            run.reject(_("""\
The 'Last-Translator' field was not found in your PO file header,
or is not formatted properly.  The line should look like:

    "Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"

In this line, FULL NAME has to be replaced by the full name of the current
translator for the '%s' textual domain (which most probably means you :-);
EMAIL@ADDRESS gives a way to reach that translator by email.
""") % run.hints.domain, "[last-translator missing]")

    def language_team(self, header):
        text = header['language-team']
        match = re.match('(.*?)( *)<(.+)>$', text)
        if match:
            maybe_language, maybe_mailto = match.group(1, 3)
            try:
                maybe_language.decode('ascii')  # language name should be ASCII
            except UnicodeError:
                m = []
                for c in maybe_language:
                    if ord(c) < 128: m.append(c)
                    else: m.append('\\%2x' % ord(c))
                maybe_language = ''.join(m)
            if len(match.group(2)) > 1:
                run.submitter.write(_("""\
There is a slight formatting error.  In the 'Language-Team' header line,
please use no more than a single space between '%s' and '<%s>'.
""") % (maybe_language, maybe_mailto))
            if maybe_language != globals.team_language:
                run.reject(_("""\
You wrote '%s' in the 'Language-Team' field of the PO file header, while
I think it should have been '%s'.
""") % (maybe_language, _(globals.team_language)), "[language-team incorrect]")
            if not globals.team_mailto:
                run.reject(_("""\
Each translation team should publish some mailing list address, meant
to appear (between angular brackets) after the language name in the
'Language-Team' field of the PO file header.  The Translation Project
does not have such an address for the %s team.  You might already know
some list dedicated to internationalisation efforts for %s, which may
fit your needs.  In case you know none, waiting to get better organised,
you might use the address of one of the team members, maybe yours if you
are alone in the team!  In any case, please inform the translation
coordinator of the address your team decides to use.
""") % (_(globals.team_language), _(globals.team_language)),
                           "[team address missing]")
            elif maybe_mailto != globals.team_mailto and \
                     maybe_mailto not in globals.team_mailtos:
                run.reject_nofill(_("""\
The 'Language-Team' field of the PO file header contains
  <%s> as the team's
email address, but the team's current address is
  <%s>.
""") % (maybe_mailto, globals.team_mailto), "[team address incorrect]")
        else:
            run.reject(_("""\
The 'Language-Team' field was not found in your PO file header,
or is not formatted properly.  The line should look like:

    "Language-Team: LANGUAGE <TEAM-EMAIL-ADDRESS>\\n"

In this line, LANGUAGE is the English name of your language, capitalizing
the first letter of its words and leaving the rest in lower case.
In several cases the TEAM-EMAIL-ADDRESS takes the form of
'translation-team-TEAM@lists.sourceforge.net', where TEAM is the ISO-639
code for your language (not to be confused with country codes, which are
a different standard).  If I had to write this line myself, right now,
according to the information which is available to me, I would use:

    "Language-Team: %s <%s>\\n"
""") % (globals.team_language, globals.team_mailto), "[language team missing]")

    def mime_fields(self, header):
        if header['mime-version'] != '1.0':
            run.reject(_("""\
The 'MIME-Version' field was not found in your PO file header,
or is not formatted properly.  The line should be this one:

    "MIME-Version: 1.0\\n"
"""), "[mime-version is not 1.0]")
        charsets = [
            'us-ascii',
            'ISO-8859-1',
            'ISO-8859-2',
            'ISO-8859-3',
            'ISO-8859-7',
            'ISO-8859-8',
            'ISO-8859-9',
            'ISO-8859-13',
            'ISO-8859-15',
            'koi8-r',
            'koi8-u',
            'EUC-KR', 'big5', 'big5-hkscs',
            'ISO-2022-JP', 'EUC-JP',
            'gb2312', 'gb18030',
            'UTF-8',
            ]
        match = re.match('text/plain; charset=(.*)', header['content-type'])
        if match:
            charset = match.group(1)
            if string.lower(charset) not in map(string.lower, charsets):
                run.reject(_("""\
The 'Content-Type' field introduced an unusual character set '%s'.
If this is not an error, please merely inform the translation coordinator
that '%s' be added to the list of acceptable charsets.
This list currently holds: %s.
""") % (charset, charset, say_list(_('or'), charsets)),
                           "[unknown charset in content-type]")
            run.po_charset = charset
        else:
            run.reject(_("""\
The 'Content-Type' field was not found in your PO file header,
or is not formatted properly.  The line should look like:

    "Content-Type: text/plain; charset=CHARSET\\n"

where CHARSET should be the name of a character set, like one of %s.
""") % say_list(_('or'), charsets),
                       "[content-type missing or ill-formatted]")
        encodings = ['8-bit', '8bit', 'Base64']
        cte = header['content-transfer-encoding']
        if cte:
            if cte not in encodings:        # FIXME: ignore case
                run.reject(_("""\
The 'Content-Transfer-Encoding' field introduced an unusual encoding
'%s'.  If this is not an error, please inform the translation
coordinator that '%s' be added to the list of acceptable charsets.
This list currently holds: %s.
""") % (cte, cte, say_list(_('and'), encodings)),
                           "[unknown content-transfer-encoding]")
        else:
            run.reject(_("""\
The 'Content-Transfer-Encoding' field was not found in your PO file header,
or is not formatted properly.  The line should look like:

    "Content-Transfer-Encoding: ENCODING\\n"

where ENCODING should be the name of an encoding, like one of %s.
""") % say_list(_('or'), encodings), "[content-transfer-encoding missing]")

    def scan_gettext_output(self, lines, work, virtual):
        comment = []
        for line in lines:
            if re.match('[0-9]+ translated messages\.$', line):
                continue
            if line[:len(work)] == work:
                line = virtual + line[len(work):]
            comment.append('> %s' % line)
        return comment

    def contents(self, work, virtual, cycle):
        try:
            mfmt = subprocess.Popen(['msgfmt', '-cv', work], stderr=subprocess.PIPE)
            lines = mfmt.stderr.readlines()
            reject = mfmt.wait()
        except:
            run.reject(_("""\
I am unable to run the 'msgfmt' program.  Sorry.  Something needs
to be fixed here.  I'll go warn the translation coordinator.
"""), "[msgfmt failed]")
            abort_robot()
        comment = self.scan_gettext_output(lines, work, virtual)
        if reject:
            run.reject_nofill(_("""\
As a last check, I ran 'msgfmt -cv -o /dev/null' on your PO file.  It
told me that I cannot let the file pass.  It reported these errors:
%s""") % ('').join(comment), "[msgfmt errors]")
            abort_robot()
        if comment and (cycle == "first"):
            run.submitter.write_nofill(_("""\
Merely for your information, let me share with you what the
'msgfmt' program has to say about your PO file:
%s""") % ('').join(comment))
##            # Extract the translated, fuzzy, and untranslated amounts.
##            match = re.match('> ([0-9]+) .*?([0-9]+) .*?([0-9]*)', comment[0])
##            if match:
##                done = eval(match.group(1))
##                left = eval(match.group(2)) + (eval(match.group(3)+'0') / 10)
##                if done < left:
##                    run.reject(_("""\
##The number of translated strings is less than half of the total number
##of strings.  Please translate more before submitting a file.
##"""), "[too few translations]")
##                    abort_robot()

    def special_instructions(self, work):
        if run.hints.domain.name == 'util-linux':
            contents = open(work, 'r').read()
            if contents.find('Permission is granted to freely copy and distribute') == -1:
                run.submitter.write(_("""\
At first sight it seems your translation does not contain a clause that
permits redistribution.  Please consider inserting a sentence like
"""))
                run.submitter.write_nofill("""\
# Permission is granted to freely copy and distribute
# this file and modified versions, provided that this
# header is not removed and modified versions are marked
# as such.
""")
                
check_po_file = CheckPoFile()


#################### Verifying submitter, existence, permission ####
class CheckRegistry:

    def translator(self):
        try:
            translator = registry.translator(run.hints.team,
                                             run.translator_name,
                                             run.translator_address)
        except KeyError:
            run.reject(_("""\
According to my notes, you are not a member of the %s team. Or at least,
you are not listed there under the name '%s'.  If this is only a matter
of spelling differences, then I need to know exactly which alternate
spellings of your name you use.  Write to the translation coordinator
and explain why you need several different names.  But it is usually
best to avoid such variance, and stick to a single preferred spelling.
""") % (_(globals.team_language), run.translator_name),
                       "[translator not found]")
            return
        if run.translator_address not in translator.mailto:
            if run.translator_address not in globals.reported_unknown:
                run.reject(_("""\
According to my notes, the address <%s> is not a valid way to reach you. 
%s %s
""") % (run.translator_address, say_alias_list(translator.mailto),
        globals.unknown_address_insert),
                           "[translator address not in registry]")
                globals.reported_unknown.append(run.translator_address)
                globals.unknown_address_insert = ''
        if not run.hints.domain:
            run.reject(_("""\
The '%s' textual domain is not known to the Translation Project, at
least not under that spelling.  It may also be that the maintainer of
that project did not make arrangements yet for the Translation Project to
handle its PO files.  In this case, please invite the project maintainer to
contact us.  See https://translationproject.org/domain/index.html
for the list of domains that are currently handled.
""") % run.hints.domain, "[unknown domain]")
        ok, reason = self.translator_may_submit(translator, run.hints.domain)
        if ok:
            if run.hints.domain.disclaim and not translator.disclaimer:
                run.reject(_("""\
According to my notes, the Free Software Foundation did not acknowledge the
receipt of a translation disclaimer for you.  Such a disclaimer is required
for '%s'.  See https://translationproject.org/html/whydisclaim.html for an
explanation of this.
""") % run.hints.domain, "[translator has no disclaimer]")
        else:
            run.reject(_("""\
You are not the usual translator for '%s'.  If you wish to have this
domain assigned to you, send an email to the TP coordinator with a CC
to your team leader.
""") % run.hints.domain, reason)

    def have_pot(self):
        if not os.path.isfile(run.hints.template_path()):
            run.reject(_("""\
You submitted a PO file for which the Translation Project does not have
a template -- %s.  If appropriate, please urge the package maintainer
to send this POT file, or the URL of the corresponding tarball, to the TP
coordinators.
""") % run.hints.template_base(), "[there is no corresponding pot file]")
            return 0
        return 1

    def preexisting(self, work):
        if registry.compare_files(work, run.hints.archive_path()):
            run.reject_nofill(_("""\
The Translation Project already holds an exact copy of your submission,
which you may find as:

    %s
""") % run.hints.archive_url(), "[duplicate submission]")

    def translator_may_submit(self, translator, domain):
        # If it is the assigned translator or the team leader, accept.
        if domain in translator.do:
            return 1, "[translator is assigned]"
        if translator == run.hints.team.leader:
            return 1, "[team leader can upload any domain]"
        # If someone else is assigned or it is external, reject.
        if run.hints.team.translator_for_domain(domain):
            return 0, "[someone else is assigned to this domain]"
        if run.hints.team.code in domain.ext:
            return 0, "[the domain is externally translated]"
        # For Finnish, require explicit assignments.
        if run.hints.team.code in ["fi"]:
            return 0, "[not explicitly assigned]"
        return 1, "[any team member can do unassigned/non-external domain]"

##      # If there is no preexisting translation, accept.
##      file = run.hints.maintainer_path()
##      if not os.path.exists(file):
##          return 1, "[first submission to this domain]"
##      # If it exists, find its submitter, the "last maker".
##      import data
##      stats = data.load_postats()
##      hints = registry.Hints(os.readlink(file))
##      try:
##          st = stats[(hints.domain.name, hints.version.name, hints.team.name)]
##          last_maker, email = st[0], st[1]
##      except KeyError:
##          # Stats not updated yet, need to read PO file.
##          content = po.read(file)
##          last_maker, email = po.last_translator(po.header(content))
##      try:
##          last_maker = registry.translator(hints.team, last_maker, email)
##      except KeyError:
##          # If the last maker is not a team member, allow the new submission.
##          return 1, "[unknown last translator]"
##      # If the current submitter equals the last maker, accept.
##      if translator == last_maker:
##          return 1, "[translator equals last translator]"
##      else:
##          return 0, "[someone else made the last submission]"

check_registry = CheckRegistry()


#################### Canonicalizing and storing the file ###########
class Process:

    def make_canonical(self, where, virtual, entries, header):
        os.system('recode -f /cl <%s | msgmerge -q --previous --no-wrap - %s >%s.norm'
                  % (where, run.hints.template_path(), where))
        os.system('%s/bin/add-x-bugs-field.sh %s.norm' % (config.progs_path, where))
        try:
            lines = os.popen('recode -f /cl <%s | diff -u -L %s~ -L %s - %s.norm'
                             % (where, virtual, virtual, where)).readlines()
        except:
            run.reject(_("""\
I am unable to run the 'diff' program.  Sorry.  Something needs
to be fixed here.  I'll go warn the translation coordinator.
"""), "[diff or recode failed]")
            abort_robot()
        # FIXME: Maybe detect and better explain `msgid' reformatting, trailing
        # space elimination, and other less evident matters.
        try:
            os.remove(where)
            os.rename('%s.norm' % where, where)
        except:
            run.reject(_("""\
I am unable to canonicalise your PO file.  Sorry.  Something needs
to be fixed here.  I'll go warn the translation coordinator.
"""), "[remove or rename failed]")
            abort_robot()
        if lines:
            if len(lines) > 50:
                lines = lines[:50] + ["[truncated]\n"]
            run.submitter.write(_("""\
The Translation Project is trying to reach a consistent presentation for
PO files, over all domains and languages.  Your submission was not fully
canonical, so the robot has made the following modifications to your PO
file.  You can use 'patch' to apply these changes to your local file,
but you do not need to: the robot will continue making them patiently and
automatically here.  This message continues after the (maybe long) diff.
"""))
            # Possibly non-ASCII byte strings, don't try to translate.
            run.submitter.write_nofill("""\
---------------------------------------------------------------------->
%s----------------------------------------------------------------------<
"""
                                  % ''.join(lines))

    def congratulate(self):
        run.subject = 'TP: %s [ACCEPTED]' % run.shorten(run.subject)
        run.submitter.write(_("""\
Your file has been accepted and stored in the archives.  Thank you!
"""))
        try:
            translator = registry.translator(run.hints.team,
                                             run.translator_name,
                                             run.translator_address)
        except KeyError:
            pass
        else:
            if not translator.autosend:
                run.submitter.write(_("""\
If you wish, I can email you an updated copy of this PO file whenever
a new template file for it is registered at the TP.  If you want this
service for yourself, ask the translation coordinator.
"""))

    def store_po_file(self, work_name):
        archive_base = run.hints.archive_base()
        incoming = '%s/%s' % (config.temp_path, archive_base)
        # Remove any file left over from a previous try in dry mode.
        if os.path.isfile(incoming):
            os.remove(incoming)
        shutil.move(work_name, incoming)
        try:
            if run.dry:
                dry_flag = '-n'
            else:
                dry_flag = ''
            lines = os.popen(
                'cd %s && %s/bin/po-register %s %s 2>&1'
                % (config.temp_path, config.progs_path, dry_flag, archive_base)
                ).readlines()
        except:
            run.reject(_("""\
I am unable to run the 'po-register' program.  Sorry.  Something needs
to be fixed here.  I'll go warn the translation coordinator.
"""), "[po-register failed]")
            return
        comment = []
        write = comment.append
        if run.envelope:
            write(_("Original message header:\n\n"))
            for item in run.envelope:
                write('>    {0}: {1}\n'.format(item[0], item[1]))
        write(_("\nDiagnostics issued by 'po-register':\n\n"))
        for line in lines:
            write('>    ' + line)
        write(_("\nIf something needs changing, write to:\n"))
        write(_(" <coordinator@translationproject.org>\n"))
        run.coordinator.write_nofill(('').join(comment))

process = Process()


def time_to_same_zone(stamp):
    date = stamp[:10]
    time = stamp[11:16]
    # Hours and minutes must stay positive and three-digit, also after shift.
    hour = eval("2"+time[:2])
    minute = eval("2"+time[-2:])
    sign = stamp[-5:-4]
    if sign == "+" or sign == "-":
        shift = eval(sign+str(eval("1"+stamp[-4:-2])-100))
        hour = hour - shift
        shift = eval(sign+str(eval("1"+stamp[-2:])-100))
        minute = minute - shift
    return "%s %s:%s" % (date, hour, minute)

## Diagnostics.

def unknown_elements_comment():
    unknown = []
    if not run.hints.domain:
        unknown.append(_("the domain name"))
    if not run.hints.version:
        unknown.append(_("the version number"))
    if not run.hints.team:
        unknown.append(_("the team code"))
    return (_("I cannot find %s") % say_list(_('nor'), unknown))

def say_alias_list(items):
    if len(items) == 0:
        return _("""\
No official email address is registered for this translator.
""")
    if len(items) == 1:
        return (_("""\
The only registered address for this translator is <%s>.
""") % items[0])
    aliases = items[:]
    aliases.sort()
    return (_("""\
The only registered addresses for this translator are: %s.
""") % say_list(_('and'), aliases))

def say_list(word, items):
    if len(items) == 0:
        return 'none'
    if len(items) == 1:
        return items[0]
    return '%s, %s %s' % ((', ').join(items[:-1]), word, items[-1])


ecre = re.compile(r'''
  =\?                   # literal =?
  (?P<charset>[^?]*?)   # non-greedy up to the next ? is the charset
  \?                    # literal ?
  (?P<encoding>[qb])    # either a "q" or a "b", case insensitive
  \?                    # literal ?
  (?P<atom>.*?)         # non-greedy up to the next ?= is the atom
  \?=                   # literal ?=
  ''', re.VERBOSE | re.IGNORECASE)


if __name__ == '__main__':
    apply(main, tuple(sys.argv[1:]))