changeset 3821:158fce02dc40

Teach convert-repo to deal with mixed charsets in git
author Matt Mackall <mpm@selenic.com>
date Thu, 07 Dec 2006 18:03:28 -0600
parents 4f056896c093
children 28134d82db9b ed5a9b27bedc
files contrib/convert-repo
diffstat 1 files changed, 15 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/contrib/convert-repo	Wed Dec 06 17:59:19 2006 -0600
+++ b/contrib/convert-repo	Thu Dec 07 18:03:28 2006 -0600
@@ -21,8 +21,20 @@
 # interrupted and can be run repeatedly to copy new commits.
 
 import sys, os, zlib, sha, time
+
+os.environ["HGENCODING"] = "utf-8"
+
 from mercurial import hg, ui, util
 
+def recode(s):
+    try:
+        return s.decode("utf-8").encode("utf-8")
+    except:
+        try:
+            return s.decode("latin-1").encode("utf-8")
+        except:
+            return s.decode("utf-8", "replace").encode("utf-8")
+
 class convert_git:
     def __init__(self, path):
         self.path = path
@@ -55,6 +67,7 @@
         c = self.catfile(version, "commit") # read the commit hash
         end = c.find("\n\n")
         message = c[end+2:]
+        message = recode(message)
         l = c[:end].splitlines()
         manifest = l[0].split()[1]
         parents = []
@@ -65,11 +78,13 @@
                 tm, tz = p[-2:]
                 author = " ".join(p[:-2])
                 if author[0] == "<": author = author[1:-1]
+                author = recode(author)
             if n == "committer":
                 p = v.split()
                 tm, tz = p[-2:]
                 committer = " ".join(p[:-2])
                 if committer[0] == "<": committer = committer[1:-1]
+                committer = recode(committer)
                 message += "\ncommitter: %s\n" % v
             if n == "parent": parents.append(v)