Skip to content

Commit

Permalink
Verify IETF and ISO language tag mappings, set Language using MkvProp…
Browse files Browse the repository at this point in the history
…Edit not LanguageIetf.
  • Loading branch information
ptr727 committed Apr 2, 2023
1 parent 71a2e90 commit e0bda7e
Show file tree
Hide file tree
Showing 8 changed files with 159 additions and 72 deletions.
79 changes: 65 additions & 14 deletions PlexCleaner/Language.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ namespace PlexCleaner;

public static class Language
{
// Get the IETF/RFC-5646/BCP-47 tag from a ISO-639-2B or similar tag
public static string GetIetfTag(string language, bool nullOnFailure)
{
if (string.IsNullOrEmpty(language))
Expand All @@ -28,22 +29,22 @@ public static string GetIetfTag(string language, bool nullOnFailure)
return None;
}

// Handle "chi" as "zho"
// Handle "chi" as "zho" for Matroska
// https://gitlab.com/mbunkus/mkvtoolnix/-/issues/1149
if (language.Equals("chi", StringComparison.OrdinalIgnoreCase))
{
return Chinese;
}

// Get ISO639-3 record
var iso6393 = GetIso6393(language);
// Get ISO639 record
var iso6393 = GetIso639(language);
if (iso6393 == null)
{
Log.Logger.Error("ISO639-3 language match not found : {Language}", language);
Log.Logger.Error("ISO639 language match not found : {Language}", language);
return nullOnFailure ? null : Undefined;
}

// Get a CultureInfo from the 639-3 3 letter code
// Get a CultureInfo from the ISO639-3 3 letter code
// E.g. afr -> afr
// E.g. ger -> deu
// E.g. fre -> fra
Expand All @@ -52,14 +53,69 @@ public static string GetIetfTag(string language, bool nullOnFailure)
var cultureInfo = CreateCultureInfo(iso6393.Id) ?? CreateCultureInfo(iso6393.Part1);
if (cultureInfo == null)
{
Log.Logger.Error("CultureInfo match not found : {Language}", language);
Log.Logger.Warning("CultureInfo not found : {Language}", language);
return nullOnFailure ? null : Undefined;
}

// Return the IETF tag
// Return the IETF
return cultureInfo.IetfLanguageTag;
}

// Get the ISO-639-2B tag from a IETF/RFC-5646/BCP-47 tag
public static string GetIso639Tag(string language, bool nullOnFailure)
{
if (string.IsNullOrEmpty(language))
{
return nullOnFailure ? null : Undefined;
}

// Undefined "und"
if (language.Equals(Undefined, StringComparison.OrdinalIgnoreCase))
{
return Undefined;
}

// No linguistic content "zxx"
if (language.Equals(None, StringComparison.OrdinalIgnoreCase))
{
return None;
}

// Handle "chi" as "zho" for Matroska
// https://gitlab.com/mbunkus/mkvtoolnix/-/issues/1149
if (language.Equals(Chinese, StringComparison.OrdinalIgnoreCase))
{
return "chi";
}

// Get ISO639 record
var iso639 = GetIso639(language);
if (iso639 != null)
{
// Return the Part 2B code
return iso639.Part2B;
}

var cultureInfo = CreateCultureInfo(language);
if (cultureInfo == null)
{
Log.Logger.Warning("CultureInfo not found : {Language}", language);
return nullOnFailure ? null : Undefined;
}

// Get ISO639 record from cultureInfo ISO code
iso639 = GetIso639(cultureInfo.ThreeLetterISOLanguageName);
if (iso639 != null)
{
// Return the Part 2B code
return iso639.Part2B;
}

// Not found
Log.Logger.Warning("ISO639 not found : {Language}", cultureInfo.ThreeLetterISOLanguageName);
return nullOnFailure ? null : Undefined;
}

public static CultureInfo CreateCultureInfo(string language)
{
// Get a CultureInfo representation
Expand Down Expand Up @@ -135,14 +191,9 @@ public static List<string> GetLanguageList(IEnumerable<TrackInfo> tracks)
return languages.ToList();
}

public static bool IsEqual(string source, string target)
{
// Case insensitive compare
return source.Equals(target, StringComparison.OrdinalIgnoreCase);
}

public static Iso6393 GetIso6393(string language)
public static Iso6393 GetIso639(string language)
{
// Match with any record
return Iso6393.FromString(language, Iso6393List);
}

Expand Down
39 changes: 13 additions & 26 deletions PlexCleaner/MkvPropEditTool.cs
Original file line number Diff line number Diff line change
Expand Up @@ -36,22 +36,13 @@ public bool SetTrackLanguage(string fileName, MediaInfo mediaInfo)
StringBuilder commandline = new();
DefaultArgs(fileName, commandline);

// Set the language property not the language-ietf property
// https://gitlab.com/mbunkus/mkvtoolnix/-/wikis/Languages-in-Matroska-and-MKVToolNix#mkvpropedit

// TODO: Should we be skipping und?
// Only set tracks that are set and not undefined
var trackList = mediaInfo.GetTrackList().Where(item => !string.IsNullOrEmpty(item.LanguageAny) && !Language.IsEqual(item.LanguageAny, Language.Undefined));
foreach (var trackItem in trackList)
{
// Set language or language-ietf property
commandline.Append($"--edit track:@{trackItem.Number} ");
if (!string.IsNullOrEmpty(trackItem.LanguageIetf))
{
commandline.Append($"--set language-ietf={trackItem.LanguageIetf} ");
}
else
{
commandline.Append($"--set language={trackItem.Language} ");
}
}
var trackList = mediaInfo.GetTrackList().Where(item => !Language.IsUndefined(item.LanguageAny)).ToList();
trackList.ForEach(item => commandline.Append($"--edit track:@{item.Number} --set language={item.LanguageAny} "));

// Set language on all unknown tracks
int exitCode = Command(commandline.ToString());
Expand All @@ -72,21 +63,17 @@ public bool SetTrackFlags(string fileName, MediaInfo mediaInfo)
{
// Setting a flag does not unset the counter flag, e.g. setting default on one track does not unset default on other tracks
// TODO: Should we set all flags for all tracks, cli gets very long, or only set flags
// var flagList = TrackInfo.GetFlags().ToList();

// Iterate over all known flags
/*
foreach (var flagType in TrackInfo.GetFlags())
// Get flags list for this track
var flagList = TrackInfo.GetFlags(trackItem.Flags).ToList();
if (flagList.Count > 0)
{
// Set flag
commandline.Append($"--edit track:@{trackItem.Number} --set {GetTrackFlag(flagType)}={(trackItem.Flags.HasFlag(flagType) ? 1 : 0)} ");
}
*/
// Edit track
commandline.Append($"--edit track:@{trackItem.Number} ");

// Iterate over set flags
foreach (var flagType in TrackInfo.GetFlags(trackItem.Flags))
{
// Set flag
commandline.Append($"--edit track:@{trackItem.Number} --set {GetTrackFlag(flagType)}=1 ");
// Set flag by name
flagList.ForEach(item => commandline.Append($"--set {GetTrackFlag(item)}=1 "));
}
}

Expand Down
6 changes: 3 additions & 3 deletions PlexCleaner/Process.cs
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ private static bool ProcessFile(string fileName, out bool modified, out SidecarF
bool result;

// Process in jump loop
for (; ; )
for (;;)
{
// Skip the file if it is in the ignore list
if (Program.Config.ProcessOptions.FileIgnoreList.Contains(fileName))
Expand Down Expand Up @@ -254,7 +254,7 @@ private static bool ProcessFile(string fileName, out bool modified, out SidecarF
break;
}

// FfMpeg or HandBrake could undo the prrevious cleanup, repeat
// FfMpeg or HandBrake could undo the previous cleanup, repeat
if (!processFile.RepairMetadataErrors(ref modified) ||
!processFile.SetUnknownLanguageTracks(ref modified) ||
!processFile.RemoveTags(ref modified) ||
Expand Down Expand Up @@ -284,7 +284,7 @@ private static bool ProcessFile(string fileName, out bool modified, out SidecarF
break;
}

// Return current state and fileinfo
// Return current state and file info
if (processFile != null)
{
state = processFile.State;
Expand Down
6 changes: 5 additions & 1 deletion PlexCleaner/ProcessFile.cs
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ public bool DeleteMismatchedSidecarFile(ref bool modified)

// File deleted, do not continue processing
modified = true;
SidecarFile.State |= SidecarFile.StatesType.FileDeleted;
return false;
}

Expand Down Expand Up @@ -284,6 +285,9 @@ public bool RepairMetadataRemux(ref bool modified)
return true;
}

// Per MkvToolNix docs remux is the recommended approach to correcting language tags
// Honor Program.Config.ProcessOptions.SetIetfLanguageTags as lots of older media does not have IETF tags set

// Any tracks need remuxing
if (!HasMetadataErrors(TrackInfo.StateType.Remove) &&
!HasMetadataErrors(TrackInfo.StateType.ReMux) &&
Expand Down Expand Up @@ -1787,7 +1791,7 @@ public SelectMediaInfo FindDuplicateTracks()
foreach (var language in languageList)
{
// Get all tracks matching this language
var trackLanguageList = trackList.FindAll(item => Language.IsEqual(language, item.LanguageIetf));
var trackLanguageList = trackList.FindAll(item => language.Equals(item.LanguageIetf, StringComparison.OrdinalIgnoreCase));

// If multiple audio tracks exist for this language, keep the preferred audio codec track
var audioTrackList = trackLanguageList.FindAll(item => item.GetType() == typeof(AudioInfo));
Expand Down
2 changes: 1 addition & 1 deletion PlexCleaner/SubtitleInfo.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ internal SubtitleInfo(MediaInfoToolXmlSchema.Track track) : base(track)
// Set track error and recommend remove, remux does not fix this error
HasErrors = true;
State = StateType.Remove;
Log.Logger.Warning("MediaInfoToolXmlSchema : MuxingMode not specified for S_VOBSUB Codec (Recommend: {State})", State);
Log.Logger.Warning("MediaInfoToolXmlSchema : MuxingMode not specified for S_VOBSUB Codec : {State}", State);
}
}
}
59 changes: 43 additions & 16 deletions PlexCleaner/TrackInfo.cs
Original file line number Diff line number Diff line change
Expand Up @@ -72,44 +72,71 @@ internal TrackInfo(MkvToolJsonSchema.Track trackJson)
Flags |= FlagsType.Forced;
}

// ISO 639-3 tag
// ISO 639-2B tag
Language = trackJson.Properties.Language;
// IETF / BCP 47 / RFC 5646 tag

// IETF / RFC 5646 BCP 47 tag
// https://gitlab.com/mbunkus/mkvtoolnix/-/wikis/Languages-in-Matroska-and-MKVToolNix
// https://r12a.github.io/app-subtags/
LanguageIetf = trackJson.Properties.LanguageIetf;

// Language but no IETF language
// If both Language and LanguageIetf are set, verify they match
if (!string.IsNullOrEmpty(Language) && !string.IsNullOrEmpty(LanguageIetf))
{
// Lookup the ISO-639-2B from LanguageIetf and compare with Language
var lookupLanguage = PlexCleaner.Language.GetIso639Tag(LanguageIetf, true);
if (string.IsNullOrEmpty(lookupLanguage) ||
!Language.Equals(lookupLanguage, StringComparison.OrdinalIgnoreCase))
{
// Set track error and recommend ReMux
HasErrors = true;
State = StateType.ReMux;
Log.Logger.Warning("MkvToolJsonSchema : LanguageIetf to Language Mismatch : {LanguageIetf} !-> {Language} : {State}", LanguageIetf, Language, State);
}
}

// Language is set but IETF language is not set
if (!string.IsNullOrEmpty(Language) && string.IsNullOrEmpty(LanguageIetf))
{
// Set track error and recommend SetLanguage
// Set track error and recommend SetLanguage (ReMux with check for SetIetfLanguageTags)
HasErrors = true;
State = StateType.SetLanguage;

// Convert the ISO-639-3 tag to RFC-5646
// Get the RFC-5646 tag from the ISO-639-2B tag
var lookupLanguage = PlexCleaner.Language.GetIetfTag(Language, true);
if (string.IsNullOrEmpty(lookupLanguage))
{
// No matching language found
Log.Logger.Warning("MkvToolJsonSchema : Failed to lookup IETF language from ISO639-3 language : {Language} (Recommend: {State})", Language, State);
Log.Logger.Warning("MkvToolJsonSchema : IETF language not set, failed to lookup IETF language from ISO639 language : {Language} : {State}", Language, State);
}
else
{
// Set IETF from lookup
LanguageIetf = lookupLanguage;
Log.Logger.Warning("MkvToolJsonSchema : IETF language not set, converting ISO639-3 to IETF : {Language} -> {IetfLanguage} (Recommend: {State})", Language, lookupLanguage, State);
Log.Logger.Warning("MkvToolJsonSchema : IETF language not set, converting ISO639 to IETF : {Language} -> {LanguageIetf} : {State}", Language, LanguageIetf, State);
}
}

// If either Language or LanguageIetf is undefined, the other has to be undefined as well
if (PlexCleaner.Language.IsEqual(Language, PlexCleaner.Language.Undefined) &&
!PlexCleaner.Language.IsEqual(LanguageIetf, PlexCleaner.Language.Undefined))
// Language is not set but IETF language is set
if (string.IsNullOrEmpty(Language) && !string.IsNullOrEmpty(LanguageIetf))
{
// Set track error and recommend ReMux
// Set track error and recommend remux
HasErrors = true;
State = StateType.ReMux;
Log.Logger.Warning("MkvToolJsonSchema : Undefined Language and LanguageIetf Mismatch : {Language} != {LanguageIetf} (Recommend: {State})", Language, LanguageIetf, State);

// Get the ISO-639-2B tag from the RFC-5646 tag
var lookupLanguage = PlexCleaner.Language.GetIso639Tag(LanguageIetf, true);
if (string.IsNullOrEmpty(lookupLanguage))
{
// No matching language found
Log.Logger.Warning("MkvToolJsonSchema : ISO639 language not set, failed to lookup ISO639 language from IETF language : {Language} : {State}", LanguageIetf, State);
}
else
{
// Set ISO-639-2B from lookup
Language = lookupLanguage;
Log.Logger.Warning("MkvToolJsonSchema : ISO639 language not set, converting IETF to ISO639: {LanguageIetf} -> {Language} : {State}", LanguageIetf, Language, State);
}
}

// If the "language" and "tag_language" fields are set FfProbe uses the tag language instead of the track language
Expand All @@ -121,7 +148,7 @@ internal TrackInfo(MkvToolJsonSchema.Track trackJson)
// Set track error and recommend remux
HasErrors = true;
State = StateType.ReMux;
Log.Logger.Warning("MkvToolJsonSchema : Tag Language and Track Language Mismatch : {TagLanguage} != {Language} (Recommend: {State})",
Log.Logger.Warning("MkvToolJsonSchema : Tag Language and Track Language Mismatch : {TagLanguage} != {Language} : {State}",
trackJson.Properties.TagLanguage, trackJson.Properties.Language, State);
}

Expand Down Expand Up @@ -191,7 +218,7 @@ internal TrackInfo(FfMpegToolJsonSchema.Stream trackJson)
// Set track error and recommend remux
HasErrors = true;
State = StateType.ReMux;
Log.Logger.Warning("FfMpegToolJsonSchema : Invalid Language : {Language} (Recommend: {State})", Language, State);
Log.Logger.Warning("FfMpegToolJsonSchema : Invalid Language : {Language} : {State}", Language, State);
}

// Leave the Language as is, no need to verify
Expand Down Expand Up @@ -329,7 +356,7 @@ public void SetFlagsFromTitle(string log)
HasErrors = true;
State = StateType.SetFlags;
Flags |= tuple.Item2;
Log.Logger.Warning("{Log} : Setting track Flag from Title : {Title} -> {Flag} (Recommend: {State})", log, Title, tuple.Item2, State);
Log.Logger.Warning("{Log} : Setting track Flag from Title : {Title} -> {Flag} : {State}", log, Title, tuple.Item2, State);
}
}
}
Expand Down
15 changes: 15 additions & 0 deletions PlexCleanerTests/LanguageTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,19 @@ public void NotMatch_Language_Tags(string prefix, string tag)
{
Assert.False(PlexCleaner.Language.IsMatch(prefix, tag));
}

[Theory]
[InlineData("af", "afr")]
[InlineData("de", "ger")]
[InlineData("fr", "fre")]
[InlineData("en", "eng")]
[InlineData("", "und")]
[InlineData("und", "und")]
[InlineData("zxx", "zxx")]
[InlineData("zh", "chi")]
[InlineData("xxx", "und")]
public void Convert_LanguageIetf_Tags(string ietf, string iso639)
{
Assert.Equal(iso639, PlexCleaner.Language.GetIso639Tag(ietf, false));
}
}
Loading

0 comments on commit e0bda7e

Please sign in to comment.