archanox/RGBDS2CIL

View on GitHub
RGBDS2CIL/Parser.cs

Summary

Maintainability
F
6 days
Test Coverage
using System;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;
using Newtonsoft.Json;

namespace RGBDS2CIL
{
    public static class Parser
    {
        private static readonly Regex CommentRegex = new(@"(;.*?(\r?\n|$))|(""(?:\\[^\n]|[^""\n])*"")|(@(?:""[^""]*"")+)", RegexOptions.Compiled);

        private static readonly Regex GetStringsRegex = new("((?<![\\\\])['\"])((?:.(?!(?<![\\\\])\\1))*.?)\\1", RegexOptions.Compiled | RegexOptions.IgnoreCase);

        internal static string RootFolder { get; set; }
        private static List<LabelLine> Labels { get; } = new();
        private static List<ConstantLine> Constants { get; } = new();
        private static List<MacroLine> Macros { get; } = new();

        public static string ExportJson(List<IAsmLine> parsedLines)
        {
            var settings = new JsonSerializerSettings
            {
                TypeNameHandling = TypeNameHandling.Auto,
                Formatting = Formatting.Indented
            };
            return JsonConvert.SerializeObject(parsedLines, settings);

        }

        internal static string[] FlattenMultiLine(IList<string> fileLines)
        {
            for (var i = 0; i < fileLines.Count; i++)
            {
                if (string.IsNullOrWhiteSpace(fileLines[i])) continue;

                //var comment = GetComment(fileLines[i]);
                var code = RemoveCommentFromCode(fileLines[i]);

                if (code?.EndsWith('\\') != true) continue;
                var rowSkip = i + 1;
                var hasMore = true;

                while (hasMore)
                {
                    var comment = GetComment(fileLines[i]);
                    code = RemoveCommentFromCode(fileLines[i]);

                    var fileLine2 = fileLines[rowSkip];
                    var comment2 = GetComment(fileLine2);
                    var code2 = RemoveCommentFromCode(fileLine2);
                    hasMore = code2.EndsWith('\\');

                    fileLines[i] = $"{code.TrimEnd('\\')} {code2} {((!string.IsNullOrWhiteSpace(comment) || !string.IsNullOrWhiteSpace(comment2)) ? "; " : " ")}{(comment + " " + comment2).Trim()}".Trim();

                    fileLines[rowSkip] = null;

                    rowSkip++;
                }
            }

            return fileLines.Where(x => x is not null).ToArray();
        }

        public static List<IAsmLine> GetLines(IEnumerable<string> fileLines, string fileName) => fileLines
            //.AsParallel().AsOrdered()
            .SelectMany((x, y) => ParseLine(x, fileName, y)).ToList();

        private static IEnumerable<IAsmLine> ParseLine(string fileLine, string fileName, int line)
        {
            var parsedLines = new List<IAsmLine>();

            var comment = GetComment(fileLine);

            var code = RemoveCommentFromCode(fileLine);

            if (code == null)
            {
                parsedLines.Add(new CommentLine(fileLine, comment, fileName, line));
            }
            else
            {
                var codeLine = new CodeLine(code, fileLine, comment, fileName, line, GetStrings(code));

                if (code.CommandName("INCLUDE"))
                {
                    var includeFile = new IncludeLine(codeLine, false);
                    var path = Path.Combine(RootFolder, includeFile.IncludeFile);
                    if (File.Exists(path))
                        includeFile.Lines.AddRange(GetLines(FlattenMultiLine(File.ReadAllLines(path)), includeFile.IncludeFile));
                    else
                    {
                        Console.WriteLine($"Could not include file, {includeFile.IncludeFile}");
                        //throw new FileNotFoundException("Could not include file", includeFile.IncludeFile);
                    }

                    parsedLines.Add(includeFile);
                }
                else if (code.CommandName("INCBIN"))
                {
                    var binary = new IncludeLine(codeLine, true);
                    var path = Path.Combine(RootFolder, binary.IncludeFile);
                    if (File.Exists(path))
                    {
                        binary.ReadBinaryFile(path);
                    }
                    else
                    {
                        //TODO: requires preprocessing of images?
                        //Console.WriteLine($"Could not include binary, {binary.IncludeFile}");
                        //throw new FileNotFoundException("Could not include binary", binary.IncludeFile);
                    }

                    parsedLines.Add(binary);
                }
                else if (code.CommandName("SECTION"))
                    parsedLines.Add(new SectionLine(codeLine));
                else if (code.CommandName("JP"))
                    parsedLines.Add(new JumpLine(codeLine));

                else if (code.StartsWith('.') || code.EndsWith(':') || code.Split()[0].EndsWith("::") || code.Split()[0].EndsWith(":"))
                {
                    var split = code.Split();

                    if (string.Equals(split.Last(), "MACRO", StringComparison.OrdinalIgnoreCase))
                    {
                        var macro = new MacroLine(codeLine, split[0]);
                        parsedLines.Add(macro);
                        Macros.Add(macro);
                    }
                    else
                    {
                        codeLine.Code = split[0];
                        var label = new LabelLine(codeLine);
                        Labels.Add(label);

                        parsedLines.Add(label);
                        if (split.Length > 1)
                        {
                            //Console.WriteLine(code[codeLine.Code.Length..].Trim() + " [" + code + "]");
                            //parsedLines.AddRange(ParseLine(code[codeLine.Code.Length..], fileName, line));
                        }
                    }
                }
                else if (code.CommandName("ENDM"))
                    parsedLines.Add(new EndMacroLine(codeLine));

                else if (code.Trim().ToUpper().Split().Contains("EQU"))
                {
                    var constant = new ConstantLine(codeLine, "EQU");
                    Constants.Add(constant);
                    parsedLines.Add(constant);
                }
                else if (code.Trim().ToUpper().Split().Contains("EQUS"))
                {
                    var constant = new ConstantLine(codeLine, "EQUS");
                    Constants.Add(constant);
                    parsedLines.Add(constant);
                }
                else if (code.Trim().Split().Length > 1 && code.Trim().Split().Skip(1).First() == "=")
                {
                    var constant = new ConstantLine(codeLine, "=");
                    Constants.Add(constant);
                    parsedLines.Add(constant);
                }
                else if (code.CommandName("SET"))
                    parsedLines.Add(new VariableLine(codeLine));
                else if (code.ToUpper().Trim() == "NOP")
                    parsedLines.Add(new NopLine(codeLine));
                else if (code.CommandName("LD"))
                    parsedLines.Add(new LoadLine(codeLine, "LD"));
                else if (code.CommandName("LDI"))
                    parsedLines.Add(new LoadLine(codeLine, "LDI"));
                else if (code.CommandName("LDD"))
                    parsedLines.Add(new LoadLine(codeLine, "LDD"));
                else if (code.CommandName("CALL"))
                    parsedLines.Add(new CallLine(codeLine));
                else if (code.CommandName("RST"))
                    parsedLines.Add(new RestartLine(codeLine));
                else if (code.CommandName("CP"))
                    parsedLines.Add(new SubtractCompareLine(codeLine));
                else if (code.CommandName("PUSHO"))
                    parsedLines.Add(new PushOptionLine(codeLine));
                else if (code.CommandName("POPO"))
                    parsedLines.Add(new PopOptionLine(codeLine));
                else if (code.CommandName("DI"))
                    parsedLines.Add(new DisableInterruptsLine(codeLine));
                else if (code.CommandName("HALT"))
                    parsedLines.Add(new HaltLine(codeLine));
                else if (code.CommandName("JR"))
                    parsedLines.Add(new RelativeJumpLine(codeLine));
                else if (code.CommandName("XOR"))
                    parsedLines.Add(new ExclusiveOrLine(codeLine));
                else if (code.CommandName("ADD"))
                    parsedLines.Add(new AddLine(codeLine, false));
                else if (code.CommandName("ADC"))
                    parsedLines.Add(new AddLine(codeLine, true));
                else if (code.CommandName("INC"))
                    parsedLines.Add(new IncrementLine(codeLine));
                else if (code.CommandName("DEC"))
                    parsedLines.Add(new DecrementLine(codeLine));
                else if (code.CommandName("SUB"))
                    parsedLines.Add(new SubtractLine(codeLine, false));
                else if (code.CommandName("SBC"))
                    parsedLines.Add(new SubtractLine(codeLine, true));
                else if (code.CommandName("DB")) //byte 8bit
                    parsedLines.Add(new DefineLine(codeLine, typeof(byte)));
                else if (code.CommandName("DW")) //word 16bit (short)
                    parsedLines.Add(new DefineLine(codeLine, typeof(short)));
                else if (code.CommandName("DL")) //double-word/long 32bit (int)
                    parsedLines.Add(new DefineLine(codeLine, typeof(int)));
                else if (code.CommandName("ENDR"))
                    parsedLines.Add(new EndRepeatLine(codeLine));
                else if (code.CommandName("ENDC"))
                    parsedLines.Add(new EndConditionLine(codeLine));
                else if (code.CommandName("WARN"))
                    parsedLines.Add(new WarnLine(codeLine));
                else if (code.CommandName("FAIL"))
                    parsedLines.Add(new FailLine(codeLine));
                else if (code.CommandName("CHARMAP"))
                    parsedLines.Add(new CharMapLine(codeLine));
                else if (code.CommandName("IF"))
                    parsedLines.Add(new IfLine(codeLine, false));
                else if (code.CommandName("ELIF"))
                    parsedLines.Add(new IfLine(codeLine, true));
                else if (code.CommandName("ELSE"))
                    parsedLines.Add(new ElseLine(codeLine));
                else if (code.CommandName("RET"))
                    parsedLines.Add(new ReturnLine(codeLine, false));
                else if (code.CommandName("RETI"))
                    parsedLines.Add(new ReturnLine(codeLine, true));
                else if (string.Equals(code, "EI", StringComparison.OrdinalIgnoreCase))
                    parsedLines.Add(new EnableInterruptsLine(codeLine));
                else if (code.CommandName("PURGE"))
                    parsedLines.Add(new PurgeLine(codeLine));
                else if (code.CommandName("REPT"))
                    parsedLines.Add(new RepeatLine(codeLine, "REPT"));
                else if (code.CommandName("FOR"))
                    parsedLines.Add(new RepeatLine(codeLine, "FOR"));
                else if (code.CommandName("SHIFT"))
                    parsedLines.Add(new ShiftLine(codeLine));
                else if (code.CommandName("POP"))
                    parsedLines.Add(new PopLine(codeLine));
                else if (code.CommandName("PUSH"))
                    parsedLines.Add(new PushLine(codeLine));
                else if (code.CommandName("RL"))
                    parsedLines.Add(new RotateLeftLine(codeLine));
                else if (code.CommandName("RLA"))
                    parsedLines.Add(new RotateALeftLine(codeLine));
                else if (code.CommandName("BIT"))
                    parsedLines.Add(new BitLine(codeLine));
                else if (code.CommandName("LDH"))
                    parsedLines.Add(new LoadHighLine(codeLine));
                else if (code.CommandName("AND"))
                    parsedLines.Add(new AndLine(codeLine));
                else if (code.CommandName("CPL"))
                    parsedLines.Add(new ComplementLine(codeLine));
                else if (code.CommandName("RRCA"))
                    parsedLines.Add(new RotateRegisterARightLine(codeLine));
                else if (code.CommandName("RLCA"))
                    parsedLines.Add(new RotateRegisterALeftLine(codeLine));
                else if (code.CommandName("RES"))
                    parsedLines.Add(new ResetByteLine(codeLine));
                else if (code.CommandName("SCF"))
                    parsedLines.Add(new SetCarryFlagLine(codeLine));
                else if (code.CommandName("CCF"))
                    parsedLines.Add(new ComplementCarryFlagLine(codeLine));
                else if (code.CommandName("OPT"))
                    parsedLines.Add(new OptionLine(codeLine));
                else if (code.CommandName("OR"))
                    parsedLines.Add(new OrLine(codeLine));
                else if (code.CommandName("DS"))
                    parsedLines.Add(new DeclareSpaceLine(codeLine));
                else if (code.CommandName("SWAP"))
                    parsedLines.Add(new SwapLine(codeLine));
                else if (code.CommandName("RR"))
                    parsedLines.Add(new RotateRegisterRightLine(codeLine));
                else if (code.CommandName("SRL"))
                    parsedLines.Add(new ShiftRightLogicLine(codeLine));
                else if (code.CommandName("SLA"))
                    parsedLines.Add(new ShiftLeftArithmeticLine(codeLine));
                else if (code.CommandName("DAA"))
                    parsedLines.Add(new DecimalAdjustAccumulatorLine(codeLine));
                else if (code.CommandName("RRA"))
                    parsedLines.Add(new RotateRegisterRightLine(codeLine));
                else if (code.CommandName("STOP"))
                    parsedLines.Add(new StopLine(codeLine));
                else if (code.CommandName("ASSERT"))
                    parsedLines.Add(new AssertLine(codeLine));
                else if (code.CommandName("RRC"))
                    parsedLines.Add(new RotateRegisterRightLine(codeLine));
                else if (code.CommandName("RLC"))
                    parsedLines.Add(new RotateRegisterLeftLine(codeLine));
                else if (code.CommandName("SRA"))
                    parsedLines.Add(new ShiftRightArithmeticLine(codeLine));
                else if (code.CommandName("LOAD"))
                    parsedLines.Add(new Load2Line(codeLine));
                else if (code.CommandName("ENDL"))
                    parsedLines.Add(new EndLoadLine(codeLine));

                //else if (code.CommandName("UNION"))
                //    parsedLines.Add(new UnionLine(codeLine));
                //else if (code.CommandName("NEXTU"))
                //    parsedLines.Add(new NextUnionLine(codeLine));
                //else if (code.CommandName("ENDU"))
                //    parsedLines.Add(new EndUnionLine(codeLine));

                //TODO:
                //PUSHC
                //POPC
                //SETCHARMAP
                //NEWCHARMAP
                //RSRESET
                //rsset

                //https://rgbds.gbdev.io/docs/master/rgblink.5#ORG
                //note: moves out the address
                //else if (code.CommandName("ORG"))
                //    parsedLines.Add(new OrgLine(codeLine));

                //ifdef
                // c# = "#if (DEBUG)"
                //endif
                // c# = "#endif"

                else if (code.StartsWith('\\') && char.IsDigit(code.ToUpper()[1]))
                    parsedLines.Add(new MacroArgumentLine(codeLine));
                else if (Labels.Select(x => x.LabelName.ToUpper()).Contains(codeLine.Code.Split()[0].ToUpper()))
                {
                    var labelName = codeLine.Code.Split()[0].ToUpper();
                    var labels = Labels
                        .Where(x => string.Equals(x.LabelName, labelName, StringComparison.OrdinalIgnoreCase))
                        .ToImmutableArray();
                    if (labels.Length > 1)
                    {
                        //TODO: need to remove already declared shit, unless it's scoped differently?
                        //override methods?
                        //Debugger.Break();
                        //TODO: get proper name spaced label
                    }

                    var label = labels.FirstOrDefault();
                    parsedLines.Add(new LabelCallLine(codeLine, label));
                }
                else if (Constants.Select(x => x.ConstantName).Contains(codeLine.Code.Split()[0].Trim()))
                {
                    var constantName = codeLine.Code.Split()[0].Trim();

                    var constants = Constants
                        .Where(x => string.Equals(x.ConstantName, constantName, StringComparison.OrdinalIgnoreCase))
                        .ToImmutableArray();
                    if (constants.Length > 1)
                    {
                        //Debugger.Break();
                    }

                    var constant = constants.FirstOrDefault();
                    parsedLines.Add(new ConstantAssignLine(codeLine, constant));
                }
                else if (Macros.Select(x => x.Name.Trim(':')).Contains(codeLine.Code.Split()[0].Trim()))
                {
                    var macroName = codeLine.Code.Split()[0].Trim();

                    var macros = Macros
                        .Where(x => string.Equals(x.Name.Trim(':'), macroName, StringComparison.Ordinal))
                        .ToImmutableArray();
                    if (macros.Length > 1)
                    {
                        //Debugger.Break();
                    }

                    var macro = macros.FirstOrDefault();
                    parsedLines.Add(new MacroCallLine(codeLine, macro));
                }
                else
                {
                    //Console.WriteLine(codeLine.Code.Trim().Split()[0]);
                    //Console.WriteLine(codeLine.FileName);
                    //Debugger.Break();
                    parsedLines.Add(codeLine);
                    //throw new NotImplementedException($"Instruction {codeLine.Code.Split()[0]} not implemented.");
                }
            }

            return parsedLines.ToArray();
        }

        public static string RemoveCommentFromCode(string fileLine)
        {
            var code = CommentRegex.Replace(fileLine, me => me.Value.StartsWith(";") ? me.Groups[2].Value : me.Value).Trim();

            if (string.IsNullOrWhiteSpace(code))
                code = null;
            return code?.Trim();
        }

        public static string GetComment(string fileLine)
        {
            if (fileLine == null)
                throw new ArgumentNullException(nameof(fileLine));
            var clean = CommentRegex.Replace(fileLine, me => me.Value.StartsWith(";") ? me.Groups[2].Value : me.Value).Trim();

            var comment = fileLine.Remove(fileLine.IndexOf(clean, StringComparison.Ordinal), clean.Length).Trim();

            if (string.IsNullOrWhiteSpace(comment))
                return null;

            if (comment.StartsWith(';'))
                comment = comment[1..];

            return comment.Trim();
        }

        public static List<string> GetStrings(string code)
        {
            if (string.IsNullOrWhiteSpace(code)) return new List<string>();

            var returned = GetStringsRegex.Matches(code).Select(x => x.Value?.TrimStart('"').TrimEnd('"')).ToList();

            return returned.Count > 0 ? returned : null;
        }

        public static List<string> GetParameters(string code)
        {
            if (string.IsNullOrWhiteSpace(code)) return new List<string>();
            code = code.Trim();
            var matches = new List<string>();

            while (code.Length > 0)
            {
                var parameter = GetParameter(code);
                matches.Add(parameter);
                code = code[parameter.Length..].TrimStart(',').Trim();
            }

            return matches.Count > 0 ? matches : null;
        }

        private static string GetParameter(string code)
        {
            //NOTE: Does not currently support nested parameters
            //NOTE: Does not support exiting a string
            var i = 0;
            var insideString = code[0] == '\"';
            var insideFunction = false;
            for (; i < code.Length; i++)
            {
                if (!insideString)
                {
                    if (code[i] == '(') insideFunction = true;
                    if (code[i] == ')') insideFunction = false;
                    if (code[i] == ',' && !insideFunction) break;
                    continue;
                }

                if (i <= 0 || code[i] != '\"' || code[i - 1] == '\\') continue;

                i++;
                break;
            }

            return code[..i];
        }
    }
}