Validate file content type for txt, log, JSON file in C#

Surajit Kumar Shah 0 Reputation points
2025-01-08T17:16:17.0733333+00:00

public static bool IsFileValid(IFormFile file)

{

using (var reader = new BinaryReader(file.OpenReadStream()))

{

    var signatures = _fileSignatures.Values.SelectMany(x => x).ToList();  // flatten all signatures to single list

    var headerBytes = reader.ReadBytes(_fileSignatures.Max(m => m.Value.Max(n => n.Length)));

    bool result = signatures.Any(signature => headerBytes.Take(signature.Length).SequenceEqual(signature));

    return result;

}

}

private static readonly Dictionary<string, List<byte[]>> _fileSignatures = new()

{

{ ".gif", new List<byte[]> { new byte[] { 0x47, 0x49, 0x46, 0x38 } } },

{ ".png", new List<byte[]> { new byte[] { 0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A } } },

{ ".jpeg", new List<byte[]>

    {

        new byte[] { 0xFF, 0xD8, 0xFF, 0xE0 },

        new byte[] { 0xFF, 0xD8, 0xFF, 0xE2 },

        new byte[] { 0xFF, 0xD8, 0xFF, 0xE3 },

        new byte[] { 0xFF, 0xD8, 0xFF, 0xEE },

        new byte[] { 0xFF, 0xD8, 0xFF, 0xDB },

    }

},

{ ".jpeg2000", new List<byte[]> { new byte[] { 0x00, 0x00, 0x00, 0x0C, 0x6A, 0x50, 0x20, 0x20, 0x0D, 0x0A, 0x87, 0x0A } } },



{ ".jpg", new List<byte[]>

    {

        new byte[] { 0xFF, 0xD8, 0xFF, 0xE0 },

        new byte[] { 0xFF, 0xD8, 0xFF, 0xE1 },

        new byte[] { 0xFF, 0xD8, 0xFF, 0xE8 },

        new byte[] { 0xFF, 0xD8, 0xFF, 0xEE },

        new byte[] { 0xFF, 0xD8, 0xFF, 0xDB },

    }

},

{ ".zip", new List<byte[]> //also docx, xlsx, pptx, ...

    {

        new byte[] { 0x50, 0x4B, 0x03, 0x04 },

        new byte[] { 0x50, 0x4B, 0x4C, 0x49, 0x54, 0x45 },

        new byte[] { 0x50, 0x4B, 0x53, 0x70, 0x58 },

        new byte[] { 0x50, 0x4B, 0x05, 0x06 },

        new byte[] { 0x50, 0x4B, 0x07, 0x08 },

        new byte[] { 0x57, 0x69, 0x6E, 0x5A, 0x69, 0x70 },

    }

},

{ ".pdf", new List<byte[]> { new byte[] { 0x25, 0x50, 0x44, 0x46 } } },

{ ".z", new List<byte[]>

    {

        new byte[] { 0x1F, 0x9D },

        new byte[] { 0x1F, 0xA0 }

    }

},

{ ".tar", new List<byte[]>

    {

        new byte[] { 0x75, 0x73, 0x74, 0x61, 0x72, 0x00, 0x30 , 0x30 },

        new byte[] { 0x75, 0x73, 0x74, 0x61, 0x72, 0x20, 0x20 , 0x00 },

    }

},

{ ".tar.z", new List<byte[]>

    {

        new byte[] { 0x1F, 0x9D },

        new byte[] { 0x1F, 0xA0 }

    }

},

{ ".tif", new List<byte[]>

    {

        new byte[] { 0x49, 0x49, 0x2A, 0x00 },

        new byte[] { 0x4D, 0x4D, 0x00, 0x2A }

    }

},

{ ".tiff", new List<byte[]>

    {

        new byte[] { 0x49, 0x49, 0x2A, 0x00 },

        new byte[] { 0x4D, 0x4D, 0x00, 0x2A }

    }

},

{ ".rar", new List<byte[]>

    {

        new byte[] { 0x52, 0x61, 0x72, 0x21, 0x1A, 0x07 , 0x00 },

        new byte[] { 0x52, 0x61, 0x72, 0x21, 0x1A, 0x07 , 0x01, 0x00 },

    }

},

{ ".7z", new List<byte[]>

    {

        new byte[] { 0x37, 0x7A, 0xBC, 0xAF, 0x27 , 0x1C },

    }

},

{ ".txt", new List<byte[]>

    {

        new byte[] { 0xEF, 0xBB , 0xBF },

        new byte[] { 0xFF, 0xFE},

        new byte[] { 0xFE, 0xFF },

        new byte[] { 0x00, 0x00, 0xFE, 0xFF },

    }

},

{ ".mp3", new List<byte[]>

    {

        new byte[] { 0xFF, 0xFB },

        new byte[] { 0xFF, 0xF3},

        new byte[] { 0xFF, 0xF2},

        new byte[] { 0x49, 0x44, 0x43},

    }

},

};

Hello I found the above code to validate the file content in c#. This is helpful on validating the content of some known file types. However the logic to compare the signature works fine for some known file type like jpeg, gif, mp3, doc, docx etc. The logic doesn't work for file types like txt, log, JSON. Is there any solution to validate the content type of txt, log, JSON files ? I tried to match the signatures of txt, log, JSON files but it's always different for different files.

.NET
.NET
Microsoft Technologies based on the .NET software framework.
4,035 questions
ASP.NET Core
ASP.NET Core
A set of technologies in the .NET Framework for building web applications and XML web services.
4,731 questions
ASP.NET
ASP.NET
A set of technologies in the .NET Framework for building web applications and XML web services.
3,561 questions
JavaScript API
JavaScript API
An Office service that supports add-ins to interact with objects in Office client applications.
1,019 questions
C#
C#
An object-oriented and type-safe programming language that has its roots in the C family of languages and includes support for component-oriented programming.
11,185 questions
0 comments No comments
{count} votes

2 answers

Sort by: Most helpful
  1. Bruce (SqlWork.com) 69,121 Reputation points
    2025-01-08T17:51:35.44+00:00

    txt and log files have no defined format and can contain any characters, so hard to validate.

    to validate a json file you need to read the entire file to if its valid. if you just want validation the start characters, json file has couple formats:

    • an object, then the file starts with a <whitespace>{ and ends with a }<whitespace>
    • an array, then the file starts with a <whitespace>[ and ends with ]<whitespace>
    • bool value, then <whitespace>true<whitespace> or <whitespace>false<whitespace>
    • null value, then <whitespace>null<whitespace>
    • string value, then <whitespace>" and ends with "<whitespace>
    • numeric value, then <whitespace><numeric string<whitespace>. a numeric string can be an int, hex, decimal, or exponent format.

    see:

    https://www.json.org/json-en.html

    0 comments No comments

  2. SurferOnWww 3,696 Reputation points
    2025-01-09T00:54:31.8266667+00:00

    Is there any solution to validate the content type of txt, log, JSON files ?

    No, there is no practical solution since they are all text file. If they have BOM you will be able to guess that they are text file. However, there will be no way to differentiate among txt, log and JSON files.


Your answer

Answers can be marked as Accepted Answers by the question author, which helps users to know the answer solved the author's problem.