#r "System.IO" #r "System.Text.RegularExpressions" using CsvHelper; using System; using System.IO; using System.Text.RegularExpressions; /// /// Process a csv file containing the details of Titanic survivors. /// /// The passenger name column (pas_name) is expected to be in the format "surname, title forenames" /// This function will to reformat this to "title forenames surname" and store the result in a new column (pas_name2). /// /// If any errors are found in the pas_name format a 1 will be placed in a new column /// called "name_format_error" on corresponding row /// /// public static void Run(Stream myBlob, string name, Stream outputBlob, TraceWriter log) { log.Info($"C# Blob trigger function Processed blob\n Name:{name}.csv \n Size: {myBlob.Length} Bytes"); var _record = new TitanicSurvivor(); int _recordsRead = 0; int _recordsWritten = 0; int _recordsInError = 0; try { using (StreamReader sr = new StreamReader(myBlob)) { CsvReader csvReader = new CsvReader(sr); // Configure the CsvReader to ignore missing fields. This is a bit of a hack that allows us to use // the same record format for both the input file and output file. Since we're adding // two new columns to the output file we need to tell CsvReader to ignore errors //due to not finding these columns in the input file. csvReader.Configuration.MissingFieldFound = null; // Read the input file header csvReader.Read(); csvReader.ReadHeader(); // Write out a corresponding header to a new output file TextWriter tr = new StreamWriter(outputBlob); CsvWriter csvWriter = new CsvWriter(tr); csvWriter.WriteHeader(); csvWriter.NextRecord(); // Iterate through the input file rows, process each one and write the processed // output to the output file while (csvReader.Read()) { _recordsRead++; _record = csvReader.GetRecord(); _record.name_format_error = 0; // Here we're using a simple regex to check for the name pattern we expect // If we don't find it we flag the row as an error row Regex regex = new Regex(@"^([A-Za-z]*),?\s*([A-Za-z\s,]*)$"); Match match = regex.Match(_record.pas_name); if (!match.Success) { _record.pas_name2 = "** pas_name contains invalid characters. Must be alphabetic characters, comma or SPC only **"; _record.name_format_error = 1; _recordsInError++; } else { // The names should be in the format , , e.g. Thomas, Mr John, Jr // We're going to do some simple string manipulation to create a new name in the format // e.g. Mr John Thomas Jr String[] tempStr = _record.pas_name.Split(','); if (tempStr.Length == 2) { _record.pas_name2 = string.Format("{0} {1}", tempStr[1]?.Trim(), tempStr[0]?.Trim()); } else if (tempStr.Length == 3) { _record.pas_name2 = string.Format("{0} {1} {2}", tempStr[1]?.Trim(), tempStr[0]?.Trim(), tempStr[2]?.Trim()); } else { _record.pas_name2 = "** pas_name is incorrectly formatted. Should be , , []"; _record.name_format_error = 1; } } csvWriter.WriteRecord(_record); csvWriter.NextRecord(); _recordsWritten++; } log.Info(string.Format("{0} records read. {1} records processed. {2} errors", _recordsRead, _recordsWritten, _recordsInError)); tr.Flush(); tr.Close(); tr.Dispose(); } } catch (Exception ex) { log.Error(string.Format("Error Occurred. {0}", ex.ToString())); } } /// /// POCO representing a row in the csv file with the following format: /// /// Id,pas_name,class,pas_age,sex_name,is_survive sex_code /// 1,Allen,Miss Elisabeth Walton,1st,29,female,1,1 /// /// class TitanicSurvivor { public string Id { get; set; } public string pas_name { get; set; } public string @class { get; set; } public string pas_age { get; set; } public string sex_name { get; set; } public int is_survive { get; set; } public int sex_code { get; set; } // These columns will be added to the output file public string pas_name2 { get; set; } public int name_format_error { get; set; } }