I recently needed to parse a tab-delimited file and I figured it would be easier if I could manipulate it in DataSet form. I came across Dave O’s article on The Code Project which was close, but not quite what I needed. First, I wanted to read the files a line at a time, because some of them are pretty big. Granted, the DataSet will be in-memory the whole time, but I didn’t want to allocate all that ram twice — once for the initial parsing and then again for the DataSet. Second, I needed some extra massaging on the data (e.g. the values are encapsulated in quotes).
So, here’s my updated code, which I’ll upload to the Code Project one of these days.
using System; using System.Data; using System.IO; namespace AssessmentDownload { class TextToDataSet { public static DataSet Convert(string fileName, string tableName, string delimiter) { //The DataSet to Return DataSet result = new DataSet(); //Open the file in a stream reader StreamReader s = new StreamReader(fileName); //Split the first line into the columns string[] columns = s.ReadLine().Split(delimiter.ToCharArray()); //Add the new DataTable to the RecordSet result.Tables.Add(tableName); // Cycle the colums, adding those that don't exist yet // and sequencing the one that do. for(int columnIndex = 0; columnIndex < columns.Length; columnIndex++) { string col = columns[columnIndex]; bool added = false; string next = ""; int i = 0; // used in case there are duplicate column names while (!added) { //Build the column name and remove any unwanted characters. string columnname = col + next; columnname = columnname.Replace("#", ""); columnname = columnname.Replace("'", ""); columnname = columnname.Replace("&", ""); columnname = columnname.Trim('"'); //See if the column already exists if (!result.Tables[tableName].Columns.Contains(columnname)) { //if it doesn't then we add it here and mark it as added result.Tables[tableName].Columns.Add(columnname); added = true; } else { // if it did exist then we increment the sequencer and try again // with a modified column name (e.g. MyColumn_2) i++; next = "_" + i.ToString(); } } // now update the column array with the newly cleaned column name columns[columnIndex] = col; } // read file a line at a time string row; while ((row = s.ReadLine()) != null) { //Split the row at the delimiter. string[] items = row.Split(delimiter.ToCharArray()); // clean up the data a bit (mostly removing surrounding quotes) for (int i = 0; i < items.Length; i++) { string item = items[i]; if (item.StartsWith("'") && item.EndsWith("'")) item = item.Trim('''); if (item.StartsWith(""") && item.EndsWith(""")) item = item.Trim('"'); items[i] = item; } //Add the item result.Tables[tableName].Rows.Add(items); } //Return the imported data. return result; } } }