Converting a Tab-Delimited File to a DataSet

I recently needed to parse a tab-delimited file and I figured it would be easier if I could manipulate it in DataSet form. I came across Dave O’s article on The Code Project which was close, but not quite what I needed. First, I wanted to read the files a line at a time, because some of them are pretty big. Granted, the DataSet will be in-memory the whole time, but I didn’t want to allocate all that ram twice — once for the initial parsing and then again for the DataSet. Second, I needed some extra massaging on the data (e.g. the values are encapsulated in quotes).

So, here’s my updated code, which I’ll upload to the Code Project one of these days.

using System;
using System.Data;
using System.IO;

namespace AssessmentDownload
{
    class TextToDataSet
    {
        public static DataSet Convert(string fileName,
         string tableName, string delimiter)
        {

            //The DataSet to Return
            DataSet result = new DataSet();

            //Open the file in a stream reader
            StreamReader s = new StreamReader(fileName);

            //Split the first line into the columns
            string[] columns = s.ReadLine().Split(delimiter.ToCharArray());

            //Add the new DataTable to the RecordSet
            result.Tables.Add(tableName);

            // Cycle the colums, adding those that don't exist yet
            // and sequencing the one that do.
            for(int columnIndex = 0; columnIndex < columns.Length; columnIndex++)
            {
                string col = columns[columnIndex];
                bool added = false;
                string next = "";
                int i = 0;  // used in case there are duplicate column names
                while (!added)
                {
                    //Build the column name and remove any unwanted characters.
                    string columnname = col + next;
                    columnname = columnname.Replace("#", "");
                    columnname = columnname.Replace("'", "");
                    columnname = columnname.Replace("&", "");
                    columnname = columnname.Trim('"');

                    //See if the column already exists
                    if (!result.Tables[tableName].Columns.Contains(columnname))
                    {
                        //if it doesn't then we add it here and mark it as added
                        result.Tables[tableName].Columns.Add(columnname);
                        added = true;
                    }
                    else
                    {
                        // if it did exist then we increment the sequencer and try again
                        // with a modified column name (e.g. MyColumn_2)
                        i++;
                        next = "_" + i.ToString();
                    }
                }

                // now update the column array with the newly cleaned column name
                columns[columnIndex] = col;
            }

            // read file a line at a time
            string row;
            while ((row = s.ReadLine()) != null)
            {

                //Split the row at the delimiter.
                string[] items = row.Split(delimiter.ToCharArray());

                // clean up the data a bit (mostly removing surrounding quotes)
                for (int i = 0; i < items.Length; i++)
                {
                    string item = items[i];
                    if (item.StartsWith("'") && item.EndsWith("'"))
                        item = item.Trim(''');
                    if (item.StartsWith(""") && item.EndsWith("""))
                        item = item.Trim('"');
                    items[i] = item;
                }

                //Add the item
                result.Tables[tableName].Rows.Add(items);
            }


            //Return the imported data.
            return result;
        }
    }
}
0