I am trying to read a CSV file using Jet OLEDB provider.
The file (badimport.csv) looks like this:
Name,Description,DefaultPrice_$_per_m3
S6,Structural sawlog MSG6,65
S8,Structural sawlog MSG8,75
S10,Structural sawlog MSG10,90
S12,Structural sawlog MSG12,120
Using an OLE DB connection the first column seems to be getting
interpreted as a Decimal and not a string. All the records in the name
field come in with the first character (i.e. S) truncated.
My pared back code looks like this:
using System;
using System.Collections;
using System.IO;
using System.Data;
using System.Data.OleDb;
public class MyClass
{
public static void Main()
{
string csvFileName = "C:\\badimport.csv";
// break to debugger
// System.Diagnostics.Debugger.Break();
DataTable csvTable = ReadFromCSVFile( csvFileName );
DataColumn nameColumn = csvTable.Columns[ "Name" ];
DataColumn descriptionColumn = csvTable.Columns[ "Description" ];
DataColumn priceColumn = csvTable.Columns[ "Price" ];
for ( int row = 0; row < csvTable.Rows.Count; row++ ) {
try {
DataRow csvRow = csvTable.Rows[row];
string name = GetAsString( csvRow, "Name");
string description = (string)csvRow[ descriptionColumn ];
int price = (int)csvRow[ priceColumn];
Console.WriteLine(" Row {0}: Name = {1}, Description = {2}, Price =
{3}", row, name, description, price);
}
catch ( Exception e ) {
throw new ApplicationException( "Error importing
LogProductDefinition on line " + (row+2) + " of file '" + csvFileName +
"': " + e.Message + e.StackTrace );
}
}
}
private static System.Data.DataTable ReadFromCSVFile( string
csvFileName ) {
// can use a file-specific SCHEMA.INI file to control the importing
process
// specifically the MaxScanRows=0 entry forces the entire file to be
scanned to determine the column types.
// Format of this file is defined here:
http://msdn.microsoft.com/library/default.asp?url=/library/en-us/odbc/htm/odbcjetschema_ini_file.asp
//
if ( !File.Exists( csvFileName ) ) {
throw new ApplicationException( " Cannot find CSV file : \"" +
csvFileName + "\"." );
}
string csvPath = Path.GetDirectoryName( Path.GetFullPath( csvFileName
) );
OleDbConnection connection = new OleDbConnection(
"Provider=Microsoft.Jet.OLEDB.4.0;" +
"Data Source=" +
csvPath + ";" +
"Extended
Properties=\"Text;HDR=Yes;FMT=Delimited\"" );
return ReadDataTable( connection, Path.GetFileName(csvFileName) );
}
private static System.Data.DataTable ReadDataTable( OleDbConnection
connection, string tableName ) {
connection.Open();
try {
// Read all data into a data table.
OleDbDataAdapter adapter = new OleDbDataAdapter( "SELECT * FROM [" +
tableName + "]", connection );
System.Data.DataTable dataTable = new System.Data.DataTable(
tableName );
adapter.Fill( dataTable );
return dataTable;
} finally {
connection.Close();
}
}
private static string GetAsString( DataRow row, string columnName ) {
DataColumn column = row.Table.Columns[ columnName ];
if ( column == null ) {
throw new ApplicationException( "Column '" + columnName + "' not
found" );
}
object content = row[column];
if ( content == System.DBNull.Value ) {
return ""; // empty cell is OK
}
try {
content = row[column];
if ( content.GetType() == typeof( System.String ) ) {
return (string)content;
}
else {
return content.ToString();
}
}
catch ( Exception e ) {
throw new ApplicationException( "Unable to convert value \"" +
content.ToString() + "\" in column \"" + column.ColumnName + "\" to a
string. " + e.Message );
}
}
}
If is set up a SCHEMA.INI file with the follwoing contents in the same
directory the name column is read correctly:
[badimport.csv]
ColNameHeader=True
MaxScanRows=0
Anyone have any odea why the first column is interpreted as a decimal?
Thanks,
John.