I love the IDataReader interface. To me it’s just an easy/quick way to iterate over data when you know what that data is going to be. The ease of binding it to ASP.NET Webform and Winform grids (plus many extensions I have for them to export to excel and text files) makes them extremely efficient useful for me. I’ve created a few recently that I’ll share over a blog post or two. This one, takes an HTML table (provided as a string) and turns its contents into a DataReader. The one stipulation is that the table must have the same amount of columns on every row (having a colspan attribute on one row will mess it up). I may compensate for that in the future, but I don’t have a need to right now. I did this in about 10 minutes, so it maybe a little sloppy but it’s worked for the few tests I’ve thrown at it. This class makes use of the HTML Agility Pack to parse the HTML (highly useful and I highly recommend it if you plan on doing any HTML parsing). The HTML Agility Pack is open source and you can download the code or the library at: http://htmlagilitypack.codeplex.com/
Imports System.Data
Imports System.Net
Imports System.Collections
Imports System.Collections.Specialized
Imports System.Text
Imports System.Text.RegularExpressions
Imports HtmlAgilityPack
Imports Iuf.Extensions
Namespace Argus.Data
''' <summary>
''' A class that reads an HTML table and returns it as a DataReader. This class is depended on and coded against
''' the HtmlAgilityPack. This will require that the table has a consistent cell count in each row, it does not handle
''' tables with the colspan that doesn't exist on every row.
''' </summary>
''' <remarks>
''' </remarks>
Public Class HtmlTableReader
Implements IDataReader
'*********************************************************************************************************************
'
' Class: HtmlTableReader
' Initial Date: 09/13/2011
' Last Updated: 09/14/2011
' Programmer(s): Blake Pell, bpell@indiana.edu
'
'*********************************************************************************************************************
Public Sub New(ByVal html As String, ByVal firstRowContainsHeader As Boolean)
Me.Html = html
Dim hd As New HtmlDocument()
hd.LoadHtml(html)
If hd.DocumentNode.SelectNodes("//table").Count = 0 Then
Exit Sub
End If
Dim table As HtmlNode = hd.DocumentNode.SelectNodes("//table")(0)
Dim rowCount As Integer = 0
Dim columnCount As Integer = 0
Dim dt As New DataTable("HtmlTableReader")
' We're only going to parse the first table in the case of multiple tables or nested tables. Considering creating a DataSet
' or multi-reader for support of nested tables at the same time.
For Each tr As HtmlNode In table.SelectNodes("//tr")
Dim cells As HtmlNodeCollection = tr.SelectNodes("td")
If firstRowContainsHeader = True And rowCount = 0 Then
For Each td As HtmlNode In cells
dt.Columns.Add(td.InnerText, System.Type.GetType("System.String"))
Next
ElseIf firstRowContainsHeader = False And rowCount = 0 Then
Dim fieldCounter As Integer = 0
For Each td As HtmlNode In cells
fieldCounter += 1
dt.Columns.Add("Column" & fieldCounter.ToString, System.Type.GetType("System.String"))
Next
Else
Dim fields(cells.Count - 1) As String
columnCount = 0
For Each td As HtmlNode In cells
fields(columnCount) = td.InnerText
columnCount += 1
Next
dt.Rows.Add(fields)
End If
rowCount += 1
columnCount = 0
Next
Me.DataTable = dt
Me.DataReader = dt.CreateDataReader
End Sub
Private _dataReader As DataTableReader
''' <summary>
''' The underlaying DataTableReader that we are wrapping.
''' </summary>
''' <value></value>
''' <returns></returns>
''' <remarks>
''' These methods are exposed via the implemented properties of this class and does not need
''' to be exposed itself.
''' </remarks>
Private Property DataReader() As DataTableReader
Get
Return _dataReader
End Get
Set(ByVal value As DataTableReader)
_dataReader = value
End Set
End Property
Private _dataTable As DataTable
''' <summary>
''' The underlaying DataTable that is populated from the specified url.
''' </summary>
''' <value></value>
''' <returns></returns>
''' <remarks>
''' This has been left a private variable so that the DataTable isn't tampered with while
''' iterating over the DataReader.
''' </remarks>
Private Property DataTable() As DataTable
Get
Return _dataTable
End Get
Set(ByVal value As DataTable)
_dataTable = value
End Set
End Property
Private _html As String = ""
''' <summary>
''' The html that represents the table.
''' </summary>
Public Property Html() As String
Get
Return _html
End Get
Set(ByVal value As String)
_html = value
End Set
End Property
''' <summary>
''' Resets the underlaying DataReader and creates a new one that is at the first position.
''' </summary>
Public Sub MoveFirst()
If Me.DataReader IsNot Nothing Then
Me.DataReader.Close() : Me.DataReader = Nothing
End If
If Me.DataTable IsNot Nothing Then
Me.DataReader = Me.DataTable.CreateDataReader
End If
End Sub
''' <summary>
''' Closes the DataReader
''' </summary>
Public Sub Close() Implements System.Data.IDataReader.Close
Me.DataReader.Close()
End Sub
''' <summary>
''' The number of records in the DataReader.
''' </summary>
Public Function RowCount() As Integer
If Me.DataTable Is Nothing Then
Return False
End If
Return Me.DataTable.Rows.Count
End Function
Public ReadOnly Property Depth() As Integer Implements System.Data.IDataReader.Depth
Get
Return Me.DataReader.Depth
End Get
End Property
Public Function GetSchemaTable() As System.Data.DataTable Implements System.Data.IDataReader.GetSchemaTable
Return Me.DataReader.GetSchemaTable
End Function
Public ReadOnly Property IsClosed() As Boolean Implements System.Data.IDataReader.IsClosed
Get
Return Me.DataReader.IsClosed
End Get
End Property
Public Function NextResult() As Boolean Implements System.Data.IDataReader.NextResult
Return Me.DataReader.NextResult
End Function
Public Function Read() As Boolean Implements System.Data.IDataReader.Read
Return Me.DataReader.Read
End Function
Public ReadOnly Property RecordsAffected() As Integer Implements System.Data.IDataReader.RecordsAffected
Get
Return Me.DataReader.RecordsAffected
End Get
End Property
Public ReadOnly Property FieldCount() As Integer Implements System.Data.IDataRecord.FieldCount
Get
Return Me.DataReader.FieldCount
End Get
End Property
Public Function GetBoolean(ByVal i As Integer) As Boolean Implements System.Data.IDataRecord.GetBoolean
Return Me.DataReader.GetBoolean(i)
End Function
Public Function GetByte(ByVal i As Integer) As Byte Implements System.Data.IDataRecord.GetByte
Return Me.DataReader.GetByte(i)
End Function
Public Function GetBytes(ByVal i As Integer, ByVal fieldOffset As Long, ByVal buffer() As Byte, ByVal bufferoffset As Integer, ByVal length As Integer) As Long Implements System.Data.IDataRecord.GetBytes
Return Me.DataReader.GetBytes(i, fieldOffset, buffer, bufferoffset, length)
End Function
Public Function GetChar(ByVal i As Integer) As Char Implements System.Data.IDataRecord.GetChar
Return Me.DataReader.GetChar(i)
End Function
Public Function GetChars(ByVal i As Integer, ByVal fieldoffset As Long, ByVal buffer() As Char, ByVal bufferoffset As Integer, ByVal length As Integer) As Long Implements System.Data.IDataRecord.GetChars
Return Me.DataReader.GetChars(i, fieldoffset, buffer, bufferoffset, length)
End Function
Public Function GetData(ByVal i As Integer) As System.Data.IDataReader Implements System.Data.IDataRecord.GetData
Throw New NotImplementedException
End Function
Public Function GetDataTypeName(ByVal i As Integer) As String Implements System.Data.IDataRecord.GetDataTypeName
Return Me.DataReader.GetDataTypeName(i)
End Function
Public Function GetDateTime(ByVal i As Integer) As Date Implements System.Data.IDataRecord.GetDateTime
Return Me.DataReader.GetDataTypeName(i)
End Function
Public Function GetDecimal(ByVal i As Integer) As Decimal Implements System.Data.IDataRecord.GetDecimal
Return Me.DataReader.GetDecimal(i)
End Function
Public Function GetDouble(ByVal i As Integer) As Double Implements System.Data.IDataRecord.GetDouble
Return Me.DataReader.GetDouble(i)
End Function
Public Function GetFieldType(ByVal i As Integer) As System.Type Implements System.Data.IDataRecord.GetFieldType
Return Me.DataReader.GetFieldType(i)
End Function
Public Function GetFloat(ByVal i As Integer) As Single Implements System.Data.IDataRecord.GetFloat
Return Me.DataReader.GetFloat(i)
End Function
Public Function GetGuid(ByVal i As Integer) As System.Guid Implements System.Data.IDataRecord.GetGuid
Return Me.DataReader.GetGuid(i)
End Function
Public Function GetInt16(ByVal i As Integer) As Short Implements System.Data.IDataRecord.GetInt16
Return Me.DataReader.GetInt16(i)
End Function
Public Function GetInt32(ByVal i As Integer) As Integer Implements System.Data.IDataRecord.GetInt32
Return Me.DataReader.GetInt32(i)
End Function
Public Function GetInt64(ByVal i As Integer) As Long Implements System.Data.IDataRecord.GetInt64
Return Me.DataReader.GetInt64(i)
End Function
Public Function GetName(ByVal i As Integer) As String Implements System.Data.IDataRecord.GetName
Return Me.DataReader.GetName(i)
End Function
Public Function GetOrdinal(ByVal name As String) As Integer Implements System.Data.IDataRecord.GetOrdinal
Return Me.DataReader.GetOrdinal(name)
End Function
Public Function GetString(ByVal i As Integer) As String Implements System.Data.IDataRecord.GetString
Return Me.DataReader.GetString(i)
End Function
Public Function GetValue(ByVal i As Integer) As Object Implements System.Data.IDataRecord.GetValue
Return Me.DataReader.GetValue(i)
End Function
Public Function GetValues(ByVal values() As Object) As Integer Implements System.Data.IDataRecord.GetValues
Return Me.DataReader.GetValues(values)
End Function
Public Function IsDBNull(ByVal i As Integer) As Boolean Implements System.Data.IDataRecord.IsDBNull
Return Me.DataReader.IsDBNull(i)
End Function
Default Public Overloads ReadOnly Property Item(ByVal i As Integer) As Object Implements System.Data.IDataRecord.Item
Get
Return Me.DataReader.Item(i)
End Get
End Property
Default Public Overloads ReadOnly Property Item(ByVal name As String) As Object Implements System.Data.IDataRecord.Item
Get
Return Me.DataReader.Item(name)
End Get
End Property
Private disposedValue As Boolean = False ' To detect redundant calls
' IDisposable
Protected Overridable Sub Dispose(ByVal disposing As Boolean)
If Not Me.disposedValue Then
If disposing Then
Iuf.Data.DatabaseUtils.CleanupDbResources(Nothing, Me.DataReader, Nothing, Nothing, Me.DataTable)
' TODO: free other state (managed objects).
End If
' TODO: free your own state (unmanaged objects).
' TODO: set large fields to null.
End If
Me.disposedValue = True
End Sub
#Region " IDisposable Support "
' This code added by Visual Basic to correctly implement the disposable pattern.
Public Sub Dispose() Implements IDisposable.Dispose
' Do not change this code. Put cleanup code in Dispose(ByVal disposing As Boolean) above.
Dispose(True)
GC.SuppressFinalize(Me)
End Sub
#End Region
End Class
End Namespace