B
Brent
I'd like to think that my code* is pretty simple, but I'm running into
memory errors when loading larger documents.
The URL you see below in the first line of the Page_Load function is
about 3 MB. That URL then gets parsed with several regular expressions.
The code often runs out of memory. I'm guessing that each time the
document is parsed, a new string of 3 MB is created -- the several
regular expressions I use consume memory rapidly.
One option I have explored is reading in each row. The problem is that
the regular expression parsing looks at multiple lines, and I can't
guarantee where the lines I need will occur.
I'm a bit frustrated at this point, as the code works fine on smaller
documents. I'd sure appreciate any help.
-- Brent
*==============================================================
public void Page_Load(Object sender, EventArgs e) {
string strResponse =
getText("http://www.sec.gov/Archives/edgar/data/1085158/0001085158-99-000008.txt"));
string report_date = getRegExGroupValue(strResponse, @regExPattern1,"G2");
string report_header = getRegExGroupValue(strResponse,
@regExPattern1,"G2");
string report_companyname = getRegExGroupValue(strResponse,
@regExPattern3,"G2");
string report_date = getRegExGroupValue(strResponse, @regExPattern4,"G2");
}
public string getText(string strURL)
{
HttpWebRequest oRequest = (HttpWebRequest)WebRequest.Create(strURL);
oRequest.Timeout = 10*60000; // 10 minutes; for long files (10000
= 10 seconds)
oRequest.UserAgent = "Web Client";
HttpWebResponse oResponse = (HttpWebResponse)oRequest.GetResponse();
Stream myStream = oResponse.GetResponseStream();
StreamReader sr = new StreamReader(myStream);
string strResponse = sr.ReadToEnd();
return strResponse;
myStream.Close();
}
public string getRegExGroupValue(string strText, string strPat, string
strGroup)
{
string returnValue;
if(Regex.IsMatch(strText,@strPat,RegexOptions.Multiline|RegexOptions.IgnoreCase))
{
Match strMatch = Regex.Match(strText,@strPat,RegexOptions.Multiline);
Regex.Replace(strText,@"\s"," ");
return strMatch.Groups[strGroup].Value.Trim();
}
else
{
return "0";
}
}
==========================================================
memory errors when loading larger documents.
The URL you see below in the first line of the Page_Load function is
about 3 MB. That URL then gets parsed with several regular expressions.
The code often runs out of memory. I'm guessing that each time the
document is parsed, a new string of 3 MB is created -- the several
regular expressions I use consume memory rapidly.
One option I have explored is reading in each row. The problem is that
the regular expression parsing looks at multiple lines, and I can't
guarantee where the lines I need will occur.
I'm a bit frustrated at this point, as the code works fine on smaller
documents. I'd sure appreciate any help.
-- Brent
*==============================================================
public void Page_Load(Object sender, EventArgs e) {
string strResponse =
getText("http://www.sec.gov/Archives/edgar/data/1085158/0001085158-99-000008.txt"));
string report_date = getRegExGroupValue(strResponse, @regExPattern1,"G2");
string report_header = getRegExGroupValue(strResponse,
@regExPattern1,"G2");
string report_companyname = getRegExGroupValue(strResponse,
@regExPattern3,"G2");
string report_date = getRegExGroupValue(strResponse, @regExPattern4,"G2");
}
public string getText(string strURL)
{
HttpWebRequest oRequest = (HttpWebRequest)WebRequest.Create(strURL);
oRequest.Timeout = 10*60000; // 10 minutes; for long files (10000
= 10 seconds)
oRequest.UserAgent = "Web Client";
HttpWebResponse oResponse = (HttpWebResponse)oRequest.GetResponse();
Stream myStream = oResponse.GetResponseStream();
StreamReader sr = new StreamReader(myStream);
string strResponse = sr.ReadToEnd();
return strResponse;
myStream.Close();
}
public string getRegExGroupValue(string strText, string strPat, string
strGroup)
{
string returnValue;
if(Regex.IsMatch(strText,@strPat,RegexOptions.Multiline|RegexOptions.IgnoreCase))
{
Match strMatch = Regex.Match(strText,@strPat,RegexOptions.Multiline);
Regex.Replace(strText,@"\s"," ");
return strMatch.Groups[strGroup].Value.Trim();
}
else
{
return "0";
}
}
==========================================================