前几天看到同事在网上复制、粘贴管理方面的文章,一遍一遍地重复,这让我想到可不可写一个程序来完成呢,于是上网查资料,终于给他解决了,代码如下:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Text.RegularExpressions;
using System.IO;
using System.Text;namespace WebUI
{public partial class TestWebClient : System.Web.UI.Page{protected void Page_Load(object sender, EventArgs e){}protected void btnDownLoad_Click(object sender, EventArgs e){for (int i = 1507; i <= 1507; i++){string url = "http://www.ccmcsz.com/management/" + i + ".htm";Response.Write(url);SetLog(url, i.ToString());Response.Write("<br/>");}}public void SetLog(string url, string name){try{string filepath = @"D:\Test163\";Encoding defaultencode = Encoding.GetEncoding("gb2312");string FileName = name + ".txt";string NewFilePath = Path.Combine(filepath, FileName);if (!Directory.Exists(filepath)){Directory.CreateDirectory(filepath);}System.Net.WebClient wc = new System.Net.WebClient();Stream ss = wc.OpenRead(url);StreamReader rd = new StreamReader(ss, defaultencode);string message = rd.ReadToEnd();rd.Close();wc.Dispose();message = DelHTML(message);StreamWriter Sw = new StreamWriter(NewFilePath, true, defaultencode);Sw.Write(message);Sw.Flush();Sw.Close();Sw = null;}catch{this.Response.Write(url + "<br/>");}}public static string DelHTML(string Htmlstring){//删除脚本Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase);Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase);//删除HTMLHtmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase);Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);//Htmlstring = Regex.Replace(Htmlstring,@"<A>.*</A>","");//Htmlstring = Regex.Replace(Htmlstring,@"<[a-zA-Z]*=\.[a-zA-Z]*\?[a-zA-Z]+=\d&\w=%[a-zA-Z]*|[A-Z0-9]","");Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "\"", RegexOptions.IgnoreCase);Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase);Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1", RegexOptions.IgnoreCase);Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2", RegexOptions.IgnoreCase);Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3", RegexOptions.IgnoreCase);Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9", RegexOptions.IgnoreCase);Htmlstring = Regex.Replace(Htmlstring, @"&#(\d+);", "", RegexOptions.IgnoreCase);Htmlstring.Replace("<", "");Htmlstring.Replace(">", "");Htmlstring.Replace("\r\n", "");//Htmlstring=HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();return Htmlstring;} }
}
等待更新...