Примеры решения задачки по парсингу на Ruby, Perl, Java, Groovy, Python, C#, JavaScript, PHP Кажется, задача:Есть файл, в каком добросовестно наличествуют строчки вида:
(xxx, yyy)\t(xxx, yyy)\t(xxx, yyy)\t\n(xxx, yyy)\t(xxx, yyy)\t(xxx, yyy)\t\nxxx — настоящие доли, yyy — мнимые. Нужно устроить из данного файла таблицу, итог сберечь в файлике c фамилией oldname.res.txt.
Ruby: class ParseText def writeToFile(filename, data) open(filename, ‘w’).write(data) end def filterArray(array) result = [] for item in array if item =~ /\((\w{3}), \w{3}\)\s+\((\w{3}), \w{3}\)\s+\((\w{3}), \w{3}\)\s+/ result << $1 + “\t” + $2 + “\t” + $3 end end result end def convert(input) result = filterArray(input) writeToFile(@fname + “.res.txt”, result.join(”\n”)) result.size.to_s + ” lines were converted\n” end def main(args) if (args.length == 0) puts “Usage: ParseText filedata.txt” else @fname = args[0] print convert(File.open(@fname){|io| io.readlines }); end end end ParseText.new.main(ARGV) Perl: package ParseText; sub new($) { my ($class) = @_; my $self = {}; bless $self, $class; return $self; } sub writeToFile($$$) { my ($self, $filename, $data) = @_; open(handle, “>$filename”); print handle “$data”; close(handle); } sub filterArray($$) { my ($self, $array) = @_; my $result = []; for $item (@$array) { if ($item =~ /\((\w{3}), \w{3}\)\s+\((\w{3}), \w{3}\)\s+\((\w{3}), \w{3}\)\s+/) { push @$result, $1 . “\t” . $2 . “\t” . $3; } } return $result; } sub convert($$) { my ($self, $fname) = @_; open(handle, “< $fname”) or die “Can’t open $fname $!”; my @input = ; close(handle) or die “Can’t close $fname”; my $result = $self->filterArray(\@input); $self->writeToFile($fname . “.res.txt”, join(”\n”, @$result)); return scalar(@$result) . ” lines were converted\n” } sub main($$) { my ($self, $args) = @_; if (scalar(@$args) == 0) { print “Usage: ParseText filedata.txt\n”; } else { print $self->convert($args->[0]); } } my $parseObj = new ParseText(); $parseObj->main(\@ARGV); Java: import java.io.BufferedReader; import java.io.BufferedOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.FileNotFoundException; import java.io.InputStreamReader; import java.io.IOException; import java.io.PrintWriter; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; class ParseText { private void writeToFile(String filename, List data) throws IOException, FileNotFoundException { PrintWriter pw = new PrintWriter( new FileOutputStream(filename)); int index = 0; for (String item : data) { pw.write(item); if (index != data.size() - 1) { pw.write(”\n”); } index += 1; } pw.close(); } private List filterArray(List stream) { List result = new ArrayList(); for (String item : stream) { Pattern mask = Pattern.compile( “\\((\\w{3}), \\w{3}\\)\\s+\\((\\w{3}), \\w{3}\\)\\s+\\((\\w{3}), \\w{3}\\)\\s+”); Matcher search = mask.matcher(item); if (search.find()) { result.add(search.group(1) + “\t” + search.group(2) + “\t” + search.group(3)); } } return result; } public String convert(String fname) throws IOException, FileNotFoundException { List data = new ArrayList(); BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(new File(fname)))); String line = null; while ((line = br.readLine()) != null) { data.add(line); } br.close(); List result = filterArray(data); writeToFile(fname + “.res.txt”, result); return result.size() + ” lines were converted”; } public static void main(String[] args) throws IOException, FileNotFoundException { ParseText self = new ParseText(); if (args.length == 0) { System.out.println(”Usage: ParseText filedata.txt”); } else { System.out.println(self.convert(args[0])); } } } Groovy: class ParseTextClass { def writeToFile(filename, data) { new File(filename).write(data) } def filterArray(array) { def result = [] for (item in array) { def search = ( item =~ /\((\w{3}), \w{3}\)\s+\((\w{3}), \w{3}\)\s+\((\w{3}), \w{3}\)\s+/ ); if (search.matches()) { result << search[0][1] + “\t” + search[0][2] + “\t” + search[0][3] } } return result } def convert(fname) { def input = [] new File(fname).eachLine { line -> input << line } def result = filterArray(input) writeToFile(fname + “.res.txt”, result.join(”\n”)) return result.size + ” lines were converted\n” } def main(args) { if (args.size() == 0) { print “Usage: ParseText filedata.txt” } else { print convert(args[0]); } } } new ParseTextClass().main(this.args); Python: import re, sys class ParseText: fname = ” def writeToFile(self, filename, data): open(filename, ‘w’).write(data) def filterArray(self, stream): result = [] for item in stream: mask = re.compile(r’\((\w{3}), \w{3}\)\s+\((\w{3}), \w{3}\)\s+\((\w{3}), \w{3}\)\s+’) search = mask.search(item) if search: result.append(search.group(1) + “\t” + search.group(2) + “\t” + search.group(3)) return result def convert(self, data): result = self.filterArray(data) self.writeToFile(self.fname + “.res.txt”, ‘\n’.join(result)) return str(len(result)) + ” lines were converted\n” def main(self, args): if (len(args) < 2): print “Usage: ParseText filedata.txt\n” else: self.fname = args[1] print self.convert(open(self.fname, ‘r’)) if __name__ == ‘__main__’: object = ParseText() object.main(sys.argv) C#: using System; using System.IO; using System.Text.RegularExpressions; using System.Collections; class ParseText { public void writeToFile(string filename, ICollection data) { using (FileStream fileStream = new FileStream(filename, FileMode.Create, FileAccess.Write, FileShare.None)) { using (StreamWriter streamWriter = new StreamWriter(fileStream)) { int index = 0; foreach (string item in data) { streamWriter.Write(item); if (index != data.Count - 1) { streamWriter.WriteLine(”"); } index += 1; } } } } private ArrayList filterArray(ArrayList stream) { ArrayList result = new ArrayList(); foreach (string item in stream) { Regex RE = new Regex(@”\((\w{3}), \w{3}\)\s+\((\w{3}), \w{3}\)\s+\((\w{3}), \w{3}\)\s+”, RegexOptions.Multiline); MatchCollection search = RE.Matches(item); if (search.Count > 0) { result.Add(search[0].Groups[1].Value + “\t” + search[0].Groups[2].Value + “\t” + search[0].Groups[3].Value); } } return result; } public string convert(String fname) { ArrayList input = new ArrayList(); using (FileStream FS = new FileStream(fname, FileMode.Open, FileAccess.Read, FileShare.Read)) { using (StreamReader sr = new StreamReader(FS)) { while (sr.Peek() != -1) { input.Add(sr.ReadLine()); } } } ArrayList result = filterArray(input); writeToFile(fname + “.res.txt”, result); return result.Count + ” lines were converted”; } static void Main(string[] args) { ParseText self = new ParseText(); if (args.Length == 0) { Console.WriteLine(”Usage: ParseText filedata.txt”); } else { Console.WriteLine(self.convert(args[0])); } } } JavaScript: function ParseText() { } function ParseText.prototype.writeToFile(filename, data) { var fso, tf; fso = new ActiveXObject(”Scripting.FileSystemObject”); tf = fso.CreateTextFile(filename, true); tf.Write(data); tf.Close(); } function ParseText.prototype.filterArray(array) { var result = new Array(); var index = 0; for (var i = 0; i < array.length; ++i) { var regexp = /\((\w{3}), \w{3}\)\s+\((\w{3}), \w{3}\)\s+\((\w{3}), \w{3}\)\s+/; var match = regexp.exec(array[i]); if (match != null) { result[index++] = match[1] + “\t” + match[2] + “\t” + match[3]; } } return result; } function ParseText.prototype.convert(fname) { var fso, ts; fso = new ActiveXObject(”Scripting.FileSystemObject”); ts = fso.OpenTextFile(fname, 1); var input = new Array(); var index = 0; while (!ts.AtEndOfStream) { input[index++] = ts.ReadLine(); } ts.Close(); result = this.filterArray(input); this.writeToFile(fname + “.res.txt”, result.join(”\n”)); return result.length + ” lines were converted”; } function ParseText.prototype.main(args) { if (args.length == 0) { var ws = WScript.CreateObject(”WScript.Shell”); ws.Popup(”Usage: ParseText filedata.txt”); } else { var ws = WScript.CreateObject(”WScript.Shell”); ws.Popup(this.convert(args.Item(0))); } } var parseobj = new ParseText(); parseobj.main(WScript.Arguments); PHP: include (”Console/Getopt.php”); class ParseText { private function writeToFile($filename, $data) { file_put_contents($filename, $data); } private function filterArray($data) { $result = array(); $index = 0; foreach ($data as $item) { if (preg_match(’/\((\w{3}), \w{3}\)\s+\((\w{3}), \w{3}\)\s+\((\w{3}), \w{3}\)\s+/’, $item, $search)) { $result[$index++] = $search[1] . “\t” . $search[2] . “\t” . $search[3]; } } return $result; } public function convert($fname) { $input = file($fname); $result = $this->filterArray($input); $this->writeToFile($fname . “.res.txt”, join($result, “\n”)); return count($result) . ” lines were converted”; } public function main($args) { if (count($args) <= 1) { print “Usage: ParseText filedata.txt”; } else { print $this->convert($args[1]); } } } $cg = new Console_Getopt(); $args = $cg->readPHPArgv(); $parseobj = new ParseText(); $parseobj->main($args);
Вы должны быть зарегистрироавны чтобы оставить комментарий.