R project

profileJasperZ511626
11_WebScrapingInfoRep.pdf

Reading(and(Wri+ng(Data(Files(

Unstructured(vs(Structured(( Plain7text(Data(

State(of(the(Union(Speeches( *** ! State of the Union Address ! George Washington ! December 8, 1790 ! ! Fellow-Citizens of the Senate and House of Representatives: ! In meeting you again I feel much satisfaction in being able to repeat my ! congratulations on the favorable prospects which continue to distinguish ! our public affairs. The abundant fruits of another year have blessed our ! country with plenty and with the means of a flourishing commerce.!

Web(Log(Entries(

169.237.46.168(77([26/Jan/2004:10:47:58(70800](( "GET(/stat141/Winter04(HTTP/1.1"(301(328(( "hTp://anson.ucdavis.edu/courses/"(( "Mozilla/4.0((compa+ble;(MSIE(6.0;(Windows(NT(5.0;(.NET( CLR(1.1.4322)("( (( 169.237.46.168(77([26/Jan/2004:10:47:58(70800](( "GET(/stat141/Winter04/(HTTP/1.1"(200(2585(( "hTp://anson.ucdavis.edu/courses/"(( "Mozilla/4.0((compa+ble;(MSIE(6.0;(Windows(NT(5.0;(.NET( CLR(1.1.4322)"(

Web(Log(Entries(–(extract(&(omit(

169.237.46.168(77([26/Jan/2004:10:47:58(70800](( "GET(/stat141/Winter04(HTTP/1.1"(301(328(( "hTp://anson.ucdavis.edu/courses/""" "Mozilla/4.0((compa+ble;(MSIE(6.0;(Windows(NT(5.0;(.NET( CLR(1.1.4322)("( (( 169.237.46.168(77([26/Jan/2004:10:47:58(70800](( "GET(/stat141/Winter04/(HTTP/1.1"(200(2585(( "hTp://anson.ucdavis.edu/courses/"(( "Mozilla/4.0((compa+ble;(MSIE(6.0;(Windows(NT(5.0;(.NET( CLR(1.1.4322)"(

readLines()!

•  The(readLines()(func+on(reads(each(line( of(text(in(a(file(and(creates(a(character(vector( with(one(element(per(line(

•  We(can(then(use(regular(expressions(to( extract(the(data(we(want.(

wlist(=(strsplit(wl,("(\"|(77(\\[|\"(")( ( wlist[[1]]( [1]("169.237.46.168"(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((( [2]("26/Jan/2004:10:47:58(70800]"((((((((((((((((((((((((((((((((((((((((((((((( [3]("GET(/stat141/Winter04(HTTP/1.1"(((((((((((((((((((((((((((((((((((((((((((( [4]("301(328"((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((( [5]("hTp://anson.ucdavis.edu/courses/"((((((((((((((((((((((((((((((((((((((((( [6]("\"Mozilla/4.0((compa+ble;(MSIE(6.0;(Windows( NT(5.0;(.NET(CLR(1.1.4322)\""( (

SPLIT(on(either( blank"((or(( blank77blank[((or( "blank( (

> wlist[[1]][3]! [1] "GET /stat141/Winter04 HTTP/1.1”! ( Eliminate(the(unwanted(characters(in(the(third( element( ( > sapply(wlist, ! function(x) gsub(" .*$", "", x[3]))! [1]("GET"("GET"(

Fixed7width(formats(

read.fwf()!

•  The(read.fwf() func+on(is(handy(if(the( pieces(of(informa+on(are(always(the(same( width(

•  “fwf”(stands(for(fixed7width7format( •  The(web(log(data(is(close(to(a(fwf(

123456789012345678901234567890123456789…! ip is 1-14 | skip| |! 169.237.46.168 -- [26/Jan/2004:10:47:58…! ! read.fwf(fileLoc, ! widths = c(14,5,2,1,3,1,4,18,3))! ! V1 V2 V3 V4 V5 V6 V7 … 1 169.237.46.168 -- [ 26 / Jan / 2004 …! 2 169.237.46.168 -- [ 26 / Jan / 2004 …!

Delimited(data(

Reading(data(into(R(

•  Many(data(sets(are(stored((in(text(files.((( •  The(easiest(way(to(read(these(into(R(is(using( either(the(read.table"or(read.csv( func+on,(both(of(which(return(a(data(frame.(

•  Consider(the(data(at(the(following(site( ! fileLoc = "http://www-958.ibm.com/software/data/ cognos/manyeyes/datasets/olympic2012withgdp/ versions/1.txt"!

The(data( ISO Gold/medals !Silver/medals !Bronze/medals !…! ABW !0 !0 !0 !0 !0 !2,456,000,000.00 !108,000…! AFG !0 !0 !1 !1 !1 !20,343,461,030.00!34,385,000 …! AGO !0 !0 !0 !0 !0 !100,990,000,000.00 …!! ALB !0 !0 !0 !0 !0 !12,959,563,902.00!3,205,000 …! !

These(data(are(tab(delimited( The(variable(names(have(slashes(in(them( The(numbers(have(commas(in(them(

read.table() or read.csv()!

•  These(func+ons(are(useful(for(reading( delimited(plain(text(files(

•  They(have(quite(a(few(op+ons.((Some(of(the( important(ones(are:( – (file(7(name(or(URL( – header(7(are(column(names(at(the(top(of(the(file?( – sep(7(what(divides(elements(of(the(table( – na.strings(7(symbol(for(missing(values,(like(9999( – skip(7(number(of(lines(at(the(top(of(the(file(to( ignore(

(

> ctry = read.csv(! fileLoc, skip = 1, sep = "\t", header = FALSE,! colClasses = c("character", rep("numeric", 5), ! rep("character", 3)))! ! > head(ctry) ! V1 V2 V3 V4 V5 V6 V7 V8 V9! 1 ABW 0 0 0 0 0 2,456,000,000.00 108,000 22740.7407! 2 AFG 0 0 1 1 1 20,343,461,030.00 34,385,000 591.6377! 3 AGO 0 0 0 0 0 100,990,000,000.00 19,082,000 5292.4222! 4 ALB 0 0 0 0 0 12,959,563,902.00 3,205,000 4043.5457! 5 AND 0 0 0 0 0 3,491,000,000.00 84,864 41136.4065! 6 ARE 0 0 0 0 0 360,245,000,000.00 7,512,000 47955.9372!

Note(we(skipped(the(first(row(because(the(names(would(be( problema+c( Next(we(need(to:( Clean(up(the(GDP(and(POP(by(removing((,s(and(conver+ng( character(strings(to(numeric(

> head(data$V7)! [1] "2,456,000,000.00" "20,343,461,030.00" "100,990,000,000.00"! [4] "12,959,563,902.00" "3,491,000,000.00" "360,245,000,000.00"!

> fix7 = as.numeric(! gsub(",", "", data$V7))! > head(fix7)! [1] 2456000000 20343461030 100990000000 ! [4] 12959563902 3491000000 360245000000!

Data(Available(on(the(Web(

•  HTML(( – Table((e.g.,(your(simula+on(results)( – plain(text(format((e.g.,(the(ManyEyes(data)(

•  Other(Format:( – JSON( – XML(

HTML(

Scraping(data(from(a(Web(page(

•  Means(to(write(code(to(automa+cally(extract( data(from(one(or(more(web(pages.(

•  HTML(is(like(XML(–(We(can(use(parsing( capabili+es(in(the(XML(package.( htmlParse()(can(create(a(tree(structure( from(ill7formed(HTML.((

•  The(informa+on(is(all(in(text(and(we(may(need( to(use(regular(expressions(to(extract(the( relevant(pieces(

•  Much(of(the(data(available(on(the(web(is(not( provided(as(a(separate(downloadable(file;(it’s( embedded(in(the(website(itself.(

•  Web(pages(are(created(when(your(browser( sooware(represents(or(�renders�(a(specially( formaTed((HTML)(text(file.((Most(browsers( allow(you(to(see(this(file(under(something(like( View(>(Page(Source.(

Our"goal:" Extract"this"" informa5on"and" put"it"in"a" dataframe" (

JSON(

JavaScript(Object(Nota+on(

•  Text(format( •  Lightweight(data7interchange(( •  Easy(for(humans(to(read(and(write.(( •  Easy(for(machines(to(parse(and(generate(

JSON(Structure(

•  JSON(is(built(on(two(structures:( •  An(unordered(collec+on(of(comma7separated( name:value(pairs(

{“lender_id”:”ma=”,"“loan_count”:23}" •  An(ordered(array(of(values( [1,"[true,"false,"true],"[1,"2,"10,"20]," {“lender_id”:”skylar”,"“loan_count”:1}]"

Comparison(to(XML(

•  JSON(is(simpler( •  Not(as(rich(–(no(aTributes,(unordered,(no( schema(for(describing(acceptable(format(

•  Compressed(JSON(and(XML(not(much( different(in(size((

{"header":{"total":"576803","page":1,"date":"2010701729T20:00:23Z","page_size": 1000},( "lenders":[( {"lender_id":"maT",( (("name":"MaT",( (("image":{"id":12829,"template_id"1},("whereabouts":"San(Francisco(CA",(( (("country_code":"US",( (("uid":"maT",( (("member_since":"2006701701T09:01:01Z",( (("personal_url":"www.socialedge.org\/blogs\/kiva7chronicles",( (("occupa+on":"Entrepreneur",( (("loan_because":"I(love(the(stories.(",( (("occupa+onal_info":"I(co7founded(a(startup(nonprofit((this(one!)(and(I(work(with(an( amazing(group(of(people(dreaming(up(ways(to(alleviate(poverty(through(personal( lending.(",( (("loan_count":89,( (("invitee_count":23},( {"lender_id":"jessica",( (("name":"Jessica",( (("image":{"id":197292,( (("template_id":1},(…(

XML(

eXtensible(Markup(Language(

Most(of(the(data(sets(we(have(seen(have( been(in(the(form(of(ASCII(tables.( Date Time Lat Lon Depth Mag 1968/01/12 22:19:10.35 36.6453 -121.2497 6.84 3.00 1968/02/09 13:42:37.05 37.1527 -121.5448 8.49 3.00 1968/02/21 14:39:48.10 37.1783 -121.5780 6.95 3.80 1968/03/02 04:25:53.94 36.8343 -121.5447 5.35 3.00 1968/03/17 15:07:02.12 37.3088 -121.6615 4.39 3.00 1968/03/21 21:54:59.94 37.0378 -121.7407 11.86 4.30

•  Advantages:(( –  easy(to(read,(write,(and(process( –  in(standard(cases,(don�t(need(a(lot(of(extra(informa+on(

•  But(these(advantages(can(quickly( disappear....(

XML(is(a(standard(for(seman&c,( hierarchical(representa+on(of(data(

<state>! <gml:name abbreviation="AL”> ALABAMA </gml:name>! <county>! <gml:name> Autauga County </gml:name>! <gml:location>! <gml:coord>! <gml:X> -86641472</gml:X>! <gml:Y> 32542207</gml:Y>! </gml:coord>! </gml:location>! </county>!

Rela+onships(between( pieces(of(data(reflect( rela+onships(in(the(real( world.(

Pros(

•  data(is(self7describing( •  format(separates(content(from(structure( •  data(can(be(easily(merged(and(exchanged( •  file(is(human7readable( •  file(is(also(easily(machine7generated( •  standards(are(widely(adopted(

Cons(

( •  XML documents can be very verbose and

hard to read •  It’s so general that it’s hard to develop

tools for all cases •  Files can be quite large due to high

amount of redundancy

•  XML(is(has(become(quite(popular(in(many( scien+fic(fields,(and(it(is(standard(in(many(web( applica+ons(for(the(exchange(and(visualiza+on( of(data.(((

•  Well(learn(how(to(( – create(it(and(( – read/process(it.(

•  Well(do(both(of(these(things(from(within(R,( but(first(let�s(start(with(an(overview(of(what( XML(documents(look(like.(

<lender>(( ((((((<lender_id>maT</lender_id>(( ((((((<name>MaT</name>(( ((((((<image>(( ((((((((<id>12829</id>(( ((((((((<template_id>1</template_id>(( ((((((</image>(( ((((((<whereabouts>San(Francisco(CA</whereabouts>(( ((((((<country_code>US</country_code>(( ((((((<uid>maT</uid>(( ((((((<member_since>2006701701T09:01:01Z</member_since>(( ((((((<personal_url>www.socialedge.org/blogs/kiva7chronicles(( ((((((</personal_url>(( ((((((<occupa+on>Entrepreneur</occupa+on>(( ((((((<loan_because>I(love(the(stories.(</loan_because>(( ((((((<occupa+onal_info>I(co7founded(a(startup(nonprofit((this(one!)(( ((((((((and(I(work(with(an(amazing(group(of(people(dreaming(up(ways(to(( ((((((((alleviate(poverty(through(personal(lending.(( ((((((</occupa+onal_info>(( ((((((<loan_count>89</loan_count>(( ((((((<invitee_count>23</invitee_count>(( ((((</lender>(( (

Snippet(of(Kiva( Data(for(one( lender(

Snippet(of(exchange(data( <Cube>(( (<Cube(+me="2008704721">(( (<Cube(currency="USD"(rate="1.5898"/>(( ((<Cube(currency="JPY"(rate="164.43"/>(( ((<Cube(currency="BGN"(rate="1.9558"/>(( ((<Cube(currency="CZK"(rate="25.091"/>(( (</Cube>(( (<Cube(+me="2008704717">(( ((<Cube(currency="USD"(rate="1.5872"/>(( ((<Cube(currency="JPY"(rate="162.74"/>(( ((<Cube(currency="BGN"(rate="1.9558"/>(( ((<Cube(currency="CZK"(rate="24.975"/>(( (</Cube>(( (</Cube>((

<event(id="00068404"(network7code="ak"(( ((((((((+me7stamp="2008/09/16_22:17:31("(version="2">(( ((((((((<param(name="year"(value="2008"/>(( ((((((((<param(name="month"(value="09"/>(( ((((((((<param(name="day"(value="14"/>(( ((((((((<param(name="hour"(value="00"/>(( ((((((((<param(name="minute"(value="59"/>(( ((((((((<param(name="second"(value="04.0"/>(( ((((((((<param(name="la+tude"(value="51.8106"/>(( ((((((((<param(name="longitude"(value="7175.9250"/>(( ((((((((<param(name="depth"(value="146.0"/>(( ((((((((<param(name="magnitude"(value="3.8"/>(( ((((((((<param(name="num7sta+ons"(value="10"/>(( ((((((((<param(name="num7phases"(value="15"/>(( ((((((((<param(name="dist7first7sta+on"(value="126.1"/>(( ((((((((<param(name="azimuthal7gap"(value="53"/>(( ((((((((<param(name="magnitude7type"(value="L"/>(( ((((((((<param(name="magnitude7type7ext"(( (((((((((((((((value="Ml(=(local(magnitude((synthe+c(Wood7Anderson)"/>(( ((((((((<param(name="loca+on7method"(value="a"/>(( ((((((((<param(name="loca+on7method7ext"(( (((((((((((((((value="Auryn((Confirmed(by(human(review)"/>(( ((((</event>(( ((((<event>(( (

Snippet(of(USGS( earthquake(catalog( data(

<ac+ons>(( ((<ac+on(date+me="2009701726">(( ((((<text>Referred(to(the(CommiTee(on(Appropria+ons,(and(in(addi+on(( ((((((((((to(the(CommiTee(on(the(Budget,(for(a(period(to(be(( ((((((((((subsequently(determined(by(the(Speaker,(in(each(case(for(( ((((((((((considera+on(of(such(provisions(as(fall(within(the(( ((((((((((jurisdic+on(of(the(commiTee(concerned.(( ((((</text>(( ((</ac+on>(( ((<ac+on(date+me="2009701726">(( ((((<text>Referred(to(House(Appropria+ons</text>(( ((</ac+on>(( </ac+ons>(( ..(( <relatedbills>(( ((<bill(rela+on="rule"(session="111"(type="hr"(number="88"(/>(( </relatedbills>(( (

Snippet(of( US( Congress( data(

<Placemark(id="217">(( <name>8.2</name>(( <descrip+on>(( Date:(200879715(( Magnitude:(1.5(( Depth:(8.2(km(( </descrip+on>(( <styleUrl>#ball172</styleUrl>(( <Point>(( <coordinates>7147.426,(60.929,(0</coordinates>(( </Point>(( </Placemark>((

Snippet(of( KML(for(one( earthquake(

XML(Syntax(

Syntax( The(basic(unit(of(XML(code(is(called(an( �element�(or(�node.�(It(is(made(up(of(both( markup(and(content.((Markup(consists(of(tags,2 a3ributes,(and(comments.( <CYL> 6 </CYL> <!–- elem with content 6 -->!

<CYL> </CYL> ! <CYL type=“numeric”/>! <CYL size=�2�> 6 </CYL>!

Start(tag( End(tag(Content( Comment(7(can(go(anywhere(

An(aTribute(

Elements(with( no(content(

Well7formed( •  (Tag(names(are(case7sensi+ve;(start(and(end(tags( must(match(exactly.(

•  (No(spaces(are(allowed(between(the(<(and(the(tag( name.(

•  (Tag(names(must(begin(with(a(leTer(and(contain( only(alphanumeric(characters.(

•  (An(element(must(have(both(an(open(and(closing( tag(unless(it(is(empty.(

•  (An(empty(element(that(does(not(have(a(closing( tag(must(be(of(the(form(<tagname/>.(

•  (Tags(must(nest(properly.(((Inner(tags(must(close( before(outer(ones.)(

<?xml version="1.0" encoding="ISO-8859-1"?>! <!-- Edited with XML Spy v2006 (http://www.altova.com) -->! <CATALOG>!

!<PLANT>! ! !<COMMON>Bloodroot</COMMON>! ! !<BOTANICAL>Sanguinaria canadensis</BOTANICAL>! ! !<ZONE>4</ZONE>! ! !<LIGHT>Mostly Shady</LIGHT>! ! !<PRICE>$2.44</PRICE>! ! !<AVAILABILITY>031599</AVAILABILITY>! !</PLANT>! !<PLANT>! ! !<COMMON>Columbine</COMMON>! ! !<BOTANICAL>Aquilegia canadensis</BOTANICAL>! ! !<ZONE>3</ZONE>! ! !<LIGHT>Mostly Shady</LIGHT>! ! !<PRICE>$9.37</PRICE>! ! !<AVAILABILITY>030699</AVAILABILITY>! !</PLANT>! !<PLANT>! ! !<COMMON>Marsh Marigold</COMMON>! ! !<BOTANICAL>Caltha palustris</BOTANICAL>! ! !<ZONE>4</ZONE>! ! !<LIGHT>Mostly Sunny</LIGHT>! ! !<PRICE>$6.81</PRICE>! ! !<AVAILABILITY>051799</AVAILABILITY>! !</PLANT>!

</CATALOG>!

Note(how(indenta+on( makes(it(easier(to( check(that(the(tags( are(correctly(nested.(

XML(declara+on( and(processing( instruc+ons(

Well(formed(XML(ctd.:(

•  All(aTributes(must(appear(in(quotes(in(the( format:((

name = �value�! •  (Isolated(markup(characters(must(be(specified( via(en+ty(references.((< is(specified(by(&lt; and > is(specified(by(&gt;.(

•  (All(XML(documents(must(contain(a(root2node( containing(all(the(other(nodes.(

Tree(Representa+on(

<Envelope>( (((<subject>Reference(rates</subject>(( (((<Sender>(( ((((((<name>European(Central(Bank</name>(( (((</Sender>( (((<Cube>(( ((((((<Cube(+me="2008704721">(( ((((((((<Cube(currency="USD"(rate="1.5898"/>(( ((((((((<Cube(currency="JPY"(rate="164.43"/>(( ((((((((<Cube(currency="BGN"(rate="1.9558"/>(( ((((((((<Cube(currency="CZK"(rate="25.091"/>(( (((((</Cube>(( (((((<Cube(+me="2008704717">(( ((((((((<Cube(currency="USD"(rate="1.5872"/>(( ((((((((<Cube(currency="JPY"(rate="162.74"/>(( ((((((((<Cube(currency="BGN"(rate="1.9558"/>(( ((((((((<Cube(currency="CZK"(rate="24.975"/>(( (((((</Cube>(( ((</Cube>(( </Envelope>(

Envelope

subject Sender Cube

Cube

Cube

Cube

CubeCubeCube

name

Europea...

Referenc...

... ...

Tree(terminology(

•  There(is(only(one(root2or2document2node(in( the(tree,(and((all(the(other(nodes(are( contained(within(it.(

•  We(think(of(these(other(nodes(as(being( descendants(of(the(root(node.(((

•  We(use(the(language(of(a(family(tree(to(refer( to(rela+onships(between(nodes.(Parents,2 children,2siblings,2ancestors,2descendants(

•  The(terminal2nodes(in(a(tree(are(also(known(as( leaf2nodes.((Content(always(falls(in(a(leaf(node.(

Note:( •  We’ll(learn(to(create(and(process(XML( documents(from(within(R,(but(always(keep(in( mind(that(R(and(XML(are(two(separate(things.(((

•  In(par+cular,(it(will(be(helpful(to(have(in(your( mind(the(structure(of(the(XML(document( before(you(do(anything(in(R,(especially(when( you’re(crea+ng(a(new(XML(document.(

Representa)on+of+Numbers+

Representa)on+of+Colors+

Colors:+(rgb)+

(255,+0,+0)+

(255,+255,+0)++

(100,+149,+237)+

#E41A1C99+

#FF0000+

#FFFF00+

#6495ED+

Representa)on+of+Data+

HTML+table,+Excel+Spreadsheet,+plain+ text+

ManyEyes+html+

ManyEyes+text+

ASCII+&+Unicode+ Character( ASCII( Unicode(

++++A+ 0100+0001+ 0000+0000+0100+0001+

+++++a+ 0110+0001+ 0000+0000+0110+0001+

0000+0011+1011+0001+ +α

ManyEyes+xlsx+

txt( html( xlsx(

browser+ Render+w/+ no+markup+

Format+ according+to+ markup+

Open+file+in+ Excel+

Excel+ Display+as+ Excel+

Display+as++ Excel+

Display+

TextEditor+ Display+ASCII+ characters+

See+markup+ as+well+as+ content+

See+nothing+ or+jibberish+

Hypertext+Markup+Language+

Tree+

Tree+Data+Structure+

html

body

h1 ph2

BML Report Introduction Research

by

head

img shows

that

D'Souza

a

Text+nodes+ are+octagons+

Tree+Hierarchy+

•  One+root+node+ •  Root+node+has+child+nodes+and+each+of+ these+can+have+child+nodes+and+so+on+

•  Any+node+must+have+one+and+only+one+ parent+

Examples+of+HTML+

Table+in+HTML+ <table>+ +<tr>+ ++<th>A</th>++<th>B</th>+++ +</tr>+ +<tr>++ ++<td>1</td>+<td>25,000</td>+ </tr>++ <tr>+++ +<td>7</td>+<td>100,000</ td>+</tr>+ </table>+ +

Appears+as:+ + A ++++++B++ 1  25,000+ 7+++100,000+ + Can+you+draw+the+tree+for+ this+document?+

table+<table>+

table+

tr+

<table>+ + <tr>+

table+

tr+

th+

A+

<table>+ + <tr>+ + <th>+ A+ </td>+ +

table+

tr+

th+

A+

th+

B+

<table>+ + <tr>+ + <th>+A+</th>+ <th>+B+</th>+ </tr>+ +

table+

tr+

th+

A+

th+

B+

tr+

td+

1+

td+

25,000+

table+

tr+

th+

A+

th+

B+

tr+

td+

1+

td+

25,000+

tr+

td+

7+

td+

100,000+

An+HTML+Table+

•  Tables+are+defined+with+the+<table>+tag.++ •  A+table+has+rows+marked+up+with+the+<tr>+tag.++ •  Each+row+is+divided+into+data+cells+with+the+ <td>+tag.+(td+stands+for+table+data).++

•  A+data+cell+can+contain+text,+images,+lists,+ paragraphs,+forms,+horizontal+rules,+tables,+ etc.++

•  Headings+in+a+table+are+defined+with+the+<th>+ tag.++

Modified+Table++ <table+cellpadding="6”+ +++++++++++++border="2">+++++++ ++<tr>++ ++++<th>A</th>++ ++++<th>B</th>+++ ++</tr>+ ++<tr+align="right">+++ ++++<td>1</td>+ ++++<td>25,000</td>+ ++</tr>+ ++<tr+align="right">+++ ++++<td>7</td>+ ++++<td>100,000</td>+ ++</tr>+ </table>+

Appears+as:+ + + + +

Unordered+Lists+

•  Unordered+lists+have+ items+marked+with+ bullets.++

<ul>++ +<li>Coffee</li>++ +<li>Milk</li>++ </ul>++ •  Paragraphs,+line+breaks,+

images,+links,+other+lists,+ etc.+can+be+placed+in+a+list++

Appears+as:+

•+Coffee++ •+Milk+

Ordered+Lists+

•  Ordered+lists+have+items+ marked+with+numbers.++

<ol>++ +<li>Coffee</li>++ +<li>Milk</li>++ </ol>++ +

Appears+as:+ + 1.+Coffee++ 2.+Milk++ +

Paragraphs+and+Sec)ons+

<h1>+ My+BML+Report+ </h1>+ <h2>Introduc)on</h2>+ <p>++ The+BML+model+is+a+simple+ traffic+model...++ </p>+ <p>+We+studied+the+BML+ model+behavior+for...++ </p>+

Appears+as:+ + +

Images+

•  The+img+tag+is+used+to+embed+ images+in+a+Web+page+

(<img(src(=("images/bml34.png"((

(((((((((((width(="400"/>(

•  The+src+airibute+give+the+file+ name+for+the+image+

•  The+width+airibute+is+op)onal+ •  This+tag+is+empty+–+the+start+and+ end+tag+are+collapsed.+

Appears+as:+ +

Links+ <a(href(=("hCp://mae.ucdavis.edu/dsouza/">( D'Souzza</a>++discovered+....+ + Appears+as:+ + + <a>+is+an+anchor+tag++ The+content+is+the+text+that+is+“clickable”+ The+link+can+be+to+another+place+within+the+ document++

Element+Syntax+

•  Each+HTML+element+has+an+element+name,+ e.g.++ – body+:+the+main+content+of+the+page++ – h1+:+largest+header++ – p+:+paragraph+ – br+:+line+break++

+

Element+Syntax+

<h1>+++++This(is(a(Ltle(((((</h1>( (

+ +Start+tag++++Text+Content++++++End+tag+ •  The+end+tag+is+a+slash+and+the+name+ surrounded+by+angle+brackets+</h1>++

•  Some+HTML+elements+have+no+content+<br/>++ is+for+a+line+break+++

Element+Content+

•  Simple+content+is+plain+text:+ <h1>++This(is(a(Ltle(((</h1>( •  Complex+content+includes+other+elements.+ <p>This(paragraph(includes(<a(href=“hCp://

…”>a(link</a>(and(sentences.</p>(

How+many+child+elements+does+this+<p>+node+ contain?+ 3:+the+text+before+the+<a>,+the+<a>+node+and+the+ text+aler+the+<a>+node++++

Airibute+Syntax+

•  Aiributes+provide+addi)onal+informa)on+to+ an+HTML+element.++

•  Aiributes+always+come+in+name/value+pairs+ like+this:+name="value"++

•  Aiributes+are+always+specified+in+the+start+tag+ of+an+HTML+element.++

Wellmformed+XHTML+

•  Wellmformed+HTML+is+called+XHTML.++ •  Tag+names+follow+strict+rules+for+matching+ case+

•  Airibute+values+must+be+in+quotes+ •  Elements+must+be+properly+nested+(i.e.+you+ can+draw+a+tree+with+it)+

+

A+BML+Report+

Raw+HTML+for+the+Stylized+Report+ <html>+ +<head></head>+ +<body>+ ++<h1>BML+Model+Simula)on+Study</h1>+ +++<h2>Introduc)on</h2>+ +++<p>+The+BML+model+is+a+simple+traffic+model...+</p>+ +++<h2>Earlier+Findings</h2>+ +++<p>+ <a+href="hip://mae.ucdavis.edu/dsouza/">D'Souzza</a>+discovered+....+ </p>+ <p>+ A+total+traffic+jam+might+look+like+this+ <img+src="images/bml34.png"+width+="200"/>+ </p>+ +</body>+ </html>+

A+preqed+up+BML+Report+

Raw+HTML+for+the+Stylized+Report+ <html>+ +<head>+ <link(rel="stylesheet"(type="text/css"(href="bmlStyle.css"(/>(

+</head>+ +<body>+ ++<h1>BML+Model+Simula)on+Study</h1>+ +++<h2>Introduc)on</h2>+ +++<p>+The+BML+model+is+a+simple+traffic+model...+</p>+ +++<h2+class="bml”>Earlier+Findings</h2>+ +++<p>+ <a+href="hip://mae.ucdavis.edu/dsouza/">D'Souzza</a>+discovered+....+ </p>+ <p>+ A+total+traffic+jam+might+look+like+this+ <img+src="images/bml34.png"+width+="200"/>+ </p>+ +</body>+ </html>+

Cascading+Style+Sheet+

body+ {+backgroundmcolor:#d0e4fe;+}+ h1+ {+color:orange;+textmalign:center;+}+ h2.bml+ {+color:green;+textmalign:center;+}+ p+ {+fontmfamily:"Times+New+Roman”;+fontmsize:20px;+}+

CSS+

selector+{property:+value;+}+ + Selector+may+be:+ •  HTML+tag+name + + +h1+{color:+green;}+ •  airibute+value+for+id +#idXYZ+{color:blue;}+ •  class + + + + + + +.bml+{fontmsize:+2em}++